import * as cheerio from 'cheerio'; type ScrapeResult = { prices: number[]; blocked: boolean; diagnostics?: { selectorHits: Record; sampleTexts: string[]; samplePrices: number[] }; }; async function scrapeSoldPricesUSDPage(html: string, wantDiagnostics = false): Promise { const $ = cheerio.load(html); const prices: number[] = []; const selectorHits: Record = {}; const sampleTexts: string[] = []; const samplePrices: number[] = []; const blocked = html.includes('To continue, please verify') || html.toLowerCase().includes('robot check'); const priceSelectors = [ '.s-item__price', '.s-item__detail--primary .s-item__price', 'span[class*="s-item__price"]', ]; $('li.s-item').each((_i, el) => { const $el = $(el); let text: string | null = null; for (const sel of priceSelectors) { const t = $el.find(sel).first().text().trim(); if (t) { text = t; selectorHits[sel] = (selectorHits[sel] || 0) + 1; break; } } if (!text) { // regex fallback within this listing's HTML const htmlFrag = $el.html() || ''; const m = htmlFrag.match(/\$\s*[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})?/); if (m) text = m[0]; } if (!text) return; if (wantDiagnostics && sampleTexts.length < 10) sampleTexts.push(text); if (!text.includes('$')) return; const single = text.split(' to ')[0]; const num = single.replace(/[^0-9.]/g, ''); if (!num) return; const value = Number(num); if (!Number.isFinite(value) || value <= 0) return; prices.push(value); if (wantDiagnostics && samplePrices.length < 10) samplePrices.push(value); }); return { prices, blocked, diagnostics: wantDiagnostics ? { selectorHits, sampleTexts, samplePrices } : undefined }; } async function fetchSoldSearchHtml(query: string, page = 1): Promise<{ ok: boolean; html: string }> { const url = `https://www.ebay.com/sch/i.html?_nkw=${encodeURIComponent(query)}&LH_Sold=1&LH_Complete=1&rt=nc&_ipg=200&_pgn=${page}`; const res = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', 'Accept-Language': 'en-US,en;q=0.9' } as any } as any); const html = await res.text(); return { ok: res.ok, html }; } export async function fetchMedianSoldPriceUSDForSku(sku: string): Promise { // Try quoted exact search first const tryQueries = [ `"${sku}"`, sku // fallback without quotes ]; for (const q of tryQueries) { let all: number[] = []; for (let page = 1; page <= 2; page++) { const { ok, html } = await fetchSoldSearchHtml(q, page); if (!ok) continue; const { prices, blocked } = await scrapeSoldPricesUSDPage(html); if (blocked) return null; all = all.concat(prices); if (all.length === 0) { // small delay before next page to be gentle await new Promise(r => setTimeout(r, 600)); } } if (all.length > 0) { all.sort((a, b) => a - b); const mid = Math.floor(all.length / 2); return Number((all.length % 2 === 0 ? (all[mid - 1] + all[mid]) / 2 : all[mid]).toFixed(2)); } } return null; } export async function debugFetchSoldPricesUSDForSku(sku: string) { const sequences = [`"${sku}"`, sku]; const attempts: any[] = []; for (const q of sequences) { let total = 0; let blocked = false; const diagnostics: any = { pages: [] }; for (let page = 1; page <= 2; page++) { const { ok, html } = await fetchSoldSearchHtml(q, page); const diag = await scrapeSoldPricesUSDPage(html, true); diagnostics.pages.push({ page, ok, count: diag.prices.length, selectorHits: diag.diagnostics?.selectorHits, sampleTexts: diag.diagnostics?.sampleTexts, samplePrices: diag.diagnostics?.samplePrices }); total += diag.prices.length; blocked = blocked || diag.blocked; if (diag.prices.length === 0 && page === 1) { await new Promise(r => setTimeout(r, 600)); } } attempts.push({ query: q, totalCount: total, blocked, details: diagnostics }); if (total > 0) break; } return { attempts }; }