118 lines
4.2 KiB
TypeScript
118 lines
4.2 KiB
TypeScript
import * as cheerio from 'cheerio';
|
|
|
|
type ScrapeResult = {
|
|
prices: number[];
|
|
blocked: boolean;
|
|
diagnostics?: { selectorHits: Record<string, number>; sampleTexts: string[]; samplePrices: number[] };
|
|
};
|
|
|
|
async function scrapeSoldPricesUSDPage(html: string, wantDiagnostics = false): Promise<ScrapeResult> {
|
|
const $ = cheerio.load(html);
|
|
const prices: number[] = [];
|
|
const selectorHits: Record<string, number> = {};
|
|
const sampleTexts: string[] = [];
|
|
const samplePrices: number[] = [];
|
|
|
|
const blocked = html.includes('To continue, please verify') || html.toLowerCase().includes('robot check');
|
|
|
|
const priceSelectors = [
|
|
'.s-item__price',
|
|
'.s-item__detail--primary .s-item__price',
|
|
'span[class*="s-item__price"]',
|
|
];
|
|
|
|
$('li.s-item').each((_i, el) => {
|
|
const $el = $(el);
|
|
let text: string | null = null;
|
|
for (const sel of priceSelectors) {
|
|
const t = $el.find(sel).first().text().trim();
|
|
if (t) {
|
|
text = t; selectorHits[sel] = (selectorHits[sel] || 0) + 1; break;
|
|
}
|
|
}
|
|
if (!text) {
|
|
// regex fallback within this listing's HTML
|
|
const htmlFrag = $el.html() || '';
|
|
const m = htmlFrag.match(/\$\s*[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
|
|
if (m) text = m[0];
|
|
}
|
|
if (!text) return;
|
|
if (wantDiagnostics && sampleTexts.length < 10) sampleTexts.push(text);
|
|
if (!text.includes('$')) return;
|
|
const single = text.split(' to ')[0];
|
|
const num = single.replace(/[^0-9.]/g, '');
|
|
if (!num) return;
|
|
const value = Number(num);
|
|
if (!Number.isFinite(value) || value <= 0) return;
|
|
prices.push(value);
|
|
if (wantDiagnostics && samplePrices.length < 10) samplePrices.push(value);
|
|
});
|
|
|
|
return { prices, blocked, diagnostics: wantDiagnostics ? { selectorHits, sampleTexts, samplePrices } : undefined };
|
|
}
|
|
|
|
async function fetchSoldSearchHtml(query: string, page = 1): Promise<{ ok: boolean; html: string }> {
|
|
const url = `https://www.ebay.com/sch/i.html?_nkw=${encodeURIComponent(query)}&LH_Sold=1&LH_Complete=1&rt=nc&_ipg=200&_pgn=${page}`;
|
|
const res = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
'Accept-Language': 'en-US,en;q=0.9'
|
|
} as any
|
|
} as any);
|
|
const html = await res.text();
|
|
return { ok: res.ok, html };
|
|
}
|
|
|
|
export async function fetchMedianSoldPriceUSDForSku(sku: string): Promise<number | null> {
|
|
// Try quoted exact search first
|
|
const tryQueries = [
|
|
`"${sku}"`,
|
|
sku // fallback without quotes
|
|
];
|
|
for (const q of tryQueries) {
|
|
let all: number[] = [];
|
|
for (let page = 1; page <= 2; page++) {
|
|
const { ok, html } = await fetchSoldSearchHtml(q, page);
|
|
if (!ok) continue;
|
|
const { prices, blocked } = await scrapeSoldPricesUSDPage(html);
|
|
if (blocked) return null;
|
|
all = all.concat(prices);
|
|
if (all.length === 0) {
|
|
// small delay before next page to be gentle
|
|
await new Promise(r => setTimeout(r, 600));
|
|
}
|
|
}
|
|
if (all.length > 0) {
|
|
all.sort((a, b) => a - b);
|
|
const mid = Math.floor(all.length / 2);
|
|
return Number((all.length % 2 === 0 ? (all[mid - 1] + all[mid]) / 2 : all[mid]).toFixed(2));
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
export async function debugFetchSoldPricesUSDForSku(sku: string) {
|
|
const sequences = [`"${sku}"`, sku];
|
|
const attempts: any[] = [];
|
|
for (const q of sequences) {
|
|
let total = 0;
|
|
let blocked = false;
|
|
const diagnostics: any = { pages: [] };
|
|
for (let page = 1; page <= 2; page++) {
|
|
const { ok, html } = await fetchSoldSearchHtml(q, page);
|
|
const diag = await scrapeSoldPricesUSDPage(html, true);
|
|
diagnostics.pages.push({ page, ok, count: diag.prices.length, selectorHits: diag.diagnostics?.selectorHits, sampleTexts: diag.diagnostics?.sampleTexts, samplePrices: diag.diagnostics?.samplePrices });
|
|
total += diag.prices.length;
|
|
blocked = blocked || diag.blocked;
|
|
if (diag.prices.length === 0 && page === 1) {
|
|
await new Promise(r => setTimeout(r, 600));
|
|
}
|
|
}
|
|
attempts.push({ query: q, totalCount: total, blocked, details: diagnostics });
|
|
if (total > 0) break;
|
|
}
|
|
return { attempts };
|
|
}
|
|
|
|
|