This commit is contained in:
@@ -1,38 +1,117 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
export async function fetchMedianSoldPriceUSDForSku(sku: string): Promise<number | null> {
|
||||
const query = encodeURIComponent(`"${sku}"`);
|
||||
const url = `https://www.ebay.com/sch/i.html?_nkw=${query}&LH_Sold=1&LH_Complete=1&rt=nc`;
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
||||
} as any
|
||||
} as any);
|
||||
if (!res.ok) return null;
|
||||
const html = await res.text();
|
||||
type ScrapeResult = {
|
||||
prices: number[];
|
||||
blocked: boolean;
|
||||
diagnostics?: { selectorHits: Record<string, number>; sampleTexts: string[]; samplePrices: number[] };
|
||||
};
|
||||
|
||||
async function scrapeSoldPricesUSDPage(html: string, wantDiagnostics = false): Promise<ScrapeResult> {
|
||||
const $ = cheerio.load(html);
|
||||
const prices: number[] = [];
|
||||
const selectorHits: Record<string, number> = {};
|
||||
const sampleTexts: string[] = [];
|
||||
const samplePrices: number[] = [];
|
||||
|
||||
const blocked = html.includes('To continue, please verify') || html.toLowerCase().includes('robot check');
|
||||
|
||||
const priceSelectors = [
|
||||
'.s-item__price',
|
||||
'.s-item__detail--primary .s-item__price',
|
||||
'span[class*="s-item__price"]',
|
||||
];
|
||||
|
||||
$('li.s-item').each((_i, el) => {
|
||||
const priceText = $(el).find('.s-item__price').first().text().trim();
|
||||
if (!priceText) return;
|
||||
// Accept only USD (has $)
|
||||
if (!priceText.includes('$')) return;
|
||||
// Remove ranges like "$10.00 to $20.00"
|
||||
const single = priceText.split(' to ')[0];
|
||||
const $el = $(el);
|
||||
let text: string | null = null;
|
||||
for (const sel of priceSelectors) {
|
||||
const t = $el.find(sel).first().text().trim();
|
||||
if (t) {
|
||||
text = t; selectorHits[sel] = (selectorHits[sel] || 0) + 1; break;
|
||||
}
|
||||
}
|
||||
if (!text) {
|
||||
// regex fallback within this listing's HTML
|
||||
const htmlFrag = $el.html() || '';
|
||||
const m = htmlFrag.match(/\$\s*[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]{2})?/);
|
||||
if (m) text = m[0];
|
||||
}
|
||||
if (!text) return;
|
||||
if (wantDiagnostics && sampleTexts.length < 10) sampleTexts.push(text);
|
||||
if (!text.includes('$')) return;
|
||||
const single = text.split(' to ')[0];
|
||||
const num = single.replace(/[^0-9.]/g, '');
|
||||
if (!num) return;
|
||||
const value = Number(num);
|
||||
if (!Number.isFinite(value) || value <= 0) return;
|
||||
prices.push(value);
|
||||
if (wantDiagnostics && samplePrices.length < 10) samplePrices.push(value);
|
||||
});
|
||||
if (prices.length === 0) return null;
|
||||
prices.sort((a, b) => a - b);
|
||||
const mid = Math.floor(prices.length / 2);
|
||||
if (prices.length % 2 === 0) {
|
||||
return Number(((prices[mid - 1] + prices[mid]) / 2).toFixed(2));
|
||||
} else {
|
||||
return Number(prices[mid].toFixed(2));
|
||||
|
||||
return { prices, blocked, diagnostics: wantDiagnostics ? { selectorHits, sampleTexts, samplePrices } : undefined };
|
||||
}
|
||||
|
||||
async function fetchSoldSearchHtml(query: string, page = 1): Promise<{ ok: boolean; html: string }> {
|
||||
const url = `https://www.ebay.com/sch/i.html?_nkw=${encodeURIComponent(query)}&LH_Sold=1&LH_Complete=1&rt=nc&_ipg=200&_pgn=${page}`;
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
||||
'Accept-Language': 'en-US,en;q=0.9'
|
||||
} as any
|
||||
} as any);
|
||||
const html = await res.text();
|
||||
return { ok: res.ok, html };
|
||||
}
|
||||
|
||||
export async function fetchMedianSoldPriceUSDForSku(sku: string): Promise<number | null> {
|
||||
// Try quoted exact search first
|
||||
const tryQueries = [
|
||||
`"${sku}"`,
|
||||
sku // fallback without quotes
|
||||
];
|
||||
for (const q of tryQueries) {
|
||||
let all: number[] = [];
|
||||
for (let page = 1; page <= 2; page++) {
|
||||
const { ok, html } = await fetchSoldSearchHtml(q, page);
|
||||
if (!ok) continue;
|
||||
const { prices, blocked } = await scrapeSoldPricesUSDPage(html);
|
||||
if (blocked) return null;
|
||||
all = all.concat(prices);
|
||||
if (all.length === 0) {
|
||||
// small delay before next page to be gentle
|
||||
await new Promise(r => setTimeout(r, 600));
|
||||
}
|
||||
}
|
||||
if (all.length > 0) {
|
||||
all.sort((a, b) => a - b);
|
||||
const mid = Math.floor(all.length / 2);
|
||||
return Number((all.length % 2 === 0 ? (all[mid - 1] + all[mid]) / 2 : all[mid]).toFixed(2));
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function debugFetchSoldPricesUSDForSku(sku: string) {
|
||||
const sequences = [`"${sku}"`, sku];
|
||||
const attempts: any[] = [];
|
||||
for (const q of sequences) {
|
||||
let total = 0;
|
||||
let blocked = false;
|
||||
const diagnostics: any = { pages: [] };
|
||||
for (let page = 1; page <= 2; page++) {
|
||||
const { ok, html } = await fetchSoldSearchHtml(q, page);
|
||||
const diag = await scrapeSoldPricesUSDPage(html, true);
|
||||
diagnostics.pages.push({ page, ok, count: diag.prices.length, selectorHits: diag.diagnostics?.selectorHits, sampleTexts: diag.diagnostics?.sampleTexts, samplePrices: diag.diagnostics?.samplePrices });
|
||||
total += diag.prices.length;
|
||||
blocked = blocked || diag.blocked;
|
||||
if (diag.prices.length === 0 && page === 1) {
|
||||
await new Promise(r => setTimeout(r, 600));
|
||||
}
|
||||
}
|
||||
attempts.push({ query: q, totalCount: total, blocked, details: diagnostics });
|
||||
if (total > 0) break;
|
||||
}
|
||||
return { attempts };
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user