update scraper

This commit is contained in:
2026-06-06 01:27:19 -04:00
parent f998d7ff64
commit 65b1079de3
3 changed files with 150 additions and 139 deletions
+54 -47
View File
@@ -33,72 +33,79 @@ export class EldoradoScraper extends BaseScraper {
// Track seen combinations to avoid duplicates // Track seen combinations to avoid duplicates
const seenListings = new Set<string>(); const seenListings = new Set<string>();
const blockedSellerTerms = new Set([
'price',
'delivery time',
'delivery instructions',
'current offer',
'in stock',
'min. qty.',
'min. qty',
'min qty',
'recommended',
'cheapest first',
'lowest min. quantity',
'other sellers',
'star citizen',
'auec',
'items',
'accounts',
'top ups',
'boosting',
'gift cards',
]);
for (let i = 0; i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
const line = lines[i]; const line = lines[i];
// Look for "$/M" pattern - this is the direct price per million // Eldorado currently exposes unit pricing as either /K or /M.
// Examples: "$0.00007 / M", "$0.00018 / M", "$0.00007/M" const unitPriceMatch = line.match(/\$\s*([\d.]+)\s*\/\s*([KM])/i);
const pricePerMMatch = line.match(/\$\s*([\d.]+)\s*\/?\s*\/\s*M/i) || line.match(/\$\s*([\d.]+)\s*\/\s*M/i);
if (pricePerMMatch) { if (unitPriceMatch) {
const pricePerMillion = parseFloat(pricePerMMatch[1]); const unitPrice = parseFloat(unitPriceMatch[1]);
const priceUnit = unitPriceMatch[2].toUpperCase();
const pricePerMillion = priceUnit === 'K' ? unitPrice * 1000 : unitPrice;
// Look for the minimum quantity nearby and normalize it to aUEC.
let amountAUEC = 1_000_000;
// Look for "Min. qty" or "Min qty" nearby to get the quantity
let minQtyM = 10000; // Default to 10000M
for (let j = Math.max(0, i - 5); j < Math.min(lines.length, i + 5); j++) { for (let j = Math.max(0, i - 5); j < Math.min(lines.length, i + 5); j++) {
const qtyLine = lines[j]; const qtyLine = lines[j];
// Match patterns like "Min. qty. 6000 M" or "Min qty: 16,000 M" const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*([KM])/i);
const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*M/i);
if (qtyMatch) { if (qtyMatch) {
minQtyM = parseFloat(qtyMatch[1].replace(/,/g, '')); const quantity = parseFloat(qtyMatch[1].replace(/,/g, ''));
const quantityUnit = qtyMatch[2].toUpperCase();
amountAUEC = quantity * (quantityUnit === 'K' ? 1_000 : 1_000_000);
break; break;
} }
} }
const amountAUEC = minQtyM * 1_000_000; const priceUSD = (amountAUEC / 1_000_000) * pricePerMillion;
const priceUSD = pricePerMillion * minQtyM;
// Find seller name - look both backwards and forwards
// For featured seller, name appears BEFORE the price
// For other sellers, name appears in a structured list
let seller: string | undefined; let seller: string | undefined;
for (let j = i - 1; j >= Math.max(0, i - 13); j--) {
const candidate = lines[j];
const normalized = candidate.toLowerCase();
// Search backwards first (for featured seller and some list items)
for (let j = Math.max(0, i - 20); j < i; j++) {
const sellerLine = lines[j];
// Skip common non-seller text
if ( if (
sellerLine.includes('$') || !candidate ||
sellerLine.includes('Price') || candidate.includes('$') ||
sellerLine.includes('qty') || candidate.includes('%') ||
sellerLine.includes('stock') || /\breviews?\b/i.test(candidate) ||
sellerLine.includes('Delivery') || /\borders?\b/i.test(candidate) ||
sellerLine.toLowerCase().includes('review') || /\bmember since\b/i.test(candidate) ||
sellerLine.includes('Rating') || /\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
sellerLine.includes('Offer') || /\d,\d{3}/.test(candidate) ||
sellerLine.includes('Details') || blockedSellerTerms.has(normalized) ||
sellerLine.includes('FEATURED') || normalized.startsWith('other sellers') ||
sellerLine.includes('Other') || candidate.length < 3 ||
sellerLine.includes('sellers') || candidate.length > 30
sellerLine.includes('aUEC') ||
sellerLine === 'Star Citizen' || // Exclude the game title but not seller names
sellerLine.includes('IMF') ||
sellerLine.includes('in-game') ||
sellerLine.includes('currency') ||
sellerLine.length < 3 ||
sellerLine.length > 30
) { ) {
continue; continue;
} }
// Match seller name patterns - alphanumeric with underscores/hyphens if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
// Allow some special cases like "StarCitizen" seller = candidate;
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(sellerLine)) { break;
seller = sellerLine;
// Don't break - keep looking for a closer match
} }
} }
@@ -117,7 +124,7 @@ export class EldoradoScraper extends BaseScraper {
} }
// Create unique key to avoid duplicates // Create unique key to avoid duplicates
const key = `${pricePerMillion}-${minQtyM}`; const key = `${seller || 'unknown'}-${pricePerMillion}-${amountAUEC}`;
if (seenListings.has(key)) continue; if (seenListings.has(key)) continue;
seenListings.add(key); seenListings.add(key);
+88 -84
View File
@@ -12,7 +12,6 @@ export class PlayerAuctionsScraper extends BaseScraper {
} }
async extractListings(page: Page): Promise<VendorListing[]> { async extractListings(page: Page): Promise<VendorListing[]> {
// Wait for page readiness
await page.waitForTimeout(3000); await page.waitForTimeout(3000);
// Close cookie popup if it exists // Close cookie popup if it exists
@@ -26,119 +25,124 @@ export class PlayerAuctionsScraper extends BaseScraper {
// No cookie popup or already closed // No cookie popup or already closed
} }
// Find offer cards - they have class "offer-item" const listings = await page.evaluate(() => {
const offerCards = await page.locator('.offer-item, [class*="offer-item"]').all(); const results: Array<{
amountAUEC: number;
priceUSD: number;
pricePerMillion: number;
seller?: string;
deliveryTime?: string;
}> = [];
if (offerCards.length === 0) { const lines = document.body.innerText
return this.extractListingsAlternative(page); .split('\n')
} .map(line => line.trim())
.filter(line => line.length > 0);
const listings: VendorListing[] = []; const blockedSellerTerms = new Set([
const targetQuantityM = 1000000; // 1000000 M = 1 trillion AUEC (field is already in millions) 'main server',
'offer details',
'buy now',
'm',
'go',
'price',
'server',
'rating',
'delivery',
'new seller',
]);
// Step 2-5: Process each offer card const seenListings = new Set<string>();
for (let i = 0; i < Math.min(offerCards.length, 20); i++) {
try {
const card = offerCards[i];
// Find the quantity input (shows number with "M" suffix, has +/- buttons) for (let i = 0; i < lines.length; i++) {
const qtyInput = card.locator('input[type="number"]').first(); const line = lines[i];
const pricePerMillionMatch = line.match(/^\$\s*([\d,]+(?:\.\d+)?)\s*\/\s*M\b/i);
if (!(await qtyInput.isVisible({ timeout: 1000 }).catch(() => false))) { if (!pricePerMillionMatch) {
continue; continue;
} }
// Set quantity to 1000000 (which means 1000000 M = 1 trillion AUEC) const pricePerMillion = parseFloat(pricePerMillionMatch[1].replace(/,/g, ''));
await qtyInput.scrollIntoViewIfNeeded();
await qtyInput.click({ force: true });
await qtyInput.fill('');
await qtyInput.pressSequentially(targetQuantityM.toString(), { delay: 10 });
await qtyInput.press('Enter'); // Trigger update
// Wait for price to update (0.5-2 seconds as per instructions)
await page.waitForTimeout(2500);
// Step 3: Extract the total price from the BUY NOW button area
// Look for the price near the BUY NOW button - it's typically in a large font
let totalPriceUSD = 0; let totalPriceUSD = 0;
// Try to find the price element near BUY NOW button for (let j = i + 1; j <= Math.min(lines.length - 1, i + 3); j++) {
const buyNowButton = card.locator('button:has-text("BUY NOW"), [class*="buy"]').first(); const totalMatch = lines[j].match(/^\$\s*([\d,]+(?:\.\d{2})?)$/);
if (await buyNowButton.isVisible().catch(() => false)) { if (totalMatch) {
// Get the parent container and look for price nearby totalPriceUSD = parseFloat(totalMatch[1].replace(/,/g, ''));
const priceContainer = buyNowButton.locator('xpath=..').first();
const priceText = await priceContainer.textContent().catch(() => '');
// Extract price - should be like "$5.00" in large text
if (priceText) {
const priceMatch = priceText.match(/\$\s*([\d,]+\.\d{2})/);
if (priceMatch) {
totalPriceUSD = parseFloat(priceMatch[1].replace(/,/g, ''));
}
}
}
// Fallback: look for price in the card, but exclude "Minutes" context
if (totalPriceUSD === 0) {
const cardText = await card.textContent().catch(() => '');
if (cardText) {
const lines = cardText.split('\n').map(l => l.trim());
for (const line of lines) {
// Skip lines that contain time indicators
if (line.includes('Minutes') || line.includes('Hours') || line.includes('Days')) {
continue;
}
// Look for price pattern with decimal
const priceMatch = line.match(/\$\s*([\d,]+\.\d{2})/);
if (priceMatch) {
const price = parseFloat(priceMatch[1].replace(/,/g, ''));
if (price > 0 && price < 100000) {
totalPriceUSD = price;
break; break;
} }
} }
}
}
}
if (totalPriceUSD === 0) { if (!totalPriceUSD || !pricePerMillion) {
continue; continue;
} }
// Step 4: Compute USD per 1M const amountInMillions = totalPriceUSD / pricePerMillion;
const pricePerMillion = totalPriceUSD / targetQuantityM; const amountAUEC = Math.round(amountInMillions * 1_000_000);
let seller: string | undefined;
for (let j = i - 1; j >= Math.max(0, i - 10); j--) {
const candidate = lines[j];
const normalized = candidate.toLowerCase();
// Extract seller name and delivery time from card text if (
const fullCardText = await card.textContent().catch(() => ''); blockedSellerTerms.has(normalized) ||
const sellerMatch = fullCardText ? fullCardText.match(/([a-zA-Z0-9_-]{3,20})/) : null; candidate.includes('$') ||
const seller = sellerMatch ? sellerMatch[1] : 'Unknown'; /\(\d+\)/.test(candidate) ||
/\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
/\bmember since\b|\btotal orders\b/i.test(candidate) ||
/^\d+(\.\d+)?$/.test(candidate) ||
candidate.length < 3 ||
candidate.length > 24
) {
continue;
}
const deliveryMatch = fullCardText ? fullCardText.match(/(\d+\s*(?:Minutes?|Hours?|Days?))/i) : null; if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
const deliveryTime = deliveryMatch ? deliveryMatch[1] : undefined; seller = candidate;
break;
}
}
listings.push({ let deliveryTime: string | undefined;
vendor: 'playerauctions', for (let j = Math.max(0, i - 6); j < i; j++) {
amountAUEC: targetQuantityM * 1_000_000, if (/\d+\s*(Minutes?|Hours?|Days?)/i.test(lines[j])) {
deliveryTime = lines[j];
break;
}
}
const key = `${seller || 'unknown'}-${pricePerMillion}-${totalPriceUSD}`;
if (seenListings.has(key)) {
continue;
}
seenListings.add(key);
results.push({
amountAUEC,
priceUSD: totalPriceUSD, priceUSD: totalPriceUSD,
pricePerMillion, pricePerMillion,
seller: seller.trim(), seller,
deliveryTime, deliveryTime,
scrapedAt: new Date(),
url: this.getTargetUrl(),
}); });
}
} catch (error) { return results;
// Skip this card });
}
}
if (listings.length === 0) { if (listings.length === 0) {
return this.extractListingsAlternative(page); return this.extractListingsAlternative(page);
} }
return listings; return listings.map(listing => ({
vendor: 'playerauctions' as const,
amountAUEC: listing.amountAUEC,
priceUSD: listing.priceUSD,
pricePerMillion: listing.pricePerMillion,
seller: listing.seller,
deliveryTime: listing.deliveryTime,
scrapedAt: new Date(),
url: this.getTargetUrl(),
}));
} }
private async extractListingsAlternative(page: Page): Promise<VendorListing[]> { private async extractListingsAlternative(page: Page): Promise<VendorListing[]> {
+2 -2
View File
@@ -1,7 +1,7 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"module": "commonjs", "module": "Node16",
"lib": ["ES2022", "DOM"], "lib": ["ES2022", "DOM"],
"outDir": "./dist", "outDir": "./dist",
"rootDir": "./src", "rootDir": "./src",
@@ -10,7 +10,7 @@
"skipLibCheck": true, "skipLibCheck": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"resolveJsonModule": true, "resolveJsonModule": true,
"moduleResolution": "node", "moduleResolution": "Node16",
"declaration": true, "declaration": true,
"declarationMap": true, "declarationMap": true,
"sourceMap": true "sourceMap": true