From 65b1079de3b66ef4fc05c5c3d02a18149fd22499 Mon Sep 17 00:00:00 2001 From: HRiggs Date: Sat, 6 Jun 2026 01:27:19 -0400 Subject: [PATCH] update scraper --- backend/src/scrapers/eldorado-scraper.ts | 105 +++++----- .../src/scrapers/playerauctions-scraper.ts | 180 +++++++++--------- backend/tsconfig.json | 4 +- 3 files changed, 150 insertions(+), 139 deletions(-) diff --git a/backend/src/scrapers/eldorado-scraper.ts b/backend/src/scrapers/eldorado-scraper.ts index 785acab..4fca078 100644 --- a/backend/src/scrapers/eldorado-scraper.ts +++ b/backend/src/scrapers/eldorado-scraper.ts @@ -33,72 +33,79 @@ export class EldoradoScraper extends BaseScraper { // Track seen combinations to avoid duplicates const seenListings = new Set(); + const blockedSellerTerms = new Set([ + 'price', + 'delivery time', + 'delivery instructions', + 'current offer', + 'in stock', + 'min. qty.', + 'min. qty', + 'min qty', + 'recommended', + 'cheapest first', + 'lowest min. quantity', + 'other sellers', + 'star citizen', + 'auec', + 'items', + 'accounts', + 'top ups', + 'boosting', + 'gift cards', + ]); + for (let i = 0; i < lines.length; i++) { const line = lines[i]; - // Look for "$/M" pattern - this is the direct price per million - // Examples: "$0.00007 / M", "$0.00018 / M", "$0.00007/M" - const pricePerMMatch = line.match(/\$\s*([\d.]+)\s*\/?\s*\/\s*M/i) || line.match(/\$\s*([\d.]+)\s*\/\s*M/i); + // Eldorado currently exposes unit pricing as either /K or /M. + const unitPriceMatch = line.match(/\$\s*([\d.]+)\s*\/\s*([KM])/i); - if (pricePerMMatch) { - const pricePerMillion = parseFloat(pricePerMMatch[1]); - + if (unitPriceMatch) { + const unitPrice = parseFloat(unitPriceMatch[1]); + const priceUnit = unitPriceMatch[2].toUpperCase(); + const pricePerMillion = priceUnit === 'K' ? unitPrice * 1000 : unitPrice; - - // Look for "Min. qty" or "Min qty" nearby to get the quantity - let minQtyM = 10000; // Default to 10000M + // Look for the minimum quantity nearby and normalize it to aUEC. + let amountAUEC = 1_000_000; for (let j = Math.max(0, i - 5); j < Math.min(lines.length, i + 5); j++) { const qtyLine = lines[j]; - // Match patterns like "Min. qty. 6000 M" or "Min qty: 16,000 M" - const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*M/i); + const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*([KM])/i); if (qtyMatch) { - minQtyM = parseFloat(qtyMatch[1].replace(/,/g, '')); + const quantity = parseFloat(qtyMatch[1].replace(/,/g, '')); + const quantityUnit = qtyMatch[2].toUpperCase(); + amountAUEC = quantity * (quantityUnit === 'K' ? 1_000 : 1_000_000); break; } } - const amountAUEC = minQtyM * 1_000_000; - const priceUSD = pricePerMillion * minQtyM; + const priceUSD = (amountAUEC / 1_000_000) * pricePerMillion; - // Find seller name - look both backwards and forwards - // For featured seller, name appears BEFORE the price - // For other sellers, name appears in a structured list let seller: string | undefined; - - // Search backwards first (for featured seller and some list items) - for (let j = Math.max(0, i - 20); j < i; j++) { - const sellerLine = lines[j]; - - // Skip common non-seller text + for (let j = i - 1; j >= Math.max(0, i - 13); j--) { + const candidate = lines[j]; + const normalized = candidate.toLowerCase(); + if ( - sellerLine.includes('$') || - sellerLine.includes('Price') || - sellerLine.includes('qty') || - sellerLine.includes('stock') || - sellerLine.includes('Delivery') || - sellerLine.toLowerCase().includes('review') || - sellerLine.includes('Rating') || - sellerLine.includes('Offer') || - sellerLine.includes('Details') || - sellerLine.includes('FEATURED') || - sellerLine.includes('Other') || - sellerLine.includes('sellers') || - sellerLine.includes('aUEC') || - sellerLine === 'Star Citizen' || // Exclude the game title but not seller names - sellerLine.includes('IMF') || - sellerLine.includes('in-game') || - sellerLine.includes('currency') || - sellerLine.length < 3 || - sellerLine.length > 30 + !candidate || + candidate.includes('$') || + candidate.includes('%') || + /\breviews?\b/i.test(candidate) || + /\borders?\b/i.test(candidate) || + /\bmember since\b/i.test(candidate) || + /\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) || + /\d,\d{3}/.test(candidate) || + blockedSellerTerms.has(normalized) || + normalized.startsWith('other sellers') || + candidate.length < 3 || + candidate.length > 30 ) { continue; } - - // Match seller name patterns - alphanumeric with underscores/hyphens - // Allow some special cases like "StarCitizen" - if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(sellerLine)) { - seller = sellerLine; - // Don't break - keep looking for a closer match + + if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) { + seller = candidate; + break; } } @@ -117,7 +124,7 @@ export class EldoradoScraper extends BaseScraper { } // Create unique key to avoid duplicates - const key = `${pricePerMillion}-${minQtyM}`; + const key = `${seller || 'unknown'}-${pricePerMillion}-${amountAUEC}`; if (seenListings.has(key)) continue; seenListings.add(key); diff --git a/backend/src/scrapers/playerauctions-scraper.ts b/backend/src/scrapers/playerauctions-scraper.ts index 0675ae1..4b41897 100644 --- a/backend/src/scrapers/playerauctions-scraper.ts +++ b/backend/src/scrapers/playerauctions-scraper.ts @@ -12,7 +12,6 @@ export class PlayerAuctionsScraper extends BaseScraper { } async extractListings(page: Page): Promise { - // Wait for page readiness await page.waitForTimeout(3000); // Close cookie popup if it exists @@ -26,119 +25,124 @@ export class PlayerAuctionsScraper extends BaseScraper { // No cookie popup or already closed } - // Find offer cards - they have class "offer-item" - const offerCards = await page.locator('.offer-item, [class*="offer-item"]').all(); + const listings = await page.evaluate(() => { + const results: Array<{ + amountAUEC: number; + priceUSD: number; + pricePerMillion: number; + seller?: string; + deliveryTime?: string; + }> = []; - if (offerCards.length === 0) { - return this.extractListingsAlternative(page); - } + const lines = document.body.innerText + .split('\n') + .map(line => line.trim()) + .filter(line => line.length > 0); - const listings: VendorListing[] = []; - const targetQuantityM = 1000000; // 1000000 M = 1 trillion AUEC (field is already in millions) + const blockedSellerTerms = new Set([ + 'main server', + 'offer details', + 'buy now', + 'm', + 'go', + 'price', + 'server', + 'rating', + 'delivery', + 'new seller', + ]); - // Step 2-5: Process each offer card - for (let i = 0; i < Math.min(offerCards.length, 20); i++) { - try { - const card = offerCards[i]; + const seenListings = new Set(); - // Find the quantity input (shows number with "M" suffix, has +/- buttons) - const qtyInput = card.locator('input[type="number"]').first(); - - if (!(await qtyInput.isVisible({ timeout: 1000 }).catch(() => false))) { + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const pricePerMillionMatch = line.match(/^\$\s*([\d,]+(?:\.\d+)?)\s*\/\s*M\b/i); + + if (!pricePerMillionMatch) { continue; } - // Set quantity to 1000000 (which means 1000000 M = 1 trillion AUEC) - await qtyInput.scrollIntoViewIfNeeded(); - await qtyInput.click({ force: true }); - await qtyInput.fill(''); - await qtyInput.pressSequentially(targetQuantityM.toString(), { delay: 10 }); - await qtyInput.press('Enter'); // Trigger update - - // Wait for price to update (0.5-2 seconds as per instructions) - await page.waitForTimeout(2500); - - // Step 3: Extract the total price from the BUY NOW button area - // Look for the price near the BUY NOW button - it's typically in a large font + const pricePerMillion = parseFloat(pricePerMillionMatch[1].replace(/,/g, '')); let totalPriceUSD = 0; - - // Try to find the price element near BUY NOW button - const buyNowButton = card.locator('button:has-text("BUY NOW"), [class*="buy"]').first(); - if (await buyNowButton.isVisible().catch(() => false)) { - // Get the parent container and look for price nearby - const priceContainer = buyNowButton.locator('xpath=..').first(); - const priceText = await priceContainer.textContent().catch(() => ''); - - // Extract price - should be like "$5.00" in large text - if (priceText) { - const priceMatch = priceText.match(/\$\s*([\d,]+\.\d{2})/); - if (priceMatch) { - totalPriceUSD = parseFloat(priceMatch[1].replace(/,/g, '')); - } + + for (let j = i + 1; j <= Math.min(lines.length - 1, i + 3); j++) { + const totalMatch = lines[j].match(/^\$\s*([\d,]+(?:\.\d{2})?)$/); + if (totalMatch) { + totalPriceUSD = parseFloat(totalMatch[1].replace(/,/g, '')); + break; } } - - // Fallback: look for price in the card, but exclude "Minutes" context - if (totalPriceUSD === 0) { - const cardText = await card.textContent().catch(() => ''); - if (cardText) { - const lines = cardText.split('\n').map(l => l.trim()); - - for (const line of lines) { - // Skip lines that contain time indicators - if (line.includes('Minutes') || line.includes('Hours') || line.includes('Days')) { - continue; - } - - // Look for price pattern with decimal - const priceMatch = line.match(/\$\s*([\d,]+\.\d{2})/); - if (priceMatch) { - const price = parseFloat(priceMatch[1].replace(/,/g, '')); - if (price > 0 && price < 100000) { - totalPriceUSD = price; - break; - } - } - } - } - } - - if (totalPriceUSD === 0) { + + if (!totalPriceUSD || !pricePerMillion) { continue; } - // Step 4: Compute USD per 1M - const pricePerMillion = totalPriceUSD / targetQuantityM; + const amountInMillions = totalPriceUSD / pricePerMillion; + const amountAUEC = Math.round(amountInMillions * 1_000_000); + let seller: string | undefined; + for (let j = i - 1; j >= Math.max(0, i - 10); j--) { + const candidate = lines[j]; + const normalized = candidate.toLowerCase(); - // Extract seller name and delivery time from card text - const fullCardText = await card.textContent().catch(() => ''); - const sellerMatch = fullCardText ? fullCardText.match(/([a-zA-Z0-9_-]{3,20})/) : null; - const seller = sellerMatch ? sellerMatch[1] : 'Unknown'; + if ( + blockedSellerTerms.has(normalized) || + candidate.includes('$') || + /\(\d+\)/.test(candidate) || + /\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) || + /\bmember since\b|\btotal orders\b/i.test(candidate) || + /^\d+(\.\d+)?$/.test(candidate) || + candidate.length < 3 || + candidate.length > 24 + ) { + continue; + } - const deliveryMatch = fullCardText ? fullCardText.match(/(\d+\s*(?:Minutes?|Hours?|Days?))/i) : null; - const deliveryTime = deliveryMatch ? deliveryMatch[1] : undefined; + if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) { + seller = candidate; + break; + } + } - listings.push({ - vendor: 'playerauctions', - amountAUEC: targetQuantityM * 1_000_000, + let deliveryTime: string | undefined; + for (let j = Math.max(0, i - 6); j < i; j++) { + if (/\d+\s*(Minutes?|Hours?|Days?)/i.test(lines[j])) { + deliveryTime = lines[j]; + break; + } + } + + const key = `${seller || 'unknown'}-${pricePerMillion}-${totalPriceUSD}`; + if (seenListings.has(key)) { + continue; + } + seenListings.add(key); + + results.push({ + amountAUEC, priceUSD: totalPriceUSD, pricePerMillion, - seller: seller.trim(), + seller, deliveryTime, - scrapedAt: new Date(), - url: this.getTargetUrl(), }); - - } catch (error) { - // Skip this card } - } + + return results; + }); if (listings.length === 0) { return this.extractListingsAlternative(page); } - return listings; + return listings.map(listing => ({ + vendor: 'playerauctions' as const, + amountAUEC: listing.amountAUEC, + priceUSD: listing.priceUSD, + pricePerMillion: listing.pricePerMillion, + seller: listing.seller, + deliveryTime: listing.deliveryTime, + scrapedAt: new Date(), + url: this.getTargetUrl(), + })); } private async extractListingsAlternative(page: Page): Promise { diff --git a/backend/tsconfig.json b/backend/tsconfig.json index e924c59..7a5944e 100644 --- a/backend/tsconfig.json +++ b/backend/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "target": "ES2022", - "module": "commonjs", + "module": "Node16", "lib": ["ES2022", "DOM"], "outDir": "./dist", "rootDir": "./src", @@ -10,7 +10,7 @@ "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, - "moduleResolution": "node", + "moduleResolution": "Node16", "declaration": true, "declarationMap": true, "sourceMap": true