update scraper

This commit is contained in:
2026-06-06 01:27:19 -04:00
parent f998d7ff64
commit 65b1079de3
3 changed files with 150 additions and 139 deletions
+56 -49
View File
@@ -33,72 +33,79 @@ export class EldoradoScraper extends BaseScraper {
// Track seen combinations to avoid duplicates
const seenListings = new Set<string>();
const blockedSellerTerms = new Set([
'price',
'delivery time',
'delivery instructions',
'current offer',
'in stock',
'min. qty.',
'min. qty',
'min qty',
'recommended',
'cheapest first',
'lowest min. quantity',
'other sellers',
'star citizen',
'auec',
'items',
'accounts',
'top ups',
'boosting',
'gift cards',
]);
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Look for "$/M" pattern - this is the direct price per million
// Examples: "$0.00007 / M", "$0.00018 / M", "$0.00007/M"
const pricePerMMatch = line.match(/\$\s*([\d.]+)\s*\/?\s*\/\s*M/i) || line.match(/\$\s*([\d.]+)\s*\/\s*M/i);
// Eldorado currently exposes unit pricing as either /K or /M.
const unitPriceMatch = line.match(/\$\s*([\d.]+)\s*\/\s*([KM])/i);
if (pricePerMMatch) {
const pricePerMillion = parseFloat(pricePerMMatch[1]);
if (unitPriceMatch) {
const unitPrice = parseFloat(unitPriceMatch[1]);
const priceUnit = unitPriceMatch[2].toUpperCase();
const pricePerMillion = priceUnit === 'K' ? unitPrice * 1000 : unitPrice;
// Look for "Min. qty" or "Min qty" nearby to get the quantity
let minQtyM = 10000; // Default to 10000M
// Look for the minimum quantity nearby and normalize it to aUEC.
let amountAUEC = 1_000_000;
for (let j = Math.max(0, i - 5); j < Math.min(lines.length, i + 5); j++) {
const qtyLine = lines[j];
// Match patterns like "Min. qty. 6000 M" or "Min qty: 16,000 M"
const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*M/i);
const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*([KM])/i);
if (qtyMatch) {
minQtyM = parseFloat(qtyMatch[1].replace(/,/g, ''));
const quantity = parseFloat(qtyMatch[1].replace(/,/g, ''));
const quantityUnit = qtyMatch[2].toUpperCase();
amountAUEC = quantity * (quantityUnit === 'K' ? 1_000 : 1_000_000);
break;
}
}
const amountAUEC = minQtyM * 1_000_000;
const priceUSD = pricePerMillion * minQtyM;
const priceUSD = (amountAUEC / 1_000_000) * pricePerMillion;
// Find seller name - look both backwards and forwards
// For featured seller, name appears BEFORE the price
// For other sellers, name appears in a structured list
let seller: string | undefined;
// Search backwards first (for featured seller and some list items)
for (let j = Math.max(0, i - 20); j < i; j++) {
const sellerLine = lines[j];
// Skip common non-seller text
for (let j = i - 1; j >= Math.max(0, i - 13); j--) {
const candidate = lines[j];
const normalized = candidate.toLowerCase();
if (
sellerLine.includes('$') ||
sellerLine.includes('Price') ||
sellerLine.includes('qty') ||
sellerLine.includes('stock') ||
sellerLine.includes('Delivery') ||
sellerLine.toLowerCase().includes('review') ||
sellerLine.includes('Rating') ||
sellerLine.includes('Offer') ||
sellerLine.includes('Details') ||
sellerLine.includes('FEATURED') ||
sellerLine.includes('Other') ||
sellerLine.includes('sellers') ||
sellerLine.includes('aUEC') ||
sellerLine === 'Star Citizen' || // Exclude the game title but not seller names
sellerLine.includes('IMF') ||
sellerLine.includes('in-game') ||
sellerLine.includes('currency') ||
sellerLine.length < 3 ||
sellerLine.length > 30
!candidate ||
candidate.includes('$') ||
candidate.includes('%') ||
/\breviews?\b/i.test(candidate) ||
/\borders?\b/i.test(candidate) ||
/\bmember since\b/i.test(candidate) ||
/\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
/\d,\d{3}/.test(candidate) ||
blockedSellerTerms.has(normalized) ||
normalized.startsWith('other sellers') ||
candidate.length < 3 ||
candidate.length > 30
) {
continue;
}
// Match seller name patterns - alphanumeric with underscores/hyphens
// Allow some special cases like "StarCitizen"
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(sellerLine)) {
seller = sellerLine;
// Don't break - keep looking for a closer match
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
seller = candidate;
break;
}
}
@@ -117,7 +124,7 @@ export class EldoradoScraper extends BaseScraper {
}
// Create unique key to avoid duplicates
const key = `${pricePerMillion}-${minQtyM}`;
const key = `${seller || 'unknown'}-${pricePerMillion}-${amountAUEC}`;
if (seenListings.has(key)) continue;
seenListings.add(key);
+92 -88
View File
@@ -12,7 +12,6 @@ export class PlayerAuctionsScraper extends BaseScraper {
}
async extractListings(page: Page): Promise<VendorListing[]> {
// Wait for page readiness
await page.waitForTimeout(3000);
// Close cookie popup if it exists
@@ -26,119 +25,124 @@ export class PlayerAuctionsScraper extends BaseScraper {
// No cookie popup or already closed
}
// Find offer cards - they have class "offer-item"
const offerCards = await page.locator('.offer-item, [class*="offer-item"]').all();
const listings = await page.evaluate(() => {
const results: Array<{
amountAUEC: number;
priceUSD: number;
pricePerMillion: number;
seller?: string;
deliveryTime?: string;
}> = [];
if (offerCards.length === 0) {
return this.extractListingsAlternative(page);
}
const lines = document.body.innerText
.split('\n')
.map(line => line.trim())
.filter(line => line.length > 0);
const listings: VendorListing[] = [];
const targetQuantityM = 1000000; // 1000000 M = 1 trillion AUEC (field is already in millions)
const blockedSellerTerms = new Set([
'main server',
'offer details',
'buy now',
'm',
'go',
'price',
'server',
'rating',
'delivery',
'new seller',
]);
// Step 2-5: Process each offer card
for (let i = 0; i < Math.min(offerCards.length, 20); i++) {
try {
const card = offerCards[i];
const seenListings = new Set<string>();
// Find the quantity input (shows number with "M" suffix, has +/- buttons)
const qtyInput = card.locator('input[type="number"]').first();
if (!(await qtyInput.isVisible({ timeout: 1000 }).catch(() => false))) {
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const pricePerMillionMatch = line.match(/^\$\s*([\d,]+(?:\.\d+)?)\s*\/\s*M\b/i);
if (!pricePerMillionMatch) {
continue;
}
// Set quantity to 1000000 (which means 1000000 M = 1 trillion AUEC)
await qtyInput.scrollIntoViewIfNeeded();
await qtyInput.click({ force: true });
await qtyInput.fill('');
await qtyInput.pressSequentially(targetQuantityM.toString(), { delay: 10 });
await qtyInput.press('Enter'); // Trigger update
// Wait for price to update (0.5-2 seconds as per instructions)
await page.waitForTimeout(2500);
// Step 3: Extract the total price from the BUY NOW button area
// Look for the price near the BUY NOW button - it's typically in a large font
const pricePerMillion = parseFloat(pricePerMillionMatch[1].replace(/,/g, ''));
let totalPriceUSD = 0;
// Try to find the price element near BUY NOW button
const buyNowButton = card.locator('button:has-text("BUY NOW"), [class*="buy"]').first();
if (await buyNowButton.isVisible().catch(() => false)) {
// Get the parent container and look for price nearby
const priceContainer = buyNowButton.locator('xpath=..').first();
const priceText = await priceContainer.textContent().catch(() => '');
// Extract price - should be like "$5.00" in large text
if (priceText) {
const priceMatch = priceText.match(/\$\s*([\d,]+\.\d{2})/);
if (priceMatch) {
totalPriceUSD = parseFloat(priceMatch[1].replace(/,/g, ''));
}
for (let j = i + 1; j <= Math.min(lines.length - 1, i + 3); j++) {
const totalMatch = lines[j].match(/^\$\s*([\d,]+(?:\.\d{2})?)$/);
if (totalMatch) {
totalPriceUSD = parseFloat(totalMatch[1].replace(/,/g, ''));
break;
}
}
// Fallback: look for price in the card, but exclude "Minutes" context
if (totalPriceUSD === 0) {
const cardText = await card.textContent().catch(() => '');
if (cardText) {
const lines = cardText.split('\n').map(l => l.trim());
for (const line of lines) {
// Skip lines that contain time indicators
if (line.includes('Minutes') || line.includes('Hours') || line.includes('Days')) {
continue;
}
// Look for price pattern with decimal
const priceMatch = line.match(/\$\s*([\d,]+\.\d{2})/);
if (priceMatch) {
const price = parseFloat(priceMatch[1].replace(/,/g, ''));
if (price > 0 && price < 100000) {
totalPriceUSD = price;
break;
}
}
}
}
}
if (totalPriceUSD === 0) {
if (!totalPriceUSD || !pricePerMillion) {
continue;
}
// Step 4: Compute USD per 1M
const pricePerMillion = totalPriceUSD / targetQuantityM;
const amountInMillions = totalPriceUSD / pricePerMillion;
const amountAUEC = Math.round(amountInMillions * 1_000_000);
let seller: string | undefined;
for (let j = i - 1; j >= Math.max(0, i - 10); j--) {
const candidate = lines[j];
const normalized = candidate.toLowerCase();
// Extract seller name and delivery time from card text
const fullCardText = await card.textContent().catch(() => '');
const sellerMatch = fullCardText ? fullCardText.match(/([a-zA-Z0-9_-]{3,20})/) : null;
const seller = sellerMatch ? sellerMatch[1] : 'Unknown';
if (
blockedSellerTerms.has(normalized) ||
candidate.includes('$') ||
/\(\d+\)/.test(candidate) ||
/\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
/\bmember since\b|\btotal orders\b/i.test(candidate) ||
/^\d+(\.\d+)?$/.test(candidate) ||
candidate.length < 3 ||
candidate.length > 24
) {
continue;
}
const deliveryMatch = fullCardText ? fullCardText.match(/(\d+\s*(?:Minutes?|Hours?|Days?))/i) : null;
const deliveryTime = deliveryMatch ? deliveryMatch[1] : undefined;
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
seller = candidate;
break;
}
}
listings.push({
vendor: 'playerauctions',
amountAUEC: targetQuantityM * 1_000_000,
let deliveryTime: string | undefined;
for (let j = Math.max(0, i - 6); j < i; j++) {
if (/\d+\s*(Minutes?|Hours?|Days?)/i.test(lines[j])) {
deliveryTime = lines[j];
break;
}
}
const key = `${seller || 'unknown'}-${pricePerMillion}-${totalPriceUSD}`;
if (seenListings.has(key)) {
continue;
}
seenListings.add(key);
results.push({
amountAUEC,
priceUSD: totalPriceUSD,
pricePerMillion,
seller: seller.trim(),
seller,
deliveryTime,
scrapedAt: new Date(),
url: this.getTargetUrl(),
});
} catch (error) {
// Skip this card
}
}
return results;
});
if (listings.length === 0) {
return this.extractListingsAlternative(page);
}
return listings;
return listings.map(listing => ({
vendor: 'playerauctions' as const,
amountAUEC: listing.amountAUEC,
priceUSD: listing.priceUSD,
pricePerMillion: listing.pricePerMillion,
seller: listing.seller,
deliveryTime: listing.deliveryTime,
scrapedAt: new Date(),
url: this.getTargetUrl(),
}));
}
private async extractListingsAlternative(page: Page): Promise<VendorListing[]> {
+2 -2
View File
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "commonjs",
"module": "Node16",
"lib": ["ES2022", "DOM"],
"outDir": "./dist",
"rootDir": "./src",
@@ -10,7 +10,7 @@
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"moduleResolution": "node",
"moduleResolution": "Node16",
"declaration": true,
"declarationMap": true,
"sourceMap": true