update scraper
This commit is contained in:
@@ -33,72 +33,79 @@ export class EldoradoScraper extends BaseScraper {
|
||||
// Track seen combinations to avoid duplicates
|
||||
const seenListings = new Set<string>();
|
||||
|
||||
const blockedSellerTerms = new Set([
|
||||
'price',
|
||||
'delivery time',
|
||||
'delivery instructions',
|
||||
'current offer',
|
||||
'in stock',
|
||||
'min. qty.',
|
||||
'min. qty',
|
||||
'min qty',
|
||||
'recommended',
|
||||
'cheapest first',
|
||||
'lowest min. quantity',
|
||||
'other sellers',
|
||||
'star citizen',
|
||||
'auec',
|
||||
'items',
|
||||
'accounts',
|
||||
'top ups',
|
||||
'boosting',
|
||||
'gift cards',
|
||||
]);
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
|
||||
// Look for "$/M" pattern - this is the direct price per million
|
||||
// Examples: "$0.00007 / M", "$0.00018 / M", "$0.00007/M"
|
||||
const pricePerMMatch = line.match(/\$\s*([\d.]+)\s*\/?\s*\/\s*M/i) || line.match(/\$\s*([\d.]+)\s*\/\s*M/i);
|
||||
// Eldorado currently exposes unit pricing as either /K or /M.
|
||||
const unitPriceMatch = line.match(/\$\s*([\d.]+)\s*\/\s*([KM])/i);
|
||||
|
||||
if (pricePerMMatch) {
|
||||
const pricePerMillion = parseFloat(pricePerMMatch[1]);
|
||||
|
||||
if (unitPriceMatch) {
|
||||
const unitPrice = parseFloat(unitPriceMatch[1]);
|
||||
const priceUnit = unitPriceMatch[2].toUpperCase();
|
||||
const pricePerMillion = priceUnit === 'K' ? unitPrice * 1000 : unitPrice;
|
||||
|
||||
|
||||
// Look for "Min. qty" or "Min qty" nearby to get the quantity
|
||||
let minQtyM = 10000; // Default to 10000M
|
||||
// Look for the minimum quantity nearby and normalize it to aUEC.
|
||||
let amountAUEC = 1_000_000;
|
||||
for (let j = Math.max(0, i - 5); j < Math.min(lines.length, i + 5); j++) {
|
||||
const qtyLine = lines[j];
|
||||
// Match patterns like "Min. qty. 6000 M" or "Min qty: 16,000 M"
|
||||
const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*M/i);
|
||||
const qtyMatch = qtyLine.match(/Min\.?\s*qty\.?\s*:?\s*([\d,]+)\s*([KM])/i);
|
||||
if (qtyMatch) {
|
||||
minQtyM = parseFloat(qtyMatch[1].replace(/,/g, ''));
|
||||
const quantity = parseFloat(qtyMatch[1].replace(/,/g, ''));
|
||||
const quantityUnit = qtyMatch[2].toUpperCase();
|
||||
amountAUEC = quantity * (quantityUnit === 'K' ? 1_000 : 1_000_000);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const amountAUEC = minQtyM * 1_000_000;
|
||||
const priceUSD = pricePerMillion * minQtyM;
|
||||
const priceUSD = (amountAUEC / 1_000_000) * pricePerMillion;
|
||||
|
||||
// Find seller name - look both backwards and forwards
|
||||
// For featured seller, name appears BEFORE the price
|
||||
// For other sellers, name appears in a structured list
|
||||
let seller: string | undefined;
|
||||
|
||||
// Search backwards first (for featured seller and some list items)
|
||||
for (let j = Math.max(0, i - 20); j < i; j++) {
|
||||
const sellerLine = lines[j];
|
||||
|
||||
// Skip common non-seller text
|
||||
for (let j = i - 1; j >= Math.max(0, i - 13); j--) {
|
||||
const candidate = lines[j];
|
||||
const normalized = candidate.toLowerCase();
|
||||
|
||||
if (
|
||||
sellerLine.includes('$') ||
|
||||
sellerLine.includes('Price') ||
|
||||
sellerLine.includes('qty') ||
|
||||
sellerLine.includes('stock') ||
|
||||
sellerLine.includes('Delivery') ||
|
||||
sellerLine.toLowerCase().includes('review') ||
|
||||
sellerLine.includes('Rating') ||
|
||||
sellerLine.includes('Offer') ||
|
||||
sellerLine.includes('Details') ||
|
||||
sellerLine.includes('FEATURED') ||
|
||||
sellerLine.includes('Other') ||
|
||||
sellerLine.includes('sellers') ||
|
||||
sellerLine.includes('aUEC') ||
|
||||
sellerLine === 'Star Citizen' || // Exclude the game title but not seller names
|
||||
sellerLine.includes('IMF') ||
|
||||
sellerLine.includes('in-game') ||
|
||||
sellerLine.includes('currency') ||
|
||||
sellerLine.length < 3 ||
|
||||
sellerLine.length > 30
|
||||
!candidate ||
|
||||
candidate.includes('$') ||
|
||||
candidate.includes('%') ||
|
||||
/\breviews?\b/i.test(candidate) ||
|
||||
/\borders?\b/i.test(candidate) ||
|
||||
/\bmember since\b/i.test(candidate) ||
|
||||
/\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
|
||||
/\d,\d{3}/.test(candidate) ||
|
||||
blockedSellerTerms.has(normalized) ||
|
||||
normalized.startsWith('other sellers') ||
|
||||
candidate.length < 3 ||
|
||||
candidate.length > 30
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match seller name patterns - alphanumeric with underscores/hyphens
|
||||
// Allow some special cases like "StarCitizen"
|
||||
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(sellerLine)) {
|
||||
seller = sellerLine;
|
||||
// Don't break - keep looking for a closer match
|
||||
|
||||
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
|
||||
seller = candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +124,7 @@ export class EldoradoScraper extends BaseScraper {
|
||||
}
|
||||
|
||||
// Create unique key to avoid duplicates
|
||||
const key = `${pricePerMillion}-${minQtyM}`;
|
||||
const key = `${seller || 'unknown'}-${pricePerMillion}-${amountAUEC}`;
|
||||
if (seenListings.has(key)) continue;
|
||||
seenListings.add(key);
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ export class PlayerAuctionsScraper extends BaseScraper {
|
||||
}
|
||||
|
||||
async extractListings(page: Page): Promise<VendorListing[]> {
|
||||
// Wait for page readiness
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
// Close cookie popup if it exists
|
||||
@@ -26,119 +25,124 @@ export class PlayerAuctionsScraper extends BaseScraper {
|
||||
// No cookie popup or already closed
|
||||
}
|
||||
|
||||
// Find offer cards - they have class "offer-item"
|
||||
const offerCards = await page.locator('.offer-item, [class*="offer-item"]').all();
|
||||
const listings = await page.evaluate(() => {
|
||||
const results: Array<{
|
||||
amountAUEC: number;
|
||||
priceUSD: number;
|
||||
pricePerMillion: number;
|
||||
seller?: string;
|
||||
deliveryTime?: string;
|
||||
}> = [];
|
||||
|
||||
if (offerCards.length === 0) {
|
||||
return this.extractListingsAlternative(page);
|
||||
}
|
||||
const lines = document.body.innerText
|
||||
.split('\n')
|
||||
.map(line => line.trim())
|
||||
.filter(line => line.length > 0);
|
||||
|
||||
const listings: VendorListing[] = [];
|
||||
const targetQuantityM = 1000000; // 1000000 M = 1 trillion AUEC (field is already in millions)
|
||||
const blockedSellerTerms = new Set([
|
||||
'main server',
|
||||
'offer details',
|
||||
'buy now',
|
||||
'm',
|
||||
'go',
|
||||
'price',
|
||||
'server',
|
||||
'rating',
|
||||
'delivery',
|
||||
'new seller',
|
||||
]);
|
||||
|
||||
// Step 2-5: Process each offer card
|
||||
for (let i = 0; i < Math.min(offerCards.length, 20); i++) {
|
||||
try {
|
||||
const card = offerCards[i];
|
||||
const seenListings = new Set<string>();
|
||||
|
||||
// Find the quantity input (shows number with "M" suffix, has +/- buttons)
|
||||
const qtyInput = card.locator('input[type="number"]').first();
|
||||
|
||||
if (!(await qtyInput.isVisible({ timeout: 1000 }).catch(() => false))) {
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
const pricePerMillionMatch = line.match(/^\$\s*([\d,]+(?:\.\d+)?)\s*\/\s*M\b/i);
|
||||
|
||||
if (!pricePerMillionMatch) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Set quantity to 1000000 (which means 1000000 M = 1 trillion AUEC)
|
||||
await qtyInput.scrollIntoViewIfNeeded();
|
||||
await qtyInput.click({ force: true });
|
||||
await qtyInput.fill('');
|
||||
await qtyInput.pressSequentially(targetQuantityM.toString(), { delay: 10 });
|
||||
await qtyInput.press('Enter'); // Trigger update
|
||||
|
||||
// Wait for price to update (0.5-2 seconds as per instructions)
|
||||
await page.waitForTimeout(2500);
|
||||
|
||||
// Step 3: Extract the total price from the BUY NOW button area
|
||||
// Look for the price near the BUY NOW button - it's typically in a large font
|
||||
const pricePerMillion = parseFloat(pricePerMillionMatch[1].replace(/,/g, ''));
|
||||
let totalPriceUSD = 0;
|
||||
|
||||
// Try to find the price element near BUY NOW button
|
||||
const buyNowButton = card.locator('button:has-text("BUY NOW"), [class*="buy"]').first();
|
||||
if (await buyNowButton.isVisible().catch(() => false)) {
|
||||
// Get the parent container and look for price nearby
|
||||
const priceContainer = buyNowButton.locator('xpath=..').first();
|
||||
const priceText = await priceContainer.textContent().catch(() => '');
|
||||
|
||||
// Extract price - should be like "$5.00" in large text
|
||||
if (priceText) {
|
||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.\d{2})/);
|
||||
if (priceMatch) {
|
||||
totalPriceUSD = parseFloat(priceMatch[1].replace(/,/g, ''));
|
||||
}
|
||||
|
||||
for (let j = i + 1; j <= Math.min(lines.length - 1, i + 3); j++) {
|
||||
const totalMatch = lines[j].match(/^\$\s*([\d,]+(?:\.\d{2})?)$/);
|
||||
if (totalMatch) {
|
||||
totalPriceUSD = parseFloat(totalMatch[1].replace(/,/g, ''));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: look for price in the card, but exclude "Minutes" context
|
||||
if (totalPriceUSD === 0) {
|
||||
const cardText = await card.textContent().catch(() => '');
|
||||
if (cardText) {
|
||||
const lines = cardText.split('\n').map(l => l.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
// Skip lines that contain time indicators
|
||||
if (line.includes('Minutes') || line.includes('Hours') || line.includes('Days')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Look for price pattern with decimal
|
||||
const priceMatch = line.match(/\$\s*([\d,]+\.\d{2})/);
|
||||
if (priceMatch) {
|
||||
const price = parseFloat(priceMatch[1].replace(/,/g, ''));
|
||||
if (price > 0 && price < 100000) {
|
||||
totalPriceUSD = price;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (totalPriceUSD === 0) {
|
||||
|
||||
if (!totalPriceUSD || !pricePerMillion) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Step 4: Compute USD per 1M
|
||||
const pricePerMillion = totalPriceUSD / targetQuantityM;
|
||||
const amountInMillions = totalPriceUSD / pricePerMillion;
|
||||
const amountAUEC = Math.round(amountInMillions * 1_000_000);
|
||||
let seller: string | undefined;
|
||||
for (let j = i - 1; j >= Math.max(0, i - 10); j--) {
|
||||
const candidate = lines[j];
|
||||
const normalized = candidate.toLowerCase();
|
||||
|
||||
// Extract seller name and delivery time from card text
|
||||
const fullCardText = await card.textContent().catch(() => '');
|
||||
const sellerMatch = fullCardText ? fullCardText.match(/([a-zA-Z0-9_-]{3,20})/) : null;
|
||||
const seller = sellerMatch ? sellerMatch[1] : 'Unknown';
|
||||
if (
|
||||
blockedSellerTerms.has(normalized) ||
|
||||
candidate.includes('$') ||
|
||||
/\(\d+\)/.test(candidate) ||
|
||||
/\bminutes?\b|\bhours?\b|\bdays?\b/i.test(candidate) ||
|
||||
/\bmember since\b|\btotal orders\b/i.test(candidate) ||
|
||||
/^\d+(\.\d+)?$/.test(candidate) ||
|
||||
candidate.length < 3 ||
|
||||
candidate.length > 24
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const deliveryMatch = fullCardText ? fullCardText.match(/(\d+\s*(?:Minutes?|Hours?|Days?))/i) : null;
|
||||
const deliveryTime = deliveryMatch ? deliveryMatch[1] : undefined;
|
||||
if (/^[a-zA-Z][a-zA-Z0-9_-]*$/.test(candidate)) {
|
||||
seller = candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
listings.push({
|
||||
vendor: 'playerauctions',
|
||||
amountAUEC: targetQuantityM * 1_000_000,
|
||||
let deliveryTime: string | undefined;
|
||||
for (let j = Math.max(0, i - 6); j < i; j++) {
|
||||
if (/\d+\s*(Minutes?|Hours?|Days?)/i.test(lines[j])) {
|
||||
deliveryTime = lines[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const key = `${seller || 'unknown'}-${pricePerMillion}-${totalPriceUSD}`;
|
||||
if (seenListings.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seenListings.add(key);
|
||||
|
||||
results.push({
|
||||
amountAUEC,
|
||||
priceUSD: totalPriceUSD,
|
||||
pricePerMillion,
|
||||
seller: seller.trim(),
|
||||
seller,
|
||||
deliveryTime,
|
||||
scrapedAt: new Date(),
|
||||
url: this.getTargetUrl(),
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
// Skip this card
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
if (listings.length === 0) {
|
||||
return this.extractListingsAlternative(page);
|
||||
}
|
||||
|
||||
return listings;
|
||||
return listings.map(listing => ({
|
||||
vendor: 'playerauctions' as const,
|
||||
amountAUEC: listing.amountAUEC,
|
||||
priceUSD: listing.priceUSD,
|
||||
pricePerMillion: listing.pricePerMillion,
|
||||
seller: listing.seller,
|
||||
deliveryTime: listing.deliveryTime,
|
||||
scrapedAt: new Date(),
|
||||
url: this.getTargetUrl(),
|
||||
}));
|
||||
}
|
||||
|
||||
private async extractListingsAlternative(page: Page): Promise<VendorListing[]> {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "commonjs",
|
||||
"module": "Node16",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
@@ -10,7 +10,7 @@
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"moduleResolution": "node",
|
||||
"moduleResolution": "Node16",
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true
|
||||
|
||||
Reference in New Issue
Block a user