#!/usr/bin/env node /** * Fetches ALL currently discounted products from each supermarket. * * Method: VTEX Intelligent Search API — paginate every category, collect products * whose clusterHighlights/productClusters contain promo-sounding labels * (2x1, 3x2, N% off, segunda unidad, etc.) * * Stores: Jumbo, Disco, Carrefour, Changomas, DIA * Output: data/discounts/product-promos-latest.json * super-ranking-report/product-promos-data.js */ 'use strict'; const https = require('https'); const fs = require('fs/promises'); const path = require('path'); const WORKSPACE = '/home/ubuntu/.openclaw/workspace'; const CONCURRENCY = 6; // parallel requests per store // ── HTTP helper ─────────────────────────────────────────────────────────────── function httpGet(url) { return new Promise((resolve, reject) => { const req = https.request(url, { headers: { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0 Safari/537.36', 'Accept': 'application/json', }, }, res => { const chunks = []; res.on('data', c => chunks.push(c)); res.on('end', () => { const body = Buffer.concat(chunks).toString('utf8'); try { resolve({ status: res.statusCode, data: JSON.parse(body) }); } catch (e) { resolve({ status: res.statusCode, data: null }); } }); }); req.on('error', reject); req.setTimeout(20000, () => req.destroy(new Error('timeout'))); req.end(); }); } // Run up to `concurrency` async tasks from an array of thunks async function runPool(tasks, concurrency) { const results = []; let idx = 0; async function worker() { while (idx < tasks.length) { const i = idx++; results[i] = await tasks[i](); } } await Promise.all(Array.from({ length: Math.min(concurrency, tasks.length) }, worker)); return results; } // ── Promo detection ─────────────────────────────────────────────────────────── // Matches promo cluster names: 2x1, 3x2, 4x3, N% off, segunda/2da unidad, gratis, etc. const PROMO_RE = /\d+x\d+|\d+\s*%|2do|3ro|segunda|2da|3ra|gratis|descuento|oferta|hasta\s+\d|\d+\s*csi|ahorro\s+\d|llevate|promo(?!cion\s+de\s+banco)/i; // Names to EXCLUDE (bank promos, internal codes, brand clusters) const EXCLUDE_RE = /^(JUMBO|DISCO|VEA|CENCOSUD|rpainf|rpatmp|rpagal|rpamac|rpasant|rpanaranja|[A-Z_]{8,})/; function isPromoCluster(name) { return PROMO_RE.test(name) && !EXCLUDE_RE.test(name); } function extractPromoLabel(product) { const labels = []; for (const c of (product.clusterHighlights || [])) { if (c.name && isPromoCluster(c.name)) labels.push(c.name.trim()); } for (const c of (product.productClusters || [])) { if (c.name && isPromoCluster(c.name) && !labels.includes(c.name.trim())) labels.push(c.name.trim()); } return labels; } // ── VTEX IS scraper ─────────────────────────────────────────────────────────── async function getCategories(host) { const { data } = await httpGet(`https://${host}/api/catalog_system/pub/category/tree/1`); return (data || []).map(c => ({ id: c.id, name: c.name, url: c.url })); } function slugify(name) { return name.toLowerCase() .normalize('NFD').replace(/[\u0300-\u036f]/g, '') .replace(/[^a-z0-9]+/g, '-') .replace(/^-|-$/g, ''); } async function fetchCategoryPage(host, categorySlug, page) { const facets = JSON.stringify([{ key: 'category-1', value: categorySlug }]); const url = `https://${host}/_v/api/intelligent-search/product_search?page=${page}&count=50&selectedFacets=${encodeURIComponent(facets)}`; const { data } = await httpGet(url); return data; } async function scanVtexStore(storeName, host) { console.log(`\n[${storeName}] Getting categories...`); const cats = await getCategories(host); console.log(`[${storeName}] ${cats.length} categories`); const discountedProducts = new Map(); // productId → product entry let totalScanned = 0; for (const cat of cats) { const slug = slugify(cat.name); // Page 1 to get total const first = await fetchCategoryPage(host, slug, 1); if (!first || !first.products) continue; const total = first.recordsFiltered || 0; const totalPages = Math.min(Math.ceil(total / 50), 50); // IS API max 50 pages totalScanned += first.products.length; // Collect promos from page 1 for (const p of first.products) { const labels = extractPromoLabel(p); if (labels.length) discountedProducts.set(p.productId, buildEntry(p, labels, cat.name)); } if (totalPages <= 1) continue; // Paginate remaining pages in parallel batches const pageTasks = []; for (let pg = 2; pg <= totalPages; pg++) { const pg_ = pg; pageTasks.push(async () => { const r = await fetchCategoryPage(host, slug, pg_); if (!r || !r.products) return; totalScanned += r.products.length; for (const p of r.products) { const labels = extractPromoLabel(p); if (labels && labels.length) discountedProducts.set(p.productId, buildEntry(p, labels, cat.name)); } }); } await runPool(pageTasks, CONCURRENCY); process.stdout.write(` ${cat.name}: ${total} products, ${totalPages} pages scanned\n`); } const items = [...discountedProducts.values()]; items.sort((a, b) => b.promoLabels.length - a.promoLabels.length); console.log(`[${storeName}] Scanned ${totalScanned} products, found ${items.length} with active promos`); return { source: 'vtex-is-cluster-scan', host, totalScanned, totalDiscounted: items.length, items }; } function buildEntry(p, labels, category) { const item = p.items?.[0]; const offer = item?.sellers?.[0]?.commertialOffer; const price = offer?.Price; const listPrice = offer?.ListPrice; const discPct = price && listPrice && listPrice > price * 1.05 ? Math.round((1 - price / listPrice) * 100) : null; return { productId: p.productId, name: p.productName, brand: p.brand, category, link: p.link, price: price ? Math.round(price) : null, listPrice: listPrice && listPrice > price * 1.05 ? Math.round(listPrice) : null, discPct, promoLabels: labels, }; } // ── MAIN ────────────────────────────────────────────────────────────────────── const STORES = [ { name: 'jumbo', host: 'www.jumbo.com.ar' }, { name: 'disco', host: 'www.disco.com.ar' }, { name: 'carrefour', host: 'www.carrefour.com.ar' }, { name: 'changomas', host: 'www.masonline.com.ar' }, { name: 'dia', host: 'diaonline.supermercadosdia.com.ar' }, ]; async function main() { const results = {}; const start = Date.now(); for (const { name, host } of STORES) { try { results[name] = await scanVtexStore(name, host); } catch (e) { console.error(`[${name}] FAILED: ${e.message}`); results[name] = { source: 'vtex-is-cluster-scan', error: e.message, totalDiscounted: 0, items: [] }; } } const elapsed = Math.round((Date.now() - start) / 1000); const out = { generatedAt: new Date().toISOString(), source: 'vtex-is-cluster-scan', elapsedSec: elapsed, stores: results }; const outDir = path.join(WORKSPACE, 'data', 'discounts'); await fs.mkdir(outDir, { recursive: true }); const latestPath = path.join(outDir, 'product-promos-latest.json'); await fs.writeFile(latestPath, JSON.stringify(out, null, 2)); const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); await fs.writeFile(path.join(outDir, `product-promos-${ts}.json`), JSON.stringify(out, null, 2)); const jsPath = path.join(WORKSPACE, 'super-ranking-report', 'product-promos-data.js'); await fs.writeFile(jsPath, `// Auto-generated by fetch_product_promos.js — do not edit\nwindow.PRODUCT_PROMOS_REPORT = ${JSON.stringify(out, null, 2)};\n`); console.log(`\n=== DONE in ${elapsed}s ===`); for (const [store, d] of Object.entries(results)) { if (d.error) { console.log(`${store}: ERROR — ${d.error}`); continue; } console.log(`${store}: ${d.totalDiscounted} discounted products (of ${d.totalScanned} scanned)`); } console.log(`Saved: ${latestPath}`); console.log(`Saved: ${jsPath}`); } main().catch(e => { console.error(e); process.exit(1); });