#!/usr/bin/env node /** * Scrapes bank/day promotions via Chrome Relay (CDP). * Opens each store's medios-de-pago page, waits for JS render, extracts promos. * * Usage: node scrape_bank_promos_relay.js */ 'use strict'; const WebSocket = require('/usr/lib/node_modules/openclaw/node_modules/ws'); const fs = require('fs/promises'); const path = require('path'); const TOKEN = '0537d84a67f2e43b525964bb43d93f6dfae1ec1b50946455'; const RELAY_WS = 'ws://127.0.0.1:18792/cdp'; const WORKSPACE = '/home/ubuntu/.openclaw/workspace'; const STORES = [ { name: 'dia', url: 'https://diaonline.supermercadosdia.com.ar/medios-de-pago' }, { name: 'carrefour', url: 'https://www.carrefour.com.ar/medios-de-pago' }, { name: 'disco', url: 'https://www.disco.com.ar/medios-de-pago' }, { name: 'jumbo', url: 'https://www.jumbo.com.ar/medios-de-pago' }, { name: 'changomas', url: 'https://www.masonline.com.ar/medios-de-pago' }, ]; // ── CDP connection (top-level, no flat sessions) ───────────────────────────── function connectCDP() { return new Promise((resolve, reject) => { const ws = new WebSocket(RELAY_WS, { headers: { 'x-openclaw-relay-token': TOKEN }, }); let msgId = 1; const pending = new Map(); const events = {}; const cdp = { send(method, params = {}, sessionId = null) { return new Promise((res, rej) => { const id = msgId++; pending.set(id, { res, rej }); const msg = { id, method, params }; if (sessionId) msg.sessionId = sessionId; ws.send(JSON.stringify(msg)); setTimeout(() => { if (pending.has(id)) { pending.delete(id); rej(new Error(`CDP timeout: ${method}`)); } }, 30000); }); }, on(event, fn) { if (!events[event]) events[event] = []; events[event].push(fn); }, off(event) { delete events[event]; }, close() { ws.close(); }, }; ws.on('message', raw => { const msg = JSON.parse(raw); if (msg.id !== undefined && pending.has(msg.id)) { const { res, rej } = pending.get(msg.id); pending.delete(msg.id); if (msg.error) rej(new Error(JSON.stringify(msg.error))); else res(msg.result); } else if (msg.method) { (events[msg.method] || []).forEach(fn => fn(msg.params, msg.sessionId)); } }); ws.on('open', () => resolve(cdp)); ws.on('error', reject); }); } // ── Navigate to a URL using an existing tab ────────────────────────────────── async function scrapeViaTab(cdp, targetId, url) { // Attach (creates a flat session) const { sessionId } = await cdp.send('Target.attachToTarget', { targetId, flatten: true }); const s = (method, params = {}) => cdp.send(method, params, sessionId); // Enable events on this session await s('Page.enable'); await s('Runtime.enable'); await s('Network.enable'); // Navigate then wait for network idle (no activity for 3s) or 30s hard cap await new Promise(async (resolve) => { let resolved = false; let idleTimer = null; const done = () => { if (!resolved) { resolved = true; resolve(); } }; const bump = () => { if (idleTimer) clearTimeout(idleTimer); idleTimer = setTimeout(done, 3000); }; cdp.on('Page.loadEventFired', (p, sid) => { if (sid === sessionId) bump(); }); cdp.on('Network.requestWillBeSent', (p, sid) => { if (sid === sessionId) bump(); }); cdp.on('Network.loadingFinished', (p, sid) => { if (sid === sessionId) bump(); }); cdp.on('Network.loadingFailed', (p, sid) => { if (sid === sessionId) bump(); }); setTimeout(done, 30000); await s('Page.navigate', { url }); bump(); }); // Additional hydration buffer for VTEX React components await new Promise(r => setTimeout(r, 5000)); // Extract text content const evalResult = await s('Runtime.evaluate', { expression: `(function() { const txt = document.body ? document.body.innerText : ''; return { url: location.href, title: document.title, text: txt.slice(0, 12000), textLen: txt.length, }; })()`, returnByValue: true, }); await s('Target.detachFromTarget', { sessionId }).catch(() => {}); return evalResult?.result?.value || null; } // ── Parse bank promos from page text ──────────────────────────────────────── function parsePromos(raw) { if (!raw || !raw.text) return { error: 'no text extracted', url: raw?.url }; const text = raw.text; const DAYS_CANONICAL = { 'lunes a domingo': 'Todos los días', 'todos los días': 'Todos los días', 'lunes': 'Lunes', 'martes': 'Martes', 'miércoles': 'Miércoles', 'miercoles': 'Miércoles', 'jueves': 'Jueves', 'viernes': 'Viernes', 'sábado': 'Sábado', 'sabado': 'Sábado', 'domingo': 'Domingo', }; const DAY_RE = /\b(lunes a domingo|todos los d[ií]as|lunes|martes|mi[eé]rcoles|jueves|viernes|s[aá]bado|domingo)\b/i; const DISC_RE = /(\d+)\s*%\s*(?:de\s+)?descuento/i; const CUOTAS_RE = /(\d+)\s+cuotas?\s+sin\s+inter[eé]s/i; const BANK_RE = /\b(visa(?:\s+electr[oó]n)?|mastercard|amex|american express|naranja(?:\s*x)?|galicia|santander|macro|supervielle|icbc|mercado\s*pago|modo|naci[oó]n|provincia|ciudad|cabal|hsbc|bbva|patagonia|comafi|brubank|u[a]la|oca|lemon|prex|personal\s*pay|clar[ií]n\s*365|cuenta\s*dni|getnet|coto)\b/gi; const lines = text.split('\n').map(l => l.trim()).filter(l => l.length > 3); const byDay = {}; let currentDay = 'Todos los días'; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const lineLc = line.toLowerCase(); // Update current day context const dayM = DAY_RE.exec(line); if (dayM) { const canonical = DAYS_CANONICAL[dayM[1].toLowerCase()] || dayM[1]; currentDay = canonical; } // Check for discount/cuotas in this line const discM = DISC_RE.exec(line); const cuotasM = CUOTAS_RE.exec(line); if (!discM && !cuotasM) continue; const discount = discM ? discM[0] : cuotasM[0]; // Context window: ±3 lines const ctx = lines.slice(Math.max(0, i - 3), Math.min(lines.length, i + 4)).join(' '); // Day from context const ctxDayM = DAY_RE.exec(ctx); const day = ctxDayM ? (DAYS_CANONICAL[ctxDayM[1].toLowerCase()] || ctxDayM[1]) : currentDay; // Banks from context BANK_RE.lastIndex = 0; const banks = [...new Set((ctx.match(BANK_RE) || []).map(b => b.trim()))]; // Tope const topeM = ctx.match(/tope[^$\n]*\$\s*[\d.,]+/i); const vigenciaM = ctx.match(/hasta\s+el\s+\d+\/\d+/i); const promo = { descuento: discount, bancos: banks, tope: topeM ? topeM[0].trim() : '', vigencia: vigenciaM ? vigenciaM[0].trim() : '', detalle: ctx.replace(/\s+/g, ' ').trim().slice(0, 250), }; if (!byDay[day]) byDay[day] = []; // Deduplicate const key = `${discount}|${banks.sort().join(',')}`; if (!byDay[day].some(p => `${p.descuento}|${p.bancos.sort().join(',')}` === key)) { byDay[day].push(promo); } } const total = Object.values(byDay).reduce((s, a) => s + a.length, 0); return { source: 'chrome-relay-cdp', url: raw.url, title: raw.title, textLen: raw.textLen, totalPromos: total, byDay, }; } // ── Main ───────────────────────────────────────────────────────────────────── async function main() { console.log('Connecting to Chrome Relay...'); const cdp = await connectCDP(); const { targetInfos } = await cdp.send('Target.getTargets'); const pages = targetInfos.filter(t => t.type === 'page'); console.log(`Tabs open: ${pages.length}`); // Use first available page tab as our scraping tab let scrapeTargetId = pages[0]?.targetId; if (!scrapeTargetId) { const { targetId } = await cdp.send('Target.createTarget', { url: 'about:blank' }); scrapeTargetId = targetId; console.log('Created new tab for scraping'); } else { console.log(`Using tab: ${pages[0].url.slice(0, 60)}`); } const results = {}; for (const store of STORES) { process.stdout.write(`\n[${store.name}] Scraping ${store.url} ... `); try { const raw = await scrapeViaTab(cdp, scrapeTargetId, store.url); if (!raw) throw new Error('null result from CDP'); console.log(`${raw.textLen} chars extracted`); results[store.name] = parsePromos(raw); const r = results[store.name]; if (r.error) { console.log(` -> ERROR: ${r.error}`); } else { console.log(` -> ${r.totalPromos} promos, días: [${Object.keys(r.byDay).join(', ')}]`); for (const [day, promos] of Object.entries(r.byDay)) { for (const p of promos.slice(0, 4)) { console.log(` ${day}: ${p.descuento} — ${p.bancos.join(', ') || '?'} ${p.tope}`); } } } } catch (e) { console.log(`FAILED: ${e.message}`); results[store.name] = { source: 'chrome-relay-cdp', error: e.message }; } await new Promise(r => setTimeout(r, 800)); } cdp.close(); // Save const outFile = path.join(WORKSPACE, 'data', 'discounts', 'bank-promos-relay-latest.json'); const out = { generatedAt: new Date().toISOString(), source: 'chrome-relay-cdp', stores: results }; await fs.writeFile(outFile, JSON.stringify(out, null, 2)); const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); await fs.writeFile( path.join(WORKSPACE, 'data', 'discounts', `bank-promos-relay-${ts}.json`), JSON.stringify(out, null, 2) ); console.log(`\nSaved: ${outFile}`); return results; } main().catch(e => { console.error(e); process.exit(1); });