Skip to content

Instantly share code, notes, and snippets.

@supermarsx
Created November 3, 2025 18:27
Show Gist options
  • Select an option

  • Save supermarsx/69675ac56da8160aef2b9015dc5b31a8 to your computer and use it in GitHub Desktop.

Select an option

Save supermarsx/69675ac56da8160aef2b9015dc5b31a8 to your computer and use it in GitHub Desktop.
StoresAce Product Scraper — CSV Export with UI (parallel per-item)
// ==UserScript==
// @name StoresAce Product Scraper — CSV Export with UI (parallel per-item)
// @namespace https://supermarsx.github.io/userscripts
// @version 1.1.0
// @description Scrape all product data and export a flattened CSV. Now requests per-item HTML, KPIs, composition (and optionally cardex) IN PARALLEL so each product is fetched “all at once”. Includes overlay UI with Start/Cancel, progress bar, retry/backoff, and failure list.
// @author Supermarsx
// @match https://COMPANYID.storesace.com/*
// @run-at document-end
// @grant none
// ==/UserScript==
/*
WHAT'S NEW (v1.1.0)
- Per-item subrequests run in PARALLEL via Promise.allSettled:
* /items/{id}/ (HTML)
* /items/{id}/kpis/ (JSON)
* /items/{id}/composition/grid/ (JSON)
* /items/{id}/cardex/grid/ (JSON) ← optional prefetch; can be required later if type==2
- Keeps overall iteration across product IDs sequential (gentle on server),
but each product's 3–4 requests happen at once.
- If cardex is required (type==2) and wasn’t prefetched or failed, we do a follow-up fetch.
UI
- Floating bottom-left button: "Scrape → CSV"
- Bottom-center progress bar + counters (processed, total, errors, 429/403/404/500, ETA)
- Live list of permanently failed items (id + comp_name when known)
- Cancel button: stops after current request and immediately exports what was collected so far
Reliability
- Each request retries up to MAX_RETRIES with exponential backoff
- Global soft backoff grows on 429s and decays on success
- Product IDs processed strictly sequentially; subrequests parallelized per product
*/
(function () {
'use strict';
// ----------------------------
// Config
// ----------------------------
const ORIGIN = location.origin; // https://COMPANYID.storesace.com
const ENDPOINTS = {
gridIds:
"/items/grid/data/?items_no_store=0&family_type=2&startRow=0&endRow=60000&rowGroupCols=%5B%5D&groupKeys=%5B%5D&sortModel=%5B%7B%22sort%22%3A%22asc%22%2C%22colId%22%3A%22pcode%22%7D%5D&filterModel=%7B%7D&valueCols=%5B%5D",
itemHtml: (id) => `/items/${id}/`,
itemKpis: (id) => `/items/${id}/kpis/`,
itemComposition: (id) => `/items/${id}/composition/grid/?sortModel=%5B%7B%22colId%22%3A%22comp_name%22%2C%22sort%22%3A%22asc%22%7D%5D&rowGroupCols=%5B%5D&startRow=0&endRow=100000`,
itemCardex: (id) => `/items/${id}/cardex/grid/?startRow=0&endRow=30&rowGroupCols=%5B%5D&groupKeys=%5B%5D&sortModel=%5B%7B%22sort%22%3A%22asc%22%2C%22colId%22%3A%22supplier_name%22%7D%2C%7B%22sort%22%3A%22asc%22%2C%22colId%22%3A%22start_date%22%7D%2C%7B%22sort%22%3A%22asc%22%2C%22colId%22%3A%22unit_name%22%7D%5D&filterModel=%7B%7D&valueCols=%5B%5D`,
};
// Toggle behavior here if you want
const SETTINGS = {
PARALLEL_PER_ITEM: true, // run the 3–4 requests for each product at once
PREFETCH_CARDEX: true, // start cardex request immediately (even before we know type==2)
};
const MAX_RETRIES = 3; // per request retries
const BASE_DELAY_MS = 50; // initial backoff delay (per retry)
const GLOBAL_GAP_MS = 200; // small gap between products
const MAX_GLOBAL_BACKOFF_MS = 8000; // cap for global backoff after many 429s
// ----------------------------
// State
// ----------------------------
let isRunning = false;
let cancelRequested = false;
const counts = {
total: 0,
processed: 0,
permanentErrors: 0,
http429: 0,
http403: 0,
http404: 0,
http500: 0,
};
/**
* Failed items: {id, status, message, comp_name?}
*/
const failures = [];
/**
* Collected results (one object per product, already flattened)
*/
const results = [];
// grows on 429s to pace future requests
let globalBackoffMs = 0;
// ----------------------------
// UI helpers
// ----------------------------
function h(tag, props = {}, ...children) {
const el = document.createElement(tag);
Object.assign(el, props);
for (const ch of children) {
if (ch == null) continue;
if (typeof ch === 'string') el.appendChild(document.createTextNode(ch));
else el.appendChild(ch);
}
return el;
}
function createStyles() {
const style = h('style', { id: 'storesace-scraper-styles' });
style.textContent = `
.sa-fixed { position: fixed; z-index: 999999; font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; }
.sa-btn { background: #0f172a; color: white; border: none; padding: 10px 14px; border-radius: 10px; cursor: pointer; box-shadow: 0 4px 14px rgba(0,0,0,.2); font-weight: 600; }
.sa-btn[disabled] { opacity: .6; cursor: not-allowed; }
.sa-start { bottom: 20px; left: 20px; }
.sa-panel { left: 50%; transform: translateX(-50%); bottom: 12px; background: rgba(15,23,42,.92); color: #e5e7eb; padding: 12px 14px; border-radius: 12px; display: flex; gap: 14px; align-items: center; box-shadow: 0 10px 30px rgba(0,0,0,.25); }
.sa-bar-wrap { width: 380px; height: 10px; background: #334155; border-radius: 999px; overflow: hidden; }
.sa-bar { height: 100%; width: 0%; background: linear-gradient(90deg, #34d399, #60a5fa); transition: width .25s ease; }
.sa-counters { font-size: 12px; display: flex; gap: 10px; align-items: center; white-space: nowrap; }
.sa-fails { position: fixed; left: 50%; transform: translateX(-50%); bottom: 74px; max-height: 28vh; overflow: auto; padding: 6px 10px; background: rgba(30,41,59,.92); color: #f1f5f9; border-radius: 10px; box-shadow: 0 10px 30px rgba(0,0,0,.25); font-size: 12px; min-width: 380px; }
.sa-fails h4 { margin: 0 0 6px 0; font-size: 12px; font-weight: 700; color: #fda4af; }
.sa-fails ul { margin: 0; padding-left: 16px; }
.sa-chip { display:inline-flex; align-items:center; gap:6px; background:#0b1220; padding:4px 8px; border-radius:999px; }
.sa-chip b { color:#fff; }
.sa-chip small { color:#93c5fd; }
.sa-cancel { background: #ef4444; }
.sa-hidden { display: none !important; }
`;
document.head.appendChild(style);
}
let startBtn, panel, bar, countersSpan, cancelBtn, failsBox, failsList;
function mountUI() {
if (document.getElementById('storesace-scraper-styles')) return; // idempotent
createStyles();
startBtn = h('button', { className: 'sa-fixed sa-btn sa-start', title: 'Scrape all products and export CSV' }, 'Scrape → CSV');
startBtn.addEventListener('click', onStart);
bar = h('div', { className: 'sa-bar' });
const barWrap = h('div', { className: 'sa-bar-wrap' }, bar);
countersSpan = h('div', { className: 'sa-counters' },
chip('Processed', '0/0'),
chip('Errors', '0'),
chip('429', '0'),
chip('403', '0'),
chip('404', '0'),
chip('500', '0'),
chip('ETA', '—')
);
cancelBtn = h('button', { className: 'sa-btn sa-cancel' }, 'Cancel & Export');
cancelBtn.addEventListener('click', requestCancel);
panel = h('div', { className: 'sa-fixed sa-panel sa-hidden', id: 'sa-panel' }, barWrap, countersSpan, cancelBtn);
failsList = h('ul');
failsBox = h('div', { className: 'sa-fixed sa-fails sa-hidden' }, h('h4', {}, 'Failed items (after retries):'), failsList);
document.body.appendChild(startBtn);
document.body.appendChild(panel);
document.body.appendChild(failsBox);
}
function chip(label, value) {
return h('span', { className: 'sa-chip' }, h('b', {}, `${label}:`), h('small', {}, String(value)));
}
function updateCounters() {
const pct = counts.total ? Math.round(((counts.processed + counts.permanentErrors) / counts.total) * 100) : 0;
bar.style.width = `${pct}%`;
// Rebuild counters content (simple & robust)
countersSpan.innerHTML = '';
countersSpan.appendChild(chip('Processed', `${counts.processed}/${counts.total}`));
countersSpan.appendChild(chip('Errors', `${counts.permanentErrors}`));
countersSpan.appendChild(chip('429', `${counts.http429}`));
countersSpan.appendChild(chip('403', `${counts.http403}`));
countersSpan.appendChild(chip('404', `${counts.http404}`));
countersSpan.appendChild(chip('500', `${counts.http500}`));
// naive ETA: remaining * (GLOBAL_GAP_MS + globalBackoffMs + BASE_DELAY_MS)
const left = Math.max(counts.total - (counts.processed + counts.permanentErrors), 0);
const estMs = left * (GLOBAL_GAP_MS + globalBackoffMs + BASE_DELAY_MS);
const eta = left === 0 ? '—' : msToHuman(estMs);
countersSpan.appendChild(chip('ETA', eta));
}
function msToHuman(ms) {
const s = Math.round(ms / 1000);
if (s < 60) return `${s}s`;
const m = Math.floor(s / 60);
const r = s % 60;
return `${m}m ${r}s`;
}
function appendFailure(item) {
failsBox.classList.remove('sa-hidden');
const { id, status, message, comp_name } = item;
const text = comp_name ? `#${id} — ${comp_name} — ${status}${message ? ` — ${message}` : ''}`
: `#${id} — ${status}${message ? ` — ${message}` : ''}`;
const li = h('li', {}, text);
failsList.appendChild(li);
}
function showPanel(show) {
panel.classList.toggle('sa-hidden', !show);
}
// ----------------------------
// Networking helpers (with retries and backoff)
// ----------------------------
async function fetchJSON(url, descForLogs) {
return requestWithRetries(() => fetch(url, { credentials: 'same-origin' }), descForLogs, async (resp) => {
return resp.json();
});
}
async function fetchText(url, descForLogs) {
return requestWithRetries(() => fetch(url, { credentials: 'same-origin' }), descForLogs, async (resp) => {
return resp.text();
});
}
async function requestWithRetries(doFetch, label, parse) {
let lastErr;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
if (cancelRequested) throw new Error('Cancelled');
try {
if (globalBackoffMs) await sleep(globalBackoffMs);
const resp = await doFetch();
if (!resp.ok) {
// status-specific handling
updateStatusCounters(resp.status);
if (resp.status === 429) {
counts.http429++;
globalBackoffMs = Math.min((globalBackoffMs || BASE_DELAY_MS) * 2, MAX_GLOBAL_BACKOFF_MS);
}
if (attempt < MAX_RETRIES) {
await sleep(BASE_DELAY_MS * Math.pow(2, attempt));
continue; // retry
}
// permanent failure
const text = await safeReadText(resp);
throw new Error(`HTTP ${resp.status} on ${label || ''}${text ? ` — ${text.slice(0, 200)}` : ''}`);
}
// success → decay global backoff slowly
if (globalBackoffMs) globalBackoffMs = Math.max(0, Math.floor(globalBackoffMs * 0.6));
const data = await parse(resp);
return data;
} catch (e) {
lastErr = e;
if (attempt < MAX_RETRIES) {
await sleep(BASE_DELAY_MS * Math.pow(2, attempt));
continue;
}
throw e;
}
}
throw lastErr || new Error('Unknown error');
}
async function safeReadText(resp) {
try { return await resp.text(); } catch { return ''; }
}
function updateStatusCounters(status) {
if (status === 403) counts.http403++;
else if (status === 404) counts.http404++;
else if (status === 500) counts.http500++;
}
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// ----------------------------
// Data extraction per product
// ----------------------------
async function getProductIds() {
const url = ORIGIN + ENDPOINTS.gridIds;
const json = await fetchJSON(url, 'grid-ids');
const rows = Array.isArray(json?.rows) ? json.rows : [];
const ids = rows.map(r => r.id).filter((v) => Number.isFinite(Number(v)));
return { ids, total: ids.length };
}
function parseItemHtml(html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
const gv = (sel) => {
const el = doc.querySelector(sel);
if (!el) return '';
return (el.value ?? '').toString().trim();
};
const product_name = gv('#id_description');
const product_type = gv('#id_type'); // numeric text
const product_type_text = (product_type === '2') ? 'Compra' : (product_type === '1') ? 'Venda' : 'Producao/OUTRO';
const enabled = gv('#id_status');
const family_type = gv('#id_family');
const pcode = gv('#id_pcode');
return { product_name, product_type, product_type_text, enabled, family_type, pcode };
}
// ---- NEW: parallelized per-product fetch ----
async function fetchPerProduct(id) {
if (!SETTINGS.PARALLEL_PER_ITEM) {
// Fallback to sequential (old behavior)
return await fetchPerProductSequential(id);
}
const out = { id };
let compNameForFailures = undefined;
// Kick off all main requests immediately
const pHtml = fetchText(ORIGIN + ENDPOINTS.itemHtml(id), `item-html#${id}`);
const pKpis = fetchJSON(ORIGIN + ENDPOINTS.itemKpis(id), `kpis#${id}`);
const pComp = fetchJSON(ORIGIN + ENDPOINTS.itemComposition(id), `composition#${id}`);
const pCardexPrefetch = SETTINGS.PREFETCH_CARDEX
? fetchJSON(ORIGIN + ENDPOINTS.itemCardex(id), `cardex(prefetch)#${id}`).catch(e => ({ __failed: true, __error: e }))
: Promise.resolve({ __skipped: true });
// Wait for the core trio first (HTML+KPIs+Composition)
const [htmlRes, kpisRes, compRes] = await allOrThrow([pHtml, pKpis, pComp], id);
Object.assign(out, parseItemHtml(htmlRes));
out.kpis = kpisRes;
out.composition = compRes;
try {
compNameForFailures = compRes?.rows?.[0]?.comp_name || compRes?.rows?.[0]?.compName || undefined;
} catch {}
// Cardex: if type==2 (Compra)
const typeIsCompra = (String(out.product_type).trim() === '2') || (String(out.product_type_text).toLowerCase() === 'compra');
if (typeIsCompra) {
let maybeCardex = await pCardexPrefetch; // could be data, {__failed}, or {__skipped}
if (maybeCardex && (maybeCardex.__failed || maybeCardex.__skipped)) {
// Prefetch failed or was skipped → do a required follow-up fetch with retries
maybeCardex = await fetchJSON(ORIGIN + ENDPOINTS.itemCardex(id), `cardex#${id}`);
}
out.cardex = maybeCardex && !maybeCardex.__failed ? maybeCardex : undefined;
}
const flat = flattenForCsv(out);
return { flat, comp_name: compNameForFailures };
}
async function allOrThrow(promises, id) {
const settled = await Promise.allSettled(promises);
const errors = settled.filter(s => s.status === 'rejected').map(s => s.reason);
if (errors.length) {
// Aggregate and throw to route into failure handling
const msgs = errors.map(e => e?.message || String(e)).join(' | ');
throw new Error(`Parallel fetch failed for #${id}: ${msgs}`);
}
return settled.map(s => s.value);
}
// Legacy sequential per-item path (kept for completeness / optional use)
async function fetchPerProductSequential(id) {
const out = { id };
let compNameForFailures = undefined;
const html = await fetchText(ORIGIN + ENDPOINTS.itemHtml(id), `item-html#${id}`);
Object.assign(out, parseItemHtml(html));
const kpis = await fetchJSON(ORIGIN + ENDPOINTS.itemKpis(id), `kpis#${id}`);
out.kpis = kpis;
const comp = await fetchJSON(ORIGIN + ENDPOINTS.itemComposition(id), `composition#${id}`);
out.composition = comp;
try { compNameForFailures = comp?.rows?.[0]?.comp_name || comp?.rows?.[0]?.compName || undefined; } catch {}
const typeIsCompra = (String(out.product_type).trim() === '2') || (String(out.product_type_text).toLowerCase() === 'compra');
if (typeIsCompra) {
const cardex = await fetchJSON(ORIGIN + ENDPOINTS.itemCardex(id), `cardex#${id}`);
out.cardex = cardex;
}
const flat = flattenForCsv(out);
return { flat, comp_name: compNameForFailures };
}
// ----------------------------
// Flatten → CSV helpers
// ----------------------------
function flattenForCsv(obj, prefix = '', acc = {}) {
if (obj === null || obj === undefined) return acc;
if (Array.isArray(obj)) {
obj.forEach((v, i) => {
flattenForCsv(v, `${prefix}[${i}]`, acc);
});
return acc;
}
if (typeof obj === 'object') {
for (const [k, v] of Object.entries(obj)) {
const key = prefix ? `${prefix}.${k}` : k;
flattenForCsv(v, key, acc);
}
return acc;
}
// primitive
acc[prefix] = String(obj);
return acc;
}
function toCsv(rows) {
// Collect union of keys
const headerSet = new Set();
rows.forEach(r => Object.keys(r).forEach(k => headerSet.add(k)));
const headers = Array.from(headerSet);
const escape = (val) => {
if (val == null) return '';
const s = String(val);
if (/[",\n]/.test(s)) return '"' + s.replace(/"/g, '""') + '"';
return s;
};
const lines = [];
lines.push(headers.map(escape).join(','));
for (const r of rows) {
const line = headers.map(h => escape(h in r ? r[h] : ''));
lines.push(line.join(','));
}
return lines.join('\n');
}
function downloadCsv(filename, csv) {
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
}
// ----------------------------
// Orchestration
// ----------------------------
async function onStart() {
if (isRunning) return;
isRunning = true;
cancelRequested = false;
startBtn.disabled = true;
showPanel(true);
failsBox.classList.add('sa-hidden');
failsList.innerHTML = '';
Object.assign(counts, { total: 0, processed: 0, permanentErrors: 0, http429: 0, http403: 0, http404: 0, http500: 0 });
results.length = 0;
failures.length = 0;
globalBackoffMs = 0;
updateCounters();
try {
// 1) Gather IDs
const { ids, total } = await getProductIds();
counts.total = total;
updateCounters();
// 2) Iterate sequentially over products; do per-item subrequests in parallel
for (const id of ids) {
if (cancelRequested) break;
try {
const { flat } = await fetchPerProduct(id);
results.push(flat);
counts.processed++;
} catch (e) {
counts.permanentErrors++;
const status = extractStatusFromMessage(e?.message);
const failure = { id, status: status || 'error', message: e?.message || String(e) };
// Best-effort enrich with comp_name
try {
const comp = await fetchJSON(ORIGIN + ENDPOINTS.itemComposition(id), `composition-after-fail#${id}`);
failure.comp_name = comp?.rows?.[0]?.comp_name || comp?.rows?.[0]?.compName || undefined;
} catch { /* ignore */ }
failures.push(failure);
appendFailure(failure);
}
updateCounters();
if (!cancelRequested) await sleep(GLOBAL_GAP_MS);
}
// 3) Export
const csv = toCsv(results);
const ts = new Date().toISOString().replace(/[:T]/g, '-').slice(0, 19);
const fname = `storesace-products-${ts}.csv`;
downloadCsv(fname, csv);
} catch (e) {
console.error('[StoresAce Scraper] Aborted with error:', e);
alert('Scraper aborted: ' + (e?.message || String(e)));
} finally {
startBtn.disabled = false;
showPanel(false);
isRunning = false;
}
}
function requestCancel() {
if (!isRunning) return;
cancelRequested = true;
// Immediately export what we have so far
try {
const csv = toCsv(results);
const ts = new Date().toISOString().replace(/[:T]/g, '-').slice(0, 19);
const fname = `storesace-products-PARTIAL-${ts}.csv`;
downloadCsv(fname, csv);
} catch (e) {
console.error('[StoresAce Scraper] Cancel/export failed:', e);
alert('Cancel/export failed: ' + (e?.message || String(e)));
}
}
function extractStatusFromMessage(msg) {
if (!msg) return '';
const m = msg.match(/HTTP\s(\d{3})/);
if (m) return m[1];
if (/429/.test(msg)) return '429';
if (/403/.test(msg)) return '403';
if (/404/.test(msg)) return '404';
if (/500/.test(msg)) return '500';
return '';
}
// ----------------------------
// Boot
// ----------------------------
function boot() {
if (!location.origin.includes('COMPANYID.storesace.com')) return;
mountUI();
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', boot);
} else {
setTimeout(boot, 0);
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment