Created
September 18, 2025 17:38
-
-
Save Sdy603/b7f64b3d2adb8a549673054c44453746 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // enrich_duo_csv.js | |
| // CommonJS version. Only env var required: DATABASE_URL | |
| const fs = require("fs"); | |
| const { parse } = require("csv-parse"); | |
| const { stringify } = require("csv-stringify"); | |
| const { Pool } = require("pg"); | |
| const DATABASE_URL = process.env.DATABASE_URL; | |
| if (!DATABASE_URL) { | |
| console.error("Missing DATABASE_URL env var"); | |
| process.exit(1); | |
| } | |
| // Fixed filenames next to this script | |
| const INPUT_CSV = "./gitlab_duo_daily_usages.csv"; | |
| const OUTPUT_CSV = "./gitlab_duo_daily_usages_enriched.csv"; | |
| const USER_COL = "username"; // hardcoded per your requirement | |
| function normalizePostgresURL(url) { | |
| return url.startsWith("postgres://") ? url.replace("postgres://", "postgresql://") : url; | |
| } | |
| // Use SSL like your snippet, applied to the pg Pool | |
| const pool = new Pool({ | |
| connectionString: normalizePostgresURL(DATABASE_URL), | |
| ssl: { rejectUnauthorized: false } | |
| }); | |
| function ymd(dateLike) { | |
| const d = new Date(dateLike); | |
| if (Number.isNaN(d.getTime())) throw new Error(`Bad date value: ${dateLike}`); | |
| const yyyy = d.getUTCFullYear(); | |
| const mm = String(d.getUTCMonth() + 1).padStart(2, "0"); | |
| const dd = String(d.getUTCDate()).padStart(2, "0"); | |
| return `${yyyy}-${mm}-${dd}`; | |
| } | |
| function eachDayUTC(startYmd, endYmd) { | |
| const out = []; | |
| const start = new Date(startYmd + "T00:00:00Z"); | |
| const end = new Date(endYmd + "T00:00:00Z"); | |
| for (let d = start; d <= end; d.setUTCDate(d.getUTCDate() + 1)) { | |
| out.push(ymd(d)); | |
| } | |
| return out; | |
| } | |
| function readCsv(path) { | |
| return new Promise((resolve, reject) => { | |
| const rows = []; | |
| fs.createReadStream(path) | |
| .pipe(parse({ columns: true, trim: true })) | |
| .on("data", (r) => rows.push(r)) | |
| .on("end", () => resolve(rows)) | |
| .on("error", reject); | |
| }); | |
| } | |
| async function buildUsernameEmailMap(usernames) { | |
| if (!usernames.size) return new Map(); | |
| const client = await pool.connect(); | |
| try { | |
| const arr = Array.from(usernames).map((u) => u.toLowerCase()); | |
| const sql = ` | |
| SELECT LOWER(gitlab_username) AS gitlab_username, email | |
| FROM dx_users | |
| WHERE LOWER(gitlab_username) = ANY($1::text[]) | |
| `; | |
| const res = await client.query(sql, [arr]); | |
| const map = new Map(); | |
| for (const row of res.rows) { | |
| if (row.gitlab_username && row.email) map.set(row.gitlab_username, row.email); | |
| } | |
| return map; | |
| } finally { | |
| client.release(); | |
| } | |
| } | |
| function coerceBool(v) { | |
| if (typeof v === "boolean") return v; | |
| if (v == null || v === "") return false; | |
| const s = String(v).toLowerCase(); | |
| return s === "true" || s === "t" || s === "1" || s === "yes" || s === "y"; | |
| } | |
| function coerceInt(v) { | |
| if (v === "" || v == null) return 0; | |
| const n = Number(v); | |
| return Number.isFinite(n) ? n : 0; | |
| } | |
| async function main() { | |
| if (!fs.existsSync(INPUT_CSV)) { | |
| console.error(`Input CSV not found at ${INPUT_CSV}`); | |
| process.exit(1); | |
| } | |
| const inputRows = await readCsv(INPUT_CSV); | |
| if (inputRows.length === 0) { | |
| console.error("CSV is empty"); | |
| process.exit(1); | |
| } | |
| // Collect usernames from USER_COL | |
| const usernames = new Set(); | |
| for (const r of inputRows) { | |
| const uname = (r[USER_COL] || "").toLowerCase().trim(); | |
| if (uname) usernames.add(uname); | |
| } | |
| // Lookup emails in dx_users | |
| const unameToEmail = await buildUsernameEmailMap(usernames); | |
| // Normalize rows and attach email if missing | |
| const norm = []; | |
| for (const r of inputRows) { | |
| const username = (r[USER_COL] || "").toLowerCase().trim(); | |
| if (!username) continue; | |
| const email = r.email && String(r.email).includes("@") | |
| ? r.email | |
| : (unameToEmail.get(username) || null); | |
| let extras = r.extras; | |
| try { | |
| if (typeof extras === "string" && extras.length > 0) JSON.parse(extras); | |
| else extras = "{}"; | |
| } catch { | |
| extras = "{}"; | |
| } | |
| norm.push({ | |
| date: ymd(r.date), | |
| email, | |
| username, | |
| is_active: coerceBool(r.is_active), | |
| total_accepts_size: coerceInt(r.total_accepts_size), | |
| total_rejects_size: coerceInt(r.total_rejects_size), | |
| total_accepts_count: coerceInt(r.total_accepts_count), | |
| total_rejects_count: coerceInt(r.total_rejects_count), | |
| extras, | |
| source: r.source || "codeSuggestionEvents" | |
| }); | |
| } | |
| // Fill missing days per user from earliest date to today | |
| const today = ymd(new Date()); | |
| const byUser = new Map(); | |
| for (const r of norm) { | |
| const u = byUser.get(r.username) || { email: r.email || null, dates: new Set(), minDate: today }; | |
| if (!u.email && r.email) u.email = r.email; | |
| u.dates.add(r.date); | |
| if (r.date < u.minDate) u.minDate = r.date; | |
| byUser.set(r.username, u); | |
| } | |
| const out = [...norm]; | |
| for (const [uname, meta] of byUser.entries()) { | |
| const days = eachDayUTC(meta.minDate, today); | |
| for (const d of days) { | |
| if (!meta.dates.has(d)) { | |
| out.push({ | |
| date: d, | |
| email: meta.email || null, | |
| username: uname, | |
| is_active: false, | |
| total_accepts_size: 0, | |
| total_rejects_size: 0, | |
| total_accepts_count: 0, | |
| total_rejects_count: 0, | |
| extras: "{}", | |
| source: "codeSuggestionEvents" | |
| }); | |
| } | |
| } | |
| } | |
| // Sort and write output | |
| out.sort((a, b) => { | |
| const ua = a.username.localeCompare(b.username); | |
| if (ua !== 0) return ua; | |
| return a.date.localeCompare(b.date); | |
| }); | |
| const headers = [ | |
| "date", | |
| "email", | |
| "username", | |
| "is_active", | |
| "total_accepts_size", | |
| "total_rejects_size", | |
| "total_accepts_count", | |
| "total_rejects_count", | |
| "extras", | |
| "source" | |
| ]; | |
| await new Promise((resolve, reject) => { | |
| const stringifier = stringify({ header: true, columns: headers }); | |
| const outStream = fs.createWriteStream(OUTPUT_CSV); | |
| stringifier.on("error", reject); | |
| outStream.on("finish", resolve); | |
| for (const r of out) { | |
| stringifier.write([ | |
| r.date, | |
| r.email || "", | |
| r.username || "", | |
| r.is_active ? "true" : "false", | |
| r.total_accepts_size, | |
| r.total_rejects_size, | |
| r.total_accepts_count, | |
| r.total_rejects_count, | |
| r.extras, | |
| r.source || "" | |
| ]); | |
| } | |
| stringifier.end(); | |
| stringifier.pipe(outStream); | |
| }); | |
| console.log(`Wrote ${out.length} rows to ${OUTPUT_CSV}`); | |
| } | |
| main() | |
| .catch((e) => { console.error(e); process.exit(1); }) | |
| .finally(async () => { await pool.end().catch(() => {}); }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment