Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Created September 18, 2025 17:38
Show Gist options
  • Select an option

  • Save Sdy603/b7f64b3d2adb8a549673054c44453746 to your computer and use it in GitHub Desktop.

Select an option

Save Sdy603/b7f64b3d2adb8a549673054c44453746 to your computer and use it in GitHub Desktop.
// enrich_duo_csv.js
// CommonJS version. Only env var required: DATABASE_URL
const fs = require("fs");
const { parse } = require("csv-parse");
const { stringify } = require("csv-stringify");
const { Pool } = require("pg");
const DATABASE_URL = process.env.DATABASE_URL;
if (!DATABASE_URL) {
console.error("Missing DATABASE_URL env var");
process.exit(1);
}
// Fixed filenames next to this script
const INPUT_CSV = "./gitlab_duo_daily_usages.csv";
const OUTPUT_CSV = "./gitlab_duo_daily_usages_enriched.csv";
const USER_COL = "username"; // hardcoded per your requirement
function normalizePostgresURL(url) {
return url.startsWith("postgres://") ? url.replace("postgres://", "postgresql://") : url;
}
// Use SSL like your snippet, applied to the pg Pool
const pool = new Pool({
connectionString: normalizePostgresURL(DATABASE_URL),
ssl: { rejectUnauthorized: false }
});
function ymd(dateLike) {
const d = new Date(dateLike);
if (Number.isNaN(d.getTime())) throw new Error(`Bad date value: ${dateLike}`);
const yyyy = d.getUTCFullYear();
const mm = String(d.getUTCMonth() + 1).padStart(2, "0");
const dd = String(d.getUTCDate()).padStart(2, "0");
return `${yyyy}-${mm}-${dd}`;
}
function eachDayUTC(startYmd, endYmd) {
const out = [];
const start = new Date(startYmd + "T00:00:00Z");
const end = new Date(endYmd + "T00:00:00Z");
for (let d = start; d <= end; d.setUTCDate(d.getUTCDate() + 1)) {
out.push(ymd(d));
}
return out;
}
function readCsv(path) {
return new Promise((resolve, reject) => {
const rows = [];
fs.createReadStream(path)
.pipe(parse({ columns: true, trim: true }))
.on("data", (r) => rows.push(r))
.on("end", () => resolve(rows))
.on("error", reject);
});
}
async function buildUsernameEmailMap(usernames) {
if (!usernames.size) return new Map();
const client = await pool.connect();
try {
const arr = Array.from(usernames).map((u) => u.toLowerCase());
const sql = `
SELECT LOWER(gitlab_username) AS gitlab_username, email
FROM dx_users
WHERE LOWER(gitlab_username) = ANY($1::text[])
`;
const res = await client.query(sql, [arr]);
const map = new Map();
for (const row of res.rows) {
if (row.gitlab_username && row.email) map.set(row.gitlab_username, row.email);
}
return map;
} finally {
client.release();
}
}
function coerceBool(v) {
if (typeof v === "boolean") return v;
if (v == null || v === "") return false;
const s = String(v).toLowerCase();
return s === "true" || s === "t" || s === "1" || s === "yes" || s === "y";
}
function coerceInt(v) {
if (v === "" || v == null) return 0;
const n = Number(v);
return Number.isFinite(n) ? n : 0;
}
async function main() {
if (!fs.existsSync(INPUT_CSV)) {
console.error(`Input CSV not found at ${INPUT_CSV}`);
process.exit(1);
}
const inputRows = await readCsv(INPUT_CSV);
if (inputRows.length === 0) {
console.error("CSV is empty");
process.exit(1);
}
// Collect usernames from USER_COL
const usernames = new Set();
for (const r of inputRows) {
const uname = (r[USER_COL] || "").toLowerCase().trim();
if (uname) usernames.add(uname);
}
// Lookup emails in dx_users
const unameToEmail = await buildUsernameEmailMap(usernames);
// Normalize rows and attach email if missing
const norm = [];
for (const r of inputRows) {
const username = (r[USER_COL] || "").toLowerCase().trim();
if (!username) continue;
const email = r.email && String(r.email).includes("@")
? r.email
: (unameToEmail.get(username) || null);
let extras = r.extras;
try {
if (typeof extras === "string" && extras.length > 0) JSON.parse(extras);
else extras = "{}";
} catch {
extras = "{}";
}
norm.push({
date: ymd(r.date),
email,
username,
is_active: coerceBool(r.is_active),
total_accepts_size: coerceInt(r.total_accepts_size),
total_rejects_size: coerceInt(r.total_rejects_size),
total_accepts_count: coerceInt(r.total_accepts_count),
total_rejects_count: coerceInt(r.total_rejects_count),
extras,
source: r.source || "codeSuggestionEvents"
});
}
// Fill missing days per user from earliest date to today
const today = ymd(new Date());
const byUser = new Map();
for (const r of norm) {
const u = byUser.get(r.username) || { email: r.email || null, dates: new Set(), minDate: today };
if (!u.email && r.email) u.email = r.email;
u.dates.add(r.date);
if (r.date < u.minDate) u.minDate = r.date;
byUser.set(r.username, u);
}
const out = [...norm];
for (const [uname, meta] of byUser.entries()) {
const days = eachDayUTC(meta.minDate, today);
for (const d of days) {
if (!meta.dates.has(d)) {
out.push({
date: d,
email: meta.email || null,
username: uname,
is_active: false,
total_accepts_size: 0,
total_rejects_size: 0,
total_accepts_count: 0,
total_rejects_count: 0,
extras: "{}",
source: "codeSuggestionEvents"
});
}
}
}
// Sort and write output
out.sort((a, b) => {
const ua = a.username.localeCompare(b.username);
if (ua !== 0) return ua;
return a.date.localeCompare(b.date);
});
const headers = [
"date",
"email",
"username",
"is_active",
"total_accepts_size",
"total_rejects_size",
"total_accepts_count",
"total_rejects_count",
"extras",
"source"
];
await new Promise((resolve, reject) => {
const stringifier = stringify({ header: true, columns: headers });
const outStream = fs.createWriteStream(OUTPUT_CSV);
stringifier.on("error", reject);
outStream.on("finish", resolve);
for (const r of out) {
stringifier.write([
r.date,
r.email || "",
r.username || "",
r.is_active ? "true" : "false",
r.total_accepts_size,
r.total_rejects_size,
r.total_accepts_count,
r.total_rejects_count,
r.extras,
r.source || ""
]);
}
stringifier.end();
stringifier.pipe(outStream);
});
console.log(`Wrote ${out.length} rows to ${OUTPUT_CSV}`);
}
main()
.catch((e) => { console.error(e); process.exit(1); })
.finally(async () => { await pool.end().catch(() => {}); });
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment