Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Last active September 22, 2025 16:18
Show Gist options
  • Select an option

  • Save Sdy603/f76bc7a299f07b808ea8fa07bb4fd42c to your computer and use it in GitHub Desktop.

Select an option

Save Sdy603/f76bc7a299f07b808ea8fa07bb4fd42c to your computer and use it in GitHub Desktop.
'use strict';
/**
* export_dx_users.js
*
* Streams a DX Postgres query to a CSV file.
* - Uses DATABASE_URL from environment (preferred)
* - Falls back to HARDCODED_DB_URL if set
* - Normalizes postgres:// → postgresql://
* - Streams results to avoid memory issues
* - Includes graceful shutdown
*/
const fs = require('fs');
const path = require('path');
const { pipeline } = require('stream');
const { promisify } = require('util');
const { Pool } = require('pg');
const QueryStream = require('pg-query-stream');
const { stringify } = require('csv-stringify');
require('dotenv').config();
const pipe = promisify(pipeline);
// Optional hardcoded connection string
// Example: 'postgresql://user:pass@host:5432/dbname'
const HARDCODED_DB_URL = ''; // set if needed
// Normalize postgres:// → postgresql://
const normalizePostgresURL = (url) =>
url && url.startsWith('postgres://') ? url.replace('postgres://', 'postgresql://') : url;
// Choose connection string: env > hardcoded
let dbUrl = process.env.DATABASE_URL
? normalizePostgresURL(process.env.DATABASE_URL)
: normalizePostgresURL(HARDCODED_DB_URL);
if (!dbUrl) {
console.error('❌ Missing DATABASE_URL environment variable or HARDCODED_DB_URL');
process.exit(1);
}
// Database connection pool
const pool = new Pool({
connectionString: dbUrl,
ssl: { rejectUnauthorized: false },
application_name: 'dx-export-users-profiles',
});
// SQL query
const QUERY = `
SELECT
du.name AS user_name,
du.email,
dt.name AS team_name,
dt.flattened_parent AS team_hierarchy,
to_char(du.start_date::timestamp AT TIME ZONE 'UTC', 'YYYY-MM-DD') AS start_date,
string_agg(DISTINCT CONCAT(dtg.name, ': ', dt_tags.name), ', ') AS tags,
string_agg(DISTINCT ai_tools.tool, ', ') AS ai_tools_used,
COUNT(DISTINCT ai_tools.tool) AS total_ai_tools_count
FROM dx_users du
LEFT JOIN dx_teams dt ON du.team_id = dt.id
LEFT JOIN dx_user_tags dut ON du.id = dut.user_id
LEFT JOIN dx_tags dt_tags ON dut.tag_id = dt_tags.id
LEFT JOIN dx_tag_groups dtg ON dt_tags.tag_group_id = dtg.id
LEFT JOIN bespoke_ai_tool_daily_metrics ai_tools
ON du.email = ai_tools.email AND ai_tools.is_active = true
WHERE du.team_id IS NOT NULL
AND du.deleted_at IS NULL
GROUP BY
du.id,
du.name,
du.email,
dt.name,
dt.flattened_parent,
du.start_date
ORDER BY du.name;
`;
// CSV columns in order
const CSV_COLUMNS = [
'user_name',
'email',
'team_name',
'team_hierarchy',
'start_date',
'tags',
'ai_tools_used',
'total_ai_tools_count',
];
async function exportToCsv() {
const client = await pool.connect();
try {
const queryStream = new QueryStream(QUERY, [], { batchSize: 1000 });
const rowStream = client.query(queryStream);
const csvStream = stringify({ header: true, columns: CSV_COLUMNS });
const fileStream = fs.createWriteStream(path.resolve('dx_users_profiles.csv'));
let rowCount = 0;
rowStream.on('data', () => {
rowCount += 1;
if (rowCount % 1000 === 0) {
console.log(`Processed ${rowCount} rows...`);
}
});
await pipe(rowStream, csvStream, fileStream);
console.log(`✅ Export complete. Wrote ${rowCount} rows to dx_users_profiles.csv`);
} finally {
client.release();
}
}
(async () => {
try {
await exportToCsv();
} catch (err) {
console.error('❌ Export failed:', err);
process.exit(1);
} finally {
await pool.end();
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment