Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Created October 24, 2025 15:03
Show Gist options
  • Select an option

  • Save Sdy603/3c7bfe9322654d11dc6694e0a0427fa4 to your computer and use it in GitHub Desktop.

Select an option

Save Sdy603/3c7bfe9322654d11dc6694e0a0427fa4 to your computer and use it in GitHub Desktop.
// fetch_github_to_csv.js
// Fetch GitHub orgs, repos, PRs, and reviews -> write 4 CSVs.
//
// Usage:
// mac/linux:
// GITHUB_TOKEN=ghp_xxx node fetch_github_to_csv.js
// windows (PowerShell):
// $env:GITHUB_TOKEN="ghp_xxx"; node fetch_github_to_csv.js
//
// Optional env vars:
// GITHUB_BASE_URL=https://ghe.example.com/api/v3
// ORGS=org1,org2
//
// Outputs:
// Github_Organizations_trim.csv
// Github_Repositories_trim.csv
// Github_pulls_trim.csv
// Github_Reviews_trim.csv
const fs = require("fs");
const https = require("https");
const TOKEN = process.env.GITHUB_TOKEN;
if (!TOKEN) {
console.error("Error: GITHUB_TOKEN not set.");
process.exit(1);
}
const BASE = (process.env.GITHUB_BASE_URL || "https://api.github.com").replace(/\/+$/, "");
const ORGS = (process.env.ORGS || "").split(",").map(o => o.trim()).filter(Boolean);
// Basic GET with pagination
function ghGet(url) {
return new Promise((resolve, reject) => {
const options = {
headers: {
"User-Agent": "dx-github-csv",
"Authorization": `Bearer ${TOKEN}`,
"Accept": "application/vnd.github+json"
}
};
https.get(url, options, (res) => {
let data = "";
res.on("data", chunk => data += chunk);
res.on("end", () => {
if (res.statusCode !== 200) {
return reject(new Error(`HTTP ${res.statusCode}: ${data}`));
}
let parsed;
try { parsed = JSON.parse(data); } catch (e) { return reject(e); }
const link = res.headers.link || "";
const nextMatch = link.match(/<([^>]+)>;\s*rel="next"/);
resolve({ data: parsed, next: nextMatch ? nextMatch[1] : null });
});
}).on("error", reject);
});
}
async function ghGetAll(url) {
const all = [];
let next = url;
while (next) {
const { data, next: nextLink } = await ghGet(next);
if (Array.isArray(data)) all.push(...data);
else all.push(data);
next = nextLink;
}
return all;
}
function csvEscape(v) {
if (v == null) return "";
const s = String(v);
if (/[",\n]/.test(s)) return `"${s.replace(/"/g, '""')}"`;
return s;
}
function writeCsv(file, headers, rows) {
const lines = [headers.join(",")];
for (const row of rows) {
lines.push(headers.map(h => csvEscape(row[h])).join(","));
}
fs.writeFileSync(file, lines.join("\n"));
console.log(`Wrote ${rows.length} rows -> ${file}`);
}
async function getOrgs() {
if (ORGS.length) {
const out = [];
for (const login of ORGS) {
const { data } = await ghGet(`${BASE}/orgs/${login}`);
out.push(data);
}
return out;
}
return await ghGetAll(`${BASE}/user/orgs?per_page=100`);
}
async function getRepos(org) {
return await ghGetAll(`${BASE}/orgs/${org}/repos?per_page=100&type=all`);
}
async function getPulls(owner, repo) {
return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls?state=all&per_page=100`);
}
async function getPullDetails(owner, repo, number) {
const { data } = await ghGet(`${BASE}/repos/${owner}/${repo}/pulls/${number}`);
return data;
}
async function getReviews(owner, repo, number) {
return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls/${number}/reviews?per_page=100`);
}
async function getReviewComments(owner, repo, number, reviewId) {
return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls/${number}/reviews/${reviewId}/comments?per_page=100`);
}
function mapOrg(org) {
return {
source_id: org.id || "",
installation_id: "",
login: org.login || "",
source_url: org.html_url || org.url || ""
};
}
function mapRepo(repo) {
return {
source_id: repo.id || "",
organization_id: repo.owner?.id || "",
name: repo.name || "",
private: repo.private,
archived: repo.archived,
default_branch: repo.default_branch || "",
source_url: repo.html_url || repo.url || ""
};
}
function mapPull(pr, repoId, details) {
return {
source_id: pr.id || "",
repository_id: repoId,
user_login: pr.user?.login || "",
number: pr.number,
title: pr.title || "",
head_ref: pr.head?.ref || "",
base_ref: pr.base?.ref || "",
created: pr.created_at || "",
updated: pr.updated_at || "",
closed: pr.closed_at || "",
merged: pr.merged_at || "",
ready_for_review: pr.draft ? false : true,
draft: pr.draft,
merge_commit_sha: pr.merge_commit_sha || "",
additions: details?.additions || "",
deletions: details?.deletions || "",
changed_files: details?.changed_files || "",
commits: details?.commits || "",
source_url: pr.html_url || pr.url || ""
};
}
function mapReview(rv, prId, repoId, commentCount, prUrl) {
return {
source_id: rv.id || "",
pull_source_id: prId,
user_login: rv.user?.login || "",
repository_id: repoId,
state: rv.state || "",
submitted: rv.submitted_at || "",
comment_count: commentCount || 0,
source_url: prUrl ? `${prUrl}#pullrequestreview-${rv.id}` : ""
};
}
async function main() {
console.log("Fetching from:", BASE);
const orgs = await getOrgs();
const orgRows = [];
const repoRows = [];
const pullRows = [];
const reviewRows = [];
for (const org of orgs) {
const login = org.login;
orgRows.push(mapOrg(org));
const repos = await getRepos(login);
for (const repo of repos) {
repoRows.push(mapRepo(repo));
const owner = repo.owner?.login || login;
const pulls = await getPulls(owner, repo.name);
for (const pr of pulls) {
let details = null;
try { details = await getPullDetails(owner, repo.name, pr.number); } catch {}
pullRows.push(mapPull(pr, repo.id, details));
let reviews = [];
try { reviews = await getReviews(owner, repo.name, pr.number); } catch {}
for (const rv of reviews) {
let comments = [];
try { comments = await getReviewComments(owner, repo.name, pr.number, rv.id); } catch {}
reviewRows.push(mapReview(rv, pr.id, repo.id, comments.length, pr.html_url));
}
}
}
}
writeCsv("Github_Organizations_trim.csv",
["source_id","installation_id","login","source_url"], orgRows);
writeCsv("Github_Repositories_trim.csv",
["source_id","organization_id","name","private","archived","default_branch","source_url"], repoRows);
writeCsv("Github_pulls_trim.csv",
["source_id","repository_id","user_login","number","title","head_ref","base_ref","created","updated","closed","merged","ready_for_review","draft","merge_commit_sha","additions","deletions","changed_files","commits","source_url"], pullRows);
writeCsv("Github_Reviews_trim.csv",
["source_id","pull_source_id","user_login","repository_id","state","submitted","comment_count","source_url"], reviewRows);
console.log("All CSVs written successfully.");
}
main().catch(e => {
console.error(e);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment