Created
October 24, 2025 15:03
-
-
Save Sdy603/3c7bfe9322654d11dc6694e0a0427fa4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // fetch_github_to_csv.js | |
| // Fetch GitHub orgs, repos, PRs, and reviews -> write 4 CSVs. | |
| // | |
| // Usage: | |
| // mac/linux: | |
| // GITHUB_TOKEN=ghp_xxx node fetch_github_to_csv.js | |
| // windows (PowerShell): | |
| // $env:GITHUB_TOKEN="ghp_xxx"; node fetch_github_to_csv.js | |
| // | |
| // Optional env vars: | |
| // GITHUB_BASE_URL=https://ghe.example.com/api/v3 | |
| // ORGS=org1,org2 | |
| // | |
| // Outputs: | |
| // Github_Organizations_trim.csv | |
| // Github_Repositories_trim.csv | |
| // Github_pulls_trim.csv | |
| // Github_Reviews_trim.csv | |
| const fs = require("fs"); | |
| const https = require("https"); | |
| const TOKEN = process.env.GITHUB_TOKEN; | |
| if (!TOKEN) { | |
| console.error("Error: GITHUB_TOKEN not set."); | |
| process.exit(1); | |
| } | |
| const BASE = (process.env.GITHUB_BASE_URL || "https://api.github.com").replace(/\/+$/, ""); | |
| const ORGS = (process.env.ORGS || "").split(",").map(o => o.trim()).filter(Boolean); | |
| // Basic GET with pagination | |
| function ghGet(url) { | |
| return new Promise((resolve, reject) => { | |
| const options = { | |
| headers: { | |
| "User-Agent": "dx-github-csv", | |
| "Authorization": `Bearer ${TOKEN}`, | |
| "Accept": "application/vnd.github+json" | |
| } | |
| }; | |
| https.get(url, options, (res) => { | |
| let data = ""; | |
| res.on("data", chunk => data += chunk); | |
| res.on("end", () => { | |
| if (res.statusCode !== 200) { | |
| return reject(new Error(`HTTP ${res.statusCode}: ${data}`)); | |
| } | |
| let parsed; | |
| try { parsed = JSON.parse(data); } catch (e) { return reject(e); } | |
| const link = res.headers.link || ""; | |
| const nextMatch = link.match(/<([^>]+)>;\s*rel="next"/); | |
| resolve({ data: parsed, next: nextMatch ? nextMatch[1] : null }); | |
| }); | |
| }).on("error", reject); | |
| }); | |
| } | |
| async function ghGetAll(url) { | |
| const all = []; | |
| let next = url; | |
| while (next) { | |
| const { data, next: nextLink } = await ghGet(next); | |
| if (Array.isArray(data)) all.push(...data); | |
| else all.push(data); | |
| next = nextLink; | |
| } | |
| return all; | |
| } | |
| function csvEscape(v) { | |
| if (v == null) return ""; | |
| const s = String(v); | |
| if (/[",\n]/.test(s)) return `"${s.replace(/"/g, '""')}"`; | |
| return s; | |
| } | |
| function writeCsv(file, headers, rows) { | |
| const lines = [headers.join(",")]; | |
| for (const row of rows) { | |
| lines.push(headers.map(h => csvEscape(row[h])).join(",")); | |
| } | |
| fs.writeFileSync(file, lines.join("\n")); | |
| console.log(`Wrote ${rows.length} rows -> ${file}`); | |
| } | |
| async function getOrgs() { | |
| if (ORGS.length) { | |
| const out = []; | |
| for (const login of ORGS) { | |
| const { data } = await ghGet(`${BASE}/orgs/${login}`); | |
| out.push(data); | |
| } | |
| return out; | |
| } | |
| return await ghGetAll(`${BASE}/user/orgs?per_page=100`); | |
| } | |
| async function getRepos(org) { | |
| return await ghGetAll(`${BASE}/orgs/${org}/repos?per_page=100&type=all`); | |
| } | |
| async function getPulls(owner, repo) { | |
| return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls?state=all&per_page=100`); | |
| } | |
| async function getPullDetails(owner, repo, number) { | |
| const { data } = await ghGet(`${BASE}/repos/${owner}/${repo}/pulls/${number}`); | |
| return data; | |
| } | |
| async function getReviews(owner, repo, number) { | |
| return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls/${number}/reviews?per_page=100`); | |
| } | |
| async function getReviewComments(owner, repo, number, reviewId) { | |
| return await ghGetAll(`${BASE}/repos/${owner}/${repo}/pulls/${number}/reviews/${reviewId}/comments?per_page=100`); | |
| } | |
| function mapOrg(org) { | |
| return { | |
| source_id: org.id || "", | |
| installation_id: "", | |
| login: org.login || "", | |
| source_url: org.html_url || org.url || "" | |
| }; | |
| } | |
| function mapRepo(repo) { | |
| return { | |
| source_id: repo.id || "", | |
| organization_id: repo.owner?.id || "", | |
| name: repo.name || "", | |
| private: repo.private, | |
| archived: repo.archived, | |
| default_branch: repo.default_branch || "", | |
| source_url: repo.html_url || repo.url || "" | |
| }; | |
| } | |
| function mapPull(pr, repoId, details) { | |
| return { | |
| source_id: pr.id || "", | |
| repository_id: repoId, | |
| user_login: pr.user?.login || "", | |
| number: pr.number, | |
| title: pr.title || "", | |
| head_ref: pr.head?.ref || "", | |
| base_ref: pr.base?.ref || "", | |
| created: pr.created_at || "", | |
| updated: pr.updated_at || "", | |
| closed: pr.closed_at || "", | |
| merged: pr.merged_at || "", | |
| ready_for_review: pr.draft ? false : true, | |
| draft: pr.draft, | |
| merge_commit_sha: pr.merge_commit_sha || "", | |
| additions: details?.additions || "", | |
| deletions: details?.deletions || "", | |
| changed_files: details?.changed_files || "", | |
| commits: details?.commits || "", | |
| source_url: pr.html_url || pr.url || "" | |
| }; | |
| } | |
| function mapReview(rv, prId, repoId, commentCount, prUrl) { | |
| return { | |
| source_id: rv.id || "", | |
| pull_source_id: prId, | |
| user_login: rv.user?.login || "", | |
| repository_id: repoId, | |
| state: rv.state || "", | |
| submitted: rv.submitted_at || "", | |
| comment_count: commentCount || 0, | |
| source_url: prUrl ? `${prUrl}#pullrequestreview-${rv.id}` : "" | |
| }; | |
| } | |
| async function main() { | |
| console.log("Fetching from:", BASE); | |
| const orgs = await getOrgs(); | |
| const orgRows = []; | |
| const repoRows = []; | |
| const pullRows = []; | |
| const reviewRows = []; | |
| for (const org of orgs) { | |
| const login = org.login; | |
| orgRows.push(mapOrg(org)); | |
| const repos = await getRepos(login); | |
| for (const repo of repos) { | |
| repoRows.push(mapRepo(repo)); | |
| const owner = repo.owner?.login || login; | |
| const pulls = await getPulls(owner, repo.name); | |
| for (const pr of pulls) { | |
| let details = null; | |
| try { details = await getPullDetails(owner, repo.name, pr.number); } catch {} | |
| pullRows.push(mapPull(pr, repo.id, details)); | |
| let reviews = []; | |
| try { reviews = await getReviews(owner, repo.name, pr.number); } catch {} | |
| for (const rv of reviews) { | |
| let comments = []; | |
| try { comments = await getReviewComments(owner, repo.name, pr.number, rv.id); } catch {} | |
| reviewRows.push(mapReview(rv, pr.id, repo.id, comments.length, pr.html_url)); | |
| } | |
| } | |
| } | |
| } | |
| writeCsv("Github_Organizations_trim.csv", | |
| ["source_id","installation_id","login","source_url"], orgRows); | |
| writeCsv("Github_Repositories_trim.csv", | |
| ["source_id","organization_id","name","private","archived","default_branch","source_url"], repoRows); | |
| writeCsv("Github_pulls_trim.csv", | |
| ["source_id","repository_id","user_login","number","title","head_ref","base_ref","created","updated","closed","merged","ready_for_review","draft","merge_commit_sha","additions","deletions","changed_files","commits","source_url"], pullRows); | |
| writeCsv("Github_Reviews_trim.csv", | |
| ["source_id","pull_source_id","user_login","repository_id","state","submitted","comment_count","source_url"], reviewRows); | |
| console.log("All CSVs written successfully."); | |
| } | |
| main().catch(e => { | |
| console.error(e); | |
| process.exit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment