Skip to content

Instantly share code, notes, and snippets.

@tritao
Created January 25, 2026 11:41
Show Gist options
  • Select an option

  • Save tritao/f5bb6d25c5d6bedb8a44d46e23099d54 to your computer and use it in GitHub Desktop.

Select an option

Save tritao/f5bb6d25c5d6bedb8a44d46e23099d54 to your computer and use it in GitHub Desktop.
FreeCAD self-merge stats (last 2 years) via GitHub GraphQL
#!/usr/bin/env python3
import csv
import json
import subprocess
import sys
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, Iterable, List, Optional, Tuple
OWNER = "FreeCAD"
REPO = "FreeCAD"
MAINTAINERS = [
"adrianinsaval",
"hyarion",
"kadet1090",
"Roy-043",
"WandererFan",
"sliptonic",
"chennes",
"wwmayer",
"yorikvanhavre",
]
# "Last 2 years" from current date (Jan 25, 2026) => since Jan 25, 2024 (UTC).
CUTOFF_ISO = "2024-01-25T00:00:00Z"
QUERY = r"""
query($owner: String!, $name: String!, $after: String) {
repository(owner: $owner, name: $name) {
pullRequests(states: MERGED, first: 100, after: $after, orderBy: {field: UPDATED_AT, direction: DESC}) {
pageInfo { hasNextPage endCursor }
nodes {
number
url
updatedAt
mergedAt
author { login }
mergedBy { login }
}
}
}
}
"""
def parse_dt(s: str) -> datetime:
# GitHub returns RFC3339 like 2024-01-25T12:34:56Z
if s.endswith("Z"):
s = s[:-1] + "+00:00"
return datetime.fromisoformat(s).astimezone(timezone.utc)
def gh_graphql(query: str, variables: Dict[str, Any]) -> Dict[str, Any]:
cmd = [
"gh",
"api",
"graphql",
"-f",
f"query={query}",
]
for k, v in variables.items():
if v is None:
continue
cmd.extend(["-f", f"{k}={v}"])
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if p.returncode != 0:
sys.stderr.write(p.stderr)
raise SystemExit(p.returncode)
return json.loads(p.stdout)
@dataclass
class PR:
number: int
url: str
updated_at: datetime
merged_at: datetime
author: str
merged_by: str
def fetch_merged_prs_since(cutoff: datetime) -> List[PR]:
out: List[PR] = []
after: Optional[str] = None
while True:
data = gh_graphql(QUERY, {"owner": OWNER, "name": REPO, "after": after})
pr_conn = data["data"]["repository"]["pullRequests"]
nodes = pr_conn.get("nodes") or []
if not nodes:
break
last_updated: Optional[datetime] = None
for n in nodes:
updated_at = parse_dt(n["updatedAt"])
merged_at = parse_dt(n["mergedAt"])
last_updated = updated_at
author = (n.get("author") or {}).get("login") or ""
merged_by = (n.get("mergedBy") or {}).get("login") or ""
if merged_at >= cutoff:
out.append(
PR(
number=int(n["number"]),
url=n["url"],
updated_at=updated_at,
merged_at=merged_at,
author=author,
merged_by=merged_by,
)
)
# Stop once we're past the cutoff. updatedAt is always >= mergedAt, so
# if the last updatedAt is older than cutoff, no further PRs can be merged after cutoff.
if last_updated is not None and last_updated < cutoff:
break
if not pr_conn["pageInfo"]["hasNextPage"]:
break
after = pr_conn["pageInfo"]["endCursor"]
return out
def main() -> int:
cutoff = parse_dt(CUTOFF_ISO)
prs = fetch_merged_prs_since(cutoff)
maint = set(MAINTAINERS)
stats: Dict[str, Dict[str, int]] = {m: {"authored": 0, "self_merged": 0} for m in MAINTAINERS}
for pr in prs:
if pr.author in maint:
stats[pr.author]["authored"] += 1
if pr.merged_by == pr.author:
stats[pr.author]["self_merged"] += 1
csv_path = "/tmp/freecad-merged-prs-since-2024-01-25.csv"
with open(csv_path, "w", newline="") as f:
w = csv.writer(f)
w.writerow(["number", "url", "mergedAt", "author", "mergedBy", "updatedAt"])
for pr in sorted(prs, key=lambda p: p.merged_at, reverse=True):
w.writerow(
[
pr.number,
pr.url,
pr.merged_at.isoformat().replace("+00:00", "Z"),
pr.author,
pr.merged_by,
pr.updated_at.isoformat().replace("+00:00", "Z"),
]
)
print(f"Repo: {OWNER}/{REPO}")
print(f"Cutoff (UTC): {cutoff.isoformat().replace('+00:00','Z')}")
print(f"Merged PRs since cutoff: {len(prs)}")
print(f"CSV: {csv_path}")
print("\nMaintainer self-merge rates (authored PRs only):")
rows: List[Tuple[str, int, int, float]] = []
for m in MAINTAINERS:
authored = stats[m]["authored"]
self_merged = stats[m]["self_merged"]
pct = (self_merged / authored * 100.0) if authored else 0.0
rows.append((m, authored, self_merged, pct))
# stable sort by authored desc, then login
rows.sort(key=lambda r: (-r[1], r[0].lower()))
for login, authored, self_merged, pct in rows:
print(f"- {login}: {self_merged}/{authored} self-merged ({pct:.1f}%)")
return 0
if __name__ == "__main__":
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment