Skip to content

Instantly share code, notes, and snippets.

@samber
Created November 14, 2025 15:19
Show Gist options
  • Select an option

  • Save samber/a67a4c404d80c3a9f6834f5f62ef3d23 to your computer and use it in GitHub Desktop.

Select an option

Save samber/a67a4c404d80c3a9f6834f5f62ef3d23 to your computer and use it in GitHub Desktop.
Extract repository stargazers + full profile infos
#!/usr/bin/env python3
import os
import sys
import csv
import time
from datetime import datetime, timezone
import requests
GITHUB_API = "https://api.github.com"
def github_get(url, token=None):
headers = {"Accept": "application/vnd.github.v3.star+json"}
if token:
headers["Authorization"] = f"token {token}"
resp = requests.get(url, headers=headers)
if resp.status_code == 403 and "X-RateLimit-Remaining" in resp.headers and resp.headers["X-RateLimit-Remaining"] == "0":
reset = int(resp.headers.get("X-RateLimit-Reset", 0))
wait_for = max(0, reset - int(time.time())) + 1
print(f"Rate limit hit. Waiting {wait_for}s...")
time.sleep(wait_for)
return github_get(url, token)
resp.raise_for_status()
return resp.json()
def iter_stargazers(owner, repo, token=None):
page = 1
while True:
url = f"{GITHUB_API}/repos/{owner}/{repo}/stargazers?per_page=100&page={page}"
data = github_get(url, token)
if not data:
break
for item in data:
user = item.get("user") or {}
yield user["login"], user['id'], item.get("starred_at")
page += 1
def get_user_details(login, token=None):
url = f"{GITHUB_API}/users/{login}"
data = github_get(url, token)
return {
"name": data.get("name"),
"email": data.get("email"),
"company": data.get("company"),
"location": data.get("location"),
"bio": data.get("bio"),
"twitter_username": data.get("twitter_username"),
"blog": data.get("blog"),
"followers": data.get("followers"),
"following": data.get("following"),
"public_repos": data.get("public_repos"),
"public_gists": data.get("public_gists"),
}
def main():
if len(sys.argv) != 3:
print("Usage: python get_stargazers_with_company_country.py owner repo")
sys.exit(1)
owner, repo = sys.argv[1], sys.argv[2]
token = os.environ.get("GITHUB_TOKEN")
now = datetime.now(timezone.utc).replace(
microsecond=0).isoformat().replace("+00:00", "Z")
outname = f"{owner}-{repo}-stargazers-details.csv"
with open(outname, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["repository", "user_id", "login", "name", "email", "company", "location", "bio",
"twitter", "blog", "followers", "following", "public_repos", "public_gists", "starred_at", "scrapped_at", "updated_at"])
writer.writeheader()
for login, id, starred_at in iter_stargazers(owner, repo, token):
try:
details = get_user_details(login, token)
writer.writerow({
"repository": f"{owner}/{repo}",
"user_id": id,
"login": login,
"name": details["name"],
"email": details["email"],
"company": details["company"],
"location": details["location"],
"bio": details["bio"],
"twitter": details["twitter_username"],
"blog": details["blog"],
"followers": details["followers"],
"following": details["following"],
"public_repos": details["public_repos"],
"public_gists": details["public_gists"],
"starred_at": starred_at,
"scrapped_at": now,
"updated_at": now,
})
print(f"✔ {login}")
time.sleep(0.3) # Be polite to GitHub API
except Exception as e:
print(f"Error fetching {login}: {e}")
continue
print(f"\nSaved stargazer details to {outname}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment