Skip to content

Instantly share code, notes, and snippets.

@davidaknowles
Last active November 5, 2025 14:59
Show Gist options
  • Select an option

  • Save davidaknowles/b315ec42602de9b8230b2171054b2709 to your computer and use it in GitHub Desktop.

Select an option

Save davidaknowles/b315ec42602de9b8230b2171054b2709 to your computer and use it in GitHub Desktop.
Get COA coauthor list for NSF including affiliations from PUBMED
from Bio import Entrez
import csv
from collections import defaultdict
from datetime import datetime
AUTHOR_NAME = "Knowles DA"
START_YEAR = datetime.now().year - 4
EMAIL = "[email protected]"
OUTFILE = "nsf_coauthors.csv"
Entrez.email = EMAIL
term = f'"{AUTHOR_NAME}[Author]" AND ("{START_YEAR}"[dp] : "3000"[dp])'
search = Entrez.esearch(db="pubmed", term=term, retmax=1000)
ids = Entrez.read(search)["IdList"]
handle = Entrez.efetch(db="pubmed", id=",".join(ids), retmode="xml")
records = Entrez.read(handle)
coauthors = {}
last_active = defaultdict(int)
for r in records["PubmedArticle"]:
year = r["MedlineCitation"]["Article"]["Journal"]["JournalIssue"]["PubDate"].get("Year", "")
for a in r["MedlineCitation"]["Article"].get("AuthorList", []):
if "ForeName" in a and "LastName" in a:
# Format as "LastName, FirstName MiddleInitial"
name = f"{a['LastName']}, {a['ForeName']}".strip()
if AUTHOR_NAME.split()[0] in name and AUTHOR_NAME.split()[1] in name:
continue
affs = a.get("AffiliationInfo", [])
aff = affs[0]["Affiliation"] if affs else ""
if name not in coauthors:
coauthors[name] = aff
if year:
last_active[name] = max(last_active[name], int(year))
with open(OUTFILE, "w", newline="", encoding="utf-8-sig") as f:
w = csv.writer(f)
w.writerow(["Name", "Institution", "Last Active"])
for name in sorted(coauthors):
w.writerow([name, coauthors[name], last_active.get(name, "")])
print(f"Wrote {len(coauthors)} coauthors to {OUTFILE}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment