Skip to content

Instantly share code, notes, and snippets.

@cthoyt
Created November 25, 2025 08:33
Show Gist options
  • Select an option

  • Save cthoyt/307144059fd0a398463a3b975b65df56 to your computer and use it in GitHub Desktop.

Select an option

Save cthoyt/307144059fd0a398463a3b975b65df56 to your computer and use it in GitHub Desktop.
Get Bioschemas JSON-LD for CURIEs via the Bioregistry
from typing import Any
import curies
from bioregistry import get_iri
import requests
from bs4 import BeautifulSoup
import json
def get_jsonld(reference: curies.ReferenceTuple | curies.Reference, **kwargs) -> list[dict[str, Any]]:
url = get_iri(reference.prefix, reference.identifier, **kwargs)
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.text, "html.parser")
rv = []
for script in soup.find_all("script", type="application/ld+json"):
try:
data = json.loads(script.string)
except json.JSONDecodeError:
pass
else:
rv.append(data)
return rv
def _demo():
for x in get_jsonld(curies.ReferenceTuple("massbank", "MSBNK-IPB_Halle-PB000166")):
print(x)
if __name__ == '__main__':
_demo()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment