We want to get summary stats about the Kew herbarium by WCVP taxonomy at family level, using the herbarium data in GBIF.
Any field that is searchable in the GBIF data portal is also eligible for faceting, eg it is possible to get a summary report of the numbers of records in each country using this API call:
Though each occurrence record has a family field, it is not used for searching so cannot be used to facet.
- Kew herbarium in GBIF as an occurrence dataset, ID cd6e21c8-9e8a-493a-8a76-fbf7862069e5
- WCVP taxonomy in GBIF as a checklist dataset id f382f0ce-323a-4091-bb9f-add557f3a9a2
- Occurrences in GBIF are mapped to the GBIF backbone taxonomy (dataset ID: d7dddbf4-2cf0-4f39-9b2a-bb099caae36c)
A sample occurrence record from the Kew herbarium, accessed using the GBIF API
URL: https://api.gbif.org/v1/occurrence/search?dataset_key=cd6e21c8-9e8a-493a-8a76-fbf7862069e5&limit=1
{
"offset": 0,
"limit": 1,
"endOfRecords": false,
"count": 5871122,
"results": [
{
"key": 5152217304,
"datasetKey": "cd6e21c8-9e8a-493a-8a76-fbf7862069e5",
"publishingOrgKey": "061b4f20-f241-11da-a328-b8a03c50a862",
"networkKeys": [
"17abcf75-2f1e-46dd-bf75-a5b21dd02655"
],
"installationKey": "996e30f8-f762-11e1-a439-00145eb45e9a",
"hostingOrganizationKey": "061b4f20-f241-11da-a328-b8a03c50a862",
"publishingCountry": "GB",
"protocol": "DWC_ARCHIVE",
"lastCrawled": "2025-11-21T09:20:17.179+00:00",
"lastParsed": "2025-11-21T09:56:53.417+00:00",
"crawlId": 495,
"extensions": {
},
"basisOfRecord": "PRESERVED_SPECIMEN",
"occurrenceStatus": "PRESENT",
"classifications": {
"7ddf754f-d193-4cc9-b351-99906754a03b": {
"usage": {
"key": "3JP9V",
"name": "Haplormosia monophylla (Harms) Harms",
"rank": "SPECIES",
"code": "BOTANICAL",
"authorship": "(Harms) Harms",
"genericName": "Haplormosia",
"specificEpithet": "monophylla",
"formattedName": "\u003Ci\u003EHaplormosia\u003C/i\u003E \u003Ci\u003Emonophylla\u003C/i\u003E (Harms) Harms"
},
"acceptedUsage": {
"key": "3JP9V",
"name": "Haplormosia monophylla (Harms) Harms",
"rank": "SPECIES",
"code": "BOTANICAL",
"authorship": "(Harms) Harms",
"genericName": "Haplormosia",
"specificEpithet": "monophylla",
"formattedName": "\u003Ci\u003EHaplormosia\u003C/i\u003E \u003Ci\u003Emonophylla\u003C/i\u003E (Harms) Harms"
},
"taxonomicStatus": "ACCEPTED",
"classification": [
{
"key": "CS5HF",
"name": "Eukaryota",
"rank": "DOMAIN"
},
{
"key": "P",
"name": "Plantae",
"rank": "KINGDOM"
},
{
"key": "CMQ8S",
"name": "Pteridobiotina",
"rank": "SUBKINGDOM"
},
{
"key": "TP",
"name": "Tracheophyta",
"rank": "PHYLUM"
},
{
"key": "MG",
"name": "Magnoliopsida",
"rank": "CLASS"
},
{
"key": "383",
"name": "Fabales",
"rank": "ORDER"
},
{
"key": "623QT",
"name": "Fabaceae",
"rank": "FAMILY"
},
{
"key": "9CKY6",
"name": "Haplormosia",
"rank": "GENUS"
},
{
"key": "3JP9V",
"name": "Haplormosia monophylla",
"rank": "SPECIES"
}
],
"iucnRedListCategoryCode": "VU",
"issues": []
},
"d7dddbf4-2cf0-4f39-9b2a-bb099caae36c": {
"usage": {
"key": "2951884",
"name": "Haplormosia monophylla (Harms) Harms",
"rank": "SPECIES",
"code": "BOTANICAL",
"authorship": "(Harms) Harms",
"genericName": "Haplormosia",
"specificEpithet": "monophylla",
"formattedName": "\u003Ci\u003EHaplormosia\u003C/i\u003E \u003Ci\u003Emonophylla\u003C/i\u003E (Harms) Harms"
},
"acceptedUsage": {
"key": "2951884",
"name": "Haplormosia monophylla (Harms) Harms",
"rank": "SPECIES",
"code": "BOTANICAL",
"authorship": "(Harms) Harms",
"genericName": "Haplormosia",
"specificEpithet": "monophylla",
"formattedName": "\u003Ci\u003EHaplormosia\u003C/i\u003E \u003Ci\u003Emonophylla\u003C/i\u003E (Harms) Harms"
},
"taxonomicStatus": "ACCEPTED",
"classification": [
{
"key": "6",
"name": "Plantae",
"rank": "KINGDOM"
},
{
"key": "7707728",
"name": "Tracheophyta",
"rank": "PHYLUM"
},
{
"key": "220",
"name": "Magnoliopsida",
"rank": "CLASS"
},
{
"key": "1370",
"name": "Fabales",
"rank": "ORDER"
},
{
"key": "5386",
"name": "Fabaceae",
"rank": "FAMILY"
},
{
"key": "2951883",
"name": "Haplormosia",
"rank": "GENUS"
},
{
"key": "2951884",
"name": "Haplormosia monophylla",
"rank": "SPECIES"
}
],
"iucnRedListCategoryCode": "VU",
"issues": []
}
},
"taxonKey": 2951884,
"kingdomKey": 6,
"phylumKey": 7707728,
"classKey": 220,
"orderKey": 1370,
"familyKey": 5386,
"genusKey": 2951883,
"speciesKey": 2951884,
"acceptedTaxonKey": 2951884,
"scientificName": "Haplormosia monophylla (Harms) Harms",
"scientificNameAuthorship": "(Harms) Harms",
"acceptedScientificName": "Haplormosia monophylla (Harms) Harms",
"kingdom": "Plantae",
"phylum": "Tracheophyta",
"order": "Fabales",
"family": "Fabaceae",
"genus": "Haplormosia",
"species": "Haplormosia monophylla",
"genericName": "Haplormosia",
"specificEpithet": "monophylla",
"taxonRank": "SPECIES",
"taxonomicStatus": "ACCEPTED",
"iucnRedListCategory": "VU",
"decimalLatitude": 6.97382,
"decimalLongitude": -11.55943,
"elevation": 19,
"continent": "AFRICA",
"gadm": {
"level0": {
"gid": "SLE",
"name": "Sierra Leone"
},
"level1": {
"gid": "SLE.3_1",
"name": "Southern"
},
"level2": {
"gid": "SLE.3.4_1",
"name": "Pujehun"
},
"level3": {
"gid": "SLE.3.4.2_1",
"name": "Gallinas Peri"
}
},
"year": 2025,
"month": 2,
"day": 14,
"eventDate": "2025-02-14T00:00",
"startDayOfYear": 45,
"endDayOfYear": 45,
"issues": [
"GEODETIC_DATUM_ASSUMED_WGS84",
"CONTINENT_DERIVED_FROM_COORDINATES",
"INSTITUTION_MATCH_FUZZY",
"INSTITUTION_COLLECTION_MISMATCH"
],
"modified": "2025-05-13T12:04:22.973+00:00",
"lastInterpreted": "2025-11-21T09:56:53.417+00:00",
"license": "http://creativecommons.org/licenses/by/4.0/legalcode",
"isSequenced": false,
"identifiers": [
{
"identifier": "K000923577"
}
],
"media": [],
"facts": [],
"relations": [],
"institutionKey": "fa252605-26f6-426c-9892-94d071c2c77f",
"isInCluster": false,
"recordedBy": "Hoban, G.; Burgt, X.M. van der; Sanders, D.; Johnny, J.; Sesay, M.P.; Sokpo, S.",
"identifiedBy": "Burgt, X.M. van der & Hoban, G.",
"dnaSequenceID": [],
"geodeticDatum": "WGS84",
"class": "Magnoliopsida",
"countryCode": "SL",
"recordedByIDs": [],
"identifiedByIDs": [],
"gbifRegion": "AFRICA",
"country": "Sierra Leone",
"publishedByGbifRegion": "EUROPE",
"identifier": "K000923577",
"recordNumber": "11",
"catalogNumber": "K000923577",
"habitat": "Muddy riverbank with Rhizophora sp., Raphia sp., Pandanus.",
"institutionCode": "K",
"locality": "Sierra Leone, Pujehun District, Sulima.",
"eventRemarks": "Tree overhanging river, 12 m in height with 70 cm dbh. Fruits pendant, green; seeds recalcitrant. Buttress roots. Two individuals.",
"collectionCode": "Herbarium",
"gbifID": "5152217304",
"occurrenceID": "http://specimens.kew.org/herbarium/K000923577",
"higherClassification": "Fabaceae"
}
],
"facets": []
}
A sample record from the WCVP checklist, accessed using the GBIF API
{
"offset": 0,
"limit": 1,
"endOfRecords": false,
"count": 1440076,
"results": [
{
"key": 206462481,
"datasetKey": "f382f0ce-323a-4091-bb9f-add557f3a9a2",
"nubKey": 3084028,
"parentKey": 306815616,
"parent": "Achatocarpaceae",
"family": "Achatocarpaceae",
"genus": "Phaulothamnus",
"familyKey": 306815616,
"genusKey": 206462481,
"scientificName": "Phaulothamnus A.Gray",
"canonicalName": "Phaulothamnus",
"authorship": "A.Gray",
"publishedIn": "Proc. Amer. Acad. Arts 20: 293 (1885)",
"nameType": "SCIENTIFIC",
"taxonomicStatus": "ACCEPTED",
"rank": "GENUS",
"origin": "SOURCE",
"numDescendants": 0,
"numOccurrences": 0,
"taxonID": "500165",
"habitats": [],
"nomenclaturalStatus": [],
"threatStatuses": [],
"descriptions": [],
"vernacularNames": [],
"synonym": false,
"higherClassificationMap": {
"306815616": "Achatocarpaceae"
}
}
],
"facets": []
}
We can query the GBIF species API to get every family name in WCVP (just over 600 records):
{
"offset": 0,
"limit": 1,
"endOfRecords": false,
"count": 624,
"results": [
{
"key": 306199155,
"datasetKey": "f382f0ce-323a-4091-bb9f-add557f3a9a2",
"nubKey": 2393,
"family": "Acanthaceae",
"familyKey": 306199155,
"scientificName": "Acanthaceae",
"canonicalName": "Acanthaceae",
"taxonomicStatus": "ACCEPTED",
"rank": "FAMILY",
"origin": "DENORMED_CLASSIFICATION",
"numDescendants": 0,
"numOccurrences": 0,
"habitats": [],
"nomenclaturalStatus": [],
"threatStatuses": [],
"descriptions": [],
"vernacularNames": [],
"synonym": false,
"higherClassificationMap": {
}
}
],
"facets": []
}
Take every nubKey and use this in an occurrence search, as the taxon_key parameter. If all we want are the totals, we can set limit=0 and read the count part of the API response
First here is the equivalent search in the GBIF API to show that the nubKey 2393 is correctly mapped to the family name Acanthaceae:
This is the equivalent data retrieved from the API:
{
"offset": 0,
"limit": 0,
"endOfRecords": false,
"count": 85743,
"results": [],
"facets": []
}