Created
April 23, 2025 23:47
-
-
Save jbusecke/65d0e160ed1d90dac903c9c3f36cf9da to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "d920b3e5", | |
| "metadata": {}, | |
| "source": [ | |
| "# Testing how to programatically extract reference details from CMIP data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "82e0dd0e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import aiohttp\n", | |
| "from tqdm.asyncio import tqdm_asyncio\n", | |
| "\n", | |
| "async def handle_to_url(handle):\n", | |
| " # Convert handle to URL\n", | |
| " return f\"https://hdl.handle.net/api/handles/{handle.replace('hdl:', '')}\"\n", | |
| "\n", | |
| "async def get_json(session, url):\n", | |
| " async with session.get(url) as response:\n", | |
| " if response.status == 200:\n", | |
| " return await response.json()\n", | |
| " else:\n", | |
| " raise ValueError(f\"Failed to retrieve data from {url}\")\n", | |
| "\n", | |
| "async def get_value(json_response, value_type):\n", | |
| " # Return only the value index with type \"value_type\"\n", | |
| " for value in json_response['values']:\n", | |
| " if value['type'] == value_type:\n", | |
| " return value['data']['value']\n", | |
| " raise ValueError(f\"Value of type {value_type} not found in response\")\n", | |
| "\n", | |
| "async def get_doi_from_tracking_id(tracking_id: str) -> str:\n", | |
| " \"\"\"\n", | |
| " Get the DOI from a tracking ID attribute string\n", | |
| " \"\"\"\n", | |
| " tracking_ids = tracking_id.split('\\n')\n", | |
| " async with aiohttp.ClientSession() as session:\n", | |
| " # Check that all handles point to the same root handle\n", | |
| " root_handles = await tqdm_asyncio.gather(\n", | |
| " *[get_value(await get_json(session, await handle_to_url(handle)), \"IS_PART_OF\") for handle in tracking_ids]\n", | |
| " )\n", | |
| " # If not all root_handles are the same, throw an error\n", | |
| " if len(set(root_handles)) > 1:\n", | |
| " raise ValueError(\"Not all handles point to the same root handle\")\n", | |
| " else:\n", | |
| " root_handle = root_handles[0]\n", | |
| " # Get the DOI of the root handle\n", | |
| " doi = await get_value(await get_json(session, await handle_to_url(root_handle)), \"IS_PART_OF\")\n", | |
| " # If root_doi does not start with \"doi:\", raise an error\n", | |
| " if not doi.startswith(\"doi:\"):\n", | |
| " raise ValueError(\"Root handle does not point to a DOI\")\n", | |
| " return doi\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "a040bc2f", | |
| "metadata": {}, | |
| "source": [ | |
| "## Load the CMIP Catalog" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "78b86dc2", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import intake\n", | |
| "import xarray as xr\n", | |
| "\n", | |
| "\n", | |
| "# uncomment/comment lines to swap catalogs\n", | |
| "url = \"https://storage.googleapis.com/cmip6/cmip6-pgf-ingestion-test/catalog/catalog.json\" # Only stores that pass current tests\n", | |
| "col = intake.open_esm_datastore(url)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "da6c28e5", | |
| "metadata": {}, | |
| "source": [ | |
| "## A single test" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "cf22605c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████| 4/4 [00:00<00:00, 8439.24it/s]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'doi:10.22033/ESGF/CMIP6.11762'" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "path = col.df['zstore'].tolist()[100]\n", | |
| "\n", | |
| "ds = xr.open_zarr(path, consolidated=True)\n", | |
| "# await get_dois_from_tracking_ids(ds.attrs['tracking_id'])\n", | |
| "await get_doi_from_tracking_id(ds.attrs['tracking_id'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "2752bb31", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G-CC/historical/r1i1p1f1/Omon/fsitherm/gn/v20190815/'" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "path" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "ea93a8f4", | |
| "metadata": {}, | |
| "source": [ | |
| "Manually checking the facets and the doi, this seems to work 😁" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "cfb08e7c", | |
| "metadata": {}, | |
| "source": [ | |
| "## The data used in the pco2 testbed publication\n", | |
| "\n", | |
| "Got a list of partial instance_ids, and will get all available dois\n", | |
| "\n", | |
| ">[!NOTE]\n", | |
| "> I found that the results are highly redundant and generally there is only one DOI per simulation (e.g. source_id and experiment_id). I thus pruned things heavily below to reduce runtime\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "7d88cc25", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "partial_instance_ids = [\n", | |
| " \"ACCESS-ESM1-5.gn.ssp245.Omon.r4i1p1f1\",\n", | |
| " \"ACCESS-ESM1-5.gn.historical.Omon.r4i1p1f1\",\n", | |
| " \"ACCESS-ESM1-5.gn.ssp245.Omon.r5i1p1f1\",\n", | |
| " \"ACCESS-ESM1-5.gn.historical.Omon.r5i1p1f1\",\n", | |
| " \"CESM2.gn.ssp245.Omon.r10i1p1f1\",\n", | |
| " \"CESM2.gn.historical.Omon.r10i1p1f1\",\n", | |
| " \"CESM2.gn.ssp245.Omon.r11i1p1f1\",\n", | |
| " \"CESM2.gn.historical.Omon.r11i1p1f1\",\n", | |
| " \"CESM2.gn.ssp245.Omon.r4i1p1f1\",\n", | |
| " \"CESM2.gn.historical.Omon.r4i1p1f1\",\n", | |
| " \"CESM2-WACCM.gn.ssp245.Omon.r1i1p1f1\",\n", | |
| " \"CESM2-WACCM.gn.historical.Omon.r1i1p1f1\",\n", | |
| " \"CESM2-WACCM.gr.ssp245.Omon.r2i1p1f1\",\n", | |
| " \"CESM2-WACCM.gr.historical.Omon.r2i1p1f1\",\n", | |
| " \"CESM2-WACCM.gr.ssp245.Omon.r3i1p1f1\",\n", | |
| " \"CESM2-WACCM.gr.historical.Omon.r3i1p1f1\",\n", | |
| " \"CMCC-ESM2.gn.ssp245.Omon.r1i1p1f1\",\n", | |
| " \"CMCC-ESM2.gn.historical.Omon.r1i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r10i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r10i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r1i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r1i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r1i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r1i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r2i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r2i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r2i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r2i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r3i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r3i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r3i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r3i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r4i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r4i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r4i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r4i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r5i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r5i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r5i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r5i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r6i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r6i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r6i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r6i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r7i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r7i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r7i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r7i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r8i1p1f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r8i1p1f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r8i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r8i1p2f1\",\n", | |
| " \"CanESM5.gn.ssp245.Omon.r9i1p2f1\",\n", | |
| " \"CanESM5.gn.historical.Omon.r9i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.ssp245.Omon.r1i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.historical.Omon.r1i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.ssp245.Omon.r2i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.historical.Omon.r2i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.ssp245.Omon.r3i1p2f1\",\n", | |
| " \"CanESM5-CanOE.gn.historical.Omon.r3i1p2f1\",\n", | |
| " \"GFDL-ESM4.gr.ssp245.Omon.r1i1p1f1\",\n", | |
| " \"GFDL-ESM4.gr.historical.Omon.r1i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r11i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r11i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r12i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r12i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r14i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r14i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r15i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r15i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r16i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r16i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r22i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r22i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r23i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r23i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r26i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r26i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.ssp245.Omon.r27i1p1f1\",\n", | |
| " \"MPI-ESM1-2-LR.gn.historical.Omon.r27i1p1f1\",\n", | |
| " \"UKESM1-0-LL.gn.ssp245.Omon.r1i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.historical.Omon.r1i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.ssp245.Omon.r2i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.historical.Omon.r2i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.ssp245.Omon.r3i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.historical.Omon.r3i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.ssp245.Omon.r4i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.historical.Omon.r4i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.ssp245.Omon.r8i1p1f2\",\n", | |
| " \"UKESM1-0-LL.gn.historical.Omon.r8i1p1f2\"\n", | |
| "]\n", | |
| "# Im gonna cheat here a bit. I am pretty sure that the dois are per simulation, so lets just search for the source_id and experiment_id\n", | |
| "\n", | |
| "partial_instance_ids_pruned = sorted(list(set(['.'.join(iid.split('.')[:-1]) for iid in partial_instance_ids])))\n", | |
| "partial_instance_ids_pruned" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "id": "8623fb75", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "0748d44cf648430ba7a5fbd1c16ec6ca", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Instance ID Loop: 0%| | 0/20 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ACCESS-ESM1-5.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "6e3f99e4c13546b2b886327281720c57", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1758.62it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ACCESS-ESM1-5.historical {'doi:10.22033/ESGF/CMIP6.4272'}\n", | |
| "ACCESS-ESM1-5.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "5d7072d711d24e6c9979eecd21b6ba83", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 2123.70it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ACCESS-ESM1-5.ssp245 {'doi:10.22033/ESGF/CMIP6.4322'}\n", | |
| "CESM2-WACCM.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "ce5f8f1299ba40e08f66f1beee04a567", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1831.57it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2-WACCM.historical {'doi:10.22033/ESGF/CMIP6.10071'}\n", | |
| "CESM2-WACCM.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "4322ef65150649b495c4b3f7f25fe73a", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 2/2 [00:00<00:00, 4606.59it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2-WACCM.ssp245 {'doi:10.22033/ESGF/CMIP6.10101'}\n", | |
| "CESM2-WACCM.gr.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "d5e8b77862ee44d1b06135694fd40651", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1472.72it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2-WACCM.historical {'doi:10.22033/ESGF/CMIP6.10071'}\n", | |
| "CESM2-WACCM.gr.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "a8faf47c904b4618a4a5e28ce141bb5f", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 2/2 [00:00<00:00, 4202.71it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2-WACCM.ssp245 {'doi:10.22033/ESGF/CMIP6.10101'}\n", | |
| "CESM2.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "9116eec45e6b496081d2b62ca9cc1686", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 4/4 [00:00<00:00, 5635.61it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2.historical {'doi:10.22033/ESGF/CMIP6.7627'}\n", | |
| "CESM2.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "d7a429bdc3fc45ab90c2b77940a436d6", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 2/2 [00:00<00:00, 3067.13it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CESM2.ssp245 {'doi:10.22033/ESGF/CMIP6.7748'}\n", | |
| "CMCC-ESM2.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "a8a4c1d03cc748c696cf0b7516e1d5ca", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1248.30it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CMCC-ESM2.historical {'doi:10.22033/ESGF/CMIP6.13195'}\n", | |
| "CMCC-ESM2.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "c91075a618534aeba89adc0974cdb468", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1492.63it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CMCC-ESM2.ssp245 {'doi:10.22033/ESGF/CMIP6.13252'}\n", | |
| "CanESM5-CanOE.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "57c1cb85cea84ed49714f0f34c03004b", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1838.80it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CanESM5-CanOE.historical {'doi:10.22033/ESGF/CMIP6.10260'}\n", | |
| "CanESM5-CanOE.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "e2c08ab2ca814afbbe5ed8edbda99d2e", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1326.47it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CanESM5-CanOE.ssp245 {'doi:10.22033/ESGF/CMIP6.10270'}\n", | |
| "CanESM5.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "90a389cfe9b649e9b68d1148be20a3cd", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 3536.51it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CanESM5.historical {'doi:10.22033/ESGF/CMIP6.3610'}\n", | |
| "CanESM5.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "6270aec497674838a096b76ff191658e", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 1/1 [00:00<00:00, 1937.32it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CanESM5.ssp245 {'doi:10.22033/ESGF/CMIP6.3685'}\n", | |
| "GFDL-ESM4.gr.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "294a76ffb18944b49f041fd8e6a7a3ee", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 9/9 [00:00<00:00, 10742.38it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "GFDL-ESM4.historical {'doi:10.22033/ESGF/CMIP6.8597'}\n", | |
| "GFDL-ESM4.gr.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "fef3851f50f844ec88410a06277f386a", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 5/5 [00:00<00:00, 4425.30it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "GFDL-ESM4.ssp245 {'doi:10.22033/ESGF/CMIP6.8686'}\n", | |
| "MPI-ESM1-2-LR.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "4f779eb7a575412c8112bcb3c2f55b28", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 9/9 [00:00<00:00, 13414.62it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "MPI-ESM1-2-LR.historical {'doi:10.22033/ESGF/CMIP6.6595'}\n", | |
| "MPI-ESM1-2-LR.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "d2d085112576458895af2f7d07ab9772", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 5/5 [00:00<00:00, 10082.46it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "MPI-ESM1-2-LR.ssp245 {'doi:10.22033/ESGF/CMIP6.6693'}\n", | |
| "UKESM1-0-LL.gn.historical.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "28915813399a4f51b07d841699296d63", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 2/2 [00:00<00:00, 4914.24it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "UKESM1-0-LL.historical {'doi:10.22033/ESGF/CMIP6.6113'}\n", | |
| "UKESM1-0-LL.gn.ssp245.Omon\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "c117a01d4deb4c92989c688e5f59a0b4", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Dataset Loop: 0%| | 0/1 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "100%|██████████| 2/2 [00:00<00:00, 4017.53it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "UKESM1-0-LL.ssp245 {'doi:10.22033/ESGF/CMIP6.6339'}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from tqdm.auto import tqdm\n", | |
| "\n", | |
| "doi_dict = {}\n", | |
| "for iid in tqdm(partial_instance_ids_pruned, desc=\"Instance ID Loop\"):\n", | |
| " print(iid)\n", | |
| " source_id, grid_label, experiment_id, table_id = iid.split('.')\n", | |
| " cat = col.search(\n", | |
| " source_id=source_id,\n", | |
| " grid_label=grid_label,\n", | |
| " experiment_id=experiment_id,\n", | |
| " table_id=table_id,\n", | |
| " variable_id=['tos'], #TODO: I need to add the others that were used\n", | |
| " )\n", | |
| " # more thorough search but the results are redundant anyways and the above search is much faster\n", | |
| "\n", | |
| " # source_id, grid_label, experiment_id, table_id, member_id = iid.split('.')\n", | |
| " # cat = col.search(\n", | |
| " # source_id=source_id,\n", | |
| " # grid_label=grid_label,\n", | |
| " # experiment_id=experiment_id,\n", | |
| " # table_id=table_id,\n", | |
| " # member_id=member_id\n", | |
| " # variable_id=['tos'], #TODO: I need to add the others that were used\n", | |
| " # )\n", | |
| " datasets = cat.to_dataset_dict(\n", | |
| " aggregate=False,\n", | |
| " skip_on_error=True,\n", | |
| " xarray_open_kwargs={\n", | |
| " 'consolidated':True,\n", | |
| " 'decode_times':False\n", | |
| " },\n", | |
| " progressbar=False,\n", | |
| " )\n", | |
| " # some datasets have no tracking_id....seems to be a problem with some BGC variables? Needs further investigation\n", | |
| " # should not be a deal breaker since we are looking for simulation level DOIs here. So if we find a single one per simulation we are good\n", | |
| "\n", | |
| "\n", | |
| " # Im gonna cheat here a bit. I am pretty sure that the dois are per simulation, so lets just get the firste one that has the tracking_id attribute\n", | |
| " datasets_pruned = {}\n", | |
| " for name, ds in datasets.items():\n", | |
| " if 'tracking_id' in ds.attrs:\n", | |
| " datasets_pruned[name] = ds\n", | |
| " break\n", | |
| " \n", | |
| " dois = [await get_doi_from_tracking_id(ds.attrs['tracking_id']) for ds in tqdm(datasets_pruned.values(), desc=\"Dataset Loop\") if 'tracking_id' in ds.attrs]\n", | |
| "\n", | |
| " # create new source_id key in dict or if source_id key already existst append to list\n", | |
| " key = '.'.join([source_id, experiment_id])\n", | |
| " print(key,set(dois))\n", | |
| " if key not in doi_dict:\n", | |
| " doi_dict[key] = set(dois)\n", | |
| " else:\n", | |
| " doi_dict[key] = doi_dict[key].union(set(dois))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "id": "1a1b17ab", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'ACCESS-ESM1-5.historical': {'doi:10.22033/ESGF/CMIP6.4272'},\n", | |
| " 'ACCESS-ESM1-5.ssp245': {'doi:10.22033/ESGF/CMIP6.4322'},\n", | |
| " 'CESM2-WACCM.historical': {'doi:10.22033/ESGF/CMIP6.10071'},\n", | |
| " 'CESM2-WACCM.ssp245': {'doi:10.22033/ESGF/CMIP6.10101'},\n", | |
| " 'CESM2.historical': {'doi:10.22033/ESGF/CMIP6.7627'},\n", | |
| " 'CESM2.ssp245': {'doi:10.22033/ESGF/CMIP6.7748'},\n", | |
| " 'CMCC-ESM2.historical': {'doi:10.22033/ESGF/CMIP6.13195'},\n", | |
| " 'CMCC-ESM2.ssp245': {'doi:10.22033/ESGF/CMIP6.13252'},\n", | |
| " 'CanESM5-CanOE.historical': {'doi:10.22033/ESGF/CMIP6.10260'},\n", | |
| " 'CanESM5-CanOE.ssp245': {'doi:10.22033/ESGF/CMIP6.10270'},\n", | |
| " 'CanESM5.historical': {'doi:10.22033/ESGF/CMIP6.3610'},\n", | |
| " 'CanESM5.ssp245': {'doi:10.22033/ESGF/CMIP6.3685'},\n", | |
| " 'GFDL-ESM4.historical': {'doi:10.22033/ESGF/CMIP6.8597'},\n", | |
| " 'GFDL-ESM4.ssp245': {'doi:10.22033/ESGF/CMIP6.8686'},\n", | |
| " 'MPI-ESM1-2-LR.historical': {'doi:10.22033/ESGF/CMIP6.6595'},\n", | |
| " 'MPI-ESM1-2-LR.ssp245': {'doi:10.22033/ESGF/CMIP6.6693'},\n", | |
| " 'UKESM1-0-LL.historical': {'doi:10.22033/ESGF/CMIP6.6113'},\n", | |
| " 'UKESM1-0-LL.ssp245': {'doi:10.22033/ESGF/CMIP6.6339'}}" | |
| ] | |
| }, | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "doi_dict" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "id": "0c192c5a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "id": "376eec5b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame.from_dict(doi_dict, orient='index').reset_index().rename(columns={'index':'source_id_experiment_id', 0:'dois'})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "id": "1af84ab4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>source_id_experiment_id</th>\n", | |
| " <th>dois</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>ACCESS-ESM1-5.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.4272</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>ACCESS-ESM1-5.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.4322</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>CESM2-WACCM.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.10071</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>CESM2-WACCM.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.10101</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>CESM2.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.7627</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>CESM2.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.7748</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>CMCC-ESM2.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.13195</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>CMCC-ESM2.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.13252</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>CanESM5-CanOE.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.10260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>CanESM5-CanOE.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.10270</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>CanESM5.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.3610</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>CanESM5.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.3685</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>GFDL-ESM4.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.8597</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>GFDL-ESM4.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.8686</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>MPI-ESM1-2-LR.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.6595</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>MPI-ESM1-2-LR.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.6693</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>UKESM1-0-LL.historical</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.6113</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>UKESM1-0-LL.ssp245</td>\n", | |
| " <td>doi:10.22033/ESGF/CMIP6.6339</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " source_id_experiment_id dois\n", | |
| "0 ACCESS-ESM1-5.historical doi:10.22033/ESGF/CMIP6.4272\n", | |
| "1 ACCESS-ESM1-5.ssp245 doi:10.22033/ESGF/CMIP6.4322\n", | |
| "2 CESM2-WACCM.historical doi:10.22033/ESGF/CMIP6.10071\n", | |
| "3 CESM2-WACCM.ssp245 doi:10.22033/ESGF/CMIP6.10101\n", | |
| "4 CESM2.historical doi:10.22033/ESGF/CMIP6.7627\n", | |
| "5 CESM2.ssp245 doi:10.22033/ESGF/CMIP6.7748\n", | |
| "6 CMCC-ESM2.historical doi:10.22033/ESGF/CMIP6.13195\n", | |
| "7 CMCC-ESM2.ssp245 doi:10.22033/ESGF/CMIP6.13252\n", | |
| "8 CanESM5-CanOE.historical doi:10.22033/ESGF/CMIP6.10260\n", | |
| "9 CanESM5-CanOE.ssp245 doi:10.22033/ESGF/CMIP6.10270\n", | |
| "10 CanESM5.historical doi:10.22033/ESGF/CMIP6.3610\n", | |
| "11 CanESM5.ssp245 doi:10.22033/ESGF/CMIP6.3685\n", | |
| "12 GFDL-ESM4.historical doi:10.22033/ESGF/CMIP6.8597\n", | |
| "13 GFDL-ESM4.ssp245 doi:10.22033/ESGF/CMIP6.8686\n", | |
| "14 MPI-ESM1-2-LR.historical doi:10.22033/ESGF/CMIP6.6595\n", | |
| "15 MPI-ESM1-2-LR.ssp245 doi:10.22033/ESGF/CMIP6.6693\n", | |
| "16 UKESM1-0-LL.historical doi:10.22033/ESGF/CMIP6.6113\n", | |
| "17 UKESM1-0-LL.ssp245 doi:10.22033/ESGF/CMIP6.6339" | |
| ] | |
| }, | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "id": "7a418a28", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df.to_csv('doi_dict.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "52dcfb32", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "cmip6", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.13.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment