Skip to content

Instantly share code, notes, and snippets.

@csaybar
Last active November 4, 2025 09:57
Show Gist options
  • Select an option

  • Save csaybar/6406b172fdb9470d26bafe4bbf2d59df to your computer and use it in GitHub Desktop.

Select an option

Save csaybar/6406b172fdb9470d26bafe4bbf2d59df to your computer and use it in GitHub Desktop.
CLOUD_3D
TACO_METADATA = {
# =========================================================================
# CORE IDENTIFIERS
# =========================================================================
"id": "global-3d-cloud-reconstruction",
"dataset_version": "1.0.0",
"title": "Global 3D Cloud Reconstruction Dataset",
# =========================================================================
# DESCRIPTION
# =========================================================================
"description": (
"A large-scale AI-ready dataset for 3D cloud structure reconstruction from "
"geostationary satellite imagery. Contains paired 2D multispectral imagery from "
"GOES-16/ABI, Himawari-8/AHI, and MSG/SEVIRI satellites with co-located 3D vertical "
"cloud property profiles from CloudSat Cloud Profiling Radar (CPR). "
"\n\n"
"The dataset provides three primary target variables across 80 vertical height levels: "
"radar reflectivity (Z in dBZ), ice water content (IWC in g/m³), and effective radius "
"(re in micrometers). "
"\n\n"
"Dataset is organized into three subsets: "
"(1) Pre-training: for self-supervised learning, "
"(2) Fine-tuning: spatiotemporally aligned image-profile pairs for supervised training, "
"(3) Tropical Cyclones: benchmark pairs for evaluation on intense storms. "
"\n\n"
"All geostationary imagery is normalized to 11 common spectral channels with 256x256 pixel patches. "
"Temporal coverage spans 2004-2025 for MSG, 2015-2022 for Himawari-8, and 2018-2024 for GOES-16."
),
# =========================================================================
# LICENSES & ATTRIBUTION
# =========================================================================
"licenses": ["CC-BY-4.0"],
"attribution": (
"This dataset contains modified EUMETSAT Meteosat High Rate SEVIRI level 1.5 data 2004-2025. "
"CloudSat data courtesy NASA/JPL-Caltech. "
"GOES-16 data courtesy NOAA. "
"Himawari-8 data courtesy JMA. "
"\n\n"
"When using this dataset, please cite: "
"Ermis, S., Aybar, C., Freischem, L., Girtsou, S., Bintsi, K.-M., "
"Diaz Salas-Porras, E., Eisinger, M., Jones, W., Jungbluth, A., & Tremblay, B. (2025). "
"Global 3D Reconstruction of Clouds & Tropical Cyclones. "
"Tackling Climate Change with Machine Learning Workshop at NeurIPS 2024."
),
# =========================================================================
# KEYWORDS
# =========================================================================
"keywords": [
"cloud microphysics",
"3d reconstruction",
"geostationary satellites",
"CloudSat",
"radar reflectivity",
"ice water content",
"effective radius",
"tropical cyclones",
"deep learning",
"remote sensing",
"GOES-16",
"Himawari-8",
"MSG",
"SEVIRI",
"Earth observation",
"climate",
"machine learning"
],
# =========================================================================
# PROVIDERS
# =========================================================================
"providers": [
{
"name": "European Space Agency (ESA)",
"roles": ["licensor", "host"],
"url": "https://www.esa.int",
"description": "Principal sponsor and data host through Frontier Development Lab"
},
{
"name": "EUMETSAT",
"roles": ["producer"],
"url": "https://www.eumetsat.int",
"description": "Provider of MSG/SEVIRI High Rate Level 1.5 data (modified)"
},
{
"name": "NASA CloudSat",
"roles": ["producer"],
"url": "https://cloudsat.atmos.colostate.edu/",
"description": "Provider of CloudSat CPR vertical profile retrievals"
},
{
"name": "NOAA",
"roles": ["producer"],
"url": "https://www.noaa.gov",
"description": "Provider of GOES-16/ABI data"
},
{
"name": "Japan Meteorological Agency (JMA)",
"roles": ["producer"],
"url": "https://www.jma.go.jp",
"description": "Provider of Himawari-8/AHI data"
},
{
"name": "Frontier Development Lab (FDL)",
"roles": ["processor"],
"url": "https://eslab.ai/",
"description": "Dataset curation and processing"
},
{
"name": "Universitat de València",
"roles": ["processor"],
"url": "https://www.uv.es",
"description": "Dataset creation and processing infrastructure"
},
{
"name": "University of Oxford",
"roles": ["processor"],
"url": "https://www.ox.ac.uk",
"description": "Model development and validation"
},
{
"name": "source.coop",
"roles": ["host"],
"url": "https://source.coop",
"description": "Public data hosting via AWS S3"
}
],
# =========================================================================
# TASKS
# =========================================================================
"tasks": ["reconstruction", "regression"],
# =========================================================================
# SPATIAL AND TEMPORAL EXTENT
# =========================================================================
"extent": {
"spatial": [-180, -90, 180, 90], # Global coverage
"temporal": ["2004-01-01T00:00:00Z", "2025-12-31T23:59:59Z"]
},
# =========================================================================
# CONTACTS
# =========================================================================
"contacts": [
{
"name": "Shirin Ermis",
"organization": "University of Oxford",
"email": "[email protected]",
"role": "principal investigator"
},
{
"name": "William Jones",
"organization": "University of Oxford",
"email": "[email protected]",
"role": "principal investigator"
},
{
"name": "Cesar Aybar",
"organization": "Universitat de València",
"email": "[email protected]",
"role": "dataset curator"
}
],
# =========================================================================
# CITATION
# =========================================================================
"citation": (
"Ermis, S., Aybar, C., Freischem, L., Girtsou, S., Bintsi, K.-M., "
"Diaz Salas-Porras, E., Eisinger, M., Jones, W., Jungbluth, A., & Tremblay, B. (2025). "
"Global 3D Reconstruction of Clouds & Tropical Cyclones. "
"Tackling Climate Change with Machine Learning Workshop at NeurIPS 2025."
),
# =========================================================================
# LINKS
# =========================================================================
"links": [
{
"rel": "self",
"href": "https://data.source.coop/csaybar/3dclouds/tacollection.json",
"type": "application/json",
"title": "This collection"
},
{
"rel": "root",
"href": "https://data.source.coop/csaybar/3dclouds/",
"type": "application/json",
"title": "Root catalog"
},
{
"rel": "documentation",
"href": "https://github.com/csaybar/3d-clouds-dataset",
"type": "text/html",
"title": "Dataset documentation and notebooks"
},
{
"rel": "license",
"href": "https://creativecommons.org/licenses/by/4.0/",
"type": "text/html",
"title": "CC-BY-4.0 License"
},
{
"rel": "derived_from",
"href": "https://cloudsat.atmos.colostate.edu/",
"type": "text/html",
"title": "CloudSat Data"
},
{
"rel": "derived_from",
"href": "https://www.eumetsat.int/seviri",
"type": "text/html",
"title": "MSG/SEVIRI Data"
}
],
# =========================================================================
# LEGAL
# =========================================================================
"legal": {
"eumetsat_attribution": (
"Contains modified EUMETSAT Meteosat High Rate SEVIRI level 1.5 data 2004-2025"
),
"eumetsat_policy_compliance": (
"Dataset derived from MSG/SEVIRI High Rate Level 1.5 data with latency >1 hour. "
"Original numerical radiance data not retrievable from derived products. "
"Compliant with EUMETSAT data policy as confirmed 2025-11-03."
),
"cloudsat_policy": (
"CloudSat data courtesy NASA/JPL-Caltech. "
"Public domain data available through CloudSat Data Processing Center."
)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment