Last active
November 4, 2025 09:57
-
-
Save csaybar/6406b172fdb9470d26bafe4bbf2d59df to your computer and use it in GitHub Desktop.
CLOUD_3D
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| TACO_METADATA = { | |
| # ========================================================================= | |
| # CORE IDENTIFIERS | |
| # ========================================================================= | |
| "id": "global-3d-cloud-reconstruction", | |
| "dataset_version": "1.0.0", | |
| "title": "Global 3D Cloud Reconstruction Dataset", | |
| # ========================================================================= | |
| # DESCRIPTION | |
| # ========================================================================= | |
| "description": ( | |
| "A large-scale AI-ready dataset for 3D cloud structure reconstruction from " | |
| "geostationary satellite imagery. Contains paired 2D multispectral imagery from " | |
| "GOES-16/ABI, Himawari-8/AHI, and MSG/SEVIRI satellites with co-located 3D vertical " | |
| "cloud property profiles from CloudSat Cloud Profiling Radar (CPR). " | |
| "\n\n" | |
| "The dataset provides three primary target variables across 80 vertical height levels: " | |
| "radar reflectivity (Z in dBZ), ice water content (IWC in g/m³), and effective radius " | |
| "(re in micrometers). " | |
| "\n\n" | |
| "Dataset is organized into three subsets: " | |
| "(1) Pre-training: for self-supervised learning, " | |
| "(2) Fine-tuning: spatiotemporally aligned image-profile pairs for supervised training, " | |
| "(3) Tropical Cyclones: benchmark pairs for evaluation on intense storms. " | |
| "\n\n" | |
| "All geostationary imagery is normalized to 11 common spectral channels with 256x256 pixel patches. " | |
| "Temporal coverage spans 2004-2025 for MSG, 2015-2022 for Himawari-8, and 2018-2024 for GOES-16." | |
| ), | |
| # ========================================================================= | |
| # LICENSES & ATTRIBUTION | |
| # ========================================================================= | |
| "licenses": ["CC-BY-4.0"], | |
| "attribution": ( | |
| "This dataset contains modified EUMETSAT Meteosat High Rate SEVIRI level 1.5 data 2004-2025. " | |
| "CloudSat data courtesy NASA/JPL-Caltech. " | |
| "GOES-16 data courtesy NOAA. " | |
| "Himawari-8 data courtesy JMA. " | |
| "\n\n" | |
| "When using this dataset, please cite: " | |
| "Ermis, S., Aybar, C., Freischem, L., Girtsou, S., Bintsi, K.-M., " | |
| "Diaz Salas-Porras, E., Eisinger, M., Jones, W., Jungbluth, A., & Tremblay, B. (2025). " | |
| "Global 3D Reconstruction of Clouds & Tropical Cyclones. " | |
| "Tackling Climate Change with Machine Learning Workshop at NeurIPS 2024." | |
| ), | |
| # ========================================================================= | |
| # KEYWORDS | |
| # ========================================================================= | |
| "keywords": [ | |
| "cloud microphysics", | |
| "3d reconstruction", | |
| "geostationary satellites", | |
| "CloudSat", | |
| "radar reflectivity", | |
| "ice water content", | |
| "effective radius", | |
| "tropical cyclones", | |
| "deep learning", | |
| "remote sensing", | |
| "GOES-16", | |
| "Himawari-8", | |
| "MSG", | |
| "SEVIRI", | |
| "Earth observation", | |
| "climate", | |
| "machine learning" | |
| ], | |
| # ========================================================================= | |
| # PROVIDERS | |
| # ========================================================================= | |
| "providers": [ | |
| { | |
| "name": "European Space Agency (ESA)", | |
| "roles": ["licensor", "host"], | |
| "url": "https://www.esa.int", | |
| "description": "Principal sponsor and data host through Frontier Development Lab" | |
| }, | |
| { | |
| "name": "EUMETSAT", | |
| "roles": ["producer"], | |
| "url": "https://www.eumetsat.int", | |
| "description": "Provider of MSG/SEVIRI High Rate Level 1.5 data (modified)" | |
| }, | |
| { | |
| "name": "NASA CloudSat", | |
| "roles": ["producer"], | |
| "url": "https://cloudsat.atmos.colostate.edu/", | |
| "description": "Provider of CloudSat CPR vertical profile retrievals" | |
| }, | |
| { | |
| "name": "NOAA", | |
| "roles": ["producer"], | |
| "url": "https://www.noaa.gov", | |
| "description": "Provider of GOES-16/ABI data" | |
| }, | |
| { | |
| "name": "Japan Meteorological Agency (JMA)", | |
| "roles": ["producer"], | |
| "url": "https://www.jma.go.jp", | |
| "description": "Provider of Himawari-8/AHI data" | |
| }, | |
| { | |
| "name": "Frontier Development Lab (FDL)", | |
| "roles": ["processor"], | |
| "url": "https://eslab.ai/", | |
| "description": "Dataset curation and processing" | |
| }, | |
| { | |
| "name": "Universitat de València", | |
| "roles": ["processor"], | |
| "url": "https://www.uv.es", | |
| "description": "Dataset creation and processing infrastructure" | |
| }, | |
| { | |
| "name": "University of Oxford", | |
| "roles": ["processor"], | |
| "url": "https://www.ox.ac.uk", | |
| "description": "Model development and validation" | |
| }, | |
| { | |
| "name": "source.coop", | |
| "roles": ["host"], | |
| "url": "https://source.coop", | |
| "description": "Public data hosting via AWS S3" | |
| } | |
| ], | |
| # ========================================================================= | |
| # TASKS | |
| # ========================================================================= | |
| "tasks": ["reconstruction", "regression"], | |
| # ========================================================================= | |
| # SPATIAL AND TEMPORAL EXTENT | |
| # ========================================================================= | |
| "extent": { | |
| "spatial": [-180, -90, 180, 90], # Global coverage | |
| "temporal": ["2004-01-01T00:00:00Z", "2025-12-31T23:59:59Z"] | |
| }, | |
| # ========================================================================= | |
| # CONTACTS | |
| # ========================================================================= | |
| "contacts": [ | |
| { | |
| "name": "Shirin Ermis", | |
| "organization": "University of Oxford", | |
| "email": "[email protected]", | |
| "role": "principal investigator" | |
| }, | |
| { | |
| "name": "William Jones", | |
| "organization": "University of Oxford", | |
| "email": "[email protected]", | |
| "role": "principal investigator" | |
| }, | |
| { | |
| "name": "Cesar Aybar", | |
| "organization": "Universitat de València", | |
| "email": "[email protected]", | |
| "role": "dataset curator" | |
| } | |
| ], | |
| # ========================================================================= | |
| # CITATION | |
| # ========================================================================= | |
| "citation": ( | |
| "Ermis, S., Aybar, C., Freischem, L., Girtsou, S., Bintsi, K.-M., " | |
| "Diaz Salas-Porras, E., Eisinger, M., Jones, W., Jungbluth, A., & Tremblay, B. (2025). " | |
| "Global 3D Reconstruction of Clouds & Tropical Cyclones. " | |
| "Tackling Climate Change with Machine Learning Workshop at NeurIPS 2025." | |
| ), | |
| # ========================================================================= | |
| # LINKS | |
| # ========================================================================= | |
| "links": [ | |
| { | |
| "rel": "self", | |
| "href": "https://data.source.coop/csaybar/3dclouds/tacollection.json", | |
| "type": "application/json", | |
| "title": "This collection" | |
| }, | |
| { | |
| "rel": "root", | |
| "href": "https://data.source.coop/csaybar/3dclouds/", | |
| "type": "application/json", | |
| "title": "Root catalog" | |
| }, | |
| { | |
| "rel": "documentation", | |
| "href": "https://github.com/csaybar/3d-clouds-dataset", | |
| "type": "text/html", | |
| "title": "Dataset documentation and notebooks" | |
| }, | |
| { | |
| "rel": "license", | |
| "href": "https://creativecommons.org/licenses/by/4.0/", | |
| "type": "text/html", | |
| "title": "CC-BY-4.0 License" | |
| }, | |
| { | |
| "rel": "derived_from", | |
| "href": "https://cloudsat.atmos.colostate.edu/", | |
| "type": "text/html", | |
| "title": "CloudSat Data" | |
| }, | |
| { | |
| "rel": "derived_from", | |
| "href": "https://www.eumetsat.int/seviri", | |
| "type": "text/html", | |
| "title": "MSG/SEVIRI Data" | |
| } | |
| ], | |
| # ========================================================================= | |
| # LEGAL | |
| # ========================================================================= | |
| "legal": { | |
| "eumetsat_attribution": ( | |
| "Contains modified EUMETSAT Meteosat High Rate SEVIRI level 1.5 data 2004-2025" | |
| ), | |
| "eumetsat_policy_compliance": ( | |
| "Dataset derived from MSG/SEVIRI High Rate Level 1.5 data with latency >1 hour. " | |
| "Original numerical radiance data not retrievable from derived products. " | |
| "Compliant with EUMETSAT data policy as confirmed 2025-11-03." | |
| ), | |
| "cloudsat_policy": ( | |
| "CloudSat data courtesy NASA/JPL-Caltech. " | |
| "Public domain data available through CloudSat Data Processing Center." | |
| ) | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment