Skip to content

Instantly share code, notes, and snippets.

@stephenpascoe
Created February 5, 2013 16:37
Show Gist options
  • Select an option

  • Save stephenpascoe/4715666 to your computer and use it in GitHub Desktop.

Select an option

Save stephenpascoe/4715666 to your computer and use it in GitHub Desktop.
esgf-pyclient demo notebook from GO-ESSP-TECH telco on 2013-02-05
{
"metadata": {
"name": "esgf-pyclient-demo"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# esgf-pyclient demo\n",
"\n",
"## Basic dataset searches\n",
"\n",
"**NOTE**: The URL below is for use with the upcoming 0.1.1 release. Include \"/search\" at the end with the current release."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pyesgf.search import SearchConnection\n",
"conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ctx = conn.new_context(project='CMIP5', experiment='historicalGHG', replica=False)\n",
"print 'Datasets in historicalGHG = ', ctx.hit_count"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Datasets in historicalGHG = "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1306\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Working with facets"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"models = ctx.facet_counts['model']\n",
"print models"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{u'IPSL-CM5A-LR': 213, u'CESM1(CAM5.1,FV2)': 29, u'GFDL-CM3': 32, u'MIROC-ESM': 44, u'CNRM-CM5': 155, u'CanESM2': 182, u'GISS-E2-H': 60, u'BNU-ESM': 11, u'MIROC-ESM-CHEM': 17, u'FGOALS-g2': 10, u'CCSM4': 85, u'HadGEM2-ES': 127, u'GISS-E2-R': 65, u'CSIRO-Mk3.6.0': 85, u'BCC-CSM1.1': 35, u'GFDL-ESM2M': 24, u'MRI-CGCM3': 21, u'NorESM1-M': 63, u'IPSL-CM5A-MR': 48}\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ctx2 = ctx.constrain(realm=['atmos', 'land'])\n",
"print 'Hits = ', ctx2.hit_count\n",
"print ctx2.facet_counts.keys()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Hits = "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"563\n",
"[u'cmor_table', u'product', u'realm', u'institute', u'cf_standard_name', u'experiment_family', u'variable_long_name', u'project', u'source_id', u'time_frequency', u'experiment', u'variable', u'model', u'ensemble']\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# A slightly more concise way of getting available facets. This removes facets names that are\n",
"# completely constrained, e.g. \"project\"\n",
"[(k, len(v)) for (k, v) in ctx2.get_facet_options().items()]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[(u'product', 2),\n",
" (u'realm', 2),\n",
" (u'institute', 15),\n",
" (u'cf_standard_name', 161),\n",
" (u'variable_long_name', 172),\n",
" (u'cmor_table', 10),\n",
" (u'time_frequency', 5),\n",
" (u'variable', 168),\n",
" (u'model', 19),\n",
" (u'ensemble', 11)]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting results documents"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ctx3 = ctx2.constrain(ensemble='r1i1p1', institute='MOHC')\n",
"ctx3.hit_count"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 6,
"text": [
"8"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"results = ctx3.search()\n",
"for r in results:\n",
" print r.dataset_id"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102|cmip3.dkrz.de\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20110327|cmip3.dkrz.de\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.mon.atmos.Amon.r1i1p1.v20111102|cmip3.dkrz.de\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.mon.atmos.Amon.r1i1p1.v20110330|cmip3.dkrz.de\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.mon.land.Lmon.r1i1p1.v20111102|cmip3.dkrz.de\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102|cmip-dn1.badc.rl.ac.uk\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.mon.atmos.Amon.r1i1p1.v20111102|cmip-dn1.badc.rl.ac.uk\n",
"cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.mon.land.Lmon.r1i1p1.v20111102|cmip-dn1.badc.rl.ac.uk\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"results[5].json"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 10,
"text": [
"{u'cf_standard_name': [u'specific_humidity',\n",
" u'precipitation_flux',\n",
" u'air_pressure_at_sea_level',\n",
" u'wind_speed',\n",
" u'air_temperature',\n",
" u'air_temperature',\n",
" u'air_temperature'],\n",
" u'cmor_table': [u'day'],\n",
" u'data_node': u'cmip-dn1.badc.rl.ac.uk',\n",
" u'dataset_id_template_': [u'cmip5.%(product)s.%(institute)s.%(model)s.%(experiment)s.%(time_frequency)s.%(realm)s.%(cmor_table)s.%(ensemble)s'],\n",
" u'description': [u'HadGEM2-ES model output prepared for CMIP5 GHG-only'],\n",
" u'drs_id': [u'cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1'],\n",
" u'ensemble': [u'r1i1p1'],\n",
" u'experiment': [u'historicalGHG'],\n",
" u'experiment_family': [u'All', u'Historical'],\n",
" u'forcing': [u'GHG (GHG = CO2, N2O, CH4, CFCs)'],\n",
" u'format': [u'netCDF, CF-1.4'],\n",
" u'id': u'cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102|cmip-dn1.badc.rl.ac.uk',\n",
" u'index_node': u'esgf-index1.ceda.ac.uk',\n",
" u'instance_id': u'cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102',\n",
" u'institute': [u'MOHC'],\n",
" u'latest': True,\n",
" u'master_id': u'cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1',\n",
" u'metadata_format': u'THREDDS',\n",
" u'model': [u'HadGEM2-ES'],\n",
" u'number_of_aggregations': 0,\n",
" u'number_of_files': 280,\n",
" u'product': [u'output1'],\n",
" u'project': [u'CMIP5'],\n",
" u'realm': [u'atmos'],\n",
" u'replica': False,\n",
" u'score': [1.0],\n",
" u'size': 108243320840,\n",
" u'time_frequency': [u'day'],\n",
" u'timestamp': u'2012-03-20T22:37:56Z',\n",
" u'title': u'project=CMIP5 / IPCC Fifth Assessment Report, model=HadGEM2-ES, Met Office Hadley Centre, experiment=historical, well-mixed greenhouse gases only, time_frequency=day, modeling realm=atmos, ensemble=r1i1p1, version=20111102',\n",
" u'type': u'Dataset',\n",
" u'url': [u'http://cmip-dn1.badc.rl.ac.uk/thredds/esgcet/7/cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102.xml#cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102|application/xml+thredds|Catalog'],\n",
" u'variable': [u'huss',\n",
" u'pr',\n",
" u'psl',\n",
" u'sfcWind',\n",
" u'tas',\n",
" u'tasmax',\n",
" u'tasmin'],\n",
" u'variable_long_name': [u'Near-Surface Specific Humidity',\n",
" u'Precipitation',\n",
" u'Sea Level Pressure',\n",
" u'Daily-Mean Near-Surface Wind Speed',\n",
" u'Near-Surface Air Temperature',\n",
" u'Daily Maximum Near-Surface Air Temperature',\n",
" u'Daily Minimum Near-Surface Air Temperature'],\n",
" u'version': u'20111102'}"
]
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Finding files"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"files = results[0].file_context().search()\n",
"f1 = files[0]\n",
"print 'File id = \\n ', f1.file_id\n",
"print 'Download URL = \\n ', f1.download_url\n",
"print 'OPeNDAP URL = \\n ', f1.opendap_url"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"File id = \n",
" cmip5.output1.MOHC.HadGEM2-ES.historicalGHG.day.atmos.day.r1i1p1.v20111102.huss_day_HadGEM2-ES_historicalGHG_r1i1p1_18591201-18691130.nc_0|cmip3.dkrz.de\n",
"Download URL = \n",
" http://cmip3.dkrz.de/thredds/fileServer/cmip5/output1/MOHC/HadGEM2-ES/historicalGHG/day/atmos/day/r1i1p1/v20111102/huss/huss_day_HadGEM2-ES_historicalGHG_r1i1p1_18591201-18691130.nc\n",
"OPeNDAP URL = \n",
" http://cmip3.dkrz.de/thredds/dodsC/cmip5/output1/MOHC/HadGEM2-ES/historicalGHG/day/atmos/day/r1i1p1/v20111102/huss/huss_day_HadGEM2-ES_historicalGHG_r1i1p1_18591201-18691130.nc\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## wget script support\n",
"This feature will available in the next release (0.1.1)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"script = r1.file_context().get_download_script()\n",
"print script"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting login credentials\n",
"\n",
"**NOTE**: This feature requires MyProxyClient (http://pypi.python.org/pypi/MyProxyClient)\n",
"\n",
"See http://esgf-pyclient.readthedocs.org/en/latest/logon.html#module-pyesgf-logon"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pyesgf.logon import LogonManager\n",
"lm = LogonManager()\n",
"lm.logon_with_openid(openid, password)\n"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment