Skip to content

Instantly share code, notes, and snippets.

@yharby
Forked from barronh/hrrr.py
Created June 23, 2025 21:31
Show Gist options
  • Select an option

  • Save yharby/974546ef19821799d1dfe342d794b10a to your computer and use it in GitHub Desktop.

Select an option

Save yharby/974546ef19821799d1dfe342d794b10a to your computer and use it in GitHub Desktop.
Retrieve HRRR via Open Data on AWS and Zarr
__doc__ = """
# Overview
This example shows how to open High Resolution Rapid Refresh (HRRR) meteorology
"surfaces." These surfaces are slices in vertical space to create single level
maps of variables like temperature, wind speed components, pressure, etc. Each
surface is opened using the Zarr archive described in Amazon's opendata registry
described at https://registry.opendata.aws/noaa-hrrr-pds/.
# Examples
## HRRR surface temperature
This example simply retrieves all surface temperature values and prints the
statistics. Note that the original value is float16 and is being cast to float32
to ensure that statistics are calculated in higher precision.
ds = open_hrrrsfc('2023-11-23T20', varkeys=['TMP'])
df = ds['TMP'].to_dataframe().astype('f')
print(df.describe().round(2).to_csv())
# Output:
# ,TMP
# count,1905141.0
# mean,286.27
# std,10.6
# min,251.38
# 25%,278.25
# 50%,287.75
# 75%,295.5
# max,310.75
# Requirements:
- Python3
- s3fs
- xarray
- pandas
- zarr>=3.06
# Install requirements with pip
pip install s3fs pandas xarray 'zarr>=3.06'
"""
__version__ = '0.2.0'
def open_hrrrsfc(date, varkeys, layer='surface', simtype='anl', verbose=0):
"""
Open HRRR hourly meteorology surface from a date (including hour) and variable
keys. Optionally, specify the layer and simulation type (anl: analysis or
fcst: forecast).
Arguments
---------
date : str or datetime
Any date construct accepted by pandas.to_datetime. Should include hour.
varkeys : str or list
List of variables from HRRR layer see https://hrrrzarr.s3.amazonaws.com/index.html
layer : str
2-dimensional slice name (default surface):
slice: surface, mean_sea_level, top_of_atmosphere
integration: entire_atmosphere_single_layer
*mb (e.g., 1000mb): 925, 850, 700, 500, 300, 250
*m_above_ground (e.g., 1m_above_ground): 2, 8, 10, 80, 1000, 4000
0_*m_above_ground: 500, 1000, 3000, 6000,
for more options see an example day
https://hrrrzarr.s3.amazonaws.com/index.html#sfc/20231127/20231127_22z_anl.zarr/
simtype : str
Options: anl (analysis) or fcst (forecast)
Returns
-------
ds : xarray.Dataset
Dataset with all varkeys specified. If using dask, data will be retrieved
on demand, which allows for subsetting so as not to require all data.
"""
import s3fs
import fsspec
import xarray as xr
import pandas as pd
import zarr
# s3 = s3fs.S3FileSystem(anon=True, asynchronous=True)
# s3 = fsspec.filesystem("s3", asynchronous=True, )
s3opts = dict(anon=True)
# Copied from projparams.json
# https://hrrrzarr.s3.amazonaws.com/grid/projparams.json
# proj_kw = {
# 'proj': 'lcc', 'a': 6371229, 'b': 6371229,
# 'lon_0': 262.5, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5
# }
# proj = pyproj.Proj(proj_kw)
# projstr = proj.srs
# crsattrs = proj.crs.to_cf()
# Hard coded to eliminate pyproj dependency
crs_wkt = (
'PROJCRS["unknown",BASEGEOGCRS["unknown",DATUM["unknown",'
+ 'ELLIPSOID["unknown",6371229,0,LENGTHUNIT["metre",1,ID["EPSG",9001]]]],'
+ 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433],'
+ 'ID["EPSG",8901]]],CONVERSION["unknown",'
+ 'METHOD["Lambert Conic Conformal (2SP)",ID["EPSG",9802]],'
+ 'PARAMETER["Latitude of false origin",38.5,'
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],'
+ 'PARAMETER["Longitude of false origin",262.5,'
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],'
+ 'PARAMETER["Latitude of 1st standard parallel",38.5,'
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],'
+ 'PARAMETER["Latitude of 2nd standard parallel",38.5,'
+ 'ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],'
+ 'PARAMETER["Easting at false origin",0,LENGTHUNIT["metre",1],'
+ 'ID["EPSG",8826]],PARAMETER["Northing at false origin",0,'
+ 'LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],'
+ 'AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1,ID["EPSG",9001]]],'
+ 'AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]'
)
crs_proj4 = (
'+proj=lcc +lat_0=38.5 +lon_0=262.5 +lat_1=38.5 +lat_2=38.5 +x_0=0 +y_0=0'
+ ' +R=6371229 +units=m +no_defs'
)
crsattrs = dict(
grid_mapping_name='lambert_conformal_conic',
semi_major_axis=6371229.0, semi_minor_axis=6371229.0,
longitude_of_prime_meridian=0.0, standard_parallel=(38.5, 38.5),
latitude_of_projection_origin=38.5, longitude_of_central_meridian=262.5,
false_easting=0.0, false_northing=0.0,
crs_proj4=crs_proj4, crs_wkt=crs_wkt
)
if isinstance(varkeys, str):
varkeys = [varkeys]
date = pd.to_datetime(date)
dpath = f's3://{date:hrrrzarr/sfc/%Y%m%d/%Y%m%d_%Hz}_{simtype}.zarr'
if verbose > 0:
print(dpath)
gridpath = 's3://hrrrzarr/grid/HRRR_chunk_index.zarr'
if verbose > 0:
print(gridpath)
gridds = xr.open_zarr(gridpath, storage_options=s3opts)
vards = [gridds[['latitude', 'longitude']]]
for varkey in varkeys:
metapath = f'{dpath}/{layer}/{varkey}'
if verbose > 0:
print(metapath)
metaf = xr.open_zarr(metapath, storage_options=s3opts)
datapath = f'{dpath}/{layer}/{varkey}/{layer}'
if verbose > 0:
print(datapath)
dataf = xr.open_zarr(datapath, storage_options=s3opts)
ds = xr.merge([metaf, dataf])
vards.append(ds.rename(projection_x_coordinate='x', projection_y_coordinate='y'))
ds = xr.merge(vards)
ds['crs'] = xr.DataArray(0, name='crs', dims=(), attrs=crsattrs)
return ds
if __name__ == '__main__':
import numpy as np
print('Testing 20231120T20 80m_above_ground UGRD VGRD')
try:
ds = open_hrrrsfc('20231120T20', ['UGRD', 'VGRD'], layer='80m_above_ground')
print(ds)
print('Test succeeded: Review output above to confirm')
print('Checks below test basic expectations:')
chks = {
'xmin': (ds['x'].min(), -2697520.14252193),
'xmax': (ds['x'].max(), 2696479.85747807),
'ymin': (ds['y'].min(), -1587306.15255666),
'ymax': (ds['y'].max(), 1586693.84744334),
'UGRD shape': (ds['UGRD'].shape, (1059, 1799)),
'UGRD units': (ds['UGRD'].units.strip(), 'm s-1'),
'VGRD shape': (ds['VGRD'].shape, (1059, 1799)),
'VGRD units': (ds['VGRD'].units.strip(), 'm s-1'),
'forecast_reference_time': (
ds['forecast_reference_time'].astype('d'), 1.7005104e+18
)
}
for ckname, (got, val) in chks.items():
if isinstance(got, str):
same = got == val
else:
same = np.allclose([got], [val])
print(' -', ckname, {True: 'Pass', False: 'Fail'}[same])
except Exception:
print('**Test failed**')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment