Created
December 2, 2025 21:12
-
-
Save bmorris3/12e1be27b7a934395be795ba91be374b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "622a4741-d0aa-4ba0-9aba-6e27417d20c6", | |
| "metadata": {}, | |
| "source": [ | |
| "# MAESTRO tests\n", | |
| "### fetch subsets of the remote ND opacity array" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "ac9161f5-4cf2-4713-8ff3-d8407a172627", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "import fsspec\n", | |
| "import xarray as xr\n", | |
| "import zarr\n", | |
| "from zarr.experimental.cache_store import CacheStore\n", | |
| "\n", | |
| "uri = \"s3://stpubdata/mast/hlsp/maestro/12C-H4.zarr\"\n", | |
| "\n", | |
| "# access to a remote S3 bucket as a file system\n", | |
| "fs = fsspec.filesystem('s3', anon=True, asynchronous=True)\n", | |
| "remote_store = zarr.storage.FsspecStore(fs, read_only=True, path=uri)\n", | |
| "\n", | |
| "# create local cache for reads from the remote array, give the zarr array\n", | |
| "# the same name as the remote zarr array\n", | |
| "cache_store = zarr.storage.LocalStore(os.path.basename(uri))\n", | |
| "\n", | |
| "# prepare a cache store that links the remote data to the local cache:\n", | |
| "max_size_gb = 20 # do not use >20 GB of local memory towards the cache\n", | |
| "cache = CacheStore(\n", | |
| " store=remote_store,\n", | |
| " cache_store=cache_store,\n", | |
| " max_size=max_size_gb * 1024 ** 3\n", | |
| ")\n", | |
| "\n", | |
| "# open the dataset with zarr:\n", | |
| "ds = xr.open_dataset(cache, engine='zarr')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "4ed1cfa7-d73e-40f0-92f2-8f57482a5141", | |
| "metadata": {}, | |
| "source": [ | |
| "We have access to the metadata and coordinates now, let's see the preview of the dataset:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "5a27cb0d-628a-4c90-a5b6-f2c309205648", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ds" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "00e010f9-0d5d-4b8c-bc01-d4b678d92a06", | |
| "metadata": {}, | |
| "source": [ | |
| "The preview above shows us some of the coordinate values (pressure, temperature, wavenumber), and the coordinates in the `csx` data variable. Let's choose a point in the (p, T) grid that's shown above:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "a4147685-6e29-4752-b6f8-faaaa2f13909", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "grid_point = dict(\n", | |
| " temperature=[100, 1000],\n", | |
| " pressure=3e-6,\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "87ffa872-a1d8-4c83-abb7-07a389498784", | |
| "metadata": {}, | |
| "source": [ | |
| "We can remotely index the file at that grid point like so:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "ed1554d1-3915-49d6-b6ec-eb183ce675bd", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "sub_array = ds.sel(grid_point)\n", | |
| "\n", | |
| "sub_array" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "2f15e726-8432-4a6d-9468-dacf243c3ecb", | |
| "metadata": {}, | |
| "source": [ | |
| "`sub_array` above is a reference to the indexed portion of the remote ND array. At this point, it hasn't been downloaded locally.\n", | |
| "\n", | |
| "The remote download is initialized when you ask for the underlying `csx` values. You can retrieve the data by calling `sub_array.values` or `sub_array.to_numpy()`.\n", | |
| "\n", | |
| "The first time you retrieve the values, the download will take $O$(~10 sec)." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "94752177-de12-4c08-82f2-595c3ca99796", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%%time\n", | |
| "\n", | |
| "sub_array.csx.values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "323076e1-b3e7-4209-a02e-040c478ff371", | |
| "metadata": {}, | |
| "source": [ | |
| "The retrieved values are stored in a local cache at `./12C-H4.zarr`, so repeated access is fast:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "7d2dcff9-aa47-4437-8acc-de6270157de2", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%%time\n", | |
| "\n", | |
| "sub_array.csx.values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "6571d577-200e-4fad-8473-922a41dc5ce8", | |
| "metadata": {}, | |
| "source": [ | |
| "Let's see what we got:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "de5a3d87-69c2-4e66-848c-a0d975bbdcb1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import matplotlib.pyplot as plt\n", | |
| "import astropy.units as u\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "fig, ax = plt.subplots(dpi=200)\n", | |
| "ax.loglog(\n", | |
| " sub_array.wavenumber / u.cm, \n", | |
| " sub_array.csx, \n", | |
| " label=[f\"{x:.0f}\" for x in sub_array.temperature.values]\n", | |
| ")\n", | |
| "ax.legend(title='Temp. [K]', alignment='left', loc='lower left')\n", | |
| "\n", | |
| "def invert(x):\n", | |
| " return np.where(\n", | |
| " x != 0,\n", | |
| " (x / u.cm).to(u.um, u.spectral()).value,\n", | |
| " 0\n", | |
| " )\n", | |
| "\n", | |
| "secax = ax.secondary_xaxis('top', functions=(invert, invert))\n", | |
| "secax.set_xlabel('Wavelength [µm]')\n", | |
| "\n", | |
| "ax.set(\n", | |
| " xlabel='Wavenumber [cm-1]',\n", | |
| " ylabel='Cross section [cm2]',\n", | |
| " title='MAESTRO: 12C-H4',\n", | |
| ")\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "58cfea61-6f54-4ce4-a241-82323d4304f6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment