ReferenceMaker/ReferenceFileSystem GRIB2/HRRR Example

Requires development version of fsspec_reference_maker

  • pip install --user git+https://github.com/intake/fsspec-reference-maker
In [1]:
import json
import fsspec
from fsspec_reference_maker.grib2 import scan_grib
import os

Create reference jsons

In [ ]:
# 1GB of data files, forming a time-series
filter={'typeOfLevel': 'heightAboveGround', 'level': 2}

files = ['s3://noaa-hrrr-bdp-pds/hrrr.20190101/conus/hrrr.t22z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190101/conus/hrrr.t23z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t00z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t01z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t02z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t03z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t04z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t05z.wrfsfcf01.grib2',
         's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t06z.wrfsfcf01.grib2']
so = {"anon": True, "default_cache_type": "readahead"}
common = ['time', 'step', 'latitude', 'longitude', 'valid_time']
In [ ]:
def create_jsons(files):
    for url in files:
        out = scan_grib(url, common, so, inline_threashold=100, filter=filter)
        with open(os.path.basename(url).replace("grib2", "json"), "w") as f:
            json.dump(out, f)
In [6]:
create_jsons(files)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/tmp/ipykernel_252/2518840911.py in <module>
----> 1 create_jsons(files)

NameError: name 'create_jsons' is not defined

Use MultiZarrToZarr() to combine into single reference

In [7]:
from glob import glob
json_list = sorted(glob('./hrrr.t*.json'))
In [10]:
from fsspec_reference_maker.combine import MultiZarrToZarr

mzz = MultiZarrToZarr(json_list, remote_protocol="s3", remote_options={"anon": True},
                        xarray_concat_args={"dim": 'time'})
mzz.translate("hrrr.total.json")

Access data and plot

In [23]:
import xarray as xr
from fsspec_reference_maker.grib2 import GRIBCodec
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import pandas as pd
In [3]:
fs = fsspec.filesystem('reference', fo='./hrrr.total.json', remote_protocol='s3', remote_options={'anon':True})
m = fs.get_mapper('')
ds = xr.open_dataset(m, engine='zarr')
/tmp/ipykernel_68/1688709781.py:3: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider:
1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  ds = xr.open_dataset(m, engine='zarr')
In [4]:
ds
Out[4]:
<xarray.Dataset>
Dimensions:            (y: 1059, x: 1799, time: 9)
Coordinates:
    heightAboveGround  float64 1e+03
    latitude           (y, x) float64 ...
    longitude          (y, x) float64 ...
    step               timedelta64[ns] 01:00:00
  * time               (time) datetime64[us] 2019-01-02 ... 2019-01-01T23:00:00
    valid_time         (time) datetime64[ns] 2019-01-02T01:00:00 ... NaT
Dimensions without coordinates: y, x
Data variables:
    refd               (time, y, x) float32 ...
    si10               (time, y, x) float32 ...
    u                  (time, y, x) float32 ...
    u10                (time, y, x) float32 ...
    unknown            (time, y, x) float32 ...
    v                  (time, y, x) float32 ...
    v10                (time, y, x) float32 ...
Attributes:
    Conventions:             CF-1.7
    GRIB_centre:             kwbc
    GRIB_centreDescription:  US National Weather Service - NCEP 
    GRIB_edition:            2
    GRIB_subCentre:          0
    history:                 2021-08-09T15:34 GRIB to CDM+CF via cfgrib-0.9.9...
    institution:             US National Weather Service - NCEP 
In [25]:
i = 0 # Time index

fig = plt.figure(figsize=(6,6))
ax = plt.subplot(111, projection=ccrs.LambertConformal())
p = ax.pcolormesh(ds.longitude, ds.latitude, ds.si10.isel(time=i), transform=ccrs.PlateCarree())
ax.coastlines()
ax.add_feature(cfeature.STATES)

time = pd.to_datetime(ds.time[i].data).strftime("%Y-%m-%d %H%M UTC")

ax.set_title(f"10m Wind Speed\n{time}")
plt.colorbar(p, orientation='horizontal', label='m/s')
Out[25]:
<matplotlib.colorbar.Colorbar at 0x7f178ffeb790>
In [ ]: