Notebook

HRRR forecast best timeseries¶

Read from kerchunked GRIB2 files, write to Zarr

In [1]:

import xarray as xr
import fsspec
import hvplot.xarray
from kerchunk.grib2 import scan_grib   # needed here only for grib compression codec

HRRR latest forecast best time series¶

In [2]:

rpath = 's3://esip-qhub-public/noaa/hrrr/hrrr_best.json'
s_opts = {'requester_pays':True, 'skip_instance_cache':True}
r_opts = {'anon':True}
fs = fsspec.filesystem("reference", fo=rpath, ref_storage_args=s_opts,
                       remote_protocol='s3', remote_options=r_opts)
m = fs.get_mapper("")

ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), 
                      chunks={'valid_time':1})

/home/rsignell/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/core/dataset.py:408: UserWarning: Specified Dask chunks (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) would separate on disks chunk shape 177 for dimension valid_time. This could degrade performance. Consider rechunking after loading instead.
  _check_chunks_compatibility(var, output_chunks, preferred_chunks)

In [3]:

ds = ds.drop(['time', 'step', 'heightAboveGround']).rename({'valid_time':'time'})

In [4]:

ds

Out[4]:

<xarray.Dataset>
Dimensions:    (time: 177, y: 1059, x: 1799)
Coordinates:
    latitude   (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray>
    longitude  (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray>
  * time       (time) datetime64[us] 2022-02-20T19:00:00 ... 2022-02-28T03:00:00
Dimensions without coordinates: y, x
Data variables:
    d2m        (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    pt         (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    r2         (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    sh2        (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    si10       (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    t2m        (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    u10        (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    unknown    (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
    v10        (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
Attributes:
    Conventions:             CF-1.7
    GRIB_centre:             kwbc
    GRIB_centreDescription:  US National Weather Service - NCEP 
    GRIB_edition:            2
    GRIB_subCentre:          0
    history:                 2022-02-27T19:09 GRIB to CDM+CF via cfgrib-0.9.9...
    institution:             US National Weather Service - NCEP

In [5]:

ds.isel(time=slice(-3,-1)).to_zarr('foo.zarr', 'w')

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Input In [5], in <module>
----> 1 ds.isel(time=slice(-3,-1)).to_zarr('foo.zarr', 'w')

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/core/dataset.py:2035, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options)
   2032 if encoding is None:
   2033     encoding = {}
-> 2035 return to_zarr(
   2036     self,
   2037     store=store,
   2038     chunk_store=chunk_store,
   2039     storage_options=storage_options,
   2040     mode=mode,
   2041     synchronizer=synchronizer,
   2042     group=group,
   2043     encoding=encoding,
   2044     compute=compute,
   2045     consolidated=consolidated,
   2046     append_dim=append_dim,
   2047     region=region,
   2048     safe_chunks=safe_chunks,
   2049 )

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/api.py:1431, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options)
   1429 writer = ArrayWriter()
   1430 # TODO: figure out how to properly handle unlimited_dims
-> 1431 dump_to_store(dataset, zstore, writer, encoding=encoding)
   1432 writes = writer.sync(compute=compute)
   1434 if compute:

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/api.py:1119, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1116 if encoder:
   1117     variables, attrs = encoder(variables, attrs)
-> 1119 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:517, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    515 new_variables = set(variables) - existing_variable_names
    516 variables_without_encoding = {vn: variables[vn] for vn in new_variables}
--> 517 variables_encoded, attributes = self.encode(
    518     variables_without_encoding, attributes
    519 )
    521 if existing_variable_names:
    522     # Decode variables directly, without going via xarray.Dataset to
    523     # avoid needing to load index variables into memory.
    524     # TODO: consider making loading indexes lazy again?
    525     existing_vars, _, _ = conventions.decode_cf_variables(
    526         self.get_variables(), self.get_attrs()
    527     )

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/common.py:200, in AbstractWritableDataStore.encode(self, variables, attributes)
    183 def encode(self, variables, attributes):
    184     """
    185     Encode the variables and attributes in this store
    186 
   (...)
    198 
    199     """
--> 200     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    201     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
    202     return variables, attributes

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/common.py:200, in <dictcomp>(.0)
    183 def encode(self, variables, attributes):
    184     """
    185     Encode the variables and attributes in this store
    186 
   (...)
    198 
    199     """
--> 200     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    201     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
    202     return variables, attributes

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:476, in ZarrStore.encode_variable(self, variable)
    475 def encode_variable(self, variable):
--> 476     variable = encode_zarr_variable(variable)
    477     return variable

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:275, in encode_zarr_variable(var, needs_copy, name)
    254 def encode_zarr_variable(var, needs_copy=True, name=None):
    255     """
    256     Converts an Variable into an Variable which follows some
    257     of the CF conventions:
   (...)
    272         A variable which has been encoded as described above.
    273     """
--> 275     var = conventions.encode_cf_variable(var, name=name)
    277     # zarr allows unicode, but not variable-length strings, so it's both
    278     # simpler and more compact to always encode as UTF-8 explicitly.
    279     # TODO: allow toggling this explicitly via dtype in encoding.
    280     coder = coding.strings.EncodedStringCoder(allows_unicode=True)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/conventions.py:273, in encode_cf_variable(var, needs_copy, name)
    264 ensure_not_multiindex(var, name=name)
    266 for coder in [
    267     times.CFDatetimeCoder(),
    268     times.CFTimedeltaCoder(),
   (...)
    271     variables.UnsignedIntegerCoder(),
    272 ]:
--> 273     var = coder.encode(var, name=name)
    275 # TODO(shoyer): convert all of these to use coders, too:
    276 var = maybe_encode_nonstring_dtype(var, name=name)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:659, in CFDatetimeCoder.encode(self, variable, name)
    655 dims, data, attrs, encoding = unpack_for_encoding(variable)
    656 if np.issubdtype(data.dtype, np.datetime64) or contains_cftime_datetimes(
    657     variable
    658 ):
--> 659     (data, units, calendar) = encode_cf_datetime(
    660         data, encoding.pop("units", None), encoding.pop("calendar", None)
    661     )
    662     safe_setitem(attrs, "units", units, name=name)
    663     safe_setitem(attrs, "calendar", calendar, name=name)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:595, in encode_cf_datetime(dates, units, calendar)
    592 dates = np.asarray(dates)
    594 if units is None:
--> 595     units = infer_datetime_units(dates)
    596 else:
    597     units = _cleanup_netcdf_time_units(units)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:377, in infer_datetime_units(dates)
    375 else:
    376     reference_date = dates[0] if len(dates) > 0 else "1970-01-01"
--> 377     reference_date = format_cftime_datetime(reference_date)
    378 unique_timedeltas = np.unique(np.diff(dates))
    379 units = _infer_time_units_from_diff(unique_timedeltas)

File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:388, in format_cftime_datetime(date)
    383 def format_cftime_datetime(date):
    384     """Converts a cftime.datetime object to a string with the format:
    385     YYYY-MM-DD HH:MM:SS.UUUUUU
    386     """
    387     return "{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}".format(
--> 388         date.year,
    389         date.month,
    390         date.day,
    391         date.hour,
    392         date.minute,
    393         date.second,
    394         date.microsecond,
    395     )

AttributeError: 'numpy.datetime64' object has no attribute 'year'

In [ ]: