Read from kerchunked GRIB2 files, write to Zarr
import xarray as xr
import fsspec
import hvplot.xarray
from kerchunk.grib2 import scan_grib # needed here only for grib compression codec
rpath = 's3://esip-qhub-public/noaa/hrrr/hrrr_best.json'
s_opts = {'requester_pays':True, 'skip_instance_cache':True}
r_opts = {'anon':True}
fs = fsspec.filesystem("reference", fo=rpath, ref_storage_args=s_opts,
remote_protocol='s3', remote_options=r_opts)
m = fs.get_mapper("")
ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False),
chunks={'valid_time':1})
/home/rsignell/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/core/dataset.py:408: UserWarning: Specified Dask chunks (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) would separate on disks chunk shape 177 for dimension valid_time. This could degrade performance. Consider rechunking after loading instead. _check_chunks_compatibility(var, output_chunks, preferred_chunks)
ds = ds.drop(['time', 'step', 'heightAboveGround']).rename({'valid_time':'time'})
ds
<xarray.Dataset> Dimensions: (time: 177, y: 1059, x: 1799) Coordinates: latitude (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray> longitude (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray> * time (time) datetime64[us] 2022-02-20T19:00:00 ... 2022-02-28T03:00:00 Dimensions without coordinates: y, x Data variables: d2m (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> pt (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> r2 (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> sh2 (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> si10 (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> t2m (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> u10 (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> unknown (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> v10 (time, y, x) float32 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray> Attributes: Conventions: CF-1.7 GRIB_centre: kwbc GRIB_centreDescription: US National Weather Service - NCEP GRIB_edition: 2 GRIB_subCentre: 0 history: 2022-02-27T19:09 GRIB to CDM+CF via cfgrib-0.9.9... institution: US National Weather Service - NCEP
ds.isel(time=slice(-3,-1)).to_zarr('foo.zarr', 'w')
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Input In [5], in <module> ----> 1 ds.isel(time=slice(-3,-1)).to_zarr('foo.zarr', 'w') File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/core/dataset.py:2035, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options) 2032 if encoding is None: 2033 encoding = {} -> 2035 return to_zarr( 2036 self, 2037 store=store, 2038 chunk_store=chunk_store, 2039 storage_options=storage_options, 2040 mode=mode, 2041 synchronizer=synchronizer, 2042 group=group, 2043 encoding=encoding, 2044 compute=compute, 2045 consolidated=consolidated, 2046 append_dim=append_dim, 2047 region=region, 2048 safe_chunks=safe_chunks, 2049 ) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/api.py:1431, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options) 1429 writer = ArrayWriter() 1430 # TODO: figure out how to properly handle unlimited_dims -> 1431 dump_to_store(dataset, zstore, writer, encoding=encoding) 1432 writes = writer.sync(compute=compute) 1434 if compute: File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/api.py:1119, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims) 1116 if encoder: 1117 variables, attrs = encoder(variables, attrs) -> 1119 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:517, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims) 515 new_variables = set(variables) - existing_variable_names 516 variables_without_encoding = {vn: variables[vn] for vn in new_variables} --> 517 variables_encoded, attributes = self.encode( 518 variables_without_encoding, attributes 519 ) 521 if existing_variable_names: 522 # Decode variables directly, without going via xarray.Dataset to 523 # avoid needing to load index variables into memory. 524 # TODO: consider making loading indexes lazy again? 525 existing_vars, _, _ = conventions.decode_cf_variables( 526 self.get_variables(), self.get_attrs() 527 ) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/common.py:200, in AbstractWritableDataStore.encode(self, variables, attributes) 183 def encode(self, variables, attributes): 184 """ 185 Encode the variables and attributes in this store 186 (...) 198 199 """ --> 200 variables = {k: self.encode_variable(v) for k, v in variables.items()} 201 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} 202 return variables, attributes File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/common.py:200, in <dictcomp>(.0) 183 def encode(self, variables, attributes): 184 """ 185 Encode the variables and attributes in this store 186 (...) 198 199 """ --> 200 variables = {k: self.encode_variable(v) for k, v in variables.items()} 201 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} 202 return variables, attributes File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:476, in ZarrStore.encode_variable(self, variable) 475 def encode_variable(self, variable): --> 476 variable = encode_zarr_variable(variable) 477 return variable File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/zarr.py:275, in encode_zarr_variable(var, needs_copy, name) 254 def encode_zarr_variable(var, needs_copy=True, name=None): 255 """ 256 Converts an Variable into an Variable which follows some 257 of the CF conventions: (...) 272 A variable which has been encoded as described above. 273 """ --> 275 var = conventions.encode_cf_variable(var, name=name) 277 # zarr allows unicode, but not variable-length strings, so it's both 278 # simpler and more compact to always encode as UTF-8 explicitly. 279 # TODO: allow toggling this explicitly via dtype in encoding. 280 coder = coding.strings.EncodedStringCoder(allows_unicode=True) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/conventions.py:273, in encode_cf_variable(var, needs_copy, name) 264 ensure_not_multiindex(var, name=name) 266 for coder in [ 267 times.CFDatetimeCoder(), 268 times.CFTimedeltaCoder(), (...) 271 variables.UnsignedIntegerCoder(), 272 ]: --> 273 var = coder.encode(var, name=name) 275 # TODO(shoyer): convert all of these to use coders, too: 276 var = maybe_encode_nonstring_dtype(var, name=name) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:659, in CFDatetimeCoder.encode(self, variable, name) 655 dims, data, attrs, encoding = unpack_for_encoding(variable) 656 if np.issubdtype(data.dtype, np.datetime64) or contains_cftime_datetimes( 657 variable 658 ): --> 659 (data, units, calendar) = encode_cf_datetime( 660 data, encoding.pop("units", None), encoding.pop("calendar", None) 661 ) 662 safe_setitem(attrs, "units", units, name=name) 663 safe_setitem(attrs, "calendar", calendar, name=name) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:595, in encode_cf_datetime(dates, units, calendar) 592 dates = np.asarray(dates) 594 if units is None: --> 595 units = infer_datetime_units(dates) 596 else: 597 units = _cleanup_netcdf_time_units(units) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:377, in infer_datetime_units(dates) 375 else: 376 reference_date = dates[0] if len(dates) > 0 else "1970-01-01" --> 377 reference_date = format_cftime_datetime(reference_date) 378 unique_timedeltas = np.unique(np.diff(dates)) 379 units = _infer_time_units_from_diff(unique_timedeltas) File ~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/coding/times.py:388, in format_cftime_datetime(date) 383 def format_cftime_datetime(date): 384 """Converts a cftime.datetime object to a string with the format: 385 YYYY-MM-DD HH:MM:SS.UUUUUU 386 """ 387 return "{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}".format( --> 388 date.year, 389 date.month, 390 date.day, 391 date.hour, 392 date.minute, 393 date.second, 394 date.microsecond, 395 ) AttributeError: 'numpy.datetime64' object has no attribute 'year'