#!/usr/bin/env python # coding: utf-8 # # FillValue issues with kerchunk dataset # NetCDF file has float32 var with `_FillValue` set as 1e37. # * reading NetCDF with Xarray correctly sets these values to NaN # * reading kerchunk JSON with Xarray sets these # In[1]: import fsspec import xarray as xr # #### Open NetCDF file directly # In[2]: fs = fsspec.filesystem('s3', anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'}) url = 's3://rsignellbucket1/COAWST/coawst_us_20220101_01.nc' # In[3]: ds = xr.open_dataset(fs.open(url), decode_cf=True) # In[4]: ds.temp[0,0,0,0].values # In[5]: ds = xr.open_dataset(fs.open(url), decode_cf=False) # In[6]: ds.temp[0,0,0,0].values # In[7]: ds.temp._FillValue # #### Read Kerchunk JSON representation of the above NetCDF file # In[8]: json_url = 's3://rsignellbucket1/COAWST/jsons/coawst_us_20220101_01.nc.json' # Try with `decode_cf=True`: # In[9]: s_opts = dict(skip_instance_cache=True, anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'}) #json r_opts = dict(anon=True, client_kwargs={'endpoint_url': 'https://mghp.osn.xsede.org'}) #data fs = fsspec.filesystem("reference", fo=json_url, ref_storage_args=s_opts, remote_protocol='s3', remote_options=r_opts) m = fs.get_mapper("") ds = xr.open_dataset(m, engine="zarr", chunks={}, backend_kwargs=dict(consolidated=False), decode_cf=True) # In[10]: ds.temp[0,0,0,0].values # Try with `decode_cf=False`: # In[11]: ds = xr.open_dataset(m, engine="zarr", chunks={}, backend_kwargs=dict(consolidated=False), decode_cf=False) # In[12]: ds.temp._FillValue # In[13]: ds.temp[0,0,0,0].values # Look at the JSON # In[14]: fs.download('temp/.zattrs', 'foo') # In[15]: get_ipython().system('more foo')