#!/usr/bin/env python # coding: utf-8 # # Explore Kerchunking Harmonie data # Explore zipped test data from: # https://stackoverflow.com/questions/70952226/how-to-merge-different-shaped-netcdf4-files # In[1]: import fsspec import xarray as xr from kerchunk.hdf import SingleHdf5ToZarr from pathlib import Path import ujson # In[2]: fs = fsspec.filesystem('file') # In[3]: flist = fs.glob('data/test_data_stackoverflow/*.nc') flist[:5] # In[4]: flist[0] # In[5]: for f in flist: print(xr.open_dataset(flist[0]).x.values) # In[6]: for f in flist: print(xr.open_dataset(flist[0]).dt_calc.values) # In[7]: for f in flist: print(xr.open_dataset(flist[0]).dt_fore[0].values) # In[8]: json_dir = 'jsons' # In[9]: #generate a json from a netCDF def gen_json(u): #open the file (u) with desiganted parameters (so), call it infile with fs.open(u, **so) as infile: #inline_threshold: chunks smaller than 300 (I'm guessing mb?) are included in output h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300) #Path().stem is from the pathlib library; returns the file name w/o the extension fstem = Path(u).stem #here we create the output file path outf = f'{json_dir}/{fstem}.json' #print(outf) #wb: write binary file with fs.open(outf, 'wb') as f: #create json file and write it to the path specified above (f) #dumps: dictionary to string #translate: translate contents of HDF5 to Zarr f.write(ujson.dumps(h5chunks.translate()).encode()); # In[10]: so = dict(mode='rb') # In[11]: #loop through list of netCDFs and put them through our function for generating jsons for f in flist: gen_json(f) # In[12]: #create and view a sorted list of all jsons created in the previous step json_list = fs.glob(f'{json_dir}/*.json') json_list[:5] # In[13]: rpath = json_list[0] fs3 = fsspec.filesystem("reference", fo=rpath) m = fs3.get_mapper("") ds = xr.open_dataset(m, engine="zarr",chunks={}, backend_kwargs={'consolidated':False}) ds