#!/usr/bin/env python
# coding: utf-8

# # Explore Kerchunking Harmonie data
# Explore zipped test data from:
# https://stackoverflow.com/questions/70952226/how-to-merge-different-shaped-netcdf4-files

# In[1]:


import fsspec
import xarray as xr
from kerchunk.hdf import SingleHdf5ToZarr 
from pathlib import Path
import ujson


# In[2]:


fs = fsspec.filesystem('file')


# In[3]:


flist = fs.glob('data/test_data_stackoverflow/*.nc')
flist[:5]


# In[4]:


flist[0]


# In[5]:


for f in flist:
    print(xr.open_dataset(flist[0]).x.values)


# In[6]:


for f in flist:
    print(xr.open_dataset(flist[0]).dt_calc.values)


# In[7]:


for f in flist:
    print(xr.open_dataset(flist[0]).dt_fore[0].values)


# In[8]:


json_dir = 'jsons'


# In[9]:


#generate a json from a netCDF
def gen_json(u):
    #open the file (u) with desiganted parameters (so), call it infile
    with fs.open(u, **so) as infile:
        #inline_threshold: chunks smaller than 300 (I'm guessing mb?) are included in output
        h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)
        #Path().stem is from the pathlib library; returns the file name w/o the extension
        fstem = Path(u).stem
        #here we create the output file path
        outf = f'{json_dir}/{fstem}.json'
        #print(outf)
        #wb: write binary file
        with fs.open(outf, 'wb') as f:
            #create json file and write it to the path specified above (f)
            #dumps: dictionary to string
            #translate: translate contents of HDF5 to Zarr
            f.write(ujson.dumps(h5chunks.translate()).encode());


# In[10]:


so = dict(mode='rb')


# In[11]:


#loop through list of netCDFs and put them through our function for generating jsons
for f in flist:
    gen_json(f)


# In[12]:


#create and view a sorted list of all jsons created in the previous step
json_list = fs.glob(f'{json_dir}/*.json')
json_list[:5]


# In[13]:


rpath = json_list[0]
fs3 = fsspec.filesystem("reference", fo=rpath)

m = fs3.get_mapper("")
ds = xr.open_dataset(m, engine="zarr",chunks={}, backend_kwargs={'consolidated':False})

ds