Explore zipped test data from: https://stackoverflow.com/questions/70952226/how-to-merge-different-shaped-netcdf4-files
import fsspec
import xarray as xr
from kerchunk.hdf import SingleHdf5ToZarr
from pathlib import Path
import ujson
fs = fsspec.filesystem('file')
flist = fs.glob('data/test_data_stackoverflow/*.nc')
flist[:5]
['/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_18.nc', '/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_19.nc', '/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_20.nc', '/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_21.nc', '/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_22.nc']
flist[0]
'/shared/users/rsignell/notebooks/testing/data/test_data_stackoverflow/harmonie_knmi_2021_03_20_18_30_18.nc'
for f in flist:
print(xr.open_dataset(flist[0]).x.values)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30]
for f in flist:
print(xr.open_dataset(flist[0]).dt_calc.values)
2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000 2021-03-20T18:00:00.000000000
for f in flist:
print(xr.open_dataset(flist[0]).dt_fore[0].values)
1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000 1616263200000000000
json_dir = 'jsons'
#generate a json from a netCDF
def gen_json(u):
#open the file (u) with desiganted parameters (so), call it infile
with fs.open(u, **so) as infile:
#inline_threshold: chunks smaller than 300 (I'm guessing mb?) are included in output
h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)
#Path().stem is from the pathlib library; returns the file name w/o the extension
fstem = Path(u).stem
#here we create the output file path
outf = f'{json_dir}/{fstem}.json'
#print(outf)
#wb: write binary file
with fs.open(outf, 'wb') as f:
#create json file and write it to the path specified above (f)
#dumps: dictionary to string
#translate: translate contents of HDF5 to Zarr
f.write(ujson.dumps(h5chunks.translate()).encode());
so = dict(mode='rb')
#loop through list of netCDFs and put them through our function for generating jsons
for f in flist:
gen_json(f)
#create and view a sorted list of all jsons created in the previous step
json_list = fs.glob(f'{json_dir}/*.json')
json_list[:5]
['/shared/users/rsignell/notebooks/testing/jsons/harmonie_knmi_2021_03_20_18_30_18.json', '/shared/users/rsignell/notebooks/testing/jsons/harmonie_knmi_2021_03_20_18_30_19.json', '/shared/users/rsignell/notebooks/testing/jsons/harmonie_knmi_2021_03_20_18_30_20.json', '/shared/users/rsignell/notebooks/testing/jsons/harmonie_knmi_2021_03_20_18_30_21.json', '/shared/users/rsignell/notebooks/testing/jsons/harmonie_knmi_2021_03_20_18_30_22.json']
rpath = json_list[0]
fs3 = fsspec.filesystem("reference", fo=rpath)
m = fs3.get_mapper("")
ds = xr.open_dataset(m, engine="zarr",chunks={}, backend_kwargs={'consolidated':False})
ds
<xarray.Dataset> Dimensions: (dt_fore: 49, x: 31, y: 54) Coordinates: dt_calc datetime64[ns] ... * dt_fore (dt_fore) float64 1.616e+18 ... 1.616e+18 lat (x, y) float64 dask.array<chunksize=(31, 54), meta=np.ndarray> lon (x, y) float64 dask.array<chunksize=(31, 54), meta=np.ndarray> Dimensions without coordinates: x, y Data variables: (12/27) air_temperature_2m (dt_fore, x, y) float16 dask.array<chunksize=(13, 16, 27), meta=np.ndarray> air_temperature_500 (dt_fore, x, y) float16 dask.array<chunksize=(13, 16, 27), meta=np.ndarray> air_temperature_850 (dt_fore, x, y) float16 dask.array<chunksize=(13, 16, 27), meta=np.ndarray> dewpoint_2m (dt_fore, x, y) float16 dask.array<chunksize=(13, 16, 27), meta=np.ndarray> geopotential_height_500 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> geopotential_height_850 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> ... ... wind_direction_50 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> wind_speed_10 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> wind_speed_100 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> wind_speed_200 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> wind_speed_300 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray> wind_speed_50 (dt_fore, x, y) float64 dask.array<chunksize=(13, 8, 14), meta=np.ndarray>