import xarray as xr
import fsspec
import numpy as np
import s3fs
import zarr
base_url = f's3://noaa-nws-aorc-v1-1-1km'
import dask
from dask.distributed import Client
client = Client()
client
year = '1979'
single_year_url = f'{base_url}/{year}.zarr/'
%%time
ds_single = xr.open_zarr(fsspec.get_mapper(single_year_url, anon=True), consolidated=True)
var='APCP_surface'
ds_single[var]
print(f'Variable size: {ds_single[var].nbytes/1e12:.1f} TB')
dataset_years = list(range(2018,2023))
s3_out = s3fs.S3FileSystem(anon=True)
fileset = [s3fs.S3Map(
root=f"s3://{base_url}/{dataset_year}.zarr", s3=s3_out, check=False
) for dataset_year in dataset_years]
fileset
%%time
ds_multi_year = xr.open_mfdataset(fileset, engine='zarr')
var='APCP_surface'
ds_multi_year[var]
print(f'Variable size: {ds_multi_year[var].nbytes/1e12:.1f} TB')