Data contained in these grib2 files described here: https://www.nco.ncep.noaa.gov/pmb/products/gens/
Approach:
step
dimension (the forecast hour, or "tau" dimension)ensemble
or "number" dimensiontime
dimension (the daily forecast date)%xmode minimal
Exception reporting mode: Minimal
import os
import fsspec
from datetime import datetime, timedelta
import xarray as xr
import ujson
import kerchunk
from kerchunk.grib2 import scan_grib
from kerchunk.combine import MultiZarrToZarr
from pathlib import Path
import dask
from dask.distributed import LocalCluster, Client, performance_report
import dask.bag as db
from datatree import DataTree
import panel as pn
import pandas as pd
import hvplot.xarray
xr.__version__
'2023.3.0'
Create a set of fsspec file systems to read the latest GEFS forecast and write the reference files locally
fs_local = fsspec.filesystem('', skip_instance_cache = True, use_listings_cache=False)
fs_s3 = fsspec.filesystem('s3', anon = True)
s3://noaa-gefs-pds/gefs.{date}/{cycle}/atmos/pgrb2ap5/gep{ensemble_member}.t{cycle}.pgrb2a.0p50.f{forecast_hour}
bucket = 's3://noaa-gefs-retrospective'
flist = fs_s3.ls(bucket)
flist
['noaa-gefs-retrospective/Description_of_reforecast_data.pdf', 'noaa-gefs-retrospective/GEFSv12', 'noaa-gefs-retrospective/index.html', 'noaa-gefs-retrospective/landsfc.pgrb2.0p25', 'noaa-gefs-retrospective/landsfc.pgrb2.0p50']
dates = fs_s3.glob(f'{bucket}/GEFSv12/reforecast/2019/??????????')
print(len(dates))
print(dates[0])
print(dates[-1])
365 noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019010100 noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019123100
dates = [Path(date).name for date in dates[-3:]]
dates
['2019122900', '2019123000', '2019123100']
by searching all gribs for a specific date and variable
date = dates[0]
year = date[:4]
variable = 'weasd_sfc'
f = fs_s3.glob(f's3://noaa-gefs-retrospective/GEFSv12/reforecast/{year}/{date}/???/Days:1-10/{variable}_{date}_???.grib2')
np_ensembles = len(f)
print(np_ensembles)
f
5
['noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019122900/c00/Days:1-10/weasd_sfc_2019122900_c00.grib2', 'noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019122900/p01/Days:1-10/weasd_sfc_2019122900_p01.grib2', 'noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019122900/p02/Days:1-10/weasd_sfc_2019122900_p02.grib2', 'noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019122900/p03/Days:1-10/weasd_sfc_2019122900_p03.grib2', 'noaa-gefs-retrospective/GEFSv12/reforecast/2019/2019122900/p04/Days:1-10/weasd_sfc_2019122900_p04.grib2']
ensembles = ['c00', 'p01', 'p02', 'p03', 'p04']
by searching for a ensemble member and date
flist = sorted(fs_s3.glob(f's3://noaa-gefs-retrospective/GEFSv12/reforecast/{year}/{date}/c00/Days:1-10/*.grib2'))
flist = [f's3://{f}' for f in flist]
len(flist)
60
Make a list of variable names (extracting them from the grib file names)
varnames = []
for f in flist:
file = Path(f).parts[8]
p = file.split('_')
pp = p[:-2]
var = '_'.join(pp)
varnames.append(var)
varnames
['acpcp_sfc', 'apcp_sfc', 'cape_sfc', 'cin_sfc', 'dlwrf_sfc', 'dswrf_sfc', 'gflux_sfc', 'gust_sfc', 'hgt_ceiling', 'hgt_hybr', 'hgt_pres', 'hgt_pres_abv700mb', 'hgt_sfc', 'hlcy_hgt', 'lhtfl_sfc', 'ncpcp_sfc', 'pbl_hgt', 'pres_hybr', 'pres_msl', 'pres_pvor', 'pres_sfc', 'pvort_isen', 'pwat_eatm', 'rh_hybr', 'sfcr_sfc', 'shtfl_sfc', 'soilw_bgrnd', 'spfh_2m', 'spfh_pres', 'spfh_pres_abv700mb', 'tcdc_eatm', 'tmax_2m', 'tmin_2m', 'tmp_2m', 'tmp_hybr', 'tmp_pres', 'tmp_pres_abv700mb', 'tmp_pvor', 'tmp_sfc', 'tozne_eatm', 'tsoil_bgrnd', 'uflx_sfc', 'ugrd_hgt', 'ugrd_hybr', 'ugrd_pres', 'ugrd_pres_abv700mb', 'ugrd_pvor', 'ulwrf_sfc', 'ulwrf_tatm', 'uswrf_sfc', 'vflx_sfc', 'vgrd_hgt', 'vgrd_hybr', 'vgrd_pres', 'vgrd_pres_abv700mb', 'vgrd_pvor', 'vvel_pres', 'vvel_pres_abv700mb', 'watr_sfc', 'weasd_sfc']
n_ensembles = len(ensembles)
s3_so = {
'anon': True,
'skip_instance_cache': True
}
Try scan_grib on one grib file to deterine the number of messages
Scanning one file works if they all have the same number, but not all of the reforecast files do. It turns out that in these grib files, each file is for a specific variable, and the messages contain not only all the tau values but also the different levels. So for surface vars, there are 80 messages, but for variables with 4 levels there are 320 messages, etc.
%%time
out = scan_grib(flist[0], storage_options= s3_so)
CPU times: user 8.07 s, sys: 1.55 s, total: 9.62 s Wall time: 13.3 s
n_messages = len(out)
n_messages
80
messages = [f'{i:03d}' for i in range(n_messages)]
#client.close(); cluster.close()
individual_dir = 'individual_retro'
try:
fs_local.rm(individual_dir, recursive = True)
except:
pass
fs_local.mkdirs(individual_dir, exist_ok=True)
Path(flist[0]).parts
('s3:', 'noaa-gefs-retrospective', 'GEFSv12', 'reforecast', '2019', '2019122900', 'c00', 'Days:1-10', 'acpcp_sfc_2019122900_c00.grib2')
def make_json_name(url, grib_message_number, json_dir):
p = Path(url).parts
return f'{json_dir}/{Path(p[8]).stem}_m{grib_message_number:03d}.json'
test make_json_name on one grib file
make_json_name(flist[0], 0, individual_dir)
'individual_retro/acpcp_sfc_2019122900_c00_m000.json'
Define function to generate single JSONs
def gen_json(file_url, json_dir):
out = scan_grib(file_url, storage_options=s3_so)
for i, message in enumerate(out): # scan_grib outputs a list containing one reference file per grib message
out_file_name = make_json_name(file_url, i, json_dir) #get name
with fs_local.open(out_file_name, "w") as f:
f.write(ujson.dumps(message)) #write to file
Process all the messages in one grib file as a test
%%time
flist[0]
gen_json(flist[0], individual_dir)
CPU times: user 7.95 s, sys: 2.12 s, total: 10.1 s Wall time: 12.2 s
json_list = sorted(fs_local.ls(individual_dir))
len(json_list)
80
def return_ds(d):
fs_ = fsspec.filesystem("reference", fo=d, remote_protocol='s3', remote_options={'anon':True})
m = fs_.get_mapper("")
return xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
json_list[-1]
return_ds(json_list[-1])
Use dask.distributed to create Jsons in parallel
#client.close(); cluster.close()
n_workers = 31
threads_per_worker = 2
cluster = LocalCluster(n_workers=n_workers, threads_per_worker=threads_per_worker)
client = Client(cluster)
dates
['2019122900', '2019123000', '2019123100']
%%time
flist = []
for date in dates:
year = date[:4]
flist.extend(sorted(fs_s3.glob(f's3://noaa-gefs-retrospective/GEFSv12/reforecast/{year}/{date}/???/Days:1-10/*.grib2')))
flist = [f's3://{f}' for f in flist]
len(flist)
CPU times: user 6.41 ms, sys: 2.16 ms, total: 8.57 ms Wall time: 7.59 ms
900
b = db.from_sequence(flist, npartitions=n_workers*threads_per_worker)
b1 = b.map(gen_json, individual_dir)
%%time
with performance_report(filename="dask-report.html"):
_ = b1.compute(retries=10)
CPU times: user 3min 28s, sys: 1min 3s, total: 4min 31s Wall time: 20min 35s
json_list = sorted(fs_local.ls(individual_dir))
print(len(json_list))
print(json_list[0])
print(json_list[-1])
260400 /home/rsignell/EarthMap/Projects/notebooks/gefs/individual_retro/acpcp_sfc_2019122900_c00_m000.json /home/rsignell/EarthMap/Projects/notebooks/gefs/individual_retro/weasd_sfc_2019123100_p04_m079.json
ensemble='c00'
pattern = f'{individual_dir}/{varnames[0]}_{date}_{ensemble}_m???.json'
print(pattern)
json_list = fs_local.glob(pattern)
len(json_list)
individual_retro/acpcp_sfc_2019123100_c00_m???.json
80
def var80(var):
pattern = f'{individual_dir}/{var}_{date}_{ensemble}_m???.json'
json_list = fs_local.glob(pattern)
if len(json_list)==80:
v = var
else:
v = 'null'
return v
%%time
ensemble='c00'
a = dask.compute(*[dask.delayed(var80)(v) for v in varnames], retries=10);
v80 = [v for v in a if 'null' not in v]
CPU times: user 9.88 s, sys: 2.15 s, total: 12 s Wall time: 54.5 s
v80
['acpcp_sfc', 'apcp_sfc', 'cape_sfc', 'cin_sfc', 'dlwrf_sfc', 'dswrf_sfc', 'gflux_sfc', 'gust_sfc', 'hgt_ceiling', 'hgt_sfc', 'hlcy_hgt', 'lhtfl_sfc', 'ncpcp_sfc', 'pbl_hgt', 'pres_msl', 'pres_pvor', 'pres_sfc', 'pwat_eatm', 'sfcr_sfc', 'shtfl_sfc', 'spfh_2m', 'tcdc_eatm', 'tmax_2m', 'tmin_2m', 'tmp_2m', 'tmp_pvor', 'tmp_sfc', 'tozne_eatm', 'uflx_sfc', 'ugrd_pvor', 'ulwrf_sfc', 'ulwrf_tatm', 'uswrf_sfc', 'vflx_sfc', 'vgrd_pvor', 'watr_sfc', 'weasd_sfc']
join along step
dimension for each message in each ensemble member for a specific date. Each grib file has all the step
fields as separate messages
step_dir = 'step'
try:
fs_local.rm(step_dir, recursive=True)
except:
pass
fs_local.mkdirs(step_dir, exist_ok=True)
json_list[0]
'/home/rsignell/EarthMap/Projects/notebooks/gefs/individual_retro/acpcp_sfc_2019122900_c00_m000.json'
fs_local.ls(individual_dir)[0]
'/home/rsignell/EarthMap/Projects/notebooks/gefs/individual_retro/ugrd_pres_2019122900_p02_m253.json'
def combine_steps(date, ensemble):
for var in v80:
json_list = fs_local.glob(f'{individual_dir}/{var}_{date}_{ensemble}_*.json')
mzz = MultiZarrToZarr(json_list,
concat_dims = ['step'],
remote_protocol='s3',
remote_options=dict(anon=True),
identical_dims=['latitude', 'longitude'])
name = f'{step_dir}/{Path(json_list[0]).parts[-1]}'
with fs_local.open(name, 'w') as f:
f.write(ujson.dumps(mzz.translate()))
json_list[0]
'/home/rsignell/EarthMap/Projects/notebooks/gefs/individual_retro/acpcp_sfc_2019123100_c00_m000.json'
ensembles
['c00', 'p01', 'p02', 'p03', 'p04']
dates
['2019122900', '2019123000', '2019123100']
%%time
_ = dask.compute(*[dask.delayed(combine_steps)(date,ensemble) for ensemble in ensembles for date in dates], retries=10);
CPU times: user 1min 29s, sys: 19 s, total: 1min 48s Wall time: 8min 38s
json_list = sorted(fs_local.ls(step_dir))
len(json_list)
555
%%time
ensemble_dir = 'ensemble'
try:
fs_local.rm(ensemble_dir, recursive = True)
except:
pass
fs_local.mkdirs(ensemble_dir)
CPU times: user 1.55 ms, sys: 551 µs, total: 2.1 ms Wall time: 1.52 ms
def combine_ensemble(date):
for var in v80:
json_list = fs_local.glob(f'{step_dir}/{var}_{date}_???_m000.json')
mzz = MultiZarrToZarr(json_list,
concat_dims = ['number'],
remote_protocol='s3',
remote_options=dict(anon=True),
identical_dims=['latitude', 'longitude'])
name = f'{ensemble_dir}/{Path(json_list[0]).parts[-1]}'
with fs_local.open(name, 'w') as f:
f.write(ujson.dumps(mzz.translate()))
%%time
[combine_ensemble(date) for date in dates]
CPU times: user 4.06 s, sys: 1.44 s, total: 5.5 s Wall time: 4.99 s
[None, None, None]
json_list = fs_local.glob(f'{ensemble_dir}/*.json')
len(json_list)
111
return_ds(json_list[0])
/home/rsignell/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/plugins.py:71: RuntimeWarning: Engine 'rasterio' loading failed: libLerc.so.4: cannot open shared object file: No such file or directory warnings.warn(f"Engine {name!r} loading failed:\n{ex}", RuntimeWarning)
<xarray.Dataset> Dimensions: (number: 5, step: 80, latitude: 721, longitude: 1440, surface: 1, time: 1, valid_time: 1) Coordinates: * latitude (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0 * longitude (longitude) float64 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8 * number (number) int64 0 1 2 3 4 * step (step) timedelta64[ns] 0 days 03:00:00 ... 10 days 00:00:00 * surface (surface) int64 0 * time (time) datetime64[ns] 2019-12-29 * valid_time (valid_time) datetime64[ns] 2019-12-29T03:00:00 Data variables: acpcp (number, step, latitude, longitude) float64 ... Attributes: centre: kwbc centreDescription: US National Weather Service - NCEP edition: 2 subCentre: 2
json_list[0]
'/home/rsignell/EarthMap/Projects/notebooks/gefs/ensemble/acpcp_sfc_2019122900_c00_m000.json'
dates_dir = 'dates'
try:
fs_local.mkdirs(dates_dir)
except:
pass
def combine_dates(json_list, var):
mzz = MultiZarrToZarr(json_list,
concat_dims = 'time',
remote_protocol='s3',
remote_options=dict(anon=True),
identical_dims=['latitude', 'longitude', 'step'])
name = f'{dates_dir}/{var}.json'
with fs_local.open(name, 'w') as f:
f.write(ujson.dumps(mzz.translate()))
%%time
for var in v80:
json_list = sorted(fs_local.glob(f'{ensemble_dir}/{var}*.json'))
combine_dates(json_list, var)
CPU times: user 1.47 s, sys: 412 ms, total: 1.88 s Wall time: 1.72 s
json_list = fs_local.glob(f'{dates_dir}/*.json')
len(json_list)
json_list
['/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/acpcp_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/apcp_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/cape_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/cin_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/dlwrf_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/dswrf_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/gflux_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/gust_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/hgt_ceiling.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/hgt_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/hlcy_hgt.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/lhtfl_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/ncpcp_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/pbl_hgt.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/pres_msl.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/pres_pvor.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/pres_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/pwat_eatm.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/sfcr_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/shtfl_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/spfh_2m.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tcdc_eatm.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tmax_2m.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tmin_2m.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tmp_2m.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tmp_pvor.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tmp_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/tozne_eatm.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/uflx_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/ugrd_pvor.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/ulwrf_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/ulwrf_tatm.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/uswrf_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/vflx_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/vgrd_pvor.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/watr_sfc.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/dates/weasd_sfc.json']
return_ds(json_list[0]).squeeze(['surface', 'valid_time'],drop=True)
<xarray.Dataset> Dimensions: (time: 3, number: 5, step: 80, latitude: 721, longitude: 1440) Coordinates: * latitude (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0 * longitude (longitude) float64 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8 * number (number) int64 0 1 2 3 4 * step (step) timedelta64[ns] 0 days 03:00:00 ... 10 days 00:00:00 * time (time) datetime64[ns] 2019-12-29 2019-12-30 2019-12-31 Data variables: acpcp (time, number, step, latitude, longitude) float64 ... Attributes: centre: kwbc centreDescription: US National Weather Service - NCEP edition: 2 subCentre: 2
Here we open each reference message and determine what type of vertical level it contains. We will use this later to combine the messages alongs these levels
%%time
typeoflevel = {}
for i,ref in enumerate(json_list):
try:
ds = return_ds(ref)
dim = [dim for dim in list(ds.dims) if dim not in ['valid_time', 'x', 'y', 'step', 'time','latitude', 'longitude', 'number']] #I manually figure out which dims are common
typeoflevel[i] = dim[0]
except:
print(i, 'not included')
pass
CPU times: user 854 ms, sys: 147 ms, total: 1 s Wall time: 880 ms
%%time
groups = {}
for key, value in sorted(typeoflevel.items()):
groups.setdefault(value, []).append(key)
CPU times: user 18 µs, sys: 0 ns, total: 18 µs Wall time: 21.9 µs
groups
{'surface': [0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 16, 18, 19, 26, 28, 30, 32, 33, 35, 36], 'cloudCeiling': [8], 'heightAboveGroundLayer': [10], 'meanSea': [14], 'potentialVorticity': [15, 25, 29, 34], 'atmosphereSingleLayer': [17, 27], 'heightAboveGround': [20, 22, 23, 24], 'atmosphere': [21], 'nominalTop': [31]}
We can now use this groups dictionary to combine the compatible messages
groups_dir = 'groups'
try:
fs_local.mkdirs(groups_dir)
except:
pass
def combine_groups(json_list, group):
mzz = MultiZarrToZarr(json_list,
concat_dims = group,
remote_protocol='s3',
remote_options=dict(anon=True),
identical_dims=['latitude', 'longitude'])
name = f'{groups_dir}/{group}.json'
with fs_local.open(name, 'w') as f:
f.write(ujson.dumps(mzz.translate()))
for group,igroup in groups.items():
json_list=[]
print(group, len(igroup))
for i in igroup:
json_list.append(f'{dates_dir}/{v80[i]}.json')
combine_groups(json_list, group)
This leaves us with reference files which we can now use to open the GEFS data as an xarray-datatree
json_list = fs_local.glob(f"{groups_dir}/*.json")
json_list
['/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/atmosphere.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/atmosphereSingleLayer.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/cloudCeiling.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/heightAboveGround.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/heightAboveGroundLayer.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/meanSea.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/nominalTop.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/potentialVorticity.json', '/home/rsignell/EarthMap/Projects/notebooks/gefs/groups/surface.json']
datasets = {}
for group in groups:
datasets[group] = return_ds(f'{groups_dir}/{group}.json')
dt = DataTree.from_dict(datasets)
dt.groups
('/', '/surface', '/cloudCeiling', '/heightAboveGroundLayer', '/meanSea', '/potentialVorticity', '/atmosphereSingleLayer', '/heightAboveGround', '/atmosphere', '/nominalTop')
ds_surface = dt['surface']
ds_surface
<xarray.DatasetView> Dimensions: (surface: 1, time: 3, number: 5, step: 80, latitude: 721, longitude: 1440, valid_time: 1) Coordinates: * latitude (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0 * longitude (longitude) float64 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8 * number (number) int64 0 1 2 3 4 * step (step) timedelta64[ns] 0 days 03:00:00 ... 10 days 00:00:00 * surface (surface) int64 0 * time (time) datetime64[ns] 2019-12-29 2019-12-30 2019-12-31 * valid_time (valid_time) datetime64[ns] 2019-12-29T03:00:00 Data variables: (12/22) acpcp (surface, time, number, step, latitude, longitude) float64 ... cape (surface, time, number, step, latitude, longitude) float64 ... cin (surface, time, number, step, latitude, longitude) float64 ... dlwrf (surface, time, number, step, latitude, longitude) float64 ... dswrf (surface, time, number, step, latitude, longitude) float64 ... gflux (surface, time, number, step, latitude, longitude) float64 ... ... ... tp (surface, time, number, step, latitude, longitude) float64 ... uflx (surface, time, number, step, latitude, longitude) float64 ... ulwrf (surface, time, number, step, latitude, longitude) float64 ... uswrf (surface, time, number, step, latitude, longitude) float64 ... vflx (surface, time, number, step, latitude, longitude) float64 ... watr (surface, time, number, step, latitude, longitude) float64 ... Attributes: centre: kwbc centreDescription: US National Weather Service - NCEP edition: 2 subCentre: 2
ds_surface = dt['surface'].squeeze('surface',drop=True).squeeze('valid_time',drop=True)
ds_surface
<xarray.DatasetView> Dimensions: (time: 3, number: 5, step: 80, latitude: 721, longitude: 1440) Coordinates: * latitude (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0 * longitude (longitude) float64 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8 * number (number) int64 0 1 2 3 4 * step (step) timedelta64[ns] 0 days 03:00:00 ... 10 days 00:00:00 * time (time) datetime64[ns] 2019-12-29 2019-12-30 2019-12-31 Data variables: (12/22) acpcp (time, number, step, latitude, longitude) float64 ... cape (time, number, step, latitude, longitude) float64 ... cin (time, number, step, latitude, longitude) float64 ... dlwrf (time, number, step, latitude, longitude) float64 ... dswrf (time, number, step, latitude, longitude) float64 ... gflux (time, number, step, latitude, longitude) float64 ... ... ... tp (time, number, step, latitude, longitude) float64 ... uflx (time, number, step, latitude, longitude) float64 ... ulwrf (time, number, step, latitude, longitude) float64 ... uswrf (time, number, step, latitude, longitude) float64 ... vflx (time, number, step, latitude, longitude) float64 ... watr (time, number, step, latitude, longitude) float64 ... Attributes: centre: kwbc centreDescription: US National Weather Service - NCEP edition: 2 subCentre: 2
ds_surface = ds_surface.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby('longitude')
var = 't'
da = ds_surface[var]
da
<xarray.DataArray 't' (time: 3, number: 5, step: 80, latitude: 721, longitude: 1440)> [1245888000 values with dtype=float64] Coordinates: * latitude (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0 * longitude (longitude) float64 -180.0 -179.8 -179.5 ... 179.2 179.5 179.8 * number (number) int64 0 1 2 3 4 * step (step) timedelta64[ns] 0 days 03:00:00 ... 10 days 00:00:00 * time (time) datetime64[ns] 2019-12-29 2019-12-30 2019-12-31 Attributes: (12/19) NV: 0 cfName: air_temperature cfVarName: t dataDate: 20191229 dataTime: 0 dataType: cf ... ... shortName: t stepType: instant stepUnits: 1 totalNumber: 10 typeOfLevel: surface units: K
Use hvplot selection widgets for all extra coordinates
for coord in da.indexes:
print(coord)
pn.pane.HoloViews.widgets[coord]=pn.widgets.Select
latitude longitude number step time
date = '2019-12-30'
step = 4
valid_time = da.time.sel(time=date).values + da.step.isel(step=step).values
valid_time
numpy.datetime64('2019-12-30T15:00:00.000000000')
datestr = pd.to_datetime(valid_time).strftime('%Y-%m-%d %H:%M')
%%time
das = da.sel(time=date).isel(step=step).load()
CPU times: user 252 ms, sys: 79.8 ms, total: 332 ms Wall time: 669 ms
das.hvplot(x='longitude', y='latitude', rasterize=True, alpha=0.65, cmap='viridis',
global_extent=True, geo=True, tiles='OSM', title=f'var={var}, valid time:{datestr}')
ds_surface[var].hvplot(x='longitude', y='latitude', rasterize=True, alpha=0.65, cmap='viridis',
global_extent=True, geo=True, tiles='OSM')