import intake, intake_esm
#Ref https://intake-esm.readthedocs.io/_/downloads/en/latest/pdf/
col_url = "https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json"
from dask_gateway import Gateway
from dask.distributed import Client
def launchDask(options):
#TODO try, except
gateway = Gateway()
clusters = gateway.list_clusters()
clusters
if len(clusters) >0:
cluster = gateway.connect(clusters[0].name)
print("lets close existing connection")#or shut down and start afresh cluster.shutdown(). otherwise I dunno how to apply custom options
cluster.shutdown()
cluster = gateway.new_cluster(options)
else:
cluster = gateway.new_cluster(options)
return(cluster)
gateway = Gateway()
options = gateway.cluster_options()
options.worker_memory=8
cluster = launchDask(options)
from distributed import Client
client = Client(cluster)
client
cluster.adapt(minimum=0, maximum=10)
client
Client
|
Cluster
|
col = intake.open_esm_datastore(col_url)
esmcol_data = col.esmcol_data
cat_T = col.search(experiment_id=['historical'],
mip_table='Omon',
ensemble_member=["r1i1p1f1"],
model=['CESM2', 'CESM2-FV2', 'CESM2-WACCM-FV2','CIESM','IPSL-CM6A-LR','MPI-ESM1-2-HR','MPI-ESM1-2-LR','MIROC6','CanESM5',
'MPI-ESM-1-2-HAM','MRI-ESM2-0','SAM0-UNICON'],
#model=['GFDL-CM4','GFDL-ESM4'],
grid_label=['gn'],
variable=["thetao"])
# version=['v20190308', 'v20191120', 'v20200220','v20191108','v20190323','v20190731','v20190710','v20180803',
# 'v20191007','v20190627','v20190311','v20190429','v20190627','v20191205','v20190914','v20190815','v20191108'])
cat_T.df.groupby(['model']).nunique()#CanESM5 has two versions
project | institute | experiment_id | frequency | modeling_realm | mip_table | ensemble_member | grid_label | variable | temporal subset | version | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
model | ||||||||||||
CESM2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
CESM2-FV2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 4 | 1 | 4 |
CESM2-WACCM-FV2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 4 | 1 | 4 |
CIESM | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 4 | 1 | 4 |
CanESM5 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 | 2 | 34 |
IPSL-CM6A-LR | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 1 | 2 |
MIROC6 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 | 1 | 17 |
MPI-ESM-1-2-HAM | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 9 |
MPI-ESM1-2-HR | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 33 | 1 | 33 |
MPI-ESM1-2-LR | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 9 | 1 | 9 |
MRI-ESM2-0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 3 | 1 | 3 |
SAM0-UNICON | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 | 1 | 17 |
cat_T.df[cat_T.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.
project | institute | model | experiment_id | frequency | modeling_realm | mip_table | ensemble_member | grid_label | variable | temporal subset | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version | ||||||||||||
v20190306 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 | 17 |
v20190429 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 | 17 |
def latest_version(ds):
"""filters latest DRS versions of datasets only"""
ds=ds.df.sort_values(['version']).groupby(['temporal subset','model','mip_table','institute','variable','ensemble_member','grid_label','experiment_id'],as_index=False).last()
return ds
cat_T_new = latest_version(cat_T)
cat_T_new[cat_T_new['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.
temporal subset | model | mip_table | institute | variable | ensemble_member | grid_label | experiment_id | project | frequency | modeling_realm | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version | ||||||||||||
v20190429 | 17 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 |
cat_T_new[cat_T_new['model']=='CanESM5'] #one distinct version only, latest one.
temporal subset | model | mip_table | institute | variable | ensemble_member | grid_label | experiment_id | project | frequency | modeling_realm | version | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 185001-186012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
16 | 186101-187012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
23 | 187101-188012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
28 | 188101-189012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
35 | 189101-190012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
44 | 190101-191012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
51 | 191101-192012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
56 | 192101-193012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
63 | 193101-194012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
68 | 194101-195012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
80 | 195101-196012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
85 | 196101-197012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
92 | 197101-198012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
97 | 198101-199012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
104 | 199101-200012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
112 | 200101-201012 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
119 | 201101-201412 | CanESM5 | Omon | CCCma | thetao | r1i1p1f1 | gn | historical | CMIP6 | mon | ocean | v20190429 | s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... |
cat_T = intake.open_esm_datastore(cat_T_new,esmcol_data=esmcol_data)
#cat_T['CMIP6.CCCma.CanESM5.historical.Omon']
cat_T.df[cat_T.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.
temporal subset | model | mip_table | institute | variable | ensemble_member | grid_label | experiment_id | project | frequency | modeling_realm | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version | ||||||||||||
v20190429 | 17 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 |
VOLCELLO test
cat_T_v = col.search(experiment_id=['historical'],
mip_table=['Ofx','Omon'],
ensemble_member=["r1i1p1f1"],
model=['NorESM2-LM'],
#model=['GFDL-CM4','GFDL-ESM4'],
# grid_label=['gn'],
variable=["volcello"])
# version=['v20190308', 'v20191120', 'v20200220','v20191108','v20190323','v20190731','v20190710','v20180803',
# 'v20191007','v20190627','v20190311','v20190429','v20190627','v20191205','v20190914','v20190815','v20191108'])
cat_T_v.df.groupby(['model']).nunique()#CanESM5 has two versions
project | institute | experiment_id | frequency | modeling_realm | mip_table | ensemble_member | grid_label | variable | temporal subset | version | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
model | ||||||||||||
NorESM2-LM | 1 | 1 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 13 | 2 | 14 |
cat_T_v.df.version.unique()
array(['v20190815', 'v20191108'], dtype=object)
cat_T_vol = latest_version(cat_T_v)
cat_T_vol.version.unique()
array(['v20191108'], dtype=object)
cat_T_vol.groupby(['model']).nunique()
temporal subset | mip_table | institute | variable | ensemble_member | grid_label | experiment_id | project | frequency | modeling_realm | version | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
model | ||||||||||||
NorESM2-LM | 13 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 13 |
cat_T_volstore = intake.open_esm_datastore(cat_T_vol,esmcol_data=esmcol_data)
cat_T_volstore.df.version.unique()
array(['v20191108'], dtype=object)