Intake Example: https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb
import intake
cat_url = "https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/update_intake_catalog/intake/catalogs/c3s.yaml"
cat = intake.open_catalog(cat_url)
list(cat)
print(cat['c3s-cmip6'])
Catalogs will be cached locally in ~/.intake/cache
.
df = cat['c3s-cmip6'].read()
df.info(memory_usage='deep')
df.head()
df.ds_id.nunique()
def search(df, collection, time=None):
# a common search we do in rook
start = end = None
if time:
if "/" in time:
start, end = time.split("/")
start = start.strip()
end = end.strip()
else:
start = time.strip()
start = start or "1800-01-01"
end = end or "2500-12-31"
sdf = df.fillna({'start_time': '1000-01-01T12:00:00', 'end_time': '3000-12-31T12:00:00'})
result = sdf.loc[(sdf.ds_id == collection) & (sdf.end_time >= start) & (sdf.start_time <= end)]
return list(result.path.sort_values().to_dict().values())
result = search(
df,
collection="c3s-cmip6.CMIP.SNU.SAM0-UNICON.historical.r1i1p1f1.day.pr.gn.v20190323",
time="2000-01-01/2001-12-31")
result
df.loc[df.table_id=="fx"].ds_id
collection = df.iloc[29].ds_id
collection
result = search(df, collection=collection, time="2000-01-01/2010-12-31")
result
result = df.loc[
(df.variable_id=="tas")
& (df.experiment_id=="historical")
& (df.table_id=="day")
& (df.member_id=="r1i1p1f1")
& (df.institution_id=="MIROC")
]
result.head()
result.ds_id.unique()