#!/usr/bin/env python # coding: utf-8 # In[ ]: from glob import glob import xarray as xr import cftime import nc_time_axis import numpy as np import matplotlib.pyplot as plt import pandas as pd pd.options.display.max_rows = 200 import intake, intake_esm get_ipython().system(' pip install cmip6_preprocessing') from cmip6_preprocessing.preprocessing import (correct_units,rename_cmip6) # ## Functions for preprocessing CMIP6 data # In[12]: def chunk_time(ds): if 'time' in ds.dims: ds = ds.chunk({'time':1}) return ds # In[13]: # Necessary for creating a common time axis for all models # We want to create a common time axis so there will be no gaps when plotting the results def fix_time(ds): """ force calendar to noleap""" import xarray as xr if "time" not in ds.dims: return ds if ("calendar" not in ds["time"].attrs): ds["time"].attrs.update({"calendar": "noleap"}) if ds["time"].attrs["calendar"] not in ["noleap", "NOLEAP", "365_day"]: ds["time"].attrs.update({"calendar": "noleap"}) ds = xr.decode_cf(ds) return ds # In[14]: # Pass this function for preprocessing thetao data def pp_thetao(ds): ds = rename_cmip6(ds) ds = fix_time(ds) ds = correct_units(ds) return ds # In[15]: # Pass this function for preprocessing volcello and areacello data def pp_volcello(ds): ds = rename_cmip6(ds) ds = chunk_time(ds) ds = fix_time(ds) ds = correct_units(ds) return ds # In[16]: # Use this function to reconstruct areacello def compute_area_regular_grid(ds, Rearth=6378e3): """ compute the cells area on a regular grid """ rfac = 2 * np.pi * Rearth / 360 dx1d = rfac * 1 dy1d = rfac * 1 dx2d, dy2d = np.meshgrid(dx1d, dy1d) _, lat2d = np.meshgrid(ds["x"].values, ds["y"].values) dx = dx2d * np.cos(2 * np.pi * lat2d / 360) dy = dy2d area = dx * dy return xr.DataArray(area, dims=('y', 'x')) # ## Load the catalog with Intake-ESM # In[17]: col_url = "https://raw.githubusercontent.com/aradhakrishnanGFDL/gfdl-aws-analysis/community/esm-collection-spec-examples/esgf-world.json" # In[18]: col = intake.open_esm_datastore(col_url) esmcol_data = col.esmcol_data col # In[62]: #def latest_version(ds): # """filters latest DRS versions of datasets only""" # ds=ds.df.sort_values(['version'],ascending=False).groupby(['temporal subset','model','mip_table', # 'institute','variable','ensemble_member', # 'grid_label','experiment_id'],as_index=False)#.head(1) # return ds def latest_version(cat): """ input cat: esmdatastore output esmdatastore with latest DRS versions """ latest_cat = cat.df.sort_values(by=['version','path']).drop_duplicates(['temporal subset','model','mip_table', 'institute','variable','ensemble_member', 'grid_label','experiment_id'],keep='last') return latest_cat # In[52]: # gn_models = ['ACCESS-ESM1-5','BCC-CSM2-MR','BCC-ESM1','CAMS-CSM1-0','EC-Earth3', # 'EC-Earth3-Veg','FIO-ESM-2-0','GISS-E2-1-G','GISS-E2-1-G-CC','NESM3','ACCESS-CM2','CIESM','SAM0-UNICON','MPI-ESM1-2-HR', # 'CanESM5','FGOALS-f3-L','IPSL-CM6A-LR','MIROC6','MPI-ESM-1-2-HAM','MPI-ESM1-2-LR'] gn_models = ['CESM2', 'CESM2-FV2', 'CESM2-WACCM-FV2','IPSL-CM6A-LR','MPI-ESM1-2-HR','MPI-ESM1-2-LR','MIROC6','CanESM5', 'MPI-ESM-1-2-HAM','MRI-ESM2-0','SAM0-UNICON'] cat_T_gn = col.search(experiment_id=['historical'], mip_table='Omon', ensemble_member=["r1i1p1f1"], model=gn_models, grid_label=['gn'], variable=["thetao"]) cat_VOmon_gn = col.search(experiment_id=['historical'], mip_table=['Omon'], ensemble_member="r1i1p1f1", model=gn_models, grid_label='gn', variable=["volcello"]) cat_VOfx_gn = col.search(experiment_id=['historical'], mip_table=['Ofx'], ensemble_member="r1i1p1f1", model=gn_models, grid_label='gn', variable=["volcello"]) cat_A_gn = col.search(experiment_id=['historical'], mip_table=['Omon','Ofx'], ensemble_member="r1i1p1f1", model=gn_models, grid_label='gn', variable=['areacello']) # In[53]: cat_VOfx_gn.df # In[63]: cat_T_gn_latest = latest_version(cat_T_gn) cat_VOmon_gn_latest = latest_version(cat_VOmon_gn) cat_VOfx_gn_latest = latest_version(cat_VOfx_gn) cat_A_gn_latest = latest_version(cat_A_gn) # In[64]: cat_VOfx_gn_latest.head() # In[59]: cat_VOfx_gn.df # In[24]: cat_T_gn = intake.open_esm_datastore(cat_T_gn_latest,esmcol_data=esmcol_data) cat_VOmon_gn = intake.open_esm_datastore(cat_VOmon_gn_latest,esmcol_data=esmcol_data) cat_VOfx_gn = intake.open_esm_datastore(cat_VOfx_gn_latest,esmcol_data=esmcol_data) cat_A_gn = intake.open_esm_datastore(cat_A_gn_latest,esmcol_data=esmcol_data) # In[66]: cat_VOfx_gn_latest # In[79]: cat_T_gn.df[cat_T_gn.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one. # In[82]: cat_T_gn_latest[cat_T_gn_latest['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one. # In[90]: cat_T_gn.df.groupby(['model']).nunique() # In[91]: cat_A_gn.df.groupby(['model']).nunique() # In[95]: cat_A_gn_latest.groupby(['model']).nunique() #.nunique() # In[ ]: