#!/usr/bin/env python # coding: utf-8 # # Comparing CERA-20c and CRU TS 4.03 at glaciated locations # In[1]: import xarray as xr import cartopy import cartopy.crs as ccrs import matplotlib.pyplot as plt import numpy as np import pandas as pd from oggm import utils import hvplot.pandas # In[2]: import warnings warnings.filterwarnings('ignore', r'(invalid value encountered in|Mean of empty slice)') # ## The boring and necessary stuff: data preparation # ### Read the data # In[3]: # Download the data f_c20c = utils.file_downloader('https://cluster.klima.uni-bremen.de/~fmaussion/climate/cera-20c/cera-20c_t2m_1901-2010.nc') # Read it with xarray and chunk it for better memory management ds_c20c_o = xr.open_dataset(f_c20c) ds_c20c_o = ds_c20c_o.chunk(chunks={'number':1, 'longitude':90}) ds_c20c_o # In[4]: # Roll the coords because [0-360] is not nice ds_c20c = ds_c20c_o.roll(longitude=179, roll_coords=True) ds_c20c['longitude'] = np.where(ds_c20c['longitude'] > 180, ds_c20c['longitude'] - 360, ds_c20c['longitude']) ds_c20c = ds_c20c.chunk(chunks={'number':1, 'time':12}) # In[5]: # Convert to C° ds_c20c['t2m'] = ds_c20c['t2m'] - 273.15 # In[6]: # Read CRU ds_cru = xr.open_dataset(utils.get_cru_file('tmp', version='4.03')).chunk(chunks={'time':12}) ds_cru # In[7]: # Rename CRU's coordinates ds_cru = ds_cru.rename_dims({'lon':'longitude', 'lat':'latitude'}).rename({'lon':'longitude', 'lat':'latitude'}) # In[8]: # CRU's time is horribly centred on month middle ds_cru['time'] = pd.date_range('1901-01', '2018-12', freq='MS') # In[9]: # Station network should have missing data as well ds_cru['stn'] = ds_cru['stn'].where(ds_cru['stn']!=-999) # ## Work with annual data # We don't care about annual cycle for now: # In[10]: ds_cru = ds_cru.resample(time='AS').mean(dim='time') ds_c20c = ds_c20c.resample(time='AS').mean(dim='time') # ### Glacier masks # #### CRU # In[11]: # RGI statistics as csv frgi = utils.file_downloader('https://cluster.klima.uni-bremen.de/~fmaussion/misc/rgi62_allglaciers.csv') odf = pd.read_csv(frgi, index_col=0, converters={'Name':str, 'GLIMSId': str, 'BgnDate':str, 'EndDate':str, 'O1Region': str, 'O2Region':str, 'IsTidewater':bool, 'IsNominal':bool}) # In[12]: lon_bins = np.linspace(-180, 180, 721) lat_bins = np.linspace(-90., 90, 361) odf['lon_id'] = np.digitize(odf['CenLon'], lon_bins)-1 odf['lat_id'] = np.digitize(odf['CenLat'], lat_bins)-1 odf['unique_id'] = ['{:03d}_{:03d}'.format(lon, lat) for (lon, lat) in zip(odf['lon_id'], odf['lat_id'])] # In[13]: mdf = odf.drop_duplicates(subset='unique_id').set_index('unique_id') mdf['Area'] = odf.groupby('unique_id').sum()['Area'] print(len(mdf)) # In[14]: mask = np.zeros((360, 720), dtype=float) * np.NaN mask[mdf['lat_id'], mdf['lon_id']] = mdf['Area'] # Now CRU doesn't have data everywhere - Only keep glaciers where we also have data mask = np.where(np.isfinite(ds_cru.tmp.isel(time=0)), mask, np.NaN) ds_cru['glacier_mask'] = (('latitude', 'longitude'), np.isfinite(mask)) ds_cru['glacier_area'] = (('latitude', 'longitude'), mask) # In[15]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_cru['glacier_area'].plot(ax=ax, transform=ccrs.PlateCarree()) ax.coastlines(); ax.gridlines(); # #### CERA-20c # In[16]: odf = pd.read_csv(frgi, index_col=0, converters={'Name':str, 'GLIMSId': str, 'BgnDate':str, 'EndDate':str, 'O1Region': str, 'O2Region':str, 'IsTidewater':bool, 'IsNominal':bool}) # In[17]: lon_bins = np.linspace(-179.5, 180.5, 361) lat_bins = np.linspace(90.5, -90.5, 182) odf['lon_id'] = np.digitize(odf['CenLon'], lon_bins)-1 odf['lat_id'] = np.digitize(odf['CenLat'], lat_bins)-1 odf['unique_id'] = ['{:03d}_{:03d}'.format(lon, lat) for (lon, lat) in zip(odf['lon_id'], odf['lat_id'])] # In[18]: mdf = odf.drop_duplicates(subset='unique_id').set_index('unique_id') mdf['Area'] = odf.groupby('unique_id').sum()['Area'] print(len(mdf)) # In[19]: mask = np.zeros((181, 360), dtype=float) * np.NaN mask[mdf['lat_id'], mdf['lon_id']] = mdf['Area'] ds_c20c['glacier_mask'] = (('latitude', 'longitude'), np.isfinite(mask)) ds_c20c['glacier_area'] = (('latitude', 'longitude'), mask) # In[20]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_c20c['glacier_area'].plot(ax=ax, transform=ccrs.PlateCarree()) ax.coastlines(); ax.gridlines(); # ### Weights: mean is not equal to mean on spherical Earth # We also compute the weight of glaciated cells (weighted per glaciated area). # #### CRU # In[21]: # Weight weight = np.cos(np.deg2rad(ds_cru.latitude)) weight = ds_cru.tmp.isel(time=0) * 0. + weight ds_cru['weight'] = (('latitude', 'longitude'), weight / weight.sum()) ds_cru['weight'].plot(); # In[22]: # For glaciers we don't care about sphere, just about area per grid point weight = ds_cru['weight'] * 0 + ds_cru['glacier_area'] ds_cru['weight_glacier'] = (('latitude', 'longitude'), weight / weight.sum()) # In[23]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) (ds_cru['weight_glacier'] * 100).plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label':'Glacier area (%)'}) ax.coastlines(); ax.gridlines(); plt.title('Glacier area per grid point (% of total)'); plt.savefig('cru_glacier_mask.png', dpi=150, bbox_inches='tight'); # In[24]: f, ax = plt.subplots(figsize=(4, 4)) (ds_cru['weight_glacier'] * 100).sum(dim='longitude').plot(y='latitude', ax=ax); plt.ylim(-90, 90); plt.ylabel('Latitude'); plt.xlabel('Glacier area (% of total)'); plt.title('Zonal average of glacier area'); plt.savefig('zonal_avg.png', dpi=150, bbox_inches='tight'); # #### CERA-20c # In[25]: weight = np.cos(np.deg2rad(ds_c20c.latitude)).clip(0) weight = ds_c20c.t2m.isel(time=0, number=0) * 0. + weight ds_c20c['weight'] = (('latitude', 'longitude'), weight / weight.sum()) ds_c20c['weight'].plot(); # In[26]: weight = ds_c20c['weight'] * 0 + ds_c20c['glacier_area'] ds_c20c['weight_glacier'] = (('latitude', 'longitude'), weight / weight.sum()) ds_c20c['weight_glacier'].plot(); # ### Last boring thing - bring CERA on CRU # In order to really compare CERA with CRU, we need to interpolate CERA on CRU - there is no way around it. This is a bit memory expensive. # In[27]: # But we don't bother with ensemble here c20c_t2m = ds_c20c.t2m.mean(dim='number').compute() # Interpolate ds_cru['cera_on_cru'] = c20c_t2m.interp(longitude=ds_cru.longitude, latitude=ds_cru.latitude) # In[28]: ds_cru['cera_on_cru'] = ds_cru['cera_on_cru'].where(np.isfinite(ds_cru.tmp.isel(time=-1))) # **OK! Now the fun things, finally!** # ## Comparison CERA-20c and CRU on map # The comparison is only partly meaningful because of the different reference topographies. For this reason we will stop working in absolute temp afterwards, but only talk about changes from the reference period '1961', '1990'. # In[29]: ds_cru_ref = ds_cru.sel(time=slice('1961', '1990')).mean(dim='time') # In[30]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_cru_ref['tmp'].plot(ax=ax, transform=ccrs.PlateCarree(), vmin=-31, vmax=31, cmap='RdBu_r', cbar_kwargs={'label':'°C'}); ax.coastlines(); ax.gridlines(); plt.title('Average temperature [1961-1990] CRU TS 4.03'); plt.savefig('refperiod_cru_map.png', dpi=150, bbox_inches='tight'); # In[31]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_cru_ref['cera_on_cru'].plot(ax=ax, transform=ccrs.PlateCarree(), vmin=-31, vmax=31, cmap='RdBu_r', cbar_kwargs={'label':'°C'}) ax.coastlines(); ax.gridlines(); plt.title('Average temperature [1961-1990] CERA-20C'); plt.savefig('refperiod_cera_map.png', dpi=150, bbox_inches='tight'); # In[32]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) (ds_cru_ref['tmp'] - ds_cru_ref['cera_on_cru']).plot(ax=ax, transform=ccrs.PlateCarree(), vmin=-12, vmax=12, cmap='RdBu_r', cbar_kwargs={'label':'°C'}) ax.coastlines(); ax.gridlines(); plt.title('Diff CRU - CERA-20C; average temperature [1961-1990]'); plt.savefig('refperiod_diff_cru_cera_map.png', dpi=150, bbox_inches='tight'); # ### Still, plot the absolute timeseries to check # In[33]: (ds_cru['tmp'] * ds_cru.weight).sum(dim=['longitude','latitude']).plot(label='CRU'); (ds_cru['cera_on_cru'] * ds_cru.weight).sum(dim=['longitude','latitude']).plot(label='CERA-20c on CRU'); plt.xlim('1901', '2009'); plt.ylim(11, 15); plt.legend(loc='best'); # ## Time series anomalies # In[34]: ds_cru['tmp'] = ds_cru['tmp'] - ds_cru_ref['tmp'] ds_cru['cera_on_cru'] = ds_cru['cera_on_cru'] - ds_cru_ref['cera_on_cru'] # In[35]: ts_cru_all = (ds_cru['tmp'] * ds_cru.weight).sum(dim=['longitude','latitude']) df = ts_cru_all.to_pandas().to_frame(name='cru_world') df['cera_on_cru_world'] = (ds_cru['cera_on_cru'] * ds_cru.weight).sum(dim=['longitude','latitude']) df['cera_on_cru_world'] = np.where(df['cera_on_cru_world'] == 0, np.NaN, df['cera_on_cru_world']) # In[36]: df.plot(color=['k', 'C0']); plt.legend(['CRU TS 4.01', 'CERA-20C']); plt.title('Average land temperatures, anomaly to 1961-1990'); plt.ylabel('°C'); plt.xlabel(''); plt.savefig('global_land_ts.png', dpi=150, bbox_inches='tight'); # Much better! # ### Now, for all glacier grid points # In[37]: df['cru_glaciers'] = (ds_cru['tmp'] * ds_cru.weight_glacier).sum(dim=['longitude','latitude']) df['cera_on_cru_glaciers'] = (ds_cru['cera_on_cru'] * ds_cru.weight_glacier).sum(dim=['longitude','latitude']) df['cera_on_cru_glaciers'] = np.where(df['cera_on_cru_glaciers'] == 0, np.NaN, df['cera_on_cru_glaciers']) # In[38]: f, axs = plt.subplots(2, 2, figsize=(16, 9)) df[['cru_world', 'cera_on_cru_world']].plot(ax=axs.flatten()[0]); df[['cru_glaciers', 'cera_on_cru_glaciers']].plot(ax=axs.flatten()[1]); df[['cru_world', 'cru_glaciers']].plot(ax=axs.flatten()[2]); df[['cera_on_cru_world', 'cera_on_cru_glaciers']].plot(ax=axs.flatten()[3]); # In[39]: df[['cru_world', 'cru_glaciers']].plot(color=['k', 'grey']); plt.legend(['CRU TS 4.01 (all land)', 'CRU TS 4.01 (glaciers)']); plt.title('Average temperatures, anomaly to 1961-1990'); plt.ylabel('°C'); plt.xlabel(''); plt.ylim(-1.7, 2.8); plt.savefig('global_cru_glacier_ts.png', dpi=150, bbox_inches='tight'); # In[40]: df[['cera_on_cru_world', 'cera_on_cru_glaciers']].plot(color=['C0', 'lightsteelblue']); plt.legend(['CERA-20C (all land)', 'CERA-20C (glaciers)']); plt.title('Average temperatures, anomaly to 1961-1990'); plt.ylabel('°C'); plt.xlabel(''); plt.ylim(-1.7, 2.8); plt.savefig('global_cera_glacier_ts.png', dpi=150, bbox_inches='tight'); # In[41]: df[['cru_glaciers', 'cera_on_cru_glaciers']].plot(color=['grey', 'lightsteelblue']); plt.legend(['CRU (glaciers)', 'CERA-20C (glaciers)']); plt.title('Average temperatures, anomaly to 1961-1990'); plt.ylabel('°C'); plt.xlabel(''); plt.ylim(-1.7, 2.8); plt.savefig('global_cru_cera_glacier_ts.png', dpi=150, bbox_inches='tight'); # ### What's happening in 1921-1950? # In[42]: ds_cru_1920 = ds_cru.sel(time=slice('1921', '1950')).mean(dim='time') # In[43]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_cru_1920['tmp'].plot(ax=ax, transform=ccrs.PlateCarree(), vmin=-1.8, vmax=1.8, cmap='RdBu_r', cbar_kwargs={'label':'°C'}) ax.coastlines(); ax.gridlines(); plt.title('CRU: 1921-1950 temperature anomaly to 1961-1990'); plt.savefig('1920_cru_map.png', dpi=150, bbox_inches='tight'); # In[44]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) ds_cru_1920['cera_on_cru'].plot(ax=ax, transform=ccrs.PlateCarree(), vmin=-1.8, vmax=1.8, cmap='RdBu_r', cbar_kwargs={'label':'°C'}) ax.coastlines(); ax.gridlines(); plt.title('CERA: 1921-1950 temperature anomaly to 1961-1990'); plt.savefig('1920_cera_map.png', dpi=150, bbox_inches='tight'); # In[45]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) (ds_cru_1920['tmp'] - ds_cru_1920['cera_on_cru']).plot(ax=ax, transform=ccrs.PlateCarree()) ax.coastlines(); ax.gridlines(); # In[46]: fig = plt.figure(figsize=(12, 5)) ax = plt.axes(projection=ccrs.Robinson()) (ds_cru_1920['stn']).plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label':'N stations (from 0 to 8)'}) ax.coastlines(); ax.gridlines(); plt.title('CRU: number of stations used in the interpolation 1921-2950'); plt.savefig('1920_cru_nstats.png', dpi=150, bbox_inches='tight');