If you are only interested in the final plots and a summary for preprocessing level 5 go to:
from oggm import cfg, workflow, utils, shop
import pandas as pd
import os, glob
import numpy as np
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
cfg.initialize()
import matplotlib
matplotlib.rcParams['figure.figsize'] = (14, 8)
# makes sure that the pandas output columns are not cropped!
pd.set_option("max_colwidth", None)
def error_analysis_w_plot(dfserr={}, pd_rel_error_area = None,
level='L2', exp='elev_bands', pcp ='', clim='',
qc = '', match = '', border = 160,
rgi_reg = 'all',
path = '/home/www/oggm/gdirs/oggm_v1.4/', subplot = False, xlim=None, plot=True, rename_match =True):
""" Estimates for a specific preprocessed gdirs the relative glacier area with errors
and the relative amount of glaciers with errors (both in %).
Plots the error counts for the different error types.
Works only if the glacier statistics were estimated!
Parameters
----------
dfserr : dictionary
dictionary where already errors of largest glaciers from other preprocessed gdirs are saved, default is an empty dictionary
pd_rel_error_area : pd.DataFrame
pandas DataFrame with statistics about relative area and amount of errors on glaciers.
Default is None, which creates a new pandas DataFrame
level : str
the preprocessing level where the errors should be estimated and plotted.
So far, it works only for 'L2' and 'L5'. Default is 'L2'.
exp : str
which glacier flowlines are used. Either 'elev_bands' (default) or 'centerlines'
pcp : str
precipitation factor applied (depends on clim). Has to be set!
clim : str
which baseline is applied. Either 'ERA5' or 'CRU'. Has to be set!
qc : str
whether a glacier climate quality check and correction was applied. If yes, set it to 'qc3',
otherwise to 'qc0'. Has to be set!
match: str
which calibration & other methods are chosen: Has to be set if above level 2!
- "no_match" : only direct glaciological WGMS data used
- "match_geod" : same as no_match, but regionally the geodetic estimates are matched by changing epsilon
- "match_geod_pergla" : only per-glacier-individual geodetic estimates of Hugonnet et al. (2021) matched
- "match_geod_pergla_massredis" : same as match_geod_pergla, but instead of the Shallow-Ice Approximation,
a mass-redistribution is applied (see: https://docs.oggm.org/en/latest/mass-redistribution.html)
border : str
which size of the local glacier map was used (prepro_border). For qc0, e.g.,
most preprocessing directories are available for border=80. Default is 160
rgi_reg : str
default is all RGI regions. But you can also choose a single RGI region (e.g. rgi_reg = '11') or all RGI
regions except 19 (i.e. not available for CRU!, rgi_reg='all_without_19')
path : str
path to the general gdirs folder
subplot : bool
Default is False. If you want to use several plots, set it to True
xlim : int
Default is None. Maximum of single type errors in the plot. Useful for subplots
plot : bool
If an error type plot should be returned or not!
Returns
-------
dfserr : dictionary
dictionary where for each preprocessed gdir, a pd.DataFrame exists which lists the
type of errors and area of the 15 largest glaciers with errors
pd_rel_error_area : pd.DataFrame
pandas DataFrame with statistics about relative area and amount of errors on glaciers.
If repeated, for several preprocessed gdirs, each row represents the statistics of
one preprocessed gdirs.
"""
if clim == 'CRU' and rgi_reg == 'all':
raise InvalidParamsError("CRU is NOT available for RGI 19, use rgi_reg='all_without_19' instead")
if pd_rel_error_area is None:
pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'pcp_clim', 'match', 'qc'])
if level == 'L2':
fd = f'{path}/L1-L2_files/{exp}/RGI62/b_{border:03d}/L2/summary/'
err_msg = "In preprocessing level 2, climate, precipitation factor and qc should not be defined"
assert pcp == '' and clim == '' and qc == '', err_msg
title = f'{level}: {exp}, border: {border}'
dict_key = f'{level}_{exp}_b{border}_rgi_{rgi_reg}'
elif level == 'L5':
if (pcp == 'via_winterprcp') or (pcp == '2.6'):
fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{match}/RGI62/b_{border:03d}/L5/summary/'
else:
fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{pcp}/{match}/RGI62/b_{border:03d}/L5/summary/'
err_msg = "In preprocessing level 2, need to prescribe climate (clim), match, precipitation factor (pcp) and quality check type (qc)"
assert pcp != '' and clim != '' and qc != '' and match !='' , err_msg
title = f'{level}: {exp}, {pcp}, {clim}, {match}, {qc}, border: {border}, rgi_{rgi_reg}'
dict_key = f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}_rgi_{rgi_reg}'
else:
raise InvalidParamsError('Only L2 and L5 can be used, but the function can be adapted easily in order that it works for other levels!')
# get the statistics file which lists errors and their type
fs = glob.glob(fd+'glacier_statistics*.csv')
if rgi_reg == 'all_without_19':
for f in fs:
if 'statistics_19' in f:
fs.remove(f)
assert len(fs) == 18
elif rgi_reg == 'all':
print(len(fs))
assert len(fs) == 19
else:
for f in fs:
if f'statistics_{rgi_reg}' in f:
fs = [f]
break
assert len(fs) == 1
df = []
for f in fs:
df.append(pd.read_csv(f, index_col=0, low_memory=False))
df = pd.concat(df).sort_index()
rel_error_area = df.loc[~df['error_task'].isnull()].rgi_area_km2.sum() / df.rgi_area_km2.sum() * 100
# plot the amount of errors for each error type
if plot:
title = title + '\n relative glacier area with errors: {:.2f}%'.format(rel_error_area)
if not subplot:
plt.figure()
plt.title(title, fontsize=12)
sns.countplot(y="error_task", data=df.sort_values(by='error_task'));
if xlim is not None:
plt.xlim([0,xlim])
# save the glaciers that result in errors sorted from largest to smallest glacier area
dfserr[dict_key] = df.loc[~df['error_task'].isnull()].sort_values(by='rgi_area_km2', ascending=False)[['rgi_area_km2', 'error_task', 'error_msg']]
# .iloc[:15]
# save statistics about relative area and amount of errors on glaciers
pd_rel_error_area.loc[dict_key] = np.NaN
pd_rel_error_area.loc[dict_key]['rel_error_area_in_percent'] = rel_error_area
pd_rel_error_area.loc[dict_key]['rgi_reg'] = rgi_reg
pd_rel_error_area.loc[dict_key]['level'] = level
pd_rel_error_area.loc[dict_key]['border'] = border
pd_rel_error_area.loc[dict_key]['exp'] = exp
if rename_match:
if match == 'match_geod_pergla_2.6':
pd_rel_error_area.loc[dict_key]['match'] = match[:-4]
elif match == 'match_geod_pergla_winterprcp':
pd_rel_error_area.loc[dict_key]['match'] = 'match_geod_pergla'
else:
pd_rel_error_area.loc[dict_key]['match'] = match
else:
pd_rel_error_area.loc[dict_key]['match'] = match
pd_rel_error_area.loc[dict_key]['clim_pcp'] = f'{clim}_{pcp}'
pd_rel_error_area.loc[dict_key]['qc'] = qc
return dfserr, pd_rel_error_area
# could be changed, but not all pre-processed directories are available for all prepro_border options!
border = 160 # 80 if qc0
# these are just containers to save and later compare the error statistics!
dfserr = {}
pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'clim_pcp', 'match', 'qc', 'border', 'rgi_reg'])
In pre-processing level 2, we only distinguish between elevation bands and centerlines (see the Flowlines documentation for an explanation about the differences).
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='elev_bands', border=border, xlim = 800)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='centerlines', border=border, xlim = 800)
plt.tight_layout()
Here you can analyse the failing glaciers with the largest area and the type of error that has occurred!
dfserr['L2_elev_bands_b160_rgi_all'].head()
dfserr['L2_centerlines_b160_rgi_all'].head()
Let's just look at the level 2 errors for only RGI region 12
rgi_reg = '12'
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='elev_bands', rgi_reg=rgi_reg,
border=border, xlim = 400)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='centerlines', rgi_reg=rgi_reg,
border=border, xlim = 400)
plt.tight_layout()
dfserr['L2_elev_bands_b160_rgi_12']['error_msg']
In RGI region 12, ~12% of the glacier area can not run even in pre-processing level 2. The reasons are glacier masks errors because many of the glaciers in RGI region 12 are "nominal" glaciers. Nominal means here that the glaciers don't have a glacier outline but only a location and area and hence can not be run with OGGM.
In pre-processing level 5, we distinguish between:
level = 'L5'
exp = 'elev_bands'
qc = 'qc3'
pcps = ['pcp2.5', 'pcp1.6']
clims = ['CRU', 'ERA5']
# when we compare between ERA5 and CRU, we have to omit region 19 because CRU has no climate date for RGI region 19!
rgi_reg = 'all_without_19'
missing = []
# different match options only available for elev_bands
plt.figure(figsize=(20,18))
n = 1
for match in ['no_match', 'match_geod', 'match_geod_pergla', 'match_geod_pergla_massredis']: #
for pcp, clim in zip(pcps, clims):
plt.subplot(4,2,n)
n += 1
try:
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,
rgi_reg = rgi_reg,
subplot=True, xlim=3800)
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()
Let's repeat the same for the centerlines:
exp = 'centerlines'
match = 'no_match'
rgi_reg = 'all_without_19'
plt.figure(figsize=(14,4))
n = 1
for pcp, clim in zip(pcps, clims):
plt.subplot(1,2,n)
n += 1
try:
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,
rgi_reg=rgi_reg, subplot=True)
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()
Let's have a quick look at the largest failing glaciers and their errors!
dfserr[f'L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19'].head()
dfserr[f'L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19'].head()
pd_rel_error_area
# get only the errors from preprocessing level L5
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
pd_rel_error_area_L5
Let's plot this in a nicer way!
sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
hue='clim_pcp', orient='h', col='exp',
kind='bar', palette='Greys') #['Black', 'Grey'])
plt.xlim([0,3.5])
plt.suptitle("Relative glacier area with missing glaciers for preprocessing level L5, qc3, border=160, rgi_reg = 'all_without_19'")
plt.tight_layout()
qc = 'qc0'
level = 'L5'
pcp = 'pcp2.5'
clim = 'CRU' #, 'ERA5']
exp = 'elev_bands' #, 'centerlines']:
rgi_reg = 'all_without_19'
missing = []
# different match options only available for elev_bands
plt.figure(figsize=(20,18))
n = 1
for match in ['no_match', 'match_geod', 'match_geod_pergla', 'match_geod_pergla_massredis']: #
for border in [80, 160, 240]:
plt.subplot(4,3,n)
n += 1
try:
if 'match_geod_pergla' in match:
path = '/home/www/oggm/gdirs/oggm_v1.6/'
else:
path = '/home/www/oggm/gdirs/oggm_v1.4/'
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,path=path,
rgi_reg=rgi_reg, subplot=True, xlim=8000)
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.tight_layout()
plt.figure(figsize=(20,4))
plt.subplot(1,2,1)
dfserr_median_prcp_fac, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area = pd_rel_error_area,
level=level, exp='elev_bands', pcp = '2.6',
match ='match_geod_pergla_2.6', clim='GSWP3_W5E5',
qc = 'qc0', border = 160,xlim=3800,
rgi_reg = rgi_reg,
path = '/home/www/fmaussion/runs/new_gdirs/oggm_v1.6/', subplot = True, plot=True)
plt.subplot(1,2,2)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area = pd_rel_error_area,
level=level, exp='elev_bands', pcp = 'via_winterprcp',
match ='match_geod_pergla_winterprcp', clim='GSWP3_W5E5',
qc = 'qc0', border = 160,xlim=3800,
rgi_reg = rgi_reg,
path = '/home/www/fmaussion/runs/new_gdirs/oggm_v1.6/', subplot = True, plot=True)
plt.tight_layout()
qc = 'qc0'
level = 'L5'
pcp = 'pcp1.6'
clim = 'ERA5' #, 'ERA5']
exp = 'elev_bands' #, 'centerlines']:
rgi_reg = 'all_without_19'
missing = []
# different match options only available for elev_bands
plt.figure(figsize=(20,10))
n = 1
for match in ['match_geod_pergla', 'match_geod_pergla_massredis']: #
for border in [80, 160]:
plt.subplot(2,2,n)
n += 1
try:
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,
rgi_reg=rgi_reg, subplot=True, xlim=8000,
path = '/home/www/oggm/gdirs/oggm_v1.6/')
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.tight_layout()
# Let's get all level 5 statistics (for both, qc0 and qc3!)
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
condi1 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc0'))
condi2 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc3'))
pd_rel_error_area_L5 = pd_rel_error_area_L5[condi1 | condi2]
sns.set(font_scale = 1.4)
sns.set_style('whitegrid')
pd_rel_error_area_L5['clim_pcp_qc_border'] = np.NaN
_hue = pd_rel_error_area_L5['clim_pcp'].values+'_'+pd_rel_error_area_L5['qc'].values+'_b'+ pd_rel_error_area_L5['border'].values.astype(str)
pd_rel_error_area_L5['clim_pcp_qc_border'] = _hue
pd_rel_error_area_L5 = pd_rel_error_area_L5.sort_values('clim_pcp_qc_border')
sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
hue='clim_pcp_qc_border', orient='h', col='exp',
kind='bar', palette='Greys',
height=6, aspect=1.3)
plt.suptitle("Relative glacier area with failing glaciers for preprocessing level L5 (all RGI regions except RGI 19)")
ax = plt.gca()
plt.xlim([0,3.5])
plt.tight_layout();
plt.savefig('relative_failing_glacier_area_160.png')
pd_rel_error_area_L5
# Let's get all level 5 statistics (for both, qc0 and qc3!)
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
# we only use border = 80 for qc0 as this is the option where most preprocessed directories are available
condi1 = ((pd_rel_error_area_L5.border == 80) & (pd_rel_error_area_L5.qc=='qc0'))
# we only use border = 160 for qc3 as this is the option where most preprocessed directories are available
condi2 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc3'))
#
condi3 = (('GSWP3_W5E5_2.6' == pd_rel_error_area['clim_pcp']) | ('GSWP3_W5E5_via_winterprcp' == pd_rel_error_area['clim_pcp']))
pd_rel_error_area_L5 = pd_rel_error_area_L5[(condi1 |condi2|condi3)]
sns.set(font_scale = 1.4)
sns.set_style('whitegrid')
pd_rel_error_area_L5['clim_pcp_qc_border'] = np.NaN
_hue = pd_rel_error_area_L5['clim_pcp'].values+'_'+pd_rel_error_area_L5['qc'].values+'_b'+ pd_rel_error_area_L5['border'].values.astype(str)
pd_rel_error_area_L5['clim_pcp_qc_border'] = _hue
pd_rel_error_area_L5 = pd_rel_error_area_L5.sort_values('clim_pcp_qc_border')
sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
hue='clim_pcp_qc_border', orient='h', col='exp',
kind='bar', palette='Greys',
height=6, aspect=1.3)
plt.suptitle("Relative glacier area with failing glaciers for preprocessing level L5 (all RGI regions except RGI 19)")
ax = plt.gca()
plt.xlim([0,3.5])
plt.tight_layout();
plt.savefig('relative_failing_glacier_area.png')