If you are only interested in the final plots and a summary for preprocessing level 5 go to:
from oggm import cfg, workflow, utils, shop
import pandas as pd
import os, glob
import numpy as np
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
cfg.initialize()
import matplotlib
matplotlib.rcParams['figure.figsize'] = (14, 8)
# makes sure that the pandas output columns are not cropped!
pd.set_option("max_colwidth", None)
2022-07-15 11:10:21: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file. 2022-07-15 11:10:21: oggm.cfg: Multiprocessing switched OFF according to the parameter file. 2022-07-15 11:10:21: oggm.cfg: Multiprocessing: using all available processors (N=32)
def error_analysis_w_plot(dfserr={}, pd_rel_error_area = None,
level='L2', exp='elev_bands', pcp ='', clim='',
qc = '', match = '', border = 160,
rgi_reg = 'all',
path = '/home/www/oggm/gdirs/oggm_v1.4/', subplot = False, xlim=None, plot=True, rename_match =True):
""" Estimates for a specific preprocessed gdirs the relative glacier area with errors
and the relative amount of glaciers with errors (both in %).
Plots the error counts for the different error types.
Works only if the glacier statistics were estimated!
Parameters
----------
dfserr : dictionary
dictionary where already errors of largest glaciers from other preprocessed gdirs are saved, default is an empty dictionary
pd_rel_error_area : pd.DataFrame
pandas DataFrame with statistics about relative area and amount of errors on glaciers.
Default is None, which creates a new pandas DataFrame
level : str
the preprocessing level where the errors should be estimated and plotted.
So far, it works only for 'L2' and 'L5'. Default is 'L2'.
exp : str
which glacier flowlines are used. Either 'elev_bands' (default) or 'centerlines'
pcp : str
precipitation factor applied (depends on clim). Has to be set!
clim : str
which baseline is applied. Either 'ERA5' or 'CRU'. Has to be set!
qc : str
whether a glacier climate quality check and correction was applied. If yes, set it to 'qc3',
otherwise to 'qc0'. Has to be set!
match: str
which calibration & other methods are chosen: Has to be set if above level 2!
- "no_match" : only direct glaciological WGMS data used
- "match_geod" : same as no_match, but regionally the geodetic estimates are matched by changing epsilon
- "match_geod_pergla" : only per-glacier-individual geodetic estimates of Hugonnet et al. (2021) matched
- "match_geod_pergla_massredis" : same as match_geod_pergla, but instead of the Shallow-Ice Approximation,
a mass-redistribution is applied (see: https://docs.oggm.org/en/latest/mass-redistribution.html)
border : str
which size of the local glacier map was used (prepro_border). For qc0, e.g.,
most preprocessing directories are available for border=80. Default is 160
rgi_reg : str
default is all RGI regions. But you can also choose a single RGI region (e.g. rgi_reg = '11') or all RGI
regions except 19 (i.e. not available for CRU!, rgi_reg='all_without_19')
path : str
path to the general gdirs folder
subplot : bool
Default is False. If you want to use several plots, set it to True
xlim : int
Default is None. Maximum of single type errors in the plot. Useful for subplots
plot : bool
If an error type plot should be returned or not!
Returns
-------
dfserr : dictionary
dictionary where for each preprocessed gdir, a pd.DataFrame exists which lists the
type of errors and area of the 15 largest glaciers with errors
pd_rel_error_area : pd.DataFrame
pandas DataFrame with statistics about relative area and amount of errors on glaciers.
If repeated, for several preprocessed gdirs, each row represents the statistics of
one preprocessed gdirs.
"""
if clim == 'CRU' and rgi_reg == 'all':
raise InvalidParamsError("CRU is NOT available for RGI 19, use rgi_reg='all_without_19' instead")
if pd_rel_error_area is None:
pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'pcp_clim', 'match', 'qc'])
if level == 'L2':
fd = f'{path}/L1-L2_files/{exp}/RGI62/b_{border:03d}/L2/summary/'
err_msg = "In preprocessing level 2, climate, precipitation factor and qc should not be defined"
assert pcp == '' and clim == '' and qc == '', err_msg
title = f'{level}: {exp}, border: {border}'
dict_key = f'{level}_{exp}_b{border}_rgi_{rgi_reg}'
elif level == 'L5':
if (pcp == 'via_winterprcp') or (pcp == '2.6'):
fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{match}/RGI62/b_{border:03d}/L5/summary/'
else:
fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{pcp}/{match}/RGI62/b_{border:03d}/L5/summary/'
err_msg = "In preprocessing level 2, need to prescribe climate (clim), match, precipitation factor (pcp) and quality check type (qc)"
assert pcp != '' and clim != '' and qc != '' and match !='' , err_msg
title = f'{level}: {exp}, {pcp}, {clim}, {match}, {qc}, border: {border}, rgi_{rgi_reg}'
dict_key = f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}_rgi_{rgi_reg}'
else:
raise InvalidParamsError('Only L2 and L5 can be used, but the function can be adapted easily in order that it works for other levels!')
# get the statistics file which lists errors and their type
fs = glob.glob(fd+'glacier_statistics*.csv')
if rgi_reg == 'all_without_19':
for f in fs:
if 'statistics_19' in f:
fs.remove(f)
assert len(fs) == 18
elif rgi_reg == 'all':
print(len(fs))
assert len(fs) == 19
else:
for f in fs:
if f'statistics_{rgi_reg}' in f:
fs = [f]
break
assert len(fs) == 1
df = []
for f in fs:
df.append(pd.read_csv(f, index_col=0, low_memory=False))
df = pd.concat(df).sort_index()
rel_error_area = df.loc[~df['error_task'].isnull()].rgi_area_km2.sum() / df.rgi_area_km2.sum() * 100
# plot the amount of errors for each error type
if plot:
title = title + '\n relative glacier area with errors: {:.2f}%'.format(rel_error_area)
if not subplot:
plt.figure()
plt.title(title, fontsize=12)
sns.countplot(y="error_task", data=df.sort_values(by='error_task'));
if xlim is not None:
plt.xlim([0,xlim])
# save the glaciers that result in errors sorted from largest to smallest glacier area
dfserr[dict_key] = df.loc[~df['error_task'].isnull()].sort_values(by='rgi_area_km2', ascending=False)[['rgi_area_km2', 'error_task', 'error_msg']]
# .iloc[:15]
# save statistics about relative area and amount of errors on glaciers
pd_rel_error_area.loc[dict_key] = np.NaN
pd_rel_error_area.loc[dict_key]['rel_error_area_in_percent'] = rel_error_area
pd_rel_error_area.loc[dict_key]['rgi_reg'] = rgi_reg
pd_rel_error_area.loc[dict_key]['level'] = level
pd_rel_error_area.loc[dict_key]['border'] = border
pd_rel_error_area.loc[dict_key]['exp'] = exp
if rename_match:
if match == 'match_geod_pergla_2.6':
pd_rel_error_area.loc[dict_key]['match'] = match[:-4]
elif match == 'match_geod_pergla_winterprcp':
pd_rel_error_area.loc[dict_key]['match'] = 'match_geod_pergla'
else:
pd_rel_error_area.loc[dict_key]['match'] = match
else:
pd_rel_error_area.loc[dict_key]['match'] = match
pd_rel_error_area.loc[dict_key]['clim_pcp'] = f'{clim}_{pcp}'
pd_rel_error_area.loc[dict_key]['qc'] = qc
return dfserr, pd_rel_error_area
# could be changed, but not all pre-processed directories are available for all prepro_border options!
border = 160 # 80 if qc0
# these are just containers to save and later compare the error statistics!
dfserr = {}
pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'clim_pcp', 'match', 'qc', 'border', 'rgi_reg'])
In pre-processing level 2, we only distinguish between elevation bands and centerlines (see the Flowlines documentation for an explanation about the differences).
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='elev_bands', border=border, xlim = 800)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='centerlines', border=border, xlim = 800)
plt.tight_layout()
19 19
Here you can analyse the failing glaciers with the largest area and the type of error that has occurred!
dfserr['L2_elev_bands_b160_rgi_all'].head()
rgi_area_km2 | error_task | error_msg | |
---|---|---|---|
rgi_id | |||
RGI60-10.00002 | 48.144 | simple_glacier_masks | GeometryError: RGI60-10.00002 is a nominal glacier. |
RGI60-10.00006 | 12.966 | simple_glacier_masks | GeometryError: RGI60-10.00006 is a nominal glacier. |
RGI60-12.01364 | 9.250 | simple_glacier_masks | GeometryError: RGI60-12.01364 is a nominal glacier. |
RGI60-19.01402 | 7.482 | elevation_band_flowline | InvalidDEMError: (RGI60-19.01402) DEM altidude range too small. |
RGI60-19.01367 | 7.271 | elevation_band_flowline | InvalidDEMError: (RGI60-19.01367) DEM altidude range too small. |
dfserr['L2_centerlines_b160_rgi_all'].head()
rgi_area_km2 | error_task | error_msg | |
---|---|---|---|
rgi_id | |||
RGI60-19.01521 | 94.963 | initialize_flowlines | RuntimeError: Altitude range of main flowline too small: 0.11659260120634544 |
RGI60-10.00002 | 48.144 | glacier_masks | GeometryError: RGI60-10.00002 is a nominal glacier. |
RGI60-04.06184 | 40.934 | initialize_flowlines | AssertionError: |
RGI60-03.04079 | 35.752 | initialize_flowlines | RuntimeError: Altitude range of main flowline too small: 1.2600400626842188e-05 |
RGI60-14.01649 | 30.795 | catchment_area | ValueError: no minimum-cost path was found to the specified end point |
Let's just look at the level 2 errors for only RGI region 12
rgi_reg = '12'
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='elev_bands', rgi_reg=rgi_reg,
border=border, xlim = 400)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
exp='centerlines', rgi_reg=rgi_reg,
border=border, xlim = 400)
plt.tight_layout()
dfserr['L2_elev_bands_b160_rgi_12']['error_msg']
rgi_id RGI60-12.01364 GeometryError: RGI60-12.01364 is a nominal glacier. RGI60-12.01372 GeometryError: RGI60-12.01372 is a nominal glacier. RGI60-12.01374 GeometryError: RGI60-12.01374 is a nominal glacier. RGI60-12.01500 GeometryError: RGI60-12.01500 is a nominal glacier. RGI60-12.01443 GeometryError: RGI60-12.01443 is a nominal glacier. ... RGI60-12.01628 GeometryError: RGI60-12.01628 is a nominal glacier. RGI60-12.01545 GeometryError: RGI60-12.01545 is a nominal glacier. RGI60-12.01546 GeometryError: RGI60-12.01546 is a nominal glacier. RGI60-12.01547 GeometryError: RGI60-12.01547 is a nominal glacier. RGI60-12.01350 GeometryError: RGI60-12.01350 is a nominal glacier. Name: error_msg, Length: 339, dtype: object
In RGI region 12, ~12% of the glacier area can not run even in pre-processing level 2. The reasons are glacier masks errors because many of the glaciers in RGI region 12 are "nominal" glaciers. Nominal means here that the glaciers don't have a glacier outline but only a location and area and hence can not be run with OGGM.
In pre-processing level 5, we distinguish between:
level = 'L5'
exp = 'elev_bands'
qc = 'qc3'
pcps = ['pcp2.5', 'pcp1.6']
clims = ['CRU', 'ERA5']
# when we compare between ERA5 and CRU, we have to omit region 19 because CRU has no climate date for RGI region 19!
rgi_reg = 'all_without_19'
missing = []
# different match options only available for elev_bands
plt.figure(figsize=(20,18))
n = 1
for match in ['no_match', 'match_geod', 'match_geod_pergla', 'match_geod_pergla_massredis']: #
for pcp, clim in zip(pcps, clims):
plt.subplot(4,2,n)
n += 1
try:
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,
rgi_reg = rgi_reg,
subplot=True, xlim=3800)
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()
Let's repeat the same for the centerlines:
exp = 'centerlines'
match = 'no_match'
rgi_reg = 'all_without_19'
plt.figure(figsize=(14,4))
n = 1
for pcp, clim in zip(pcps, clims):
plt.subplot(1,2,n)
n += 1
try:
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
level=level,exp=exp, pcp =pcp, clim=clim,
qc=qc,border=border, match=match,
rgi_reg=rgi_reg, subplot=True)
except:
missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()
Let's have a quick look at the largest failing glaciers and their errors!
dfserr[f'L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19'].head()
rgi_area_km2 | error_task | error_msg | |
---|---|---|---|
rgi_id | |||
RGI60-17.15808 | 721.950 | local_t_star | MassBalanceCalibrationError: RGI60-17.15808: mu* out of specified bounds: 3.30 |
RGI60-17.15897 | 428.505 | local_t_star | MassBalanceCalibrationError: RGI60-17.15897: mu* out of specified bounds: 2.21 |
RGI60-05.10735 | 392.607 | local_t_star | MassBalanceCalibrationError: RGI60-05.10735: mu* out of specified bounds: 0.91 |
RGI60-05.10634 | 388.067 | local_t_star | MassBalanceCalibrationError: RGI60-05.10634: mu* out of specified bounds: 2.07 |
RGI60-05.10617 | 373.290 | local_t_star | MassBalanceCalibrationError: RGI60-05.10617: mu* out of specified bounds: 3.45 |
dfserr[f'L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19'].head()
rgi_area_km2 | error_task | error_msg | |
---|---|---|---|
rgi_id | |||
RGI60-10.00002 | 48.144 | simple_glacier_masks | GeometryError: RGI60-10.00002 is a nominal glacier. |
RGI60-15.10055 | 26.727 | local_t_star | MassBalanceCalibrationError: RGI60-15.10055: mu* out of specified bounds: 10290.99 |
RGI60-15.01996 | 16.642 | local_t_star | MassBalanceCalibrationError: RGI60-15.01996: mu* out of specified bounds: 19149.61 |
RGI60-15.04835 | 15.377 | local_t_star | MassBalanceCalibrationError: RGI60-15.04835: mu* out of specified bounds: 12269.57 |
RGI60-10.00006 | 12.966 | simple_glacier_masks | GeometryError: RGI60-10.00006 is a nominal glacier. |
pd_rel_error_area
rel_error_area_in_percent | level | exp | clim_pcp | match | qc | border | rgi_reg | |
---|---|---|---|---|---|---|---|---|
L2_elev_bands_b160_rgi_all | 0.047088 | L2 | elev_bands | _ | 160 | all | ||
L2_centerlines_b160_rgi_all | 0.115339 | L2 | centerlines | _ | 160 | all | ||
L2_elev_bands_b160_rgi_12 | 11.921573 | L2 | elev_bands | _ | 160 | 12 | ||
L2_centerlines_b160_rgi_12 | 11.92272 | L2 | centerlines | _ | 160 | 12 | ||
L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 | 2.460572 | L5 | elev_bands | CRU_pcp2.5 | no_match | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 | 0.081001 | L5 | elev_bands | ERA5_pcp1.6 | no_match | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp2.5_CRU_match_geod_qc3_b160_rgi_all_without_19 | 2.460745 | L5 | elev_bands | CRU_pcp2.5 | match_geod | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_qc3_b160_rgi_all_without_19 | 0.081299 | L5 | elev_bands | ERA5_pcp1.6 | match_geod | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp2.5_CRU_match_geod_pergla_qc3_b160_rgi_all_without_19 | 0.549611 | L5 | elev_bands | CRU_pcp2.5 | match_geod_pergla | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc3_b160_rgi_all_without_19 | 0.05188 | L5 | elev_bands | ERA5_pcp1.6 | match_geod_pergla | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_massredis_qc3_b160_rgi_all_without_19 | 0.051093 | L5 | elev_bands | ERA5_pcp1.6 | match_geod_pergla_massredis | qc3 | 160 | all_without_19 |
L5_centerlines_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 | 3.273621 | L5 | centerlines | CRU_pcp2.5 | no_match | qc3 | 160 | all_without_19 |
L5_centerlines_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 | 0.936732 | L5 | centerlines | ERA5_pcp1.6 | no_match | qc3 | 160 | all_without_19 |
# get only the errors from preprocessing level L5
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
pd_rel_error_area_L5
rel_error_area_in_percent | level | exp | clim_pcp | match | qc | border | rgi_reg | |
---|---|---|---|---|---|---|---|---|
L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 | 2.460572 | L5 | elev_bands | CRU_pcp2.5 | no_match | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 | 0.081001 | L5 | elev_bands | ERA5_pcp1.6 | no_match | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp2.5_CRU_match_geod_qc3_b160_rgi_all_without_19 | 2.460745 | L5 | elev_bands | CRU_pcp2.5 | match_geod | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_qc3_b160_rgi_all_without_19 | 0.081299 | L5 | elev_bands | ERA5_pcp1.6 | match_geod | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp2.5_CRU_match_geod_pergla_qc3_b160_rgi_all_without_19 | 0.549611 | L5 | elev_bands | CRU_pcp2.5 | match_geod_pergla | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc3_b160_rgi_all_without_19 | 0.05188 | L5 | elev_bands | ERA5_pcp1.6 | match_geod_pergla | qc3 | 160 | all_without_19 |
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_massredis_qc3_b160_rgi_all_without_19 | 0.051093 | L5 | elev_bands | ERA5_pcp1.6 | match_geod_pergla_massredis | qc3 | 160 | all_without_19 |
L5_centerlines_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 | 3.273621 | L5 | centerlines | CRU_pcp2.5 | no_match | qc3 | 160 | all_without_19 |
L5_centerlines_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 | 0.936732 | L5 | centerlines | ERA5_pcp1.6 | no_match | qc3 | 160 | all_without_19 |
Let's plot this in a nicer way!
sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
hue='clim_pcp', orient='h', col='exp',
kind='bar', palette='Greys') #['Black', 'Grey'])
plt.xlim([0,3.5])
plt.suptitle("Relative glacier area with missing glaciers for preprocessing level L5, qc3, border=160, rgi_reg = 'all_without_19'")
plt.tight_layout()