Example workflow to analyse the type and amount of errors for different preprocessing levels

If you are only interested in the final plots and a summary for preprocessing level 5 go to:

In [31]:
from oggm import cfg, workflow, utils, shop
import pandas as pd
import os, glob
import numpy as np
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

cfg.initialize()

import matplotlib
matplotlib.rcParams['figure.figsize'] = (14, 8)

# makes sure that the pandas output columns are not cropped!
pd.set_option("max_colwidth", None)
2022-07-15 11:10:21: oggm.cfg: Reading default parameters from the OGGM `params.cfg` configuration file.
2022-07-15 11:10:21: oggm.cfg: Multiprocessing switched OFF according to the parameter file.
2022-07-15 11:10:21: oggm.cfg: Multiprocessing: using all available processors (N=32)
In [32]:
def error_analysis_w_plot(dfserr={}, pd_rel_error_area = None,
                          level='L2', exp='elev_bands', pcp ='', clim='', 
                          qc = '', match = '', border = 160,
                          rgi_reg = 'all', 
                          path = '/home/www/oggm/gdirs/oggm_v1.4/', subplot = False, xlim=None, plot=True, rename_match =True):
    """ Estimates for a specific preprocessed gdirs the relative glacier area with errors
    and the relative amount of glaciers with errors (both in %). 
    
    Plots the error counts for the different error types.
    
    Works only if the glacier statistics were estimated!
    
    Parameters
    ----------
    dfserr : dictionary
        dictionary where already errors of largest glaciers from other preprocessed gdirs are saved, default is an empty dictionary
    pd_rel_error_area : pd.DataFrame
        pandas DataFrame with statistics about relative area and amount of errors on glaciers. 
        Default is None, which creates a new pandas DataFrame
    level : str
        the preprocessing level where the errors should be estimated and plotted.
        So far, it works only for 'L2' and 'L5'. Default is 'L2'.
    exp : str
        which glacier flowlines are used. Either 'elev_bands' (default) or 'centerlines'
    pcp : str
        precipitation factor applied (depends on clim). Has to be set!
    clim : str
        which baseline is applied. Either 'ERA5' or 'CRU'. Has to be set!
    qc : str
        whether a glacier climate quality check and correction was applied. If yes, set it to 'qc3', 
        otherwise to 'qc0'. Has to be set!
    match: str
        which calibration & other methods are chosen: Has to be set if above level 2!
        - "no_match" : only direct glaciological WGMS data used
        - "match_geod" : same as no_match, but regionally the geodetic estimates are matched by changing epsilon
        - "match_geod_pergla" : only per-glacier-individual geodetic estimates of Hugonnet et al. (2021) matched
        - "match_geod_pergla_massredis" : same as match_geod_pergla, but instead of the Shallow-Ice Approximation, 
          a mass-redistribution is applied (see: https://docs.oggm.org/en/latest/mass-redistribution.html)
    border : str
        which size of the local glacier map was used (prepro_border). For qc0, e.g., 
        most preprocessing directories are available for border=80. Default is 160
    rgi_reg : str
        default is all RGI regions. But you can also choose a single RGI region (e.g. rgi_reg = '11') or all RGI 
        regions except 19 (i.e. not available for CRU!, rgi_reg='all_without_19')
    path : str
        path to the general gdirs folder
    subplot : bool
        Default is False. If you want to use several plots, set it to True
    xlim : int
        Default is None. Maximum of single type errors in the plot. Useful for subplots
    plot : bool
        If an error type plot should be returned or not!
    
    Returns
    -------
    dfserr : dictionary
        dictionary where for each preprocessed gdir, a pd.DataFrame exists which lists the 
        type of errors and area of the 15 largest glaciers with errors
    pd_rel_error_area : pd.DataFrame
        pandas DataFrame with statistics about relative area and amount of errors on glaciers. 
        If repeated, for several preprocessed gdirs, each row represents the statistics of 
        one preprocessed gdirs.
        
    """
    if clim == 'CRU' and rgi_reg == 'all':
        raise InvalidParamsError("CRU is NOT available for RGI 19, use rgi_reg='all_without_19' instead")
    if pd_rel_error_area is None:
        pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'pcp_clim', 'match', 'qc'])

    if level == 'L2':
        fd = f'{path}/L1-L2_files/{exp}/RGI62/b_{border:03d}/L2/summary/'
        err_msg = "In preprocessing level 2, climate, precipitation factor and qc should not be defined"
        assert pcp == '' and clim == '' and qc == '', err_msg
        title = f'{level}: {exp}, border: {border}'
        dict_key = f'{level}_{exp}_b{border}_rgi_{rgi_reg}'
    elif level == 'L5':
        if (pcp == 'via_winterprcp') or (pcp == '2.6'):
            fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{match}/RGI62/b_{border:03d}/L5/summary/'
        else:
            fd = f'{path}/L3-L5_files/{clim}/{exp}/{qc}/{pcp}/{match}/RGI62/b_{border:03d}/L5/summary/'
        err_msg = "In preprocessing level 2, need to prescribe climate (clim), match, precipitation factor (pcp) and quality check type (qc)"
        assert pcp != '' and clim != '' and qc != '' and match !='' , err_msg

        title = f'{level}: {exp}, {pcp}, {clim}, {match}, {qc}, border: {border}, rgi_{rgi_reg}'
        dict_key = f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}_rgi_{rgi_reg}'
    else:
        raise InvalidParamsError('Only L2 and L5 can be used, but the function can be adapted easily in order that it works for other levels!')
        
    # get the statistics file which lists errors and their type
    fs = glob.glob(fd+'glacier_statistics*.csv')
    
    if rgi_reg == 'all_without_19':
        for f in fs:
            if 'statistics_19' in f:
                fs.remove(f)
        assert len(fs) == 18
    elif rgi_reg == 'all':
        print(len(fs))
        assert len(fs) == 19
    else:
        for f in fs:
            if f'statistics_{rgi_reg}' in f:
                fs = [f]
                break
        assert len(fs) == 1
    
    df = []
    for f in fs:
        df.append(pd.read_csv(f, index_col=0, low_memory=False))
    df = pd.concat(df).sort_index()
    
    rel_error_area = df.loc[~df['error_task'].isnull()].rgi_area_km2.sum() / df.rgi_area_km2.sum() * 100
    
    # plot the amount of errors for each error type
    if plot:
        title = title + '\n relative glacier area with errors: {:.2f}%'.format(rel_error_area)
        if not subplot:
            plt.figure()
        plt.title(title, fontsize=12)
        sns.countplot(y="error_task", data=df.sort_values(by='error_task'));
        if xlim is not None:
            plt.xlim([0,xlim])

    # save the glaciers that result in errors sorted from largest to smallest glacier area
    dfserr[dict_key] = df.loc[~df['error_task'].isnull()].sort_values(by='rgi_area_km2', ascending=False)[['rgi_area_km2', 'error_task', 'error_msg']]
    # .iloc[:15]
    # save statistics about relative area and amount of errors on glaciers
    pd_rel_error_area.loc[dict_key] = np.NaN
    pd_rel_error_area.loc[dict_key]['rel_error_area_in_percent'] = rel_error_area
    pd_rel_error_area.loc[dict_key]['rgi_reg'] = rgi_reg
    pd_rel_error_area.loc[dict_key]['level'] = level
    pd_rel_error_area.loc[dict_key]['border'] = border
    pd_rel_error_area.loc[dict_key]['exp'] = exp
    if rename_match:
        if match == 'match_geod_pergla_2.6':
            pd_rel_error_area.loc[dict_key]['match'] = match[:-4]
        elif match == 'match_geod_pergla_winterprcp':
            pd_rel_error_area.loc[dict_key]['match'] = 'match_geod_pergla'

        else:
            pd_rel_error_area.loc[dict_key]['match'] = match
    else:
        pd_rel_error_area.loc[dict_key]['match'] = match

    pd_rel_error_area.loc[dict_key]['clim_pcp'] = f'{clim}_{pcp}'
    pd_rel_error_area.loc[dict_key]['qc'] = qc
    
    return dfserr, pd_rel_error_area
In [33]:
# could be changed, but not all pre-processed directories are available for all prepro_border options!
border = 160 # 80 if qc0
# these are just containers to save and later compare the error statistics! 
dfserr = {}
pd_rel_error_area = pd.DataFrame(columns=['rel_error_area_in_percent', 'level', 'exp', 'clim_pcp', 'match', 'qc', 'border', 'rgi_reg'])

Analysis for Level 2 pre-processing directories!

In pre-processing level 2, we only distinguish between elevation bands and centerlines (see the Flowlines documentation for an explanation about the differences).

In [34]:
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
                                                  exp='elev_bands', border=border, xlim = 800)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2',  subplot=True,
                                                  exp='centerlines', border=border, xlim = 800)
plt.tight_layout()
19
19
  • failing glacier area on pre-processing level 2 is ~0.1% or less
  • less glaciers will fail when using elev_bands than centerlines

Here you can analyse the failing glaciers with the largest area and the type of error that has occurred!

In [35]:
dfserr['L2_elev_bands_b160_rgi_all'].head()
Out[35]:
rgi_area_km2 error_task error_msg
rgi_id
RGI60-10.00002 48.144 simple_glacier_masks GeometryError: RGI60-10.00002 is a nominal glacier.
RGI60-10.00006 12.966 simple_glacier_masks GeometryError: RGI60-10.00006 is a nominal glacier.
RGI60-12.01364 9.250 simple_glacier_masks GeometryError: RGI60-12.01364 is a nominal glacier.
RGI60-19.01402 7.482 elevation_band_flowline InvalidDEMError: (RGI60-19.01402) DEM altidude range too small.
RGI60-19.01367 7.271 elevation_band_flowline InvalidDEMError: (RGI60-19.01367) DEM altidude range too small.
In [36]:
dfserr['L2_centerlines_b160_rgi_all'].head()
Out[36]:
rgi_area_km2 error_task error_msg
rgi_id
RGI60-19.01521 94.963 initialize_flowlines RuntimeError: Altitude range of main flowline too small: 0.11659260120634544
RGI60-10.00002 48.144 glacier_masks GeometryError: RGI60-10.00002 is a nominal glacier.
RGI60-04.06184 40.934 initialize_flowlines AssertionError:
RGI60-03.04079 35.752 initialize_flowlines RuntimeError: Altitude range of main flowline too small: 1.2600400626842188e-05
RGI60-14.01649 30.795 catchment_area ValueError: no minimum-cost path was found to the specified end point

Let's just look at the level 2 errors for only RGI region 12

In [37]:
rgi_reg = '12'
plt.figure(figsize=(14,3))
plt.subplot(121)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2', subplot=True,
                                                      exp='elev_bands', rgi_reg=rgi_reg,
                                                      border=border, xlim = 400)
plt.subplot(122)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area, level='L2',  subplot=True,
                                                      exp='centerlines', rgi_reg=rgi_reg,
                                                      border=border, xlim = 400)
plt.tight_layout()
In [38]:
dfserr['L2_elev_bands_b160_rgi_12']['error_msg']
Out[38]:
rgi_id
RGI60-12.01364    GeometryError: RGI60-12.01364 is a nominal glacier.
RGI60-12.01372    GeometryError: RGI60-12.01372 is a nominal glacier.
RGI60-12.01374    GeometryError: RGI60-12.01374 is a nominal glacier.
RGI60-12.01500    GeometryError: RGI60-12.01500 is a nominal glacier.
RGI60-12.01443    GeometryError: RGI60-12.01443 is a nominal glacier.
                                         ...                         
RGI60-12.01628    GeometryError: RGI60-12.01628 is a nominal glacier.
RGI60-12.01545    GeometryError: RGI60-12.01545 is a nominal glacier.
RGI60-12.01546    GeometryError: RGI60-12.01546 is a nominal glacier.
RGI60-12.01547    GeometryError: RGI60-12.01547 is a nominal glacier.
RGI60-12.01350    GeometryError: RGI60-12.01350 is a nominal glacier.
Name: error_msg, Length: 339, dtype: object

In RGI region 12, ~12% of the glacier area can not run even in pre-processing level 2. The reasons are glacier masks errors because many of the glaciers in RGI region 12 are "nominal" glaciers. Nominal means here that the glaciers don't have a glacier outline but only a location and area and hence can not be run with OGGM.

Analysis for Level 5 pre-processing directories!

In pre-processing level 5, we distinguish between:

  • different flowline types (exp = 'elev_bands' or 'centerlines', see the Flowlines documentation)
  • different climate and precipitation factors (here: clim='CRU' with pcp='pcp2.5' or 'ERA5' with 'pcp1.6', or 'GSWP3_W5E5' with 'pcp2.5' or with varying prcp. fac depending on winter prcp.)
  • different ways to calibrate the mass balance (see documentation on available mass-balance calibration methods):
    • match = "no_match" : only direct glaciological WGMS data used
    • match = "match_geod" : same as no_match, but regionally the geodetic estimates are matched by changing epsilon
    • match = "match_geod_pergla" : only per-glacier-individual geodetic estimates of Hugonnet et al. (2021) matched
  • applying the climate quality check and correction (qc='qc3') or not (qc='qc0'), see historical_climate_qc.
    • at the moment we compare match_geod_pergla "qc0" and "qc3" although technically, after the newest OGGM release, there should be no difference in between. When we have preprocessed gdirs with match_geod_pergla with qc0, we will remove the match_geod_pergla qc3 gdirs as they will be outdated and should not be used. We will update the notebook afterwards!
  • using either the default shallow-ice approximation or a simple mass-redistribution (see: https://docs.oggm.org/en/latest/mass-redistribution.html). For the mass-redistribution, we only have one preprocessed glacier directory at the moment. You can check it out under:
    • match = 'match_geod_pergla_massredis' (same as match_geod_pergla but with mass redistribution instead of SIA)
In [39]:
level = 'L5'

Let's start by looking only on elev_bands flowline pre-processed directories with 'qc3':

In [40]:
exp = 'elev_bands'
qc = 'qc3'
pcps = ['pcp2.5', 'pcp1.6']
clims = ['CRU', 'ERA5']
# when we compare between ERA5 and CRU, we have to omit region 19 because CRU has no climate date for RGI region 19!
rgi_reg = 'all_without_19'
missing = []
# different match options only available for elev_bands

plt.figure(figsize=(20,18))
n = 1
for match in ['no_match', 'match_geod', 'match_geod_pergla', 'match_geod_pergla_massredis']:  # 
    for pcp, clim in zip(pcps, clims):
        plt.subplot(4,2,n)
        n += 1
        try:
            dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
                                                              level=level,exp=exp, pcp =pcp, clim=clim, 
                                                              qc=qc,border=border, match=match,
                                                              rgi_reg = rgi_reg,
                                                              subplot=True, xlim=3800)
        except:
            missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
            plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()
  • the preprocessed directory: 'L5_elev_bands_pcp2.5_CRU_match_geod_pergla_massredis_qc3' does not exist at the moment, but could be produced if necessary!

Let's repeat the same for the centerlines:

  • so far, we can only look at no_match
  • match_geod would also work with centerlines but there are no preprocessed directories for that
  • match_geod_pergla does NOT work with centerlines!!!
In [41]:
exp = 'centerlines'
match = 'no_match'
rgi_reg = 'all_without_19'
plt.figure(figsize=(14,4))
n = 1
for pcp, clim in zip(pcps, clims):
    plt.subplot(1,2,n)
    n += 1
    try:
        dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
                                                          level=level,exp=exp, pcp =pcp, clim=clim, 
                                                          qc=qc,border=border, match=match,
                                                          rgi_reg=rgi_reg, subplot=True)
    except:
        missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}')
        plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}')
plt.tight_layout()

Let's have a quick look at the largest failing glaciers and their errors!

In [42]:
dfserr[f'L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19'].head()
Out[42]:
rgi_area_km2 error_task error_msg
rgi_id
RGI60-17.15808 721.950 local_t_star MassBalanceCalibrationError: RGI60-17.15808: mu* out of specified bounds: 3.30
RGI60-17.15897 428.505 local_t_star MassBalanceCalibrationError: RGI60-17.15897: mu* out of specified bounds: 2.21
RGI60-05.10735 392.607 local_t_star MassBalanceCalibrationError: RGI60-05.10735: mu* out of specified bounds: 0.91
RGI60-05.10634 388.067 local_t_star MassBalanceCalibrationError: RGI60-05.10634: mu* out of specified bounds: 2.07
RGI60-05.10617 373.290 local_t_star MassBalanceCalibrationError: RGI60-05.10617: mu* out of specified bounds: 3.45
In [43]:
dfserr[f'L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19'].head()
Out[43]:
rgi_area_km2 error_task error_msg
rgi_id
RGI60-10.00002 48.144 simple_glacier_masks GeometryError: RGI60-10.00002 is a nominal glacier.
RGI60-15.10055 26.727 local_t_star MassBalanceCalibrationError: RGI60-15.10055: mu* out of specified bounds: 10290.99
RGI60-15.01996 16.642 local_t_star MassBalanceCalibrationError: RGI60-15.01996: mu* out of specified bounds: 19149.61
RGI60-15.04835 15.377 local_t_star MassBalanceCalibrationError: RGI60-15.04835: mu* out of specified bounds: 12269.57
RGI60-10.00006 12.966 simple_glacier_masks GeometryError: RGI60-10.00006 is a nominal glacier.
  • for a better comparison we will look how only the relative glacier area with errors differs between the experiments
In [44]:
pd_rel_error_area
Out[44]:
rel_error_area_in_percent level exp clim_pcp match qc border rgi_reg
L2_elev_bands_b160_rgi_all 0.047088 L2 elev_bands _ 160 all
L2_centerlines_b160_rgi_all 0.115339 L2 centerlines _ 160 all
L2_elev_bands_b160_rgi_12 11.921573 L2 elev_bands _ 160 12
L2_centerlines_b160_rgi_12 11.92272 L2 centerlines _ 160 12
L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 2.460572 L5 elev_bands CRU_pcp2.5 no_match qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.081001 L5 elev_bands ERA5_pcp1.6 no_match qc3 160 all_without_19
L5_elev_bands_pcp2.5_CRU_match_geod_qc3_b160_rgi_all_without_19 2.460745 L5 elev_bands CRU_pcp2.5 match_geod qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_qc3_b160_rgi_all_without_19 0.081299 L5 elev_bands ERA5_pcp1.6 match_geod qc3 160 all_without_19
L5_elev_bands_pcp2.5_CRU_match_geod_pergla_qc3_b160_rgi_all_without_19 0.549611 L5 elev_bands CRU_pcp2.5 match_geod_pergla qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc3_b160_rgi_all_without_19 0.05188 L5 elev_bands ERA5_pcp1.6 match_geod_pergla qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_massredis_qc3_b160_rgi_all_without_19 0.051093 L5 elev_bands ERA5_pcp1.6 match_geod_pergla_massredis qc3 160 all_without_19
L5_centerlines_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 3.273621 L5 centerlines CRU_pcp2.5 no_match qc3 160 all_without_19
L5_centerlines_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.936732 L5 centerlines ERA5_pcp1.6 no_match qc3 160 all_without_19
In [45]:
# get only the errors from preprocessing level L5
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
pd_rel_error_area_L5
Out[45]:
rel_error_area_in_percent level exp clim_pcp match qc border rgi_reg
L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 2.460572 L5 elev_bands CRU_pcp2.5 no_match qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.081001 L5 elev_bands ERA5_pcp1.6 no_match qc3 160 all_without_19
L5_elev_bands_pcp2.5_CRU_match_geod_qc3_b160_rgi_all_without_19 2.460745 L5 elev_bands CRU_pcp2.5 match_geod qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_qc3_b160_rgi_all_without_19 0.081299 L5 elev_bands ERA5_pcp1.6 match_geod qc3 160 all_without_19
L5_elev_bands_pcp2.5_CRU_match_geod_pergla_qc3_b160_rgi_all_without_19 0.549611 L5 elev_bands CRU_pcp2.5 match_geod_pergla qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc3_b160_rgi_all_without_19 0.05188 L5 elev_bands ERA5_pcp1.6 match_geod_pergla qc3 160 all_without_19
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_massredis_qc3_b160_rgi_all_without_19 0.051093 L5 elev_bands ERA5_pcp1.6 match_geod_pergla_massredis qc3 160 all_without_19
L5_centerlines_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 3.273621 L5 centerlines CRU_pcp2.5 no_match qc3 160 all_without_19
L5_centerlines_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.936732 L5 centerlines ERA5_pcp1.6 no_match qc3 160 all_without_19

Let's plot this in a nicer way!

In [46]:
sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
            hue='clim_pcp', orient='h', col='exp',
           kind='bar', palette='Greys') #['Black', 'Grey'])
plt.xlim([0,3.5])
plt.suptitle("Relative glacier area with missing glaciers for preprocessing level L5, qc3, border=160, rgi_reg = 'all_without_19'")
plt.tight_layout()
  • there is a short description of all findings right here!
In [ ]:
 

Let's now repeat the same for preprocesed directories without climate quality check and correction (qc0)

  • at the moment: qc0 was mostly computed for CRU and for elev_bands. ERA5 with qc0 only exists for match_geod_pergla, if needed the missing preprocessed directories could be computed!
In [47]:
qc = 'qc0'
level = 'L5'
pcp = 'pcp2.5' 
clim = 'CRU' #, 'ERA5']
exp = 'elev_bands' #, 'centerlines']:
rgi_reg = 'all_without_19'

missing = []
# different match options only available for elev_bands

plt.figure(figsize=(20,18))
n = 1
for match in ['no_match', 'match_geod', 'match_geod_pergla', 'match_geod_pergla_massredis']:  # 
    for border in [80, 160, 240]:
        plt.subplot(4,3,n)
        n += 1
        try:
            if 'match_geod_pergla' in match:
                path = '/home/www/oggm/gdirs/oggm_v1.6/'
            else:
                path = '/home/www/oggm/gdirs/oggm_v1.4/'
            dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
                                                              level=level,exp=exp, pcp =pcp, clim=clim, 
                                                              qc=qc,border=border, match=match,path=path,
                                                              rgi_reg=rgi_reg, subplot=True, xlim=8000)

        except:
            missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
            plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.tight_layout()


plt.figure(figsize=(20,4))
plt.subplot(1,2,1)
dfserr_median_prcp_fac, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area = pd_rel_error_area,
                          level=level, exp='elev_bands', pcp = '2.6',
                       match ='match_geod_pergla_2.6', clim='GSWP3_W5E5', 
                          qc = 'qc0', border = 160,xlim=3800,
                          rgi_reg = rgi_reg, 
                          path = '/home/www/fmaussion/runs/new_gdirs/oggm_v1.6/', subplot = True, plot=True)
plt.subplot(1,2,2)
dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area = pd_rel_error_area,
                          level=level, exp='elev_bands', pcp = 'via_winterprcp',
                       match ='match_geod_pergla_winterprcp', clim='GSWP3_W5E5', 
                          qc = 'qc0', border = 160,xlim=3800,
                          rgi_reg = rgi_reg, 
                          path = '/home/www/fmaussion/runs/new_gdirs/oggm_v1.6/', subplot = True, plot=True)
plt.tight_layout()
  • also get the estimates from ERA5 with qc0
In [48]:
qc = 'qc0'
level = 'L5'
pcp = 'pcp1.6' 
clim = 'ERA5' #, 'ERA5']
exp = 'elev_bands' #, 'centerlines']:
rgi_reg = 'all_without_19'

missing = []
# different match options only available for elev_bands

plt.figure(figsize=(20,10))
n = 1
for match in ['match_geod_pergla', 'match_geod_pergla_massredis']:  # 
    for border in [80, 160]:
        plt.subplot(2,2,n)
        n += 1
        try:
            dfserr, pd_rel_error_area = error_analysis_w_plot(dfserr=dfserr, pd_rel_error_area=pd_rel_error_area,
                                                              level=level,exp=exp, pcp =pcp, clim=clim, 
                                                              qc=qc,border=border, match=match,
                                                              rgi_reg=rgi_reg, subplot=True, xlim=8000,
                                                             path = '/home/www/oggm/gdirs/oggm_v1.6/')

        except:
            missing.append(f'{level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
            plt.title(f'missing: {level}_{exp}_{pcp}_{clim}_{match}_{qc}_b{border}')
plt.tight_layout()
  • a larger domain (border = 240 instead of border=80), does not always result in smaller amount of errors.
    • a https error coming from the DEM being too large could occur if the border is set too large! (see largest errors for b240 below). This is an error that could be solved, but only occurs rarely and is not important on global scales where the border is set to 80 or 160 anyways!
In [49]:
# Let's get all level 5 statistics (for both, qc0 and qc3!)
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
condi1 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc0'))
condi2 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc3'))
pd_rel_error_area_L5 = pd_rel_error_area_L5[condi1 | condi2]

In [50]:
sns.set(font_scale = 1.4)
sns.set_style('whitegrid')
pd_rel_error_area_L5['clim_pcp_qc_border'] = np.NaN
_hue = pd_rel_error_area_L5['clim_pcp'].values+'_'+pd_rel_error_area_L5['qc'].values+'_b'+ pd_rel_error_area_L5['border'].values.astype(str)
pd_rel_error_area_L5['clim_pcp_qc_border'] = _hue
pd_rel_error_area_L5 = pd_rel_error_area_L5.sort_values('clim_pcp_qc_border')

sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
            hue='clim_pcp_qc_border', orient='h', col='exp',
           kind='bar', palette='Greys',
           height=6, aspect=1.3)
plt.suptitle("Relative glacier area with failing glaciers for preprocessing level L5 (all RGI regions except RGI 19)")
ax = plt.gca()
plt.xlim([0,3.5])
plt.tight_layout();
plt.savefig('relative_failing_glacier_area_160.png')
In [51]:
pd_rel_error_area_L5
Out[51]:
rel_error_area_in_percent level exp clim_pcp match qc border rgi_reg clim_pcp_qc_border
L5_elev_bands_pcp2.5_CRU_no_match_qc0_b160_rgi_all_without_19 2.347936 L5 elev_bands CRU_pcp2.5 no_match qc0 160 all_without_19 CRU_pcp2.5_qc0_b160
L5_elev_bands_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 2.460572 L5 elev_bands CRU_pcp2.5 no_match qc3 160 all_without_19 CRU_pcp2.5_qc3_b160
L5_elev_bands_pcp2.5_CRU_match_geod_qc3_b160_rgi_all_without_19 2.460745 L5 elev_bands CRU_pcp2.5 match_geod qc3 160 all_without_19 CRU_pcp2.5_qc3_b160
L5_elev_bands_pcp2.5_CRU_match_geod_pergla_qc3_b160_rgi_all_without_19 0.549611 L5 elev_bands CRU_pcp2.5 match_geod_pergla qc3 160 all_without_19 CRU_pcp2.5_qc3_b160
L5_centerlines_pcp2.5_CRU_no_match_qc3_b160_rgi_all_without_19 3.273621 L5 centerlines CRU_pcp2.5 no_match qc3 160 all_without_19 CRU_pcp2.5_qc3_b160
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc0_b160_rgi_all_without_19 0.052326 L5 elev_bands ERA5_pcp1.6 match_geod_pergla qc0 160 all_without_19 ERA5_pcp1.6_qc0_b160
L5_elev_bands_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.081001 L5 elev_bands ERA5_pcp1.6 no_match qc3 160 all_without_19 ERA5_pcp1.6_qc3_b160
L5_elev_bands_pcp1.6_ERA5_match_geod_qc3_b160_rgi_all_without_19 0.081299 L5 elev_bands ERA5_pcp1.6 match_geod qc3 160 all_without_19 ERA5_pcp1.6_qc3_b160
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_qc3_b160_rgi_all_without_19 0.05188 L5 elev_bands ERA5_pcp1.6 match_geod_pergla qc3 160 all_without_19 ERA5_pcp1.6_qc3_b160
L5_elev_bands_pcp1.6_ERA5_match_geod_pergla_massredis_qc3_b160_rgi_all_without_19 0.051093 L5 elev_bands ERA5_pcp1.6 match_geod_pergla_massredis qc3 160 all_without_19 ERA5_pcp1.6_qc3_b160
L5_centerlines_pcp1.6_ERA5_no_match_qc3_b160_rgi_all_without_19 0.936732 L5 centerlines ERA5_pcp1.6 no_match qc3 160 all_without_19 ERA5_pcp1.6_qc3_b160
L5_elev_bands_2.6_GSWP3_W5E5_match_geod_pergla_2.6_qc0_b160_rgi_all_without_19 0.948983 L5 elev_bands GSWP3_W5E5_2.6 match_geod_pergla qc0 160 all_without_19 GSWP3_W5E5_2.6_qc0_b160
L5_elev_bands_via_winterprcp_GSWP3_W5E5_match_geod_pergla_winterprcp_qc0_b160_rgi_all_without_19 0.086321 L5 elev_bands GSWP3_W5E5_via_winterprcp match_geod_pergla qc0 160 all_without_19 GSWP3_W5E5_via_winterprcp_qc0_b160
In [52]:
# Let's get all level 5 statistics (for both, qc0 and qc3!)
pd_rel_error_area_L5 = pd_rel_error_area[(pd_rel_error_area.level == 'L5') & (pd_rel_error_area.rgi_reg =='all_without_19')]
# we only use border = 80 for qc0 as this is the option where most preprocessed directories are available
condi1 = ((pd_rel_error_area_L5.border == 80) & (pd_rel_error_area_L5.qc=='qc0'))
# we only use border = 160 for qc3 as this is the option where most preprocessed directories are available
condi2 = ((pd_rel_error_area_L5.border == 160) & (pd_rel_error_area_L5.qc=='qc3'))
# 
condi3 = (('GSWP3_W5E5_2.6' == pd_rel_error_area['clim_pcp']) | ('GSWP3_W5E5_via_winterprcp' == pd_rel_error_area['clim_pcp']))

pd_rel_error_area_L5 = pd_rel_error_area_L5[(condi1 |condi2|condi3)]
/tmp/ipykernel_466059/2060589157.py:10: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  pd_rel_error_area_L5 = pd_rel_error_area_L5[(condi1 |condi2|condi3)]
In [53]:
sns.set(font_scale = 1.4)
sns.set_style('whitegrid')
pd_rel_error_area_L5['clim_pcp_qc_border'] = np.NaN
_hue = pd_rel_error_area_L5['clim_pcp'].values+'_'+pd_rel_error_area_L5['qc'].values+'_b'+ pd_rel_error_area_L5['border'].values.astype(str)
pd_rel_error_area_L5['clim_pcp_qc_border'] = _hue
pd_rel_error_area_L5 = pd_rel_error_area_L5.sort_values('clim_pcp_qc_border')

sns.catplot(y="match", x="rel_error_area_in_percent", data=pd_rel_error_area_L5,
            hue='clim_pcp_qc_border', orient='h', col='exp',
           kind='bar', palette='Greys',
           height=6, aspect=1.3)
plt.suptitle("Relative glacier area with failing glaciers for preprocessing level L5 (all RGI regions except RGI 19)")
ax = plt.gca()
plt.xlim([0,3.5])
plt.tight_layout();
plt.savefig('relative_failing_glacier_area.png')