The Vistula catchment is the largest catchment in Poland, with an area of 194,424 km2. This notebook shows how a hydrolgoical model can evaluated using ModelSkill.
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import modelskill as ms
fldr = Path("../tests/testdata/Vistula")
df = pd.read_csv(fldr / "stations.csv", index_col=0)
df
Station | Long | Lat | Area | |
---|---|---|---|---|
Id | ||||
6458010 | VISTULA (TCZEW) | 18.80556 | 54.08722 | 193922.9 |
6458500 | VISTULA (WARSAW (WARSZAWA)) | 21.03250 | 52.24750 | 84945.1 |
6458753 | PILICA (PRZEDBORZ) | 19.87528 | 51.08944 | 2550.1 |
6458715 | WIEPRZ (LUBARTOW) | 22.64361 | 51.49806 | 6389.8 |
6458713 | WIEPRZ (KRASNYSTAW) | 23.17667 | 50.98528 | 3010.2 |
6458520 | KAMIENNA (KUNOW) | 21.27889 | 50.96194 | 1110.4 |
6458460 | VISTULA (SANDOMIERZ) | 21.74611 | 50.67250 | 31809.9 |
6458450 | VISTULA (SZCZUCIN) | 21.07722 | 50.32694 | 23869.3 |
6458406 | DUNAJEC (NOWY SACZ) | 20.68722 | 49.62722 | 4337.4 |
6158100 | POPRAD (CHMELNICA) | 20.73023 | 49.28918 | 1262.0 |
6458950 | WISLOK (TRYNCZA) | 22.54722 | 50.16222 | 3523.6 |
6458863 | PISA (PTAKI) | 21.79250 | 53.39306 | 3575.7 |
6458805 | NAREW (SURAZ) | 22.95500 | 52.94889 | 3425.3 |
6458924 | LIWIEC (LOCHOW) | 21.67833 | 52.51000 | 2471.4 |
6458555 | KRZNA (MALOWA GORA) | 23.46750 | 52.10361 | 3041.9 |
def get_comparer(df, mods, id):
"""Helper function to get a Comparer object for a given model and station id"""
q = ms.Quantity(name="Discharge", unit="m3/s")
# create Observation object
dfd = pd.read_csv(fldr / mods[0] / f"{id}.csv", index_col=0, parse_dates=True)
o = ms.PointObservation(dfd, item="Qobs", name=df.loc[id].Station, x=df.loc[id].Long, y=df.loc[id].Lat, quantity=q)
# create ModelResult objects
mm = []
for m in mods:
dfd = pd.read_csv(fldr / m / f"{id}.csv", index_col=0, parse_dates=True)
mm.append(ms.PointModelResult(dfd, item="Qsim", name=m, quantity=q))
return ms.match(obs=o, mod=mm)
# def get_comparer_from_matched(df, mod, id):
# """Helper function to get a Comparer object for a given model and station id"""
# q = ms.Quantity(name="Discharge", unit="m3/s")
# dfd = pd.read_csv(fldr / mod / f"{id}.csv", index_col=0, parse_dates=True)
# dfd = dfd.rename(columns={"Qsim": mod, "Qobs": df.loc[id].Station})
# cmp = ms.from_matched(dfd, obs_item=df.loc[id].Station, mod_items=[mod], quantity=q)
# cmp.data.attrs['x'] = df.loc[id].Long
# cmp.data.attrs['y'] = df.loc[id].Lat
# cmp.data.attrs['area'] = df.loc[id].Area
# return cmp
cmp = get_comparer(df, ["sim1","sim2"], df.index[0])
cmp
<Comparer> Quantity: Discharge [m3/s] Observation: VISTULA (TCZEW), n_points=3653 Model: sim1, rmse=442.686 Model: sim2, rmse=331.543
timeseries, scatter, boxplot, hist, kde, qq, taylor
cmp.plot.timeseries(); # backend="plotly"
cmp.sel(model="sim1").plot.scatter(); # backend="plotly"
# cmp.plot.kde(xlim=[0,5000]);
# cmp.plot.qq();
# set default metrics globally (they can be overridden in the skill/plot methods)
ms.options.metrics.list = ["kge", "cc"]
#ms.reset_option("metrics.list")
cmp.skill().round(3)
observation | n | kge | cc | |
---|---|---|---|---|
model | ||||
sim1 | VISTULA (TCZEW) | 3653 | 0.617 | 0.794 |
sim2 | VISTULA (TCZEW) | 3653 | 0.809 | 0.829 |
cmp.data["month"] = cmp.time.to_series().dt.month
cmp.skill(by=["model","month"]) #["kge"].plot.bar();
observation | n | kge | cc | ||
---|---|---|---|---|---|
model | month | ||||
sim1 | 1 | VISTULA (TCZEW) | 310 | 0.385138 | 0.440905 |
2 | VISTULA (TCZEW) | 283 | 0.578280 | 0.803975 | |
3 | VISTULA (TCZEW) | 310 | 0.454622 | 0.730561 | |
4 | VISTULA (TCZEW) | 300 | 0.509017 | 0.834517 | |
5 | VISTULA (TCZEW) | 310 | 0.575944 | 0.697614 | |
6 | VISTULA (TCZEW) | 300 | 0.344820 | 0.493330 | |
7 | VISTULA (TCZEW) | 310 | 0.052140 | 0.298390 | |
8 | VISTULA (TCZEW) | 310 | 0.219232 | 0.667376 | |
9 | VISTULA (TCZEW) | 300 | 0.376622 | 0.552289 | |
10 | VISTULA (TCZEW) | 310 | 0.626824 | 0.717061 | |
11 | VISTULA (TCZEW) | 300 | 0.643888 | 0.741710 | |
12 | VISTULA (TCZEW) | 310 | 0.288263 | 0.452674 | |
sim2 | 1 | VISTULA (TCZEW) | 310 | 0.580579 | 0.622109 |
2 | VISTULA (TCZEW) | 283 | 0.844779 | 0.887547 | |
3 | VISTULA (TCZEW) | 310 | 0.669936 | 0.767161 | |
4 | VISTULA (TCZEW) | 300 | 0.764530 | 0.809858 | |
5 | VISTULA (TCZEW) | 310 | 0.487875 | 0.655435 | |
6 | VISTULA (TCZEW) | 300 | 0.496136 | 0.695364 | |
7 | VISTULA (TCZEW) | 310 | 0.235510 | 0.540092 | |
8 | VISTULA (TCZEW) | 310 | 0.269188 | 0.773781 | |
9 | VISTULA (TCZEW) | 300 | 0.526274 | 0.741995 | |
10 | VISTULA (TCZEW) | 310 | 0.719504 | 0.863399 | |
11 | VISTULA (TCZEW) | 300 | 0.721616 | 0.840079 | |
12 | VISTULA (TCZEW) | 310 | 0.506460 | 0.560932 |
cmp.skill(by=["model","month"])["kge"].plot.line()
plt.xlabel("Month")
plt.xticks(np.arange(1,13), ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]);
# season
# season_int = cmp.time.to_series().dt.month%12 // 3 + 1
# cmp.data["season"] = season_int.map({1:"Winter", 2:"Spring", 3:"Summer", 4:"Autumn"})
# cmp.skill(by=["model","season"]).plot.grid("kge")
# loop through all stations in df and create a Comparer for each
cmps = []
for id in df.index:
try:
cmps.append(get_comparer(df, ["sim1","sim2"], id))
except ValueError as e:
print("Failed to create comparer for station", id, ":", e)
cc = ms.ComparerCollection(cmps)
cc
Failed to create comparer for station 6458500 : No data left after trimming to 2000-01-01 23:59:59 - 2010-01-01 00:00:01
<ComparerCollection> Comparer: VISTULA (TCZEW) Comparer: PILICA (PRZEDBORZ) Comparer: WIEPRZ (LUBARTOW) Comparer: WIEPRZ (KRASNYSTAW) Comparer: KAMIENNA (KUNOW) Comparer: VISTULA (SANDOMIERZ) Comparer: VISTULA (SZCZUCIN) Comparer: DUNAJEC (NOWY SACZ) Comparer: POPRAD (CHMELNICA) Comparer: WISLOK (TRYNCZA) Comparer: PISA (PTAKI) Comparer: NAREW (SURAZ) Comparer: LIWIEC (LOCHOW) Comparer: KRZNA (MALOWA GORA)
cc.skill(by=["model","observation"], metrics="kge")["kge"].plot.barh();
# Average skill over all stations, weighted by sqrt(area)
area = df.set_index("Station").loc[cc.obs_names].Area
cc.mean_skill(weights=np.sqrt(area)).round(3)
n | kge | cc | |
---|---|---|---|
model | |||
sim1 | 51142 | 0.430 | 0.649 |
sim2 | 51142 | 0.456 | 0.649 |