#!/usr/bin/env python # coding: utf-8 # # Pre-matched data with auxiliary data # # # In[1]: import modelskill as ms import numpy as np import pandas as pd import mikeio # In[ ]: fn = "../tests/testdata/SW/eur_matched.dfs0" mikeio.read(fn) # The function `from_matched()` takes a dataframe, a dfs0 or a mikeio.Dataset of already matched data and returns a Comparer object. # In[3]: cmp = ms.from_matched(fn, obs_item=1, mod_items=0, aux_items=[2,3]) cmp.aux_names # In[4]: # NOTE: we rename data_vars to avoid spaces in names cmp = cmp.rename({"Wind speed": "wind_speed", "Wind Direction": "wind_dir"}) # In[5]: cmp.aux_names # In[6]: cmp # In[7]: cmp.skill() # In[8]: cmp.plot.scatter(quantiles=0, figsize=(6,6)); cmp.plot.timeseries(); # ## Filter # # Filter on auxiliary data using `query()` or `where()`. Below, we consider only wave data when the wind speed is above 15 m/s. # In[9]: cmp.query("wind_speed > 15.0") # In[10]: cmp2 = cmp.where(cmp.data.wind_speed>15.0) cmp2 # In[11]: # notice that the model data is kept, but the observations are filtered cmp2.plot.timeseries(); # More auxiliary data can be added, e.g. as derived data from the original data. # In[12]: cmp.data["residual"] = cmp.data["Hm0, model"] - cmp.data["Observation"] # In[13]: large_residuals = np.abs(cmp.data.residual)>0.1 cmp3 = cmp.where(large_residuals) cmp3.plot.scatter(show_density=False, figsize=(6,6)); cmp3.plot.timeseries(); # In[14]: cmp3.data.data_vars # In[15]: cmp3.data.Observation.values # ## Aggregate # In[ ]: # Let's split the data based on wind direction sector and aggregate the skill calculation of the significant wave height predition for each sector. # In[16]: # Note: in this short example wind direction is between 274 and 353 degrees df = cmp.data.wind_dir.to_dataframe() windsectors = pd.cut(df.wind_dir, [255, 285, 315, 345, 360], labels=["W", "WNW", "NNW", "N"]) # convert categoricals to strings, which is more compatible cmp.data["windsector"] = windsectors.astype(str) # In[17]: cmp.skill(by="windsector") # In[18]: cmp.skill(by="windsector").rmse.plot.bar(title="Hm0 RMSE by wind sector"); # In[19]: cmp.where(cmp.data.windsector=="W").plot.timeseries();