#!/usr/bin/env python # coding: utf-8 # # Pre-matched data with auxiliary data # # # In[1]: import modelskill as ms import numpy as np import pandas as pd # In[2]: fn = "../tests/testdata/SW/eur_matched.dfs0" # The function `from_matched()` takes a dataframe, a dfs0 or a mikeio.Dataset of already matched data and returns a Comparer object. # In[3]: cmp = ms.from_matched(fn, obs_item=1, mod_items=[0]) cmp.aux_names # In[4]: # NOTE: we rename data_vars to avoid spaces in names cmp = cmp.rename({"Wind speed": "wind_speed", "Wind Direction": "wind_dir"}) # In[5]: cmp.aux_names # In[6]: cmp # In[7]: cmp.data # In[8]: cmp.skill() # In[9]: cmp.plot.scatter(quantiles=0, figsize=(6,6)); cmp.plot.timeseries(); # ## Filter # # Filter on auxiliary data using `query()` or `where()`. Below, we consider only wave data when the wind speed is above 15 m/s. # In[10]: cmp.query("wind_speed > 15.0") # In[11]: cmp2 = cmp.where(cmp.data.wind_speed>15.0) cmp2 # In[12]: # notice that the model data is kept, but the observations are filtered cmp2.plot.timeseries(); # More auxiliary data can be added, e.g. as derived data from the original data. # In[13]: cmp.data["residual"] = cmp.data["Hm0, model"] - cmp.data["Observation"] # In[14]: large_residuals = np.abs(cmp.data.residual)>0.3 cmp3 = cmp.where(large_residuals) cmp3.plot.scatter(quantiles=0, figsize=(6,6)); cmp3.plot.timeseries(); # ## Aggregate # Let's split the data based on wind direction sector and aggregate the skill calculation of the significant wave height predition for each sector. # In[15]: # Note: in this short example wind direction is between 274 and 353 degrees df = cmp.data.wind_dir.to_dataframe() cmp.data["windsector"] = pd.cut(df.wind_dir, [255, 285, 315, 345, 360], labels=["W", "WNW", "NNW", "N"]) # In[16]: ss = cmp.skill(by="windsector") ss.style() # In[17]: ss.plot.bar(field="rmse", title="Hm0 RMSE by wind sector"); # In[18]: cmp.where(cmp.data.windsector=="W").plot.timeseries(); # In[ ]: