#!/usr/bin/env python
# coding: utf-8

# # Pre-matched data with auxiliary data
# 
# 

# In[1]:


import modelskill as ms
import numpy as np
import pandas as pd


# In[2]:


fn = "../tests/testdata/SW/eur_matched.dfs0"


# The function `from_matched()` takes a dataframe, a dfs0 or a mikeio.Dataset of already matched data and returns a Comparer object.

# In[3]:


cmp = ms.from_matched(fn, obs_item=1, mod_items=[0])
cmp.aux_names


# In[4]:


# NOTE: we rename data_vars to avoid spaces in names
cmp = cmp.rename({"Wind speed": "wind_speed", "Wind Direction": "wind_dir"})


# In[5]:


cmp.aux_names


# In[6]:


cmp


# In[7]:


cmp.data


# In[8]:


cmp.skill()


# In[9]:


cmp.plot.scatter(quantiles=0, figsize=(6,6));
cmp.plot.timeseries();


# ## Filter 
# 
# Filter on auxiliary data using `query()` or `where()`. Below, we consider only wave data when the wind speed is above 15 m/s.

# In[10]:


cmp.query("wind_speed > 15.0")


# In[11]:


cmp2 = cmp.where(cmp.data.wind_speed>15.0)
cmp2


# In[12]:


# notice that the model data is kept, but the observations are filtered
cmp2.plot.timeseries();


# More auxiliary data can be added, e.g. as derived data from the original data. 

# In[13]:


cmp.data["residual"] = cmp.data["Hm0, model"] - cmp.data["Observation"]


# In[14]:


large_residuals = np.abs(cmp.data.residual)>0.3
cmp3 = cmp.where(large_residuals)
cmp3.plot.scatter(quantiles=0, figsize=(6,6));
cmp3.plot.timeseries();


# ## Aggregate

# Let's split the data based on wind direction sector and aggregate the skill calculation of the significant wave height predition for each sector.

# In[15]:


# Note: in this short example wind direction is between 274 and 353 degrees
df = cmp.data.wind_dir.to_dataframe()
cmp.data["windsector"] = pd.cut(df.wind_dir, [255, 285, 315, 345, 360], labels=["W", "WNW", "NNW", "N"])


# In[16]:


ss = cmp.skill(by="windsector")
ss.style()


# In[17]:


ss.plot.bar(field="rmse", title="Hm0 RMSE by wind sector");


# In[18]:


cmp.where(cmp.data.windsector=="W").plot.timeseries();


# In[ ]: