#!/usr/bin/env python
# coding: utf-8

# # Pre-matched data
# 
# ModelSkill is a general purpose model skill assessment library for (spatio)/temporal data.
# 
# If your data is in a tabular format, where each row corresponds to a time step and you have at least one column with an observed value and another column with modelled values, then you can use the ModelSkill library to assess the model skill.

# In[1]:


import pandas as pd
import modelskill as ms


# The csv file has 5 columns:
# 
# * datetime
# * Station X (*observed*)
# * linear
# * quadratic
# * random_forest_n10
#     

# In[2]:


df = pd.read_csv("../tests/testdata/matched_data.csv", index_col='datetime', parse_dates=True)
df.head()


# In order to use this dataset for skill assessment we create a `Comparer` with the `modelskill.from_matched()` function, in order to get nice labels on the plots, we also define which physical quantity this represents.

# In[3]:


cmp = ms.from_matched(df, obs_item='Station X', quantity=ms.Quantity(name="Some variable",unit="s"))
cmp


# In[4]:


cmp.sel(model='linear').plot.timeseries();


# In[5]:


cmp.sel(model='linear').plot.scatter(skill_table=True, title='Linear regression');


# In[6]:


cmp.sel(model='random_forest_n10').plot.scatter();


# In[7]:


cmp.skill().round(2)


# In[8]:


cmp.plot.timeseries(backend="plotly")


# In[ ]: