#!/usr/bin/env python
# coding: utf-8

# # Dfs0
# 
# See [Dfs0 in MIKE IO Documentation](https://dhi.github.io/mikeio/user-guide/dfs0.html)
# 

# In[ ]:


import pandas as pd
import mikeio


# ## Reading data

# In[ ]:


ds = mikeio.read("data/Oresund_ts.dfs0")
ds


# In[ ]:


type(ds)


# The mikeio read function returns a `Dataset` which is a container of `DataArray`s.
# 
# A `DataArray` can be selected by name or by index.

# In[ ]:


da = ds["Drogden: Surface elevation"]   # or ds.Drogden_Surface_elevation or ds[2]
da


# Upon `read`, specific items can be selected with the `items` argument using name or index.

# In[ ]:


ds = mikeio.read("data/Oresund_ts.dfs0", items=[0,2,3])
ds


# Wildcards can be used to select multiple items:

# In[ ]:


ds = mikeio.read("data/Oresund_ts.dfs0", items="*Surf*")
ds


# A specific time subset can be using .sel:

# In[ ]:


ds.sel(time=slice("2018-03-04","2018-03-04 12:00"))


# Or with positional indexing using .isel:

# In[ ]:


ds.isel(time=slice(10,20))


# The Dataset and DataArray have a number of useful attributes like `time`, `items`, `ndims`, `shape`, `values` (only DataArray) etc

# In[ ]:


ds.time


# In[ ]:


ds.items


# In[ ]:


da.item


# In[ ]:


da.shape


# In[ ]:


da.values


# The time series can be plotted with the plot method.

# In[ ]:


ds.plot();


# A simple timeseries Dataset can easily be converted to a Pandas DataFrame.

# In[ ]:


df = ds.to_pandas()
df


# ## Writing data
# 
# Often, time series data will come from a csv or an Excel file. Here is an example of how to read a csv file with pandas and then write the pandas DataFrame to a dfs0 file.

# In[ ]:


df = pd.read_csv("data/naples_fl.csv", skiprows=1, parse_dates=True, index_col=0)
df


# You will probably have the need to parse certain a specific data formats many times, then it is a good idea to create a function.

# In[ ]:


def read_ncei_obs(filename):
    # old name : new name
    mapping = {'TAVG (Degrees Fahrenheit)': 'temperature_avg_f',
               'TMAX (Degrees Fahrenheit)': 'temperature_max_f',
               'TMIN (Degrees Fahrenheit)': 'temperature_min_f',
               'PRCP (Inches)': 'prec_in'}
    
    df_renamed = (
        pd.read_csv(filename, skiprows=1, parse_dates=True, index_col=0)
           .rename(columns=mapping)
    )
    sel_cols = mapping.values() # No need to repeat ['temperature_avg_f',...]
    df_selected = df_renamed[sel_cols]
    return df_selected


# In[ ]:


df = read_ncei_obs("data/naples_fl.csv")
df.head()


# In[ ]:


df.tail()


# In[ ]:


df.shape


# Convert temperature to Celsius and precipitation to mm.

# In[ ]:


df_final = df.assign(temperature_max_c=(df['temperature_max_f'] - 32)/1.8,
                     prec_mm=df['prec_in'] * 25.4)

df_final.head()


# In[ ]:


df_final.loc['2021'].plot();


# Creating a dfs0 file from a dataframe is pretty straightforward.
# 
# 1. Convert the dataframe to a `Dataset`

# In[ ]:


ds = mikeio.from_pandas(df_final)
ds


# 2. Write the `Dataset` to a dfs0 file.

# In[ ]:


ds.to_dfs("output/naples_fl.dfs0")


# Let's read it back in again...

# In[ ]:


saved_ds = mikeio.read("output/naples_fl.dfs0")
saved_ds


# By default, EUM types are undefined. But it can be specified. Let's select a few colums.

# In[ ]:


df2 = df_final[['temperature_max_c', 'prec_in']]
df2.head()


# In[ ]:


from mikeio import ItemInfo, EUMType, EUMUnit

ds2 = mikeio.from_pandas(df2, 
                         items=[
                   ItemInfo(EUMType.Temperature),
                   ItemInfo(EUMType.Precipitation_Rate, EUMUnit.inch_per_day)]
           )
ds2


# ## EUM

# In[ ]:


from mikeio.eum import ItemInfo, EUMType, EUMUnit

EUMType.search("wind")


# In[ ]:


EUMType.Wind_speed.units


# ### Inline Exercise
# 
# What is the best EUM Type for "peak wave direction"? What is the default unit? 

# In[ ]:


# insert your code here


# ## Precipitation data

# In[ ]:


df = pd.read_csv("data/precipitation.csv", parse_dates=True, index_col=0)
df.head()


# In[ ]:


from mikecore.DfsFile import DataValueType

(mikeio.from_pandas(df, items=ItemInfo(EUMType.Precipitation_Rate, EUMUnit.mm_per_hour, data_value_type=DataValueType.MeanStepBackward))
        .to_dfs("output/precipitation.dfs0")
)


# ## Selecting 

# In[ ]:


ds = mikeio.read("output/precipitation.dfs0", items=[1,4]) # select item by item number (starting from zero)
ds


# In[ ]:


ds = mikeio.read("output/precipitation.dfs0", items=["Precipitation station 5","Precipitation station 1"]) # or by name (in the order you like it)
ds


# ### Inline Exercise
# 
# Read all items to a variable ds. Select "Precipitation station 3" - which different ways can you select this item?  

# In[ ]:


# insert your code here


# In[ ]:


import utils

utils.sysinfo()


# In[ ]: