#!/usr/bin/env python
# coding: utf-8

# # MIKE IO 1D Pandas DataFrame examples
# 
# Results in MIKE IO 1D are fundamentally extracted as Pandas DataFrames. This notebook shows
# specific methods for working with those DataFrames.

# In[1]:


from mikeio1d import Res1D

res = Res1D("../tests/testdata/network_river.res1d")


# ### Column modes
# MIKE IO 1D supports different types of column headers. Depending on your use case, one or the other may be preferable. There are four main modes:
# - string (default)
# - all
# - compact
# - timeseries
# 
# These are illustrated in the following cells.

# In[2]:


# The default column mode concatenates information about the column with a ':' delimiter.
df = res.reaches.Discharge.read()
df.head(2)


# In[3]:


# A hierarchical column mode is also supported. Only relevant levels are included with 'compact'. For full hierarchy use 'all'.
df = res.reaches.Discharge.read(column_mode="compact")
df.head(2)


# In[4]:


# It's also possible to have a TimeSeriesId column index.
# TimeSeriesId is an object that uniquely identifies each time series.
df = res.reaches.Discharge.read(column_mode="timeseries")
df.head(2)


# ### MIKE IO 1D extends Pandas by providing a '.m1d' accessor on all DataFrames.
# The accessor provides several useful methods for working with DataFrames within MIKE IO 1D:
# - .m1d.query()
# - .m1d.compact()
# - .m1d.groupby()
# - .m1d.groupby_chainage()
# - .m1d.agg_chainage()
# 
# These methods are illustrated below.

# In[5]:


# To use the .m1d accessor, the DataFrame must have a MultiIndex column (e.g. column_mode='all' or 'compact').
# The .m1d accessor exists on the DataFrame itself.
df = res.read(column_mode="all")
df.m1d


# #### .m1d.compact()

# In[6]:


# Let's make some queries on the DataFrame itself with help from the .m1d accessor.
# Let's read the entire file into a DataFrame with column_mode='all'.
df = res.read(column_mode="all")
df.head(2)


# In[7]:


# That's a lot of detail included and is a bit busy. We can use .m1d.compact() to remove redundant levels.
df = df.m1d.compact()
df.head(2)


# #### .m1d.query()

# In[8]:


# Let's get Discharge for the reaches.
df = df.m1d.query("group=='Reach' and quantity=='Discharge'")
df.head(2)


# In[9]:


# Let's look at all the reaches with 'trib' in their name.
df = df.m1d.query("name.str.contains('trib')")
df.head(2)


# In[10]:


# Let's look at the max discharge for each reach.
df.agg(["max"])


# In[11]:


# Use some standard pandas methods to format the table a different way
# Tip: Chaining methods in brackets is a great way to explore the data. Comment out lines from bottom up to see the effect.
(
    df.agg(["max"]).T.droplevel(["group"]).unstack()
    # .pipe(lambda df: df * 2) # Uncomment this line to see the effect of the .pipe() method.
)


# In[12]:


# Let's start from scratch and use bracket chaining to create the same table
df = (
    res.read(column_mode="all")
    .m1d.query("name.str.contains('trib')")
    .m1d.compact()
    .m1d.query("quantity=='Discharge'")
    .agg(["max"])
    .T.droplevel("group")
    .unstack()
)
df


# #### .m1d.agg_chainage()

# In[13]:


# Now let's try something different. We aggregate the max discharge for each reach, then look at descriptive staistics along the time axis.
# Here, 'count' is the number of time steps and 'mean' is the mean of the max discharges of all Q-points along a reach.
(
    res.read(column_mode="all")
    .m1d.query("quantity=='Discharge'")
    .m1d.compact()
    .m1d.agg_chainage("max")
    .describe()
)


# #### .m1d.groupby_chainage()

# In[14]:


# Similarly, let's look at the mean of the first and last Q points.
(
    res.read(column_mode="all")
    .m1d.query("quantity=='Discharge'")
    .m1d.compact()
    .m1d.groupby_chainage()
    .nth([0, -1])  # First we groupby, then select the first and last chaianges.
    .describe()
    .droplevel(["quantity", "group"], axis=1)
)


# #### .m1d.groupby()

# In[15]:


# We can similarly use the .m1d accessor to group by dimensions other than chainage.
# Below we describe how the global maximum of each quantity in the system varies with time.
(res.read(column_mode="all").m1d.groupby("quantity").agg("max").describe())


# In[ ]: