#!/usr/bin/env python
# coding: utf-8

# ### This notebook shows how you can look under the hood of the PyCaret Time Series Module and customize the AutoML flow per your business needs

# ## Import libraries

# In[1]:


from pprint import pprint
import pandas as pd

from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment

from sktime.utils.plotting import plot_series


# ## Load data

# In[2]:


y = get_data('airline', verbose=False)
_ = plot_series(y)


# ## Setup PyCaret Time Series Experiment 
# 
# * Based on business needs and good data science principles
#   - e.g. Interested in forecasting 12 months out
#   - Use 3 folds to do any cross-validation
#   
# * **Important arguments to setup**
#   - `fh` - forecast-horizon
#   - `folds` - number of cross validation folds to use
#   - `fold_strategy` - 'sliding' or 'expanding'
#   - `seasonal_period`: Inferred using index; can be explicitly set

# In[3]:


exp = TimeSeriesExperiment()
exp.setup(data=y, fh=12, session_id=42)


# ## Looking under the hood
# 
# ### Train/Test Split
# 
# Internally split: Keep len(fh) as test set

# In[4]:


y_train = exp.get_config("y_train")
y_test = exp.get_config("y_test")

_ = plot_series(y_train, y_test, labels=['Train', 'Test'])
_ = plot_series(y, y[-12:], labels=['Full Data', 'Last 12 points'])


# ### Seasonality
# 
# Used to define the internal grid for tuning models, etc.

# In[5]:


print(f"\nSeasonality Present: {exp.get_config('seasonality_present')}")
print(f"Seasonal Period: {exp.get_config('seasonal_period')}")


# ### Cross Validation / Fold Generation
# 
# * Uses `sktime`'s `ExpandingWindowSplitter` and `SlidingWindowSplitter` to generate folds.
# * Arguments for the splitters are determined based on inputs to `exp.setup`
# 
# **Example** (using `sktime=0.5.3`):
#   - Total Data Points = 144
#   - Train Data Points = 144 - 12 = 132
#   - Folds Requested = 3 (Default)
#   - Type = Expanding (Default)
#   - Initial Window = Computed (132 - 3 * 12 = 96)
#   - Step Size = Window Length = Length of Forecast Horizon = 12

# In[6]:


print(f"\nFold Generator: {exp.get_config('fold_generator')}")
print(f"  - Fold Generator Horizon: {exp.get_config('fold_generator').fh}")
print(f"  - Fold Generator Initial Window: {exp.get_config('fold_generator').initial_window}")
print(f"  - Fold Generator Step Length: {exp.get_config('fold_generator').step_length}")
print(f"  - Fold Generator Window Length: {exp.get_config('fold_generator').window_length}")


# ## Looking Under the Hood & Customizing the Flow

# ### Model Definitions
# 
# - `Args` - Used in `create_model`
# - `Tune Grid` - Used for fixed grid search
# - `Tune Distributions` - Used in random grid search

# In[7]:


exp.models(internal=True).head()


# ### Create Model

# #### Default

# In[8]:


model = exp.create_model("naive")
model


# #### Manually specify model arguments

# In[9]:


model = exp.create_model("naive", strategy='drift')
model


# #### Change number of folds

# In[10]:


model = exp.create_model("naive", fold=5)
model


# ### Tune Model

# #### Returning the Tuner

# In[11]:


tuned_model, tuner = exp.tune_model(model, return_tuner=True)


# #### Check the results of the tuner

# In[12]:


# Grid Results
pd.DataFrame(tuner.cv_results_)


# In[13]:


# Best Hyperparameters
pprint(tuner.best_params_)


# #### Defining a Custom Grid

# In[14]:


my_grid = {
    'sp': [12],
    'strategy': ['last', 'mean', 'drift'],
    'window_length': [12, 24, None]
}
my_grid


# In[15]:


tuned_model = exp.tune_model(model, custom_grid=my_grid)


# **The performance improved over the original tuning (we essentially converted the naive model to a seasonal naive model with this new grid)**

# ## Prediction Intervals
# 
# Available for some models only

# In[16]:


model = exp.create_model("theta")


# In[17]:


exp.predict_model(model, return_pred_int=True)


# **Models that do not provide a predicton interval simply return NA values**

# In[18]:


model = exp.create_model("naive")


# In[19]:


exp.predict_model(model, return_pred_int=True)