#!/usr/bin/env python
# coding: utf-8

# Now that we have gone through a manual process of modeling our dataset, let's see if we can replicate this using an Automated workflow. As a reminder, our plan of action was as follows:
# 
# 1. Perform EDA on the dataset to extract valuable insight about the process generating the time series **(COMPLETED)**.
# 2. Build a baseline model (univariable model without exogenous variables) for benchmarking purposes **(COMPLETED)**.
# 3. Build a univariate model with all exogenous variables to check best possible performance **(COMPLETED)**.
# 4. Evaluate the model with exogenous variables and discuss any potential issues **(COMPLETED)**.
# 5. Overcome issues identified above **(COMPLETED)**.
# 6. Make future predictions with the best model **(COMPLETED)**.
# 7. Replicate flow with Automated Time Series Modeling (AutoML) **(Covered in this notebook)**

# In[1]:


# Only enable critical logging (Optional)
import os
os.environ["PYCARET_CUSTOM_LOGGING_LEVEL"] = "CRITICAL"


# In[2]:


def what_is_installed():
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except ModuleNotFoundError:
    get_ipython().system('pip install pycaret')
    what_is_installed()


# In[3]:


import numpy as np
import pandas as pd
from pycaret.datasets import get_data
from pycaret.time_series import TSForecastingExperiment


# In[4]:


# Global Figure Settings for notebook ----
global_fig_settings = {"renderer": "notebook", "width": 1000, "height": 600}


# In[5]:


data = get_data("airquality", verbose=False)
data["index"] = pd.to_datetime(data["Date"] + " " + data["Time"])
data.drop(columns=["Date", "Time"], inplace=True)
data.replace(-200, np.nan, inplace=True)
data.set_index("index", inplace=True)

target = "CO(GT)"
exog_vars = ['NOx(GT)', 'PT08.S3(NOx)', 'RH']
include = [target] + exog_vars
data = data[include]
data.head()


# # Step 7: AutoML

# In[6]:


FH=48
metric = "mase"
exclude = ["auto_arima", "bats", "tbats", "lar_cds_dt", "par_cds_dt"]


# # Step 7A: Univariate AutoML with and without Exogenous Variables

# In[7]:


exp_auto = TSForecastingExperiment()

# enforce_exogenous=False --> Use multivariate forecasting when model supports it, else use univariate forecasting
exp_auto.setup(
    data=data, target=target, fh=FH, enforce_exogenous=False,
    numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill",
    fig_kwargs=global_fig_settings, session_id=42
)


# In[8]:


# # Check available models ----
# exp_auto_noexo.models()


# In[9]:


# Include slower models like Prophet (turbo=False), but exclude some specific models ----
best = exp_auto.compare_models(sort=metric, turbo=False, exclude=exclude)


# In[10]:


exp_auto.plot_model(best)


# In[11]:


final_auto_model = exp_auto.finalize_model(best)


# In[14]:


def safe_predict(exp, model):
    """Prediction wrapper for demo purposes."""
    try: 
        future_preds = exp.predict_model(model)
    except ValueError as exception:
        print(exception)
        exo_vars = exp.exogenous_variables
        print(f"{len(exo_vars)} exogenous variables (X) needed in order to make future predictions:\n{exo_vars}")
        
        
        exog_exps = []
        exog_models = []
        for exog_var in exog_vars:
            exog_exp = TSForecastingExperiment()
            exog_exp.setup(
                data=data[exog_var], fh=FH,
                numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill",
                fig_kwargs=global_fig_settings, session_id=42
            )

            # Users can customize how to model future exogenous variables i.e. add
            # more steps and models to potentially get better models at the expense
            # of higher modeling time.
            best = exog_exp.compare_models(
                sort=metric, include=["arima", "ets", "exp_smooth", "theta", "lightgbm_cds_dt",]        
            )
            final_exog_model = exog_exp.finalize_model(best)

            exog_exps.append(exog_exp)
            exog_models.append(final_exog_model)

        # Step 2: Get future predictions for exog variables ----
        future_exog = [
            exog_exp.predict_model(exog_model)
            for exog_exp, exog_model in zip(exog_exps, exog_models)
        ]
        future_exog = pd.concat(future_exog, axis=1)
        future_exog.columns = exog_vars
        
        future_preds = exp.predict_model(model, X=future_exog)
    
    return future_preds      


# In[15]:


future_preds = safe_predict(exp_auto, final_auto_model)
future_preds.plot()