#!/usr/bin/env python # coding: utf-8 # Now that we have gone through a manual process of modeling our dataset, let's see if we can replicate this using an Automated workflow. As a reminder, our plan of action was as follows: # # 1. Perform EDA on the dataset to extract valuable insight about the process generating the time series **(COMPLETED)**. # 2. Build a baseline model (univariable model without exogenous variables) for benchmarking purposes **(COMPLETED)**. # 3. Build a univariate model with all exogenous variables to check best possible performance **(COMPLETED)**. # 4. Evaluate the model with exogenous variables and discuss any potential issues **(COMPLETED)**. # 5. Overcome issues identified above **(COMPLETED)**. # 6. Make future predictions with the best model **(COMPLETED)**. # 7. Replicate flow with Automated Time Series Modeling (AutoML) **(Covered in this notebook)** # In[1]: # Only enable critical logging (Optional) import os os.environ["PYCARET_CUSTOM_LOGGING_LEVEL"] = "CRITICAL" # In[2]: def what_is_installed(): from pycaret import show_versions show_versions() try: what_is_installed() except ModuleNotFoundError: get_ipython().system('pip install pycaret') what_is_installed() # In[3]: import numpy as np import pandas as pd from pycaret.datasets import get_data from pycaret.time_series import TSForecastingExperiment # In[4]: # Global Figure Settings for notebook ---- global_fig_settings = {"renderer": "notebook", "width": 1000, "height": 600} # In[5]: data = get_data("airquality", verbose=False) data["index"] = pd.to_datetime(data["Date"] + " " + data["Time"]) data.drop(columns=["Date", "Time"], inplace=True) data.replace(-200, np.nan, inplace=True) data.set_index("index", inplace=True) target = "CO(GT)" exog_vars = ['NOx(GT)', 'PT08.S3(NOx)', 'RH'] include = [target] + exog_vars data = data[include] data.head() # # Step 7: AutoML # In[6]: FH=48 metric = "mase" exclude = ["auto_arima", "bats", "tbats", "lar_cds_dt", "par_cds_dt"] # # Step 7A: Univariate AutoML with and without Exogenous Variables # In[7]: exp_auto = TSForecastingExperiment() # enforce_exogenous=False --> Use multivariate forecasting when model supports it, else use univariate forecasting exp_auto.setup( data=data, target=target, fh=FH, enforce_exogenous=False, numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill", fig_kwargs=global_fig_settings, session_id=42 ) # In[8]: # # Check available models ---- # exp_auto_noexo.models() # In[9]: # Include slower models like Prophet (turbo=False), but exclude some specific models ---- best = exp_auto.compare_models(sort=metric, turbo=False, exclude=exclude) # In[10]: exp_auto.plot_model(best) # In[11]: final_auto_model = exp_auto.finalize_model(best) # In[14]: def safe_predict(exp, model): """Prediction wrapper for demo purposes.""" try: future_preds = exp.predict_model(model) except ValueError as exception: print(exception) exo_vars = exp.exogenous_variables print(f"{len(exo_vars)} exogenous variables (X) needed in order to make future predictions:\n{exo_vars}") exog_exps = [] exog_models = [] for exog_var in exog_vars: exog_exp = TSForecastingExperiment() exog_exp.setup( data=data[exog_var], fh=FH, numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill", fig_kwargs=global_fig_settings, session_id=42 ) # Users can customize how to model future exogenous variables i.e. add # more steps and models to potentially get better models at the expense # of higher modeling time. best = exog_exp.compare_models( sort=metric, include=["arima", "ets", "exp_smooth", "theta", "lightgbm_cds_dt",] ) final_exog_model = exog_exp.finalize_model(best) exog_exps.append(exog_exp) exog_models.append(final_exog_model) # Step 2: Get future predictions for exog variables ---- future_exog = [ exog_exp.predict_model(exog_model) for exog_exp, exog_model in zip(exog_exps, exog_models) ] future_exog = pd.concat(future_exog, axis=1) future_exog.columns = exog_vars future_preds = exp.predict_model(model, X=future_exog) return future_preds # In[15]: future_preds = safe_predict(exp_auto, final_auto_model) future_preds.plot()