#!/usr/bin/env python # coding: utf-8 # ### This notebook shows how you can look under the hood of the PyCaret Time Series Module and customize the AutoML flow per your business needs # ## Import libraries # In[1]: from pprint import pprint import pandas as pd from pycaret.datasets import get_data from pycaret.internal.pycaret_experiment import TimeSeriesExperiment from sktime.utils.plotting import plot_series # ## Load data # In[2]: y = get_data('airline', verbose=False) _ = plot_series(y) # ## Setup PyCaret Time Series Experiment # # * Based on business needs and good data science principles # - e.g. Interested in forecasting 12 months out # - Use 3 folds to do any cross-validation # # * **Important arguments to setup** # - `fh` - forecast-horizon # - `folds` - number of cross validation folds to use # - `fold_strategy` - 'sliding' or 'expanding' # - `seasonal_period`: Inferred using index; can be explicitly set # In[3]: exp = TimeSeriesExperiment() exp.setup(data=y, fh=12, session_id=42) # ## Looking under the hood # # ### Train/Test Split # # Internally split: Keep len(fh) as test set # In[4]: y_train = exp.get_config("y_train") y_test = exp.get_config("y_test") _ = plot_series(y_train, y_test, labels=['Train', 'Test']) _ = plot_series(y, y[-12:], labels=['Full Data', 'Last 12 points']) # ### Seasonality # # Used to define the internal grid for tuning models, etc. # In[5]: print(f"\nSeasonality Present: {exp.get_config('seasonality_present')}") print(f"Seasonal Period: {exp.get_config('seasonal_period')}") # ### Cross Validation / Fold Generation # # * Uses `sktime`'s `ExpandingWindowSplitter` and `SlidingWindowSplitter` to generate folds. # * Arguments for the splitters are determined based on inputs to `exp.setup` # # **Example** (using `sktime=0.5.3`): # - Total Data Points = 144 # - Train Data Points = 144 - 12 = 132 # - Folds Requested = 3 (Default) # - Type = Expanding (Default) # - Initial Window = Computed (132 - 3 * 12 = 96) # - Step Size = Window Length = Length of Forecast Horizon = 12 # In[6]: print(f"\nFold Generator: {exp.get_config('fold_generator')}") print(f" - Fold Generator Horizon: {exp.get_config('fold_generator').fh}") print(f" - Fold Generator Initial Window: {exp.get_config('fold_generator').initial_window}") print(f" - Fold Generator Step Length: {exp.get_config('fold_generator').step_length}") print(f" - Fold Generator Window Length: {exp.get_config('fold_generator').window_length}") # ## Looking Under the Hood & Customizing the Flow # ### Model Definitions # # - `Args` - Used in `create_model` # - `Tune Grid` - Used for fixed grid search # - `Tune Distributions` - Used in random grid search # In[7]: exp.models(internal=True).head() # ### Create Model # #### Default # In[8]: model = exp.create_model("naive") model # #### Manually specify model arguments # In[9]: model = exp.create_model("naive", strategy='drift') model # #### Change number of folds # In[10]: model = exp.create_model("naive", fold=5) model # ### Tune Model # #### Returning the Tuner # In[11]: tuned_model, tuner = exp.tune_model(model, return_tuner=True) # #### Check the results of the tuner # In[12]: # Grid Results pd.DataFrame(tuner.cv_results_) # In[13]: # Best Hyperparameters pprint(tuner.best_params_) # #### Defining a Custom Grid # In[14]: my_grid = { 'sp': [12], 'strategy': ['last', 'mean', 'drift'], 'window_length': [12, 24, None] } my_grid # In[15]: tuned_model = exp.tune_model(model, custom_grid=my_grid) # **The performance improved over the original tuning (we essentially converted the naive model to a seasonal naive model with this new grid)** # ## Prediction Intervals # # Available for some models only # In[16]: model = exp.create_model("theta") # In[17]: exp.predict_model(model, return_pred_int=True) # **Models that do not provide a predicton interval simply return NA values** # In[18]: model = exp.create_model("naive") # In[19]: exp.predict_model(model, return_pred_int=True)