In [1]:
import time
import numpy as np
import pandas as pd

from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment
In [2]:
y = get_data('airline', verbose=False)
In [3]:
fh = 12 # or alternately fh = np.arange(1,13)
fold = 3

Available Models

In [4]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh)
exp.models()
  Description Value
0 session_id 1433
1 Original Data (144, 1)
2 Missing Values False
3 Transformed Train Set (132,)
4 Transformed Test Set (12,)
5 Fold Generator ExpandingWindowSplitter
6 Fold Number 3
7 Enforce Prediction Interval False
8 Seasonal Period Tested 12
9 Seasonality Detected True
10 Target Strictly Positive True
11 Target White Noise No
12 Recommended d 1
13 Recommended Seasonal D 1
14 CPU Jobs -1
15 Use GPU False
16 Log Experiment False
17 Experiment Name ts-default-name
18 USI c930
19 Imputation Type simple
Out[4]:
Name Reference Turbo
ID
naive Naive Forecaster sktime.forecasting.naive.NaiveForecaster True
grand_means Grand Means Forecaster sktime.forecasting.naive.NaiveForecaster True
snaive Seasonal Naive Forecaster sktime.forecasting.naive.NaiveForecaster True
polytrend Polynomial Trend Forecaster sktime.forecasting.trend.PolynomialTrendForeca... True
arima ARIMA sktime.forecasting.arima.ARIMA True
auto_arima Auto ARIMA sktime.forecasting.arima.AutoARIMA True
exp_smooth Exponential Smoothing sktime.forecasting.exp_smoothing.ExponentialSm... True
ets ETS sktime.forecasting.ets.AutoETS True
theta Theta Forecaster sktime.forecasting.theta.ThetaForecaster True
tbats TBATS sktime.forecasting.tbats.TBATS False
bats BATS sktime.forecasting.bats.BATS False
lr_cds_dt Linear w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
en_cds_dt Elastic Net w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
ridge_cds_dt Ridge w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
lasso_cds_dt Lasso w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
lar_cds_dt Least Angular Regressor w/ Cond. Deseasonalize... pycaret.containers.models.time_series.BaseCdsD... True
llar_cds_dt Lasso Least Angular Regressor w/ Cond. Deseaso... pycaret.containers.models.time_series.BaseCdsD... True
br_cds_dt Bayesian Ridge w/ Cond. Deseasonalize & Detren... pycaret.containers.models.time_series.BaseCdsD... True
huber_cds_dt Huber w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
par_cds_dt Passive Aggressive w/ Cond. Deseasonalize & De... pycaret.containers.models.time_series.BaseCdsD... True
omp_cds_dt Orthogonal Matching Pursuit w/ Cond. Deseasona... pycaret.containers.models.time_series.BaseCdsD... True
knn_cds_dt K Neighbors w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
dt_cds_dt Decision Tree w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
rf_cds_dt Random Forest w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
et_cds_dt Extra Trees w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
gbr_cds_dt Gradient Boosting w/ Cond. Deseasonalize & Det... pycaret.containers.models.time_series.BaseCdsD... True
ada_cds_dt AdaBoost w/ Cond. Deseasonalize & Detrending pycaret.containers.models.time_series.BaseCdsD... True
xgboost_cds_dt Extreme Gradient Boosting w/ Cond. Deseasonali... pycaret.containers.models.time_series.BaseCdsD... True
lightgbm_cds_dt Light Gradient Boosting w/ Cond. Deseasonalize... pycaret.containers.models.time_series.BaseCdsD... True

Plotting Data

In [5]:
# Without any argument, this will plot the original dataset
exp.plot_model()
Out[5]:
'Time Series Plot.html'
In [6]:
# Without an estimator argument, this will plot the original dataset
exp.plot_model(plot="ts")
Out[6]:
'Time Series Plot.html'
In [7]:
# ACF and PACF for the original dataset
exp.plot_model(plot="acf")

# NOTE: you can customize the plots with kwargs - e.g. number of lags, figure size (width, height), etc
# data_kwargs such as `nlags` are passed to the underlying functon that gets the ACF values
# figure kwargs such as `fig_size` & `fig_template` are passed to plotly and can have any value that plotly accepts
exp.plot_model(plot="pacf", data_kwargs={'nlags':36, }, fig_kwargs={'fig_size': [800, 500], 'fig_template': 'simple_white'})
Out[7]:
'Partial Auto Correlation (PACF).html'
In [8]:
exp.plot_model(plot="decomp_classical")
exp.plot_model(plot="decomp_classical", data_kwargs={'type': 'multiplicative'})
exp.plot_model(plot="decomp_stl")
Out[8]:
'Decomposition STL.html'
In [9]:
# Show the train-test splits on the dataset
# Internally split - len(fh) as test set, remaining used as test set
exp.plot_model(plot="train_test_split")

# Show the Cross Validation splits inside the train set
exp.plot_model(plot="cv")
Out[9]:
'Cross Validation.html'
In [10]:
# Plot diagnostics
exp.plot_model(plot="diagnostics")
Out[10]:
'Diagnostics Plot.html'

Tests

In [11]:
exp.check_stats(alpha = 0.2)
Out[11]:
Test Test Name Property Setting Value
0 Summary Statistics Length 144.0
1 Summary Statistics Mean 280.298611
2 Summary Statistics Median 265.5
3 Summary Statistics Standard Deviation 119.966317
4 Summary Statistics Variance 14391.917201
5 Summary Statistics Kurtosis -0.364942
6 Summary Statistics Skewness 0.58316
7 Summary Statistics # Distinct Values 118.0
8 White Noise Ljung-Box Test Statictic {'alpha': 0.2, 'K': 24} 1606.083817
9 White Noise Ljung-Box Test Statictic {'alpha': 0.2, 'K': 48} 1933.155822
10 White Noise Ljung-Box p-value {'alpha': 0.2, 'K': 24} 0.0
11 White Noise Ljung-Box p-value {'alpha': 0.2, 'K': 48} 0.0
12 White Noise Ljung-Box White Noise {'alpha': 0.2, 'K': 24} False
13 White Noise Ljung-Box White Noise {'alpha': 0.2, 'K': 48} False
14 Stationarity ADF Stationarity {'alpha': 0.2} False
15 Stationarity ADF p-value {'alpha': 0.2} 0.99188
16 Stationarity ADF Test Statistic {'alpha': 0.2} 0.815369
17 Stationarity ADF Critical Value 1% {'alpha': 0.2} -3.481682
18 Stationarity ADF Critical Value 5% {'alpha': 0.2} -2.884042
19 Stationarity ADF Critical Value 10% {'alpha': 0.2} -2.57877
20 Stationarity KPSS Trend Stationarity {'alpha': 0.2} False
21 Stationarity KPSS p-value {'alpha': 0.2} 0.1
22 Stationarity KPSS Test Statistic {'alpha': 0.2} 0.09615
23 Stationarity KPSS Critical Value 10% {'alpha': 0.2} 0.119
24 Stationarity KPSS Critical Value 5% {'alpha': 0.2} 0.146
25 Stationarity KPSS Critical Value 2.5% {'alpha': 0.2} 0.176
26 Stationarity KPSS Critical Value 1% {'alpha': 0.2} 0.216
27 Normality Shapiro Normality {'alpha': 0.2} False
28 Normality Shapiro p-value {'alpha': 0.2} 0.000068
In [12]:
# Options are: 'all', 'summary', 'white_noise' 'stationarity', 'adf', 'kpss', 'normality'
exp.check_stats(test="summary")
Out[12]:
Test Test Name Property Setting Value
0 Summary Statistics Length 144.000000
1 Summary Statistics Mean 280.298611
2 Summary Statistics Median 265.500000
3 Summary Statistics Standard Deviation 119.966317
4 Summary Statistics Variance 14391.917201
5 Summary Statistics Kurtosis -0.364942
6 Summary Statistics Skewness 0.583160
7 Summary Statistics # Distinct Values 118.000000
In [13]:
# Setting denotes alpha value used (for most tests). For white noise, it denotes the lags used to test
exp.check_stats(test='stationarity')
Out[13]:
Test Test Name Property Setting Value
0 Stationarity ADF Stationarity {'alpha': 0.05} False
1 Stationarity ADF p-value {'alpha': 0.05} 0.99188
2 Stationarity ADF Test Statistic {'alpha': 0.05} 0.815369
3 Stationarity ADF Critical Value 1% {'alpha': 0.05} -3.481682
4 Stationarity ADF Critical Value 5% {'alpha': 0.05} -2.884042
5 Stationarity ADF Critical Value 10% {'alpha': 0.05} -2.57877
6 Stationarity KPSS Trend Stationarity {'alpha': 0.05} True
7 Stationarity KPSS p-value {'alpha': 0.05} 0.1
8 Stationarity KPSS Test Statistic {'alpha': 0.05} 0.09615
9 Stationarity KPSS Critical Value 10% {'alpha': 0.05} 0.119
10 Stationarity KPSS Critical Value 5% {'alpha': 0.05} 0.146
11 Stationarity KPSS Critical Value 2.5% {'alpha': 0.05} 0.176
12 Stationarity KPSS Critical Value 1% {'alpha': 0.05} 0.216
In [14]:
# For white noise, Setting it denotes the lags used to test
exp.check_stats(test='white_noise')
Out[14]:
Test Test Name Property Setting Value
0 White Noise Ljung-Box Test Statictic {'alpha': 0.05, 'K': 24} 1606.083817
1 White Noise Ljung-Box Test Statictic {'alpha': 0.05, 'K': 48} 1933.155822
2 White Noise Ljung-Box p-value {'alpha': 0.05, 'K': 24} 0.0
3 White Noise Ljung-Box p-value {'alpha': 0.05, 'K': 48} 0.0
4 White Noise Ljung-Box White Noise {'alpha': 0.05, 'K': 24} False
5 White Noise Ljung-Box White Noise {'alpha': 0.05, 'K': 48} False
In [15]:
# You can change alpha if needed (would not recommend though)
exp.check_stats(test='stationarity', alpha = 0.2)
Out[15]:
Test Test Name Property Setting Value
0 Stationarity ADF Stationarity {'alpha': 0.2} False
1 Stationarity ADF p-value {'alpha': 0.2} 0.99188
2 Stationarity ADF Test Statistic {'alpha': 0.2} 0.815369
3 Stationarity ADF Critical Value 1% {'alpha': 0.2} -3.481682
4 Stationarity ADF Critical Value 5% {'alpha': 0.2} -2.884042
5 Stationarity ADF Critical Value 10% {'alpha': 0.2} -2.57877
6 Stationarity KPSS Trend Stationarity {'alpha': 0.2} False
7 Stationarity KPSS p-value {'alpha': 0.2} 0.1
8 Stationarity KPSS Test Statistic {'alpha': 0.2} 0.09615
9 Stationarity KPSS Critical Value 10% {'alpha': 0.2} 0.119
10 Stationarity KPSS Critical Value 5% {'alpha': 0.2} 0.146
11 Stationarity KPSS Critical Value 2.5% {'alpha': 0.2} 0.176
12 Stationarity KPSS Critical Value 1% {'alpha': 0.2} 0.216

Flow example

Common Setup

In [16]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, session_id=42)
  Description Value
0 session_id 42
1 Original Data (144, 1)
2 Missing Values False
3 Transformed Train Set (132,)
4 Transformed Test Set (12,)
5 Fold Generator ExpandingWindowSplitter
6 Fold Number 3
7 Enforce Prediction Interval False
8 Seasonal Period Tested 12
9 Seasonality Detected True
10 Target Strictly Positive True
11 Target White Noise No
12 Recommended d 1
13 Recommended Seasonal D 1
14 CPU Jobs -1
15 Use GPU False
16 Log Experiment False
17 Experiment Name ts-default-name
18 USI ec65
19 Imputation Type simple
Out[16]:
<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x1aeb11a47c8>
In [17]:
y_train = exp.get_config("y_train")
y_test = exp.get_config("y_test")

Manual Create

Classical Statistical Models

In [18]:
model = exp.create_model("exp_smooth")
y_predict = exp.predict_model(model)
#### Plot Out-of-Sample Forecasts
exp.plot_model(estimator=model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 14.5613 18.7758 0.0366 0.0376 0.8852
1 1957-12 15.5549 18.2244 0.0420 0.0411 0.9130
2 1958-12 20.2153 22.3871 0.0479 0.0494 0.8879
Mean NaT 16.7772 19.7958 0.0422 0.0427 0.8954
SD NaT 2.4647 1.8461 0.0046 0.0049 0.0125
  Model MAE RMSE MAPE SMAPE R2
0 Exponential Smoothing 10.3023 15.8096 0.0221 0.0216 0.9549
Out[18]:
'Out-of-Sample Forecast Plot.html'
In [19]:
## NOTE: Model that supports prediction intervals will plot them by default
model_pi = exp.create_model("arima")
exp.plot_model(estimator=model_pi)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 13.0286 16.1485 0.0327 0.0334 0.9151
1 1957-12 18.2920 20.3442 0.0506 0.0491 0.8916
2 1958-12 28.6999 30.1669 0.0671 0.0697 0.7964
Mean NaT 20.0069 22.2199 0.0501 0.0507 0.8677
SD NaT 6.5117 5.8746 0.0141 0.0148 0.0513
Out[19]:
'Out-of-Sample Forecast Plot.html'
In [20]:
# Check Goodness of Fit
exp.check_stats(model)
Out[20]:
Test Test Name Property Setting Value
0 Summary Statistics Length 132.0
1 Summary Statistics Mean -0.080292
2 Summary Statistics Median -1.384793
3 Summary Statistics Standard Deviation 9.804878
4 Summary Statistics Variance 96.135641
5 Summary Statistics Kurtosis 0.486276
6 Summary Statistics Skewness 0.09929
7 Summary Statistics # Distinct Values 132.0
8 White Noise Ljung-Box Test Statictic {'alpha': 0.05, 'K': 24} 42.000662
9 White Noise Ljung-Box Test Statictic {'alpha': 0.05, 'K': 48} 67.675505
10 White Noise Ljung-Box p-value {'alpha': 0.05, 'K': 24} 0.012903
11 White Noise Ljung-Box p-value {'alpha': 0.05, 'K': 48} 0.032078
12 White Noise Ljung-Box White Noise {'alpha': 0.05, 'K': 24} False
13 White Noise Ljung-Box White Noise {'alpha': 0.05, 'K': 48} False
14 Stationarity ADF Stationarity {'alpha': 0.05} True
15 Stationarity ADF p-value {'alpha': 0.05} 0.0
16 Stationarity ADF Test Statistic {'alpha': 0.05} -6.265436
17 Stationarity ADF Critical Value 1% {'alpha': 0.05} -3.482088
18 Stationarity ADF Critical Value 5% {'alpha': 0.05} -2.884219
19 Stationarity ADF Critical Value 10% {'alpha': 0.05} -2.578864
20 Stationarity KPSS Trend Stationarity {'alpha': 0.05} True
21 Stationarity KPSS p-value {'alpha': 0.05} 0.1
22 Stationarity KPSS Test Statistic {'alpha': 0.05} 0.035309
23 Stationarity KPSS Critical Value 10% {'alpha': 0.05} 0.119
24 Stationarity KPSS Critical Value 5% {'alpha': 0.05} 0.146
25 Stationarity KPSS Critical Value 2.5% {'alpha': 0.05} 0.176
26 Stationarity KPSS Critical Value 1% {'alpha': 0.05} 0.216
27 Normality Shapiro Normality {'alpha': 0.05} True
28 Normality Shapiro p-value {'alpha': 0.05} 0.138945
In [21]:
# Plot Goodness of Fit (Plots based on model in-sample residuals)
exp.plot_model(model, plot='residuals')
exp.plot_model(model, plot='diagnostics')
exp.plot_model(model, plot='insample')
Out[21]:
'In-Sample Forecast Plot.html'
In [22]:
# Compare Model residual ACF/PACF to original Time Series ACF/PACF
# 1. Do you see any visible trend or seasonality component that has not been captured in the model (i.e. still visible in the residual ACF/PACF)?
exp.plot_model(plot='acf')
exp.plot_model(model, plot='acf')

exp.plot_model(plot='pacf')
exp.plot_model(model, plot='pacf')
Out[22]:
'Partial Auto Correlation (PACF).html'
In [23]:
# Check the decomposition of the residuals
# 1. Is the residual in the decomposition the largest component?
# 2. Do you see any visible trend or seasonality component that has not been captured in the model?
exp.plot_model(model, plot="decomp_classical")
exp.plot_model(model, plot="decomp_stl")
Out[23]:
'Decomposition STL.html'
In [24]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 10.5620 13.4978 0.0272 0.0273 0.9407
1 1957-12 26.2572 30.0651 0.0738 0.0704 0.7632
2 1958-12 11.2644 13.4112 0.0261 0.0265 0.9598
Mean NaT 16.0279 18.9914 0.0424 0.0414 0.8879
SD NaT 7.2389 7.8304 0.0222 0.0205 0.0885
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)
In [25]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 10.5620 13.4978 0.0272 0.0273 0.9407
1 1957-12 26.2572 30.0651 0.0738 0.0704 0.7632
2 1958-12 11.2644 13.4112 0.0261 0.0265 0.9598
Mean NaT 16.0279 18.9914 0.0424 0.0414 0.8879
SD NaT 7.2389 7.8304 0.0222 0.0205 0.0885
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)
In [26]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)
  Model MAE RMSE MAPE SMAPE R2
0 Exponential Smoothing 17.8363 22.7139 0.0375 0.0364 0.9069
Out[26]:
'Out-of-Sample Forecast Plot.html'
In [27]:
# Random Grid Search with different number of iterations
tuned_model = exp.tune_model(model, search_algorithm="random", n_iter=5)
print(model)
print(tuned_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 10.5620 13.4978 0.0272 0.0273 0.9407
1 1957-12 26.2572 30.0651 0.0738 0.0704 0.7632
2 1958-12 11.2644 13.4112 0.0261 0.0265 0.9598
Mean NaT 16.0279 18.9914 0.0424 0.0414 0.8879
SD NaT 7.2389 7.8304 0.0222 0.0205 0.0885
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='mul', sp=12,
                     trend='add', use_boxcox=None)
ExponentialSmoothing(damped_trend=False, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', seasonal='add', sp=12,
                     trend='add', use_boxcox=True)

Reduced Regressors: Random Forest (with internal conditional deseasonalize and detrending)

In [28]:
model = exp.create_model("rf_cds_dt")
y_predict = exp.predict_model(model)
exp.plot_model(estimator=model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 28.0638 40.1439 0.0678 0.0721 0.4754
1 1957-12 29.7063 38.0812 0.0738 0.0748 0.6201
2 1958-12 22.8056 35.9245 0.0470 0.0493 0.7113
Mean NaT 26.8586 38.0499 0.0629 0.0654 0.6023
SD NaT 2.9433 1.7227 0.0115 0.0115 0.0971
  Model MAE RMSE MAPE SMAPE R2
0 RandomForestRegressor 32.1796 46.0571 0.0606 0.0637 0.6171
Out[28]:
'Out-of-Sample Forecast Plot.html'
In [29]:
# Fixed Grid Search
tuned_model = exp.tune_model(model)
print(model)
print(tuned_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 20.1918 26.1257 0.0502 0.0521 0.7778
1 1957-12 18.5111 22.5684 0.0496 0.0482 0.8666
2 1958-12 18.1008 22.7550 0.0396 0.0405 0.8842
Mean NaT 18.9346 23.8164 0.0465 0.0470 0.8428
SD NaT 0.9046 1.6347 0.0049 0.0048 0.0465
BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                    regressor=RandomForestRegressor(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=100, n_jobs=-1,
                                                    oob_score=False,
                                                    random_state=42, verbose=0,
                                                    warm_start=False),
                    sp=1, window_length=10)
BaseCdsDtForecaster(degree=1, deseasonal_model='multiplicative',
                    regressor=RandomForestRegressor(bootstrap=False,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=8,
                                                    max_features='sqrt',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.21257793724562235,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=63, n_jobs=-1,
                                                    oob_score=False,
                                                    random_state=42, verbose=0,
                                                    warm_start=False),
                    sp=24, window_length=15)
In [30]:
# Random Grid Search
tuned_model = exp.tune_model(model, search_algorithm="random")
print(model)
print(tuned_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 20.1918 26.1257 0.0502 0.0521 0.7778
1 1957-12 18.5111 22.5684 0.0496 0.0482 0.8666
2 1958-12 18.1008 22.7550 0.0396 0.0405 0.8842
Mean NaT 18.9346 23.8164 0.0465 0.0470 0.8428
SD NaT 0.9046 1.6347 0.0049 0.0048 0.0465
BaseCdsDtForecaster(degree=1, deseasonal_model='additive',
                    regressor=RandomForestRegressor(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=100, n_jobs=-1,
                                                    oob_score=False,
                                                    random_state=42, verbose=0,
                                                    warm_start=False),
                    sp=1, window_length=10)
BaseCdsDtForecaster(degree=1, deseasonal_model='multiplicative',
                    regressor=RandomForestRegressor(bootstrap=False,
                                                    ccp_alpha=0.0,
                                                    criterion='mse',
                                                    max_depth=8,
                                                    max_features='sqrt',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.21257793724562235,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators=63, n_jobs=-1,
                                                    oob_score=False,
                                                    random_state=42, verbose=0,
                                                    warm_start=False),
                    sp=24, window_length=15)
In [31]:
y_predict = exp.predict_model(tuned_model)
exp.plot_model(estimator=tuned_model)
  Model MAE RMSE MAPE SMAPE R2
0 RandomForestRegressor 26.7246 34.0891 0.0513 0.0532 0.7902
Out[31]:
'Out-of-Sample Forecast Plot.html'

Getting Ready for Productionization

Finalizing Models

In [32]:
model = exp.create_model("ets")
tuned_model = exp.tune_model(model, search_algorithm='grid')

# Trains the model with the best hyperparameters on the entire dataset now
final_model = exp.finalize_model(tuned_model)
exp.plot_model(final_model)
exp.predict_model(final_model)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 14.5582 18.7725 0.0366 0.0376 0.8853
1 1957-12 16.8005 19.3265 0.0458 0.0447 0.9021
2 1958-12 20.8908 23.4318 0.0495 0.0512 0.8772
Mean NaT 17.4165 20.5103 0.0440 0.0445 0.8882
SD NaT 2.6217 2.0782 0.0054 0.0055 0.0104
Out[32]:
1961-01    445.4229
1961-02    418.3921
1961-03    464.7036
1961-04    494.5817
1961-05    505.5179
1961-06    573.3778
1961-07    663.6585
1961-08    654.8065
1961-09    546.7023
1961-10    488.2774
1961-11    415.7382
1961-12    460.1488
Freq: M, Name: Number of airline passengers, dtype: float64
In [33]:
## NOTE: You can choose to predict further out in the horizon as well (if needed)
# Example here shows forecasting out 36 months instead of the default of 12
exp.plot_model(estimator=final_model, data_kwargs={'fh': 36})
Out[33]:
'Out-of-Sample Forecast Plot.html'

Save model pickle file

In [34]:
exp.save_model(final_model, "my_final_model")
Transformation Pipeline and Model Successfully Saved
Out[34]:
(AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 'my_final_model.pkl')

Load Model

Usually done in another session

In [35]:
exp_load = TimeSeriesExperiment()
loaded_model = exp_load.load_model("my_final_model")
Transformation Pipeline and Model Successfully Loaded
In [36]:
# Should match predictions from before the save and load
exp_load.predict_model(loaded_model)
Out[36]:
1961-01    445.4229
1961-02    418.3921
1961-03    464.7036
1961-04    494.5817
1961-05    505.5179
1961-06    573.3778
1961-07    663.6585
1961-08    654.8065
1961-09    546.7023
1961-10    488.2774
1961-11    415.7382
1961-12    460.1488
Freq: M, dtype: float64

Auto Create

Compare Models

In [37]:
best_baseline_models = exp.compare_models(fold=fold, sort='smape', n_select=3)
best_baseline_models
  Model MAE RMSE MAPE SMAPE R2 TT (Sec)
exp_smooth Exponential Smoothing 16.7772 19.7958 0.0422 0.0427 0.8954 0.1067
ets ETS 17.4165 20.5103 0.0440 0.0445 0.8882 0.1567
arima ARIMA 20.0069 22.2199 0.0501 0.0507 0.8677 0.0500
auto_arima Auto ARIMA 21.0297 23.4661 0.0525 0.0531 0.8509 2.8300
et_cds_dt Extra Trees w/ Cond. Deseasonalize & Detrending 24.4233 31.4395 0.0584 0.0601 0.7169 0.5167
xgboost_cds_dt Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending 24.8102 31.3995 0.0613 0.0631 0.6888 0.1533
knn_cds_dt K Neighbors w/ Cond. Deseasonalize & Detrending 25.8293 34.9633 0.0617 0.0641 0.6260 0.4533
rf_cds_dt Random Forest w/ Cond. Deseasonalize & Detrending 26.8586 38.0499 0.0629 0.0654 0.6023 0.5467
ada_cds_dt AdaBoost w/ Cond. Deseasonalize & Detrending 27.8200 37.7910 0.0661 0.0686 0.6015 0.0633
theta Theta Forecaster 28.3192 33.8639 0.0670 0.0700 0.6710 0.0133
gbr_cds_dt Gradient Boosting w/ Cond. Deseasonalize & Detrending 29.1314 38.3080 0.0685 0.0715 0.5855 0.0467
lightgbm_cds_dt Light Gradient Boosting w/ Cond. Deseasonalize & Detrending 28.7738 36.2392 0.0698 0.0722 0.6255 0.0233
br_cds_dt Bayesian Ridge w/ Cond. Deseasonalize & Detrending 32.0341 39.2191 0.0799 0.0818 0.5658 0.0133
lasso_cds_dt Lasso w/ Cond. Deseasonalize & Detrending 32.8026 39.2084 0.0823 0.0841 0.5678 0.0133
en_cds_dt Elastic Net w/ Cond. Deseasonalize & Detrending 32.8556 39.2557 0.0825 0.0843 0.5669 0.0167
ridge_cds_dt Ridge w/ Cond. Deseasonalize & Detrending 32.9702 39.3452 0.0828 0.0846 0.5652 0.0133
lr_cds_dt Linear w/ Cond. Deseasonalize & Detrending 32.9708 39.3456 0.0828 0.0846 0.5652 0.0167
dt_cds_dt Decision Tree w/ Cond. Deseasonalize & Detrending 35.1446 45.8861 0.0826 0.0869 0.4284 0.0167
snaive Seasonal Naive Forecaster 33.3611 35.9139 0.0832 0.0879 0.6072 0.0133
huber_cds_dt Huber w/ Cond. Deseasonalize & Detrending 35.4709 41.1489 0.0910 0.0936 0.5226 0.0233
lar_cds_dt Least Angular Regressor w/ Cond. Deseasonalize & Detrending 36.5285 42.4001 0.0936 0.0945 0.5058 0.0133
llar_cds_dt Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending 46.7239 63.1706 0.1109 0.1165 -0.0733 0.0133
omp_cds_dt Orthogonal Matching Pursuit w/ Cond. Deseasonalize & Detrending 47.2799 64.5891 0.1110 0.1177 -0.1201 0.0133
polytrend Polynomial Trend Forecaster 48.6301 63.4299 0.1170 0.1216 -0.0784 0.0067
naive Naive Forecaster 69.0278 91.0322 0.1569 0.1792 -1.2216 0.0133
par_cds_dt Passive Aggressive w/ Cond. Deseasonalize & Detrending 78.0396 95.4251 0.2137 0.2531 -3.0784 0.0133
grand_means Grand Means Forecaster 162.4117 173.6492 0.4000 0.5075 -7.0462 0.0100
Out[37]:
[ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='mul', sp=12,
                      trend='add', use_boxcox=None),
 AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 ARIMA(maxiter=50, method='lbfgs', order=(1, 0, 0), out_of_sample_size=0,
       scoring='mse', scoring_args=None, seasonal_order=(0, 1, 0, 12),
       start_params=None, suppress_warnings=False, trend=None,
       with_intercept=True)]
In [38]:
compare_metrics = exp.pull()
compare_metrics
Out[38]:
Model MAE RMSE MAPE SMAPE R2 TT (Sec)
exp_smooth Exponential Smoothing 16.7772 19.7958 0.0422 0.0427 0.8954 0.1067
ets ETS 17.4165 20.5103 0.044 0.0445 0.8882 0.1567
arima ARIMA 20.0069 22.2199 0.0501 0.0507 0.8677 0.0500
auto_arima Auto ARIMA 21.0297 23.4661 0.0525 0.0531 0.8509 2.8300
et_cds_dt Extra Trees w/ Cond. Deseasonalize & Detrending 24.4233 31.4395 0.0584 0.0601 0.7169 0.5167
xgboost_cds_dt Extreme Gradient Boosting w/ Cond. Deseasonali... 24.8102 31.3995 0.0613 0.0631 0.6888 0.1533
knn_cds_dt K Neighbors w/ Cond. Deseasonalize & Detrending 25.8293 34.9633 0.0617 0.0641 0.626 0.4533
rf_cds_dt Random Forest w/ Cond. Deseasonalize & Detrending 26.8586 38.0499 0.0629 0.0654 0.6023 0.5467
ada_cds_dt AdaBoost w/ Cond. Deseasonalize & Detrending 27.82 37.791 0.0661 0.0686 0.6015 0.0633
theta Theta Forecaster 28.3192 33.8639 0.067 0.07 0.671 0.0133
gbr_cds_dt Gradient Boosting w/ Cond. Deseasonalize & Det... 29.1314 38.308 0.0685 0.0715 0.5855 0.0467
lightgbm_cds_dt Light Gradient Boosting w/ Cond. Deseasonalize... 28.7738 36.2392 0.0698 0.0722 0.6255 0.0233
br_cds_dt Bayesian Ridge w/ Cond. Deseasonalize & Detren... 32.0341 39.2191 0.0799 0.0818 0.5658 0.0133
lasso_cds_dt Lasso w/ Cond. Deseasonalize & Detrending 32.8026 39.2084 0.0823 0.0841 0.5678 0.0133
en_cds_dt Elastic Net w/ Cond. Deseasonalize & Detrending 32.8556 39.2557 0.0825 0.0843 0.5669 0.0167
ridge_cds_dt Ridge w/ Cond. Deseasonalize & Detrending 32.9702 39.3452 0.0828 0.0846 0.5652 0.0133
lr_cds_dt Linear w/ Cond. Deseasonalize & Detrending 32.9708 39.3456 0.0828 0.0846 0.5652 0.0167
dt_cds_dt Decision Tree w/ Cond. Deseasonalize & Detrending 35.1446 45.8861 0.0826 0.0869 0.4284 0.0167
snaive Seasonal Naive Forecaster 33.3611 35.9139 0.0832 0.0879 0.6072 0.0133
huber_cds_dt Huber w/ Cond. Deseasonalize & Detrending 35.4709 41.1489 0.091 0.0936 0.5226 0.0233
lar_cds_dt Least Angular Regressor w/ Cond. Deseasonalize... 36.5285 42.4001 0.0936 0.0945 0.5058 0.0133
llar_cds_dt Lasso Least Angular Regressor w/ Cond. Deseaso... 46.7239 63.1706 0.1109 0.1165 -0.0733 0.0133
omp_cds_dt Orthogonal Matching Pursuit w/ Cond. Deseasona... 47.2799 64.5891 0.111 0.1177 -0.1201 0.0133
polytrend Polynomial Trend Forecaster 48.6301 63.4299 0.117 0.1216 -0.0784 0.0067
naive Naive Forecaster 69.0278 91.0322 0.1569 0.1792 -1.2216 0.0133
par_cds_dt Passive Aggressive w/ Cond. Deseasonalize & De... 78.0396 95.4251 0.2137 0.2531 -3.0784 0.0133
grand_means Grand Means Forecaster 162.4117 173.6492 0.4 0.5075 -7.0462 0.0100
  • Note that some models like BATS and TBATS are disabled by default.
  • You can enable them by setting turbo = False
In [39]:
# _ = exp.compare_models(fold=fold, sort='smape', n_select=3, turbo=False)

Tune Best Models

In [40]:
best_tuned_models = [exp.tune_model(model) for model in best_baseline_models]
best_tuned_models
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 13.2626 16.6689 0.0331 0.0339 0.9096
1 1957-12 19.1686 21.3384 0.0530 0.0513 0.8807
2 1958-12 21.1925 23.4747 0.0491 0.0506 0.8767
Mean NaT 17.8746 20.4940 0.0451 0.0453 0.8890
SD NaT 3.3642 2.8419 0.0086 0.0080 0.0146
Out[40]:
[ExponentialSmoothing(damped_trend=False, initial_level=None,
                      initial_seasonal=None, initial_trend=None,
                      initialization_method='estimated', seasonal='add', sp=12,
                      trend='add', use_boxcox=True),
 AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False,
         bounds=None, callback=None, damped_trend=False, dates=None, disp=False,
         error='add', freq=None, full_output=True, ignore_inf_ic=True,
         information_criterion='aic', initial_level=None, initial_seasonal=None,
         initial_trend=None, initialization_method='estimated', maxiter=1000,
         missing='none', n_jobs=None, restrict=True, return_params=False,
         seasonal='mul', sp=12, start_params=None, trend='add'),
 ARIMA(maxiter=50, method='lbfgs', order=(0, 0, 0), out_of_sample_size=0,
       scoring='mse', scoring_args=None, seasonal_order=(1, 1, 0, 12),
       start_params=None, suppress_warnings=False, trend=None,
       with_intercept=True)]

Blend Best Models

Mean Blender

In [41]:
mean_blender = exp.blend_models(best_tuned_models, method='mean')
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 11.4179 15.3424 0.0284 0.0289 0.9234
1 1957-12 20.4720 23.0694 0.0570 0.0550 0.8606
2 1958-12 16.6014 19.2720 0.0386 0.0396 0.9169
Mean NaT 16.1638 19.2280 0.0413 0.0412 0.9003
SD NaT 3.7092 3.1547 0.0119 0.0107 0.0282
In [42]:
y_predict = exp.predict_model(mean_blender)
exp.plot_model(estimator=mean_blender)
  Model MAE RMSE MAPE SMAPE R2
0 EnsembleForecaster 9.9332 13.5997 0.0215 0.0213 0.9666
Out[42]:
'Out-of-Sample Forecast Plot.html'

Median Blender

In [43]:
median_blender = exp.blend_models(best_tuned_models, method='median')
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 12.1115 16.3539 0.0299 0.0306 0.9129
1 1957-12 19.8712 22.1745 0.0553 0.0535 0.8712
2 1958-12 18.2701 21.3093 0.0425 0.0438 0.8984
Mean NaT 16.7509 19.9459 0.0426 0.0426 0.8942
SD NaT 3.3451 2.5643 0.0104 0.0094 0.0173
In [44]:
y_predict = exp.predict_model(median_blender)
exp.plot_model(estimator=median_blender)
  Model MAE RMSE MAPE SMAPE R2
0 EnsembleForecaster 9.5287 15.4598 0.0203 0.0199 0.9569
Out[44]:
'Out-of-Sample Forecast Plot.html'

Voting Blender

In [45]:
top_model_metrics = compare_metrics.iloc[0:3]['SMAPE']
display(top_model_metrics)

top_model_weights = 1 - top_model_metrics/top_model_metrics.sum()
display(top_model_weights)
exp_smooth    0.0427
ets           0.0445
arima         0.0507
Name: SMAPE, dtype: object
exp_smooth    0.690355
ets           0.677302
arima         0.632342
Name: SMAPE, dtype: object
In [46]:
voting_blender = exp.blend_models(best_tuned_models, method='voting', weights=top_model_weights.values)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 11.3553 15.3045 0.0282 0.0288 0.9238
1 1957-12 20.5390 23.1658 0.0572 0.0552 0.8594
2 1958-12 16.5430 19.1532 0.0385 0.0395 0.9179
Mean NaT 16.1458 19.2078 0.0413 0.0412 0.9004
SD NaT 3.7598 3.2096 0.0120 0.0109 0.0291
In [47]:
y_predict = exp.predict_model(voting_blender)
print(y_predict)
exp.plot_model(estimator=voting_blender)
  Model MAE RMSE MAPE SMAPE R2
0 EnsembleForecaster 9.7840 13.6464 0.0212 0.0209 0.9664
1960-01    410.5338
1960-02    389.8435
1960-03    456.2937
1960-04    444.5850
1960-05    468.1602
1960-06    533.2819
1960-07    612.9218
1960-08    620.7866
1960-09    511.5497
1960-10    450.0216
1960-11    397.7772
1960-12    436.3490
Freq: M, Name: Number of airline passengers, dtype: float64
Out[47]:
'Out-of-Sample Forecast Plot.html'

Save and Load Model

In [48]:
_ = exp.save_model(voting_blender, "my_blender")
Transformation Pipeline and Model Successfully Saved
In [49]:
loaded_exp = TimeSeriesExperiment()
m = loaded_exp.load_model("my_blender")
# Predictions should be same as before the model was saved and loaded
loaded_exp.predict_model(m)
Transformation Pipeline and Model Successfully Loaded
Out[49]:
1960-01    410.5338
1960-02    389.8435
1960-03    456.2937
1960-04    444.5850
1960-05    468.1602
1960-06    533.2819
1960-07    612.9218
1960-08    620.7866
1960-09    511.5497
1960-10    450.0216
1960-11    397.7772
1960-12    436.3490
Freq: M, dtype: float64

Prediction Customization

In [50]:
model = exp.create_model("auto_arima")
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 12.0769 15.8014 0.0298 0.0305 0.9187
1 1957-12 19.4102 21.4989 0.0538 0.0520 0.8789
2 1958-12 31.6020 33.0981 0.0738 0.0769 0.7549
Mean NaT 21.0297 23.4661 0.0525 0.0531 0.8509
SD NaT 8.0529 7.1970 0.0180 0.0190 0.0698
In [51]:
# Default prediction
exp.predict_model(model)
  Model MAE RMSE MAPE SMAPE R2
0 Auto ARIMA 14.8982 18.5365 0.0310 0.0309 0.9380
Out[51]:
1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
Freq: M, Name: Number of airline passengers, dtype: float64
In [52]:
# With Prediction Interval (default alpha = 0.05)
exp.predict_model(model, return_pred_int=True)
  Model MAE RMSE MAPE SMAPE R2
0 Auto ARIMA 14.8982 18.5365 0.0310 0.0309 0.9380
Out[52]:
y_pred lower upper
1960-01 419.9672 400.2603 439.6740
1960-02 399.8499 375.7392 423.9605
1960-03 457.9944 429.6697 486.3190
1960-04 444.4742 414.0056 474.9427
1960-05 464.7892 432.7993 496.7790
1960-06 514.1254 481.2385 547.0123
1960-07 587.8042 554.3237 621.2846
1960-08 597.0108 563.1606 630.8610
1960-09 499.5313 465.4410 533.6215
1960-10 442.3597 408.1167 476.6027
1960-11 396.4102 362.0686 430.7519
1960-12 438.6516 404.2466 473.0566
In [53]:
# With Prediction Interval (custom alpha = 0.2)
exp.predict_model(model, return_pred_int=True, alpha=0.2)
  Model MAE RMSE MAPE SMAPE R2
0 Auto ARIMA 14.8982 18.5365 0.0310 0.0309 0.9380
Out[53]:
y_pred lower upper
1960-01 419.9672 407.0816 432.8528
1960-02 399.8499 384.0847 415.6150
1960-03 457.9944 439.4739 476.5149
1960-04 444.4742 424.5519 464.3965
1960-05 464.7892 443.8722 485.7062
1960-06 514.1254 492.6219 535.6290
1960-07 587.8042 565.9125 609.6959
1960-08 597.0108 574.8774 619.1443
1960-09 499.5313 477.2408 521.8217
1960-10 442.3597 419.9694 464.7500
1960-11 396.4102 373.9554 418.8650
1960-12 438.6516 416.1554 461.1478
In [54]:
# Increased forecast horizon to 2 years instead of the original 1 year
exp.predict_model(model, fh = np.arange(1, 25))
Out[54]:
1960-01    419.9672
1960-02    399.8499
1960-03    457.9944
1960-04    444.4742
1960-05    464.7892
1960-06    514.1254
1960-07    587.8042
1960-08    597.0108
1960-09    499.5313
1960-10    442.3597
1960-11    396.4102
1960-12    438.6516
1961-01    453.0077
1961-02    432.4005
1961-03    490.1513
1961-04    476.3150
1961-05    496.3762
1961-06    545.5087
1961-07    619.0237
1961-08    628.0990
1961-09    530.5139
1961-10    473.2576
1961-11    427.2400
1961-12    469.4268
Freq: M, Name: Number of airline passengers, dtype: float64
In [55]:
# For models that do not produce a prediction interval --> returns NA values
model = exp.create_model("lr_cds_dt")
exp.predict_model(model, return_pred_int=True)
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 38.6824 45.0820 0.0998 0.1051 0.3384
1 1957-12 28.0608 34.6867 0.0751 0.0734 0.6848
2 1958-12 32.1693 38.2681 0.0737 0.0753 0.6724
Mean NaT 32.9708 39.3456 0.0828 0.0846 0.5652
SD NaT 4.3731 4.3117 0.0120 0.0145 0.1604
  Model MAE RMSE MAPE SMAPE R2
0 LinearRegression 47.7429 55.0154 0.0965 0.1005 0.4536
Out[55]:
y_pred lower upper
1960-01 433.1925 NaN NaN
1960-02 414.2470 NaN NaN
1960-03 394.9755 NaN NaN
1960-04 374.2084 NaN NaN
1960-05 431.2220 NaN NaN
1960-06 493.9331 NaN NaN
1960-07 527.8974 NaN NaN
1960-08 512.4796 NaN NaN
1960-09 456.4409 NaN NaN
1960-10 436.8202 NaN NaN
1960-11 439.0850 NaN NaN
1960-12 460.3670 NaN NaN

Types of Window Splitters

Sliding Window Splitter

In [56]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='sliding')
model = exp.create_model("ets")
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 14.5582 18.7725 0.0366 0.0376 0.8853
1 1957-12 16.5507 19.0043 0.0452 0.0441 0.9054
2 1958-12 35.6983 39.1514 0.0808 0.0847 0.6571
Mean NaT 22.2691 25.6428 0.0542 0.0555 0.8159
SD NaT 9.5307 9.5525 0.0191 0.0208 0.1126

Expanding/Rolling Window

  • They are identical
In [57]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='expanding')
model = exp.create_model("ets")
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 14.5582 18.7725 0.0366 0.0376 0.8853
1 1957-12 16.8005 19.3265 0.0458 0.0447 0.9021
2 1958-12 20.8908 23.4318 0.0495 0.0512 0.8772
Mean NaT 17.4165 20.5103 0.0440 0.0445 0.8882
SD NaT 2.6217 2.0782 0.0054 0.0055 0.0104
In [58]:
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=fh, fold=fold, fold_strategy='rolling')
model = exp.create_model("ets")
  cutoff MAE RMSE MAPE SMAPE R2
0 1956-12 14.5582 18.7725 0.0366 0.0376 0.8853
1 1957-12 16.8005 19.3265 0.0458 0.0447 0.9021
2 1958-12 20.8908 23.4318 0.0495 0.0512 0.8772
Mean NaT 17.4165 20.5103 0.0440 0.0445 0.8882
SD NaT 2.6217 2.0782 0.0054 0.0055 0.0104

Error Handling

In [59]:
try:
    exp = TimeSeriesExperiment()
    exp.setup(data=y, fh=17, fold=76, fold_strategy='expanding')
except ValueError as error:
    print(error)
Initiated . . . . . . . . . . . . . . . . . . 17:22:40
Status . . . . . . . . . . . . . . . . . . Preprocessing Data
Not Enough Data Points, set a lower number of folds or fh