from google.colab import drive
drive.mount('/content/drive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly Enter your authorization code: ·········· Mounted at /content/drive
!pip install pycaret==2.0
from pycaret.utils import enable_colab
enable_colab()
Colab mode activated.
from pycaret.regression import *
# importing the required libraries
import pandas as pd
import numpy as np
# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import folium
from folium import plugins
# Manipulating the default plot size
plt.rcParams['figure.figsize'] = 10, 12
# Disable warnings
import warnings
warnings.filterwarnings('ignore')
# Reading a .csv file by creating a dataframe using pandas
# Reading the datasets
df= pd.read_csv('pakistan.csv', encoding = "ISO-8859-1", parse_dates=["Date"])
dt = df.copy()
dt.head()
Date | Cases | Deaths | Recovered | Travel_history | Province | City | |
---|---|---|---|---|---|---|---|
0 | 2020-02-26 | 1 | 0 | 0 | China | Islamabad Capital Territory | Islamabad |
1 | 2020-02-26 | 2 | 0 | 0 | Iran/Taftan | Sindh | Karachi |
2 | 2020-02-29 | 1 | 0 | 0 | China | Islamabad Capital Territory | Islamabad |
3 | 2020-02-29 | 1 | 0 | 0 | Iran/Taftan | Sindh | Karachi |
4 | 2020-03-02 | 1 | 0 | 0 | Iran/Taftan | Gilgit-Baltistan | Gilgit |
Travel history has less records, we will fill NAs with Unknown
dt['Travel_history'].unique
dt['Travel_history'].fillna('Unknown', inplace=True)
Type casting variables and fixing one Province value
dt = dt.sort_values('Date')
dt['Deaths']=dt['Deaths'].astype(int)
dt['Cases']=dt['Cases'].astype(int)
dt['Recovered']=dt['Recovered'].astype(int)
dt.loc[dt.Province == "khyber Pakhtunkhwa", "Province"] = "Khyber Pakhtunkhwa"
dt.loc[dt.Travel_history == "Tableegi Jamaat", "Travel_history"] = "Tableeghi Jamaat"
Few new features extracted
pdc = dt.groupby('Date')['Cases'].sum().reset_index()
pdd = dt.groupby('Date')['Deaths'].sum().reset_index()#.drop('Date', axis=1)
pdr = dt.groupby('Date')['Recovered'].sum().reset_index()#.reset_index()#.drop('Date', axis=1)
p = pd.DataFrame(pdc)
p['Deaths'] = pdd['Deaths']
p['Recovered'] = pdr['Recovered']
#Cumulative Sum
p['Cum_Cases'] = p['Cases'].cumsum()
p['Cum_Deaths'] = p['Deaths'].cumsum()
p['Cum_Recovered'] = p['Recovered'].cumsum()
del pdc, pdd, pdr
p.head()
Date | Cases | Deaths | Recovered | Cum_Cases | Cum_Deaths | Cum_Recovered | |
---|---|---|---|---|---|---|---|
0 | 2020-02-26 | 3 | 0 | 0 | 3 | 0 | 0 |
1 | 2020-02-29 | 2 | 0 | 0 | 5 | 0 | 0 |
2 | 2020-03-02 | 1 | 0 | 0 | 6 | 0 | 0 |
3 | 2020-03-06 | 0 | 0 | 1 | 6 | 0 | 1 |
4 | 2020-03-07 | 1 | 0 | 0 | 7 | 0 | 1 |
p['Dateofmonth'] = p['Date'].dt.day
p['Month'] = p['Date'].dt.month
p['Week'] = p['Date'].dt.week
p['Dayofweek'] = p['Date'].dt.dayofweek # 0 = monday.
p['Weekdayflg'] = (p['Dayofweek'] // 5 != 1).astype(float)
p['Month'] = p['Date'].dt.month
p['Quarter'] = p['Date'].dt.quarter
p['Dayofyear'] = p['Date'].dt.dayofyear
p.head(10)
Date | Cases | Deaths | Recovered | Cum_Cases | Cum_Deaths | Cum_Recovered | Dateofmonth | Month | Week | Dayofweek | Weekdayflg | Quarter | Dayofyear | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-02-26 | 3 | 0 | 0 | 3 | 0 | 0 | 26 | 2 | 9 | 2 | 1.0 | 1 | 57 |
1 | 2020-02-29 | 2 | 0 | 0 | 5 | 0 | 0 | 29 | 2 | 9 | 5 | 0.0 | 1 | 60 |
2 | 2020-03-02 | 1 | 0 | 0 | 6 | 0 | 0 | 2 | 3 | 10 | 0 | 1.0 | 1 | 62 |
3 | 2020-03-06 | 0 | 0 | 1 | 6 | 0 | 1 | 6 | 3 | 10 | 4 | 1.0 | 1 | 66 |
4 | 2020-03-07 | 1 | 0 | 0 | 7 | 0 | 1 | 7 | 3 | 10 | 5 | 0.0 | 1 | 67 |
5 | 2020-03-09 | 9 | 0 | 0 | 16 | 0 | 1 | 9 | 3 | 11 | 0 | 1.0 | 1 | 69 |
6 | 2020-03-10 | 3 | 0 | 0 | 19 | 0 | 1 | 10 | 3 | 11 | 1 | 1.0 | 1 | 70 |
7 | 2020-03-11 | 1 | 0 | 1 | 20 | 0 | 2 | 11 | 3 | 11 | 2 | 1.0 | 1 | 71 |
8 | 2020-03-12 | 1 | 0 | 0 | 21 | 0 | 2 | 12 | 3 | 11 | 3 | 1.0 | 1 | 72 |
9 | 2020-03-13 | 9 | 0 | 1 | 30 | 0 | 3 | 13 | 3 | 11 | 4 | 1.0 | 1 | 73 |
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=p['Date'], y=p['Cases'],
mode='lines+markers',
name='Cases'))
fig.add_trace(go.Scatter(x=p['Date'], y=p['Deaths'],
mode='lines+markers',
name='Deaths'))
fig.add_trace(go.Scatter(x=p['Date'], y=p['Recovered'],
mode='lines+markers',
name='Recoveries'))
fig.show()
fig = go.Figure()
# Add traces
fig.add_trace(go.Scatter(x=p['Date'], y=p['Cum_Cases'],
mode='lines+markers',
name='Cases'))
fig.add_trace(go.Scatter(x=p['Date'], y=p['Cum_Deaths'],
mode='lines+markers',
name='Deaths'))
fig.add_trace(go.Scatter(x=p['Date'], y=p['Cum_Recovered'],
mode='lines+markers',
name='Recoveries'))
fig.show()
px.scatter(p, x= 'Date', y = 'Cases', trendline = "ols")
The setup() function initializes the environment in pycaret and creates the transformation pipeline to prepare the data for modeling and deployment.
# the data check
p.head()
Date | Cases | Deaths | Recovered | Cum_Cases | Cum_Deaths | Cum_Recovered | Dateofmonth | Month | Week | Dayofweek | Weekdayflg | Quarter | Dayofyear | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-02-26 | 3 | 0 | 0 | 3 | 0 | 0 | 26 | 2 | 9 | 2 | 1.0 | 1 | 57 |
1 | 2020-02-29 | 2 | 0 | 0 | 5 | 0 | 0 | 29 | 2 | 9 | 5 | 0.0 | 1 | 60 |
2 | 2020-03-02 | 1 | 0 | 0 | 6 | 0 | 0 | 2 | 3 | 10 | 0 | 1.0 | 1 | 62 |
3 | 2020-03-06 | 0 | 0 | 1 | 6 | 0 | 1 | 6 | 3 | 10 | 4 | 1.0 | 1 | 66 |
4 | 2020-03-07 | 1 | 0 | 0 | 7 | 0 | 1 | 7 | 3 | 10 | 5 | 0.0 | 1 | 67 |
session_1 = setup(p, target = 'Cases', session_id=123, log_experiment=False, experiment_name='Cases_1')
Setup Succesfully Completed.
Description | Value | |
---|---|---|
0 | session_id | 123 |
1 | Transform Target | False |
2 | Transform Target Method | None |
3 | Original Data | (67, 14) |
4 | Missing Values | False |
5 | Numeric Features | 7 |
6 | Categorical Features | 5 |
7 | Ordinal Features | False |
8 | High Cardinality Features | False |
9 | High Cardinality Method | None |
10 | Sampled Data | (67, 14) |
11 | Transformed Train Set | (46, 48) |
12 | Transformed Test Set | (21, 48) |
13 | Numeric Imputer | mean |
14 | Categorical Imputer | constant |
15 | Normalize | False |
16 | Normalize Method | None |
17 | Transformation | False |
18 | Transformation Method | None |
19 | PCA | False |
20 | PCA Method | None |
21 | PCA Components | None |
22 | Ignore Low Variance | False |
23 | Combine Rare Levels | False |
24 | Rare Level Threshold | None |
25 | Numeric Binning | False |
26 | Remove Outliers | False |
27 | Outliers Threshold | None |
28 | Remove Multicollinearity | False |
29 | Multicollinearity Threshold | None |
30 | Clustering | False |
31 | Clustering Iteration | None |
32 | Polynomial Features | False |
33 | Polynomial Degree | None |
34 | Trignometry Features | False |
35 | Polynomial Threshold | None |
36 | Group Features | False |
37 | Feature Selection | False |
38 | Features Selection Threshold | None |
39 | Feature Interaction | False |
40 | Feature Ratio | False |
41 | Interaction Threshold | None |
Comparing all models to evaluate performance is the recommended starting point for modeling once the setup is completed
best_model = compare_models()
Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
---|---|---|---|---|---|---|---|---|
0 | CatBoost Regressor | 120.5973 | 39922.8215 | 163.9652 | 0.7318 | 0.8684 | 2.8249 | 1.5949 |
1 | Bayesian Ridge | 138.0095 | 53817.4369 | 200.2065 | 0.6728 | 1.1582 | 7.1393 | 0.0078 |
2 | Random Forest | 126.3854 | 43667.3360 | 179.9005 | 0.6714 | 0.7102 | 1.3810 | 0.2456 |
3 | K Neighbors Regressor | 133.5430 | 51243.3982 | 198.9433 | 0.6693 | 0.6344 | 0.6081 | 0.0055 |
4 | Elastic Net | 126.5903 | 51901.8681 | 197.2560 | 0.6403 | 1.1240 | 3.3518 | 0.0118 |
5 | Random Sample Consensus | 142.1350 | 49397.1397 | 192.3590 | 0.6064 | 1.1260 | 7.1215 | 0.0593 |
6 | Extreme Gradient Boosting | 132.3220 | 47822.6505 | 191.3888 | 0.5863 | 0.8160 | 2.8054 | 0.0360 |
7 | Extra Trees Regressor | 150.9493 | 65739.5060 | 224.9121 | 0.5582 | 0.8038 | 1.5087 | 0.1430 |
8 | Gradient Boosting Regressor | 132.1435 | 47311.5032 | 188.0928 | 0.4606 | 0.8670 | 3.3893 | 0.0387 |
9 | Lasso Regression | 163.3910 | 72984.0082 | 235.9398 | 0.4300 | 1.2657 | 7.2305 | 0.0097 |
10 | Lasso Least Angle Regression | 173.7761 | 75041.3241 | 231.2355 | 0.3958 | 1.2193 | 24.6577 | 0.0102 |
11 | Huber Regressor | 174.4851 | 93967.8851 | 268.1964 | 0.3197 | 0.8453 | 1.7084 | 0.0372 |
12 | Orthogonal Matching Pursuit | 180.5522 | 78743.2570 | 262.1956 | 0.2930 | 1.1580 | 6.7201 | 0.0059 |
13 | AdaBoost Regressor | 167.7506 | 75789.9828 | 232.4567 | 0.2724 | 1.2930 | 8.4861 | 0.0647 |
14 | Ridge Regression | 187.3080 | 111690.0200 | 276.8672 | 0.2158 | 1.3528 | 7.6112 | 0.0078 |
15 | Decision Tree | 163.1450 | 75245.9550 | 238.7047 | -0.0140 | 0.9323 | 2.3295 | 0.0057 |
16 | Support Vector Machine | 334.3254 | 289348.8655 | 467.4521 | -0.4175 | 1.8441 | 15.6024 | 0.0054 |
17 | Light Gradient Boosting Machine | 237.4844 | 141728.8941 | 343.5047 | -0.4341 | 1.4472 | 10.0048 | 0.0120 |
18 | Linear Regression | 311.2436 | 340820.3783 | 438.6486 | -1.2965 | 1.5851 | 13.4483 | 0.0057 |
19 | TheilSen Regressor | 311.6104 | 341223.3176 | 438.9717 | -1.2980 | 1.5776 | 13.3206 | 0.1145 |
20 | Passive Aggressive Regressor | 320.5876 | 289142.1801 | 464.0126 | -1.7844 | 1.1175 | 2.2600 | 0.0060 |
21 | Least Angle Regression | 23830.9823 | 4710185127.5582 | 28472.6183 | -11511.9715 | 2.8675 | 329.5907 | 0.0152 |
models() provide list of models in library and their id that can be used in functions
model_metadata = models()
model_metadata['Name'] # with ids
ID lr Linear Regression lasso Lasso Regression ridge Ridge Regression en Elastic Net lar Least Angle Regression llar Lasso Least Angle Regression omp Orthogonal Matching Pursuit br Bayesian Ridge ard Automatic Relevance Determination par Passive Aggressive Regressor ransac Random Sample Consensus tr TheilSen Regressor huber Huber Regressor kr Kernel Ridge svm Support Vector Machine knn K Neighbors Regressor dt Decision Tree rf Random Forest et Extra Trees Regressor ada AdaBoost Regressor gbr Gradient Boosting Regressor mlp Multi Level Perceptron xgboost Extreme Gradient Boosting lightgbm Light Gradient Boosting Machine catboost CatBoost Regressor Name: Name, dtype: object
The function that actually allows you to create a model is called create_model()
Model currently under consideration:
catboost = create_model('catboost')
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 86.8656 | 11795.4699 | 108.6070 | 0.8892 | 0.3100 | 0.2822 |
1 | 37.7048 | 2239.2099 | 47.3203 | 0.9648 | 1.1768 | 3.3480 |
2 | 57.2179 | 3525.8265 | 59.3787 | 0.7475 | 1.6256 | 13.1253 |
3 | 222.2197 | 109116.3076 | 330.3276 | 0.7604 | 1.1581 | 3.6042 |
4 | 76.0160 | 9464.7867 | 97.2871 | 0.7607 | 1.1704 | 4.8462 |
5 | 93.0054 | 18934.0392 | 137.6010 | 0.8507 | 1.4946 | 0.9058 |
6 | 80.8137 | 17248.7566 | 131.3345 | 0.9366 | 0.2073 | 0.1604 |
7 | 248.6214 | 146099.2568 | 382.2293 | 0.6860 | 0.5061 | 0.5343 |
8 | 251.0157 | 75893.5438 | 275.4878 | -0.2540 | 0.8739 | 1.3204 |
9 | 52.4930 | 4911.0180 | 70.0787 | 0.9763 | 0.1614 | 0.1220 |
Mean | 120.5973 | 39922.8215 | 163.9652 | 0.7318 | 0.8684 | 2.8249 |
SD | 80.4499 | 48988.9488 | 114.1851 | 0.3421 | 0.5114 | 3.7832 |
br = create_model('br')
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 166.4440 | 65124.7956 | 255.1956 | 0.3883 | 0.3284 | 0.3234 |
1 | 101.6727 | 17383.2109 | 131.8454 | 0.7264 | 1.6327 | 6.3110 |
2 | 68.7381 | 5971.4283 | 77.2750 | 0.5723 | 1.7018 | 15.2472 |
3 | 262.1697 | 147177.0043 | 383.6366 | 0.6769 | 1.6177 | 11.8809 |
4 | 110.4996 | 18152.0451 | 134.7295 | 0.5410 | 1.6944 | 15.7180 |
5 | 80.1753 | 8239.3392 | 90.7708 | 0.9350 | 2.5605 | 19.1302 |
6 | 88.0475 | 20770.8422 | 144.1209 | 0.9237 | 0.2970 | 0.1698 |
7 | 231.9348 | 102723.2689 | 320.5047 | 0.7792 | 1.0433 | 2.0009 |
8 | 65.4455 | 6725.3878 | 82.0085 | 0.8889 | 0.4048 | 0.3647 |
9 | 204.9677 | 145907.0469 | 381.9778 | 0.2965 | 0.3011 | 0.2474 |
Mean | 138.0095 | 53817.4369 | 200.2065 | 0.6728 | 1.1582 | 7.1393 |
SD | 68.9288 | 54764.9129 | 117.1956 | 0.2104 | 0.7571 | 7.2188 |
rf = create_model('rf')
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 138.8000 | 43890.7436 | 209.5012 | 0.5877 | 0.3380 | 0.3726 |
1 | 55.1580 | 6253.2867 | 79.0777 | 0.9016 | 0.8568 | 1.0939 |
2 | 62.1880 | 5646.7956 | 75.1452 | 0.5956 | 0.6681 | 1.2599 |
3 | 233.9320 | 128158.9723 | 357.9930 | 0.7186 | 1.2280 | 3.3804 |
4 | 123.9600 | 27564.5049 | 166.0256 | 0.3030 | 1.1055 | 3.8871 |
5 | 116.4240 | 31594.7822 | 177.7492 | 0.7508 | 1.4996 | 2.3317 |
6 | 36.9025 | 2303.3466 | 47.9932 | 0.9915 | 0.2505 | 0.1351 |
7 | 231.2000 | 132532.0282 | 364.0495 | 0.7152 | 0.2695 | 0.2112 |
8 | 189.8600 | 48460.5537 | 220.1376 | 0.1993 | 0.7633 | 1.0318 |
9 | 75.4300 | 10268.3460 | 101.3328 | 0.9505 | 0.1224 | 0.1066 |
Mean | 126.3854 | 43667.3360 | 179.9005 | 0.6714 | 0.7102 | 1.3810 |
SD | 68.2927 | 45973.5657 | 106.3163 | 0.2480 | 0.4431 | 1.3024 |
In order to tune hyperparameters, the tune_model() function is used.
tuned_catboost = tune_model(catboost)
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 99.2964 | 16159.1016 | 127.1185 | 0.8482 | 0.3145 | 0.2945 |
1 | 30.9697 | 1700.4179 | 41.2361 | 0.9732 | 1.2492 | 3.1743 |
2 | 48.6437 | 2972.8256 | 54.5236 | 0.7871 | 1.6158 | 13.0309 |
3 | 186.8260 | 95631.8335 | 309.2440 | 0.7900 | 1.2284 | 4.0666 |
4 | 98.5554 | 16578.2368 | 128.7565 | 0.5808 | 1.2673 | 5.7825 |
5 | 83.8918 | 9925.0733 | 99.6247 | 0.9217 | 1.7666 | 15.1877 |
6 | 98.0377 | 18230.5982 | 135.0207 | 0.9330 | 0.3275 | 0.3032 |
7 | 225.6414 | 99569.0256 | 315.5456 | 0.7860 | 0.4551 | 0.5269 |
8 | 342.7420 | 155466.9288 | 394.2929 | -1.5687 | 1.0752 | 1.9227 |
9 | 65.5110 | 8084.4531 | 89.9136 | 0.9610 | 0.2690 | 0.1930 |
Mean | 128.0115 | 42431.8494 | 169.5276 | 0.6012 | 0.9569 | 4.4482 |
SD | 91.2468 | 51252.9899 | 117.0138 | 0.7318 | 0.5379 | 5.1657 |
tuned_br = tune_model(br)
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 157.6517 | 57996.4003 | 240.8244 | 0.4552 | 0.3550 | 0.3351 |
1 | 88.3138 | 19285.2631 | 138.8714 | 0.6965 | 1.1031 | 1.8531 |
2 | 54.7832 | 4925.4143 | 70.1813 | 0.6472 | 1.2938 | 5.9701 |
3 | 271.0319 | 147991.3881 | 384.6965 | 0.6751 | 1.3278 | 5.4486 |
4 | 104.2271 | 21616.5500 | 147.0257 | 0.4534 | 1.2976 | 6.0247 |
5 | 65.8019 | 6681.6676 | 81.7415 | 0.9473 | 1.9804 | 7.1372 |
6 | 122.2042 | 31770.2547 | 178.2421 | 0.8833 | 0.6046 | 0.3251 |
7 | 220.6695 | 118479.4309 | 344.2084 | 0.7454 | 0.7183 | 0.9432 |
8 | 135.9793 | 25130.6976 | 158.5266 | 0.5848 | 0.5387 | 0.6193 |
9 | 228.7457 | 174881.6313 | 418.1885 | 0.1568 | 0.3551 | 0.3055 |
Mean | 144.9408 | 60875.8698 | 216.2506 | 0.6245 | 0.9574 | 2.8962 |
SD | 69.7649 | 59477.9629 | 118.7920 | 0.2173 | 0.5016 | 2.7149 |
tuned_rf = tune_model(rf)
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 168.0929 | 48724.4240 | 220.7361 | 0.5423 | 0.5043 | 0.5183 |
1 | 56.4400 | 4993.3151 | 70.6634 | 0.9214 | 1.4807 | 4.8354 |
2 | 59.0065 | 5307.5602 | 72.8530 | 0.6199 | 1.3251 | 5.9389 |
3 | 298.9888 | 199215.2197 | 446.3353 | 0.5626 | 1.0169 | 2.0351 |
4 | 119.9900 | 30841.3412 | 175.6170 | 0.2201 | 1.2535 | 5.6269 |
5 | 75.7000 | 11464.1594 | 107.0708 | 0.9096 | 1.6788 | 5.6701 |
6 | 103.2537 | 29454.7442 | 171.6238 | 0.8918 | 0.3144 | 0.1923 |
7 | 245.0390 | 187893.3130 | 433.4666 | 0.5962 | 0.3220 | 0.2098 |
8 | 106.0610 | 15752.1232 | 125.5075 | 0.7397 | 0.5433 | 0.6022 |
9 | 99.9044 | 23769.2415 | 154.1728 | 0.8854 | 0.1421 | 0.1222 |
Mean | 133.2476 | 55741.5441 | 197.8046 | 0.6889 | 0.8581 | 2.5751 |
SD | 76.7013 | 70078.4416 | 128.8987 | 0.2135 | 0.5273 | 2.4702 |
plot_model(tuned_br)
plot_model(tuned_br, plot = 'error')
plot_model(tuned_br, plot='feature')
evaluate_model(tuned_br)
interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…
This function can be used to blend specific trained models that can be passed using estimator_list parameter
blend = blend_models(estimator_list = [tuned_catboost, tuned_br, tuned_rf])
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 137.1496 | 36409.8435 | 190.8136 | 0.6580 | 0.3234 | 0.3152 |
1 | 57.7033 | 5541.8823 | 74.4438 | 0.9128 | 1.2698 | 3.2347 |
2 | 52.4480 | 3539.4094 | 59.4929 | 0.7465 | 1.4263 | 8.2825 |
3 | 244.0284 | 136377.7637 | 369.2936 | 0.7006 | 1.1894 | 3.8041 |
4 | 106.6504 | 20407.7127 | 142.8556 | 0.4839 | 1.2684 | 5.8028 |
5 | 62.3806 | 6694.5728 | 81.8204 | 0.9472 | 1.2616 | 1.2877 |
6 | 105.4117 | 25248.4432 | 158.8976 | 0.9072 | 0.3248 | 0.2568 |
7 | 214.3713 | 130218.1246 | 360.8575 | 0.7202 | 0.3486 | 0.3107 |
8 | 191.6343 | 46329.4029 | 215.2427 | 0.2345 | 0.7641 | 1.0432 |
9 | 123.1639 | 47230.4371 | 217.3256 | 0.7723 | 0.2219 | 0.1918 |
Mean | 129.4942 | 45799.7592 | 187.1043 | 0.7083 | 0.8398 | 2.4530 |
SD | 64.0144 | 46286.3859 | 103.8832 | 0.2053 | 0.4662 | 2.6473 |
Before finalizing the model, it is advisable to perform one final check by predicting the test/hold-out set and reviewing the evaluation metrics
pred = predict_model(blend);
Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|---|
0 | Voting Regressor | 156.9653 | 106635.6227 | 326.5511 | 0.6566 | 0.6664 | 1.1079 |
finalize_model()
function fits the model onto the complete dataset including the test/hold-out sample.final_blend = finalize_model(blend)
plot_model(final_blend, plot = 'error')
save_model()
allows you to save the model along with entire transformation pipeline for later use.
# TIP : It's always good to use date in the filename when saving models, it's good for version control.
save_model(final_blend,'Final Blend Model 03August2020')
Transformation Pipeline and Model Succesfully Saved