#pip install pycaret
from pycaret.utils import version
version()
1.0.0
from pycaret.datasets import get_data
data = get_data('insurance')
age | sex | bmi | children | smoker | region | charges | |
---|---|---|---|---|---|---|---|
0 | 19 | female | 27.900 | 0 | yes | southwest | 16884.92400 |
1 | 18 | male | 33.770 | 1 | no | southeast | 1725.55230 |
2 | 28 | male | 33.000 | 3 | no | southeast | 4449.46200 |
3 | 33 | male | 22.705 | 0 | no | northwest | 21984.47061 |
4 | 32 | male | 28.880 | 0 | no | northwest | 3866.85520 |
from pycaret.regression import *
reg1 = setup(data, target = 'charges', session_id = 123,
normalize = True,
polynomial_features = True, trigonometry_features = True, feature_interaction=True,
bin_numeric_features= ['age', 'bmi'])
Setup Succesfully Completed!
Description | Value | |
---|---|---|
0 | session_id | 123 |
1 | Transform Target | False |
2 | Transform Target Method | None |
3 | Original Data | (1338, 7) |
4 | Missing Values | False |
5 | Numeric Features | 2 |
6 | Categorical Features | 4 |
7 | Ordinal Features | False |
8 | High Cardinality Features | False |
9 | High Cardinality Method | None |
10 | Sampled Data | (1338, 7) |
11 | Transformed Train Set | (936, 61) |
12 | Transformed Test Set | (402, 61) |
13 | Numeric Imputer | mean |
14 | Categorical Imputer | constant |
15 | Normalize | True |
16 | Normalize Method | zscore |
17 | Transformation | False |
18 | Transformation Method | None |
19 | PCA | False |
20 | PCA Method | None |
21 | PCA Components | None |
22 | Ignore Low Variance | False |
23 | Combine Rare Levels | False |
24 | Rare Level Threshold | None |
25 | Numeric Binning | True |
26 | Remove Outliers | False |
27 | Outliers Threshold | None |
28 | Remove Multicollinearity | False |
29 | Multicollinearity Threshold | None |
30 | Clustering | False |
31 | Clustering Iteration | None |
32 | Polynomial Features | True |
33 | Polynomial Degree | 2 |
34 | Trignometry Features | True |
35 | Polynomial Threshold | 0.100000 |
36 | Group Features | False |
37 | Feature Selection | False |
38 | Features Selection Threshold | None |
39 | Feature Interaction | True |
40 | Feature Ratio | False |
41 | Interaction Threshold | 0.010000 |
lr = create_model('lr')
MAE | MSE | RMSE | R2 | RMSLE | MAPE | |
---|---|---|---|---|---|---|
0 | 2815.8431 | 2.048964e+07 | 4526.5478 | 0.8776 | 0.3940 | 0.2995 |
1 | 3607.3637 | 3.481924e+07 | 5900.7828 | 0.7975 | 0.4555 | 0.3529 |
2 | 3021.0207 | 2.326287e+07 | 4823.1601 | 0.7055 | 0.5815 | 0.3948 |
3 | 3193.7966 | 2.591245e+07 | 5090.4272 | 0.7836 | 0.5432 | 0.4356 |
4 | 3172.6050 | 2.788106e+07 | 5280.2517 | 0.7926 | 0.4516 | 0.2867 |
5 | 3222.1065 | 2.310720e+07 | 4806.9945 | 0.8514 | 0.3786 | 0.2861 |
6 | 2865.4398 | 2.438587e+07 | 4938.2051 | 0.8341 | 0.3930 | 0.3251 |
7 | 3400.1889 | 2.942705e+07 | 5424.6705 | 0.8382 | 0.4764 | 0.3169 |
8 | 3003.7402 | 2.194569e+07 | 4684.6232 | 0.8583 | 0.3833 | 0.3208 |
9 | 3154.9844 | 2.829284e+07 | 5319.1016 | 0.8178 | 0.4987 | 0.3544 |
Mean | 3145.7089 | 2.595239e+07 | 5079.4764 | 0.8157 | 0.4556 | 0.3373 |
SD | 225.8115 | 4.044394e+06 | 388.9852 | 0.0467 | 0.0669 | 0.0458 |
plot_model(lr)
save_model(lr, 'deployment_28042020')
Transformation Pipeline and Model Succesfully Saved
deployment_28042020 = load_model('deployment_28042020')
Transformation Pipeline and Model Sucessfully Loaded
deployment_28042020
[Pipeline(memory=None, steps=[('dtypes', DataTypes_Auto_infer(categorical_features=[], display_types=True, features_todrop=[], ml_usecase='regression', numerical_features=[], target='charges', time_features=[])), ('imputer', Simple_Imputer(categorical_strategy='not_available', numeric_strategy='mean', target_variable=None)), ('new_levels1', New_Catagorical_Levels... ('dummy', Dummify(target='charges')), ('fix_perfect', Remove_100(target='charges')), ('clean_names', Clean_Colum_Names()), ('feature_select', Empty()), ('fix_multi', Empty()), ('dfs', DFS_Classic(interactions=['multiply'], ml_usecase='regression', random_state=123, subclass='binary', target='charges', top_features_to_pick_percentage=None)), ('pca', Empty())], verbose=False), LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), None]
import requests
url = 'https://pycaret-demo1.herokuapp.com/predict_api'
pred = requests.post(url,json={'age':55, 'sex':'male', 'bmi':59, 'children':1, 'smoker':'male', 'region':'northwest'})
print(pred.json())
75714.0