from pycaret.utils import version
version()
'2.3.4'
from pycaret.datasets import get_data
data = get_data('us_presidential_election_results')
cycle | state | dem_poll_avg | dem_poll_avg_margin | incumbent_party | incumbent_running | party_winner | |
---|---|---|---|---|---|---|---|
0 | 1976 | California | 42.797994 | -3.270222 | republican | 1 | republican |
1 | 1976 | Colorado | 42.180101 | -1.373191 | republican | 1 | republican |
2 | 1976 | Connecticut | 41.698014 | -1.469654 | republican | 1 | republican |
3 | 1976 | Delaware | 33.370748 | 2.445322 | republican | 1 | democrat |
4 | 1976 | Georgia | 59.796546 | 29.379760 | republican | 1 | democrat |
data.cycle = data.cycle.astype('float64')
data.party_winner.value_counts().plot.barh()
<AxesSubplot:>
from pycaret.classification import *
s = setup(data, target = 'party_winner', session_id = 123,
log_experiment=True, log_plots=True, experiment_name = 'us_election_model')
Description | Value | |
---|---|---|
0 | session_id | 123 |
1 | Target | party_winner |
2 | Target Type | Binary |
3 | Label Encoded | democrat: 0, republican: 1 |
4 | Original Data | (497, 7) |
5 | Missing Values | False |
6 | Numeric Features | 3 |
7 | Categorical Features | 3 |
8 | Ordinal Features | False |
9 | High Cardinality Features | False |
10 | High Cardinality Method | None |
11 | Transformed Train Set | (347, 56) |
12 | Transformed Test Set | (150, 56) |
13 | Shuffle Train-Test | True |
14 | Stratify Train-Test | False |
15 | Fold Generator | StratifiedKFold |
16 | Fold Number | 10 |
17 | CPU Jobs | -1 |
18 | Use GPU | False |
19 | Log Experiment | True |
20 | Experiment Name | us_election_model |
21 | USI | 581f |
22 | Imputation Type | simple |
23 | Iterative Imputation Iteration | None |
24 | Numeric Imputer | mean |
25 | Iterative Imputation Numeric Model | None |
26 | Categorical Imputer | constant |
27 | Iterative Imputation Categorical Model | None |
28 | Unknown Categoricals Handling | least_frequent |
29 | Normalize | False |
30 | Normalize Method | None |
31 | Transformation | False |
32 | Transformation Method | None |
33 | PCA | False |
34 | PCA Method | None |
35 | PCA Components | None |
36 | Ignore Low Variance | False |
37 | Combine Rare Levels | False |
38 | Rare Level Threshold | None |
39 | Numeric Binning | False |
40 | Remove Outliers | False |
41 | Outliers Threshold | None |
42 | Remove Multicollinearity | False |
43 | Multicollinearity Threshold | None |
44 | Remove Perfect Collinearity | True |
45 | Clustering | False |
46 | Clustering Iteration | None |
47 | Polynomial Features | False |
48 | Polynomial Degree | None |
49 | Trignometry Features | False |
50 | Polynomial Threshold | None |
51 | Group Features | False |
52 | Feature Selection | False |
53 | Feature Selection Method | classic |
54 | Features Selection Threshold | None |
55 | Feature Interaction | False |
56 | Feature Ratio | False |
57 | Interaction Threshold | None |
58 | Fix Imbalance | False |
59 | Fix Imbalance Method | SMOTE |
best = compare_models()
Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | TT (Sec) | |
---|---|---|---|---|---|---|---|---|---|
catboost | CatBoost Classifier | 0.9366 | 0.9907 | 0.9462 | 0.9511 | 0.9465 | 0.8680 | 0.8737 | 0.5080 |
xgboost | Extreme Gradient Boosting | 0.9363 | 0.9857 | 0.9405 | 0.9552 | 0.9445 | 0.8693 | 0.8771 | 0.0640 |
rf | Random Forest Classifier | 0.9307 | 0.9887 | 0.9314 | 0.9543 | 0.9403 | 0.8572 | 0.8634 | 0.0680 |
gbc | Gradient Boosting Classifier | 0.9219 | 0.9823 | 0.9169 | 0.9525 | 0.9318 | 0.8403 | 0.8461 | 0.0280 |
lightgbm | Light Gradient Boosting Machine | 0.9219 | 0.9752 | 0.9260 | 0.9461 | 0.9327 | 0.8390 | 0.8466 | 0.0450 |
dt | Decision Tree Classifier | 0.9191 | 0.9171 | 0.9267 | 0.9419 | 0.9314 | 0.8321 | 0.8388 | 0.0040 |
knn | K Neighbors Classifier | 0.9166 | 0.9776 | 0.9512 | 0.9193 | 0.9318 | 0.8235 | 0.8344 | 0.0090 |
et | Extra Trees Classifier | 0.9135 | 0.9773 | 0.9174 | 0.9400 | 0.9267 | 0.8209 | 0.8262 | 0.0620 |
lr | Logistic Regression | 0.9108 | 0.9842 | 0.9319 | 0.9244 | 0.9254 | 0.8139 | 0.8216 | 0.0100 |
ada | Ada Boost Classifier | 0.9021 | 0.9513 | 0.9171 | 0.9237 | 0.9168 | 0.7968 | 0.8060 | 0.0260 |
lda | Linear Discriminant Analysis | 0.8934 | 0.9455 | 0.8971 | 0.9273 | 0.9087 | 0.7793 | 0.7881 | 0.0050 |
ridge | Ridge Classifier | 0.8933 | 0.0000 | 0.9019 | 0.9220 | 0.9088 | 0.7790 | 0.7870 | 0.0040 |
nb | Naive Bayes | 0.6741 | 0.8407 | 0.4895 | 0.9340 | 0.6353 | 0.3921 | 0.4623 | 0.0040 |
svm | SVM - Linear Kernel | 0.6690 | 0.0000 | 0.7857 | 0.5749 | 0.6567 | 0.3088 | 0.3635 | 0.0050 |
qda | Quadratic Discriminant Analysis | 0.6626 | 0.7844 | 0.4700 | 0.9315 | 0.6158 | 0.3735 | 0.4464 | 0.0050 |
evaluate_model(best)
interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…
lightgbm = create_model('lightgbm')
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
0 | 0.9429 | 0.9796 | 0.9524 | 0.9524 | 0.9524 | 0.8810 | 0.8810 |
1 | 0.8857 | 0.9864 | 0.9524 | 0.8696 | 0.9091 | 0.7561 | 0.7618 |
2 | 0.9714 | 0.9932 | 1.0000 | 0.9545 | 0.9767 | 0.9398 | 0.9415 |
3 | 0.9143 | 0.9320 | 1.0000 | 0.8750 | 0.9333 | 0.8148 | 0.8292 |
4 | 0.9143 | 0.9728 | 0.9048 | 0.9500 | 0.9268 | 0.8235 | 0.8250 |
5 | 0.9714 | 0.9456 | 1.0000 | 0.9545 | 0.9767 | 0.9398 | 0.9415 |
6 | 0.9429 | 1.0000 | 0.9000 | 1.0000 | 0.9474 | 0.8852 | 0.8911 |
7 | 0.8824 | 0.9821 | 0.8000 | 1.0000 | 0.8889 | 0.7671 | 0.7888 |
8 | 0.8824 | 0.9821 | 0.8000 | 1.0000 | 0.8889 | 0.7671 | 0.7888 |
9 | 0.9118 | 0.9786 | 0.9500 | 0.9048 | 0.9268 | 0.8159 | 0.8174 |
Mean | 0.9219 | 0.9752 | 0.9260 | 0.9461 | 0.9327 | 0.8390 | 0.8466 |
SD | 0.0323 | 0.0199 | 0.0716 | 0.0464 | 0.0299 | 0.0654 | 0.0605 |
dt = create_model('dt')
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
0 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
1 | 0.8571 | 0.8333 | 0.9524 | 0.8333 | 0.8889 | 0.6914 | 0.7035 |
2 | 0.9714 | 0.9762 | 0.9524 | 1.0000 | 0.9756 | 0.9412 | 0.9428 |
3 | 0.8857 | 0.8690 | 0.9524 | 0.8696 | 0.9091 | 0.7561 | 0.7618 |
4 | 0.8857 | 0.8929 | 0.8571 | 0.9474 | 0.9000 | 0.7674 | 0.7727 |
5 | 0.9429 | 0.9405 | 0.9524 | 0.9524 | 0.9524 | 0.8810 | 0.8810 |
6 | 0.9714 | 0.9667 | 1.0000 | 0.9524 | 0.9756 | 0.9412 | 0.9428 |
7 | 0.8824 | 0.9000 | 0.8000 | 1.0000 | 0.8889 | 0.7671 | 0.7888 |
8 | 0.9118 | 0.9250 | 0.8500 | 1.0000 | 0.9189 | 0.8235 | 0.8367 |
9 | 0.8824 | 0.8679 | 0.9500 | 0.8636 | 0.9048 | 0.7518 | 0.7577 |
Mean | 0.9191 | 0.9171 | 0.9267 | 0.9419 | 0.9314 | 0.8321 | 0.8388 |
SD | 0.0463 | 0.0510 | 0.0638 | 0.0607 | 0.0388 | 0.0974 | 0.0933 |
tuned_dt = tune_model(dt)
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
0 | 0.9714 | 0.9949 | 0.9524 | 1.0000 | 0.9756 | 0.9412 | 0.9428 |
1 | 0.8857 | 0.9847 | 0.9524 | 0.8696 | 0.9091 | 0.7561 | 0.7618 |
2 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
3 | 0.8857 | 0.8827 | 0.9524 | 0.8696 | 0.9091 | 0.7561 | 0.7618 |
4 | 0.8571 | 0.9320 | 0.8095 | 0.9444 | 0.8718 | 0.7126 | 0.7235 |
5 | 0.9429 | 0.9609 | 0.9524 | 0.9524 | 0.9524 | 0.8810 | 0.8810 |
6 | 0.8571 | 0.9517 | 0.9000 | 0.8571 | 0.8780 | 0.7059 | 0.7071 |
7 | 0.9118 | 0.9518 | 0.8500 | 1.0000 | 0.9189 | 0.8235 | 0.8367 |
8 | 0.9118 | 0.9250 | 0.8500 | 1.0000 | 0.9189 | 0.8235 | 0.8367 |
9 | 0.8824 | 0.9839 | 0.9500 | 0.8636 | 0.9048 | 0.7518 | 0.7577 |
Mean | 0.9106 | 0.9567 | 0.9169 | 0.9357 | 0.9239 | 0.8152 | 0.8209 |
SD | 0.0453 | 0.0347 | 0.0581 | 0.0608 | 0.0387 | 0.0938 | 0.0919 |
lgbs = []
for i in [0.1,0.2,0.3,0.4,0.5]:
lgbs.append(create_model('lightgbm', learning_rate = i))
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
0 | 0.8857 | 0.9694 | 0.9048 | 0.9048 | 0.9048 | 0.7619 | 0.7619 |
1 | 0.8571 | 0.9320 | 0.9524 | 0.8333 | 0.8889 | 0.6914 | 0.7035 |
2 | 0.9429 | 0.9966 | 0.9524 | 0.9524 | 0.9524 | 0.8810 | 0.8810 |
3 | 0.9143 | 0.9490 | 1.0000 | 0.8750 | 0.9333 | 0.8148 | 0.8292 |
4 | 0.9143 | 0.9728 | 0.9048 | 0.9500 | 0.9268 | 0.8235 | 0.8250 |
5 | 0.9714 | 0.9354 | 1.0000 | 0.9545 | 0.9767 | 0.9398 | 0.9415 |
6 | 0.9429 | 1.0000 | 0.9000 | 1.0000 | 0.9474 | 0.8852 | 0.8911 |
7 | 0.9118 | 0.9857 | 0.8500 | 1.0000 | 0.9189 | 0.8235 | 0.8367 |
8 | 0.8824 | 0.9786 | 0.8000 | 1.0000 | 0.8889 | 0.7671 | 0.7888 |
9 | 0.9412 | 0.9607 | 1.0000 | 0.9091 | 0.9524 | 0.8759 | 0.8827 |
Mean | 0.9164 | 0.9680 | 0.9264 | 0.9379 | 0.9290 | 0.8264 | 0.8341 |
SD | 0.0327 | 0.0225 | 0.0641 | 0.0539 | 0.0276 | 0.0691 | 0.0662 |
!mlflow ui
^C