This is a simple notebook demo to illustrate typically how OptimalFlow's autoCV modules work with classification problem
# Install external packages in binder environment.
!pip install xgboost
# Classification Demo
import pandas as pd
from optimalflow.autoCV import dynaClassifier,evaluate_model
import joblib
tr_features = pd.read_csv('./data/classification/train_features.csv')
tr_labels = pd.read_csv('./data/classification/train_labels.csv')
val_features = pd.read_csv('./data/classification/val_features.csv')
val_labels = pd.read_csv('./data/classification/val_labels.csv')
te_features = pd.read_csv('./data/classification/test_features.csv')
te_labels = pd.read_csv('./data/classification/test_labels.csv')
custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']
clf_cv_demo = dynaClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5)
clf_cv_demo.fit(tr_features,tr_labels)
models = {}
for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:
models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))
for name, mdl in models.items():
try:
ml_evl = evaluate_model(model_type = "cls")
ml_evl.fit(name, mdl, val_features, val_labels)
except:
print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0% *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds: Best Parameters: {'fit_intercept': 'False'} Best CV Score: 0.803456180567801 lgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 1.0s svm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.5s mlp -- Accuracy: 0.787 / Precision: 0.745 / Recall: 0.631 / Latency: 1.0s rf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 45.6s ada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 20.6s gb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 3.2s xgb -- Accuracy: 0.815 / Precision: 0.786 / Recall: 0.677 / Latency: 2.0s lsvc -- Accuracy: 0.753 / Precision: 0.667 / Recall: 0.646 / Latency: 4.1s sgd -- Accuracy: 0.775 / Precision: 0.658 / Recall: 0.8 / Latency: 0.0s hgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 9.1s rgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 1.0s
# fast Classification Demo
import pandas as pd
from optimalflow.autoCV import fastClassifier,evaluate_model
import joblib
tr_features = pd.read_csv('./data/classification/train_features.csv')
tr_labels = pd.read_csv('./data/classification/train_labels.csv')
val_features = pd.read_csv('./data/classification/val_features.csv')
val_labels = pd.read_csv('./data/classification/val_labels.csv')
te_features = pd.read_csv('./data/classification/test_features.csv')
te_labels = pd.read_csv('./data/classification/test_labels.csv')
custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']
clf_cv_demo = fastClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5,n_comb = 12)
clf_cv_demo.fit(tr_features,tr_labels)
models = {}
for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:
models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))
for name, mdl in models.items():
try:
ml_evl = evaluate_model(model_type = "cls")
ml_evl.fit(name, mdl, val_features, val_labels)
except:
print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0% *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds: Best Parameters: {'fit_intercept': 'False'} Best CV Score: 0.803456180567801 lgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 3.0s svm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.4s mlp -- Accuracy: 0.77 / Precision: 0.7 / Recall: 0.646 / Latency: 5.6s rf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 38.2s ada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 21.1s gb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 4.0s xgb -- Accuracy: 0.82 / Precision: 0.811 / Recall: 0.662 / Latency: 2.0s lsvc -- Accuracy: 0.747 / Precision: 0.661 / Recall: 0.631 / Latency: 5.2s sgd -- Accuracy: 0.64 / Precision: 0.6 / Recall: 0.046 / Latency: 0.0s hgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 5.3s rgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 6.3s