This is a simple notebook demo to illustrate typically how OptimalFlow's autoCV modules work with regression problem
# Install external packages in binder environment.
!pip install xgboost
# Regression Demo
import pandas as pd
from optimalflow.autoCV import evaluate_model,dynaRegressor
import joblib
from optimalflow.utilis_func import pipeline_splitting_rule, update_parameters,reset_parameters
reset_parameters()
tr_features = pd.read_csv('./data/regression/train_features.csv')
tr_labels = pd.read_csv('./data/regression/train_labels.csv')
val_features = pd.read_csv('./data/regression/val_features.csv')
val_labels = pd.read_csv('./data/regression/val_labels.csv')
te_features = pd.read_csv('./data/regression/test_features.csv')
te_labels = pd.read_csv('./data/regression/test_labels.csv')
reg_cv_demo = dynaRegressor(random_state=13,cv_num = 5)
reg_cv_demo.fit(tr_features,tr_labels)
models = {}
for mdl in ['lr','knn','tree','svm','mlp','rf','gb','ada','xgb','hgboost','huber','rgcv','cvlasso','sgd']:
models[mdl] = joblib.load('./pkl/{}_reg_model.pkl'.format(mdl))
for name, mdl in models.items():
try:
ml_evl = evaluate_model(model_type = "reg")
ml_evl.fit(name, mdl, val_features, val_labels)
except:
print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0% lr -- R^2 Score: 0.684 / Mean Absolute Error: 3.674 / Mean Squared Error: 24.037 / Root Mean Squared Error: 24.037 / Latency: 6.4s knn -- R^2 Score: 0.307 / Mean Absolute Error: 4.639 / Mean Squared Error: 52.794 / Root Mean Squared Error: 52.794 / Latency: 3.3s tree -- R^2 Score: 0.64 / Mean Absolute Error: 3.356 / Mean Squared Error: 27.392 / Root Mean Squared Error: 27.392 / Latency: 1.1s svm -- R^2 Score: 0.649 / Mean Absolute Error: 3.466 / Mean Squared Error: 26.746 / Root Mean Squared Error: 26.746 / Latency: 1.0s mlp -- R^2 Score: 0.629 / Mean Absolute Error: 3.56 / Mean Squared Error: 28.244 / Root Mean Squared Error: 28.244 / Latency: 3.7s rf -- R^2 Score: 0.769 / Mean Absolute Error: 2.614 / Mean Squared Error: 17.601 / Root Mean Squared Error: 17.601 / Latency: 4.7s gb -- R^2 Score: 0.779 / Mean Absolute Error: 2.542 / Mean Squared Error: 16.819 / Root Mean Squared Error: 16.819 / Latency: 4.4s ada -- R^2 Score: 0.749 / Mean Absolute Error: 2.933 / Mean Squared Error: 19.09 / Root Mean Squared Error: 19.09 / Latency: 14.9s xgb -- R^2 Score: 0.776 / Mean Absolute Error: 2.66 / Mean Squared Error: 17.02 / Root Mean Squared Error: 17.02 / Latency: 2.0s hgboost -- R^2 Score: 0.758 / Mean Absolute Error: 2.98 / Mean Squared Error: 18.412 / Root Mean Squared Error: 18.412 / Latency: 8.0s huber -- R^2 Score: 0.591 / Mean Absolute Error: 3.711 / Mean Squared Error: 31.145 / Root Mean Squared Error: 31.145 / Latency: 4.0s rgcv -- R^2 Score: 0.672 / Mean Absolute Error: 3.757 / Mean Squared Error: 24.983 / Root Mean Squared Error: 24.983 / Latency: 5.1s cvlasso -- R^2 Score: 0.661 / Mean Absolute Error: 3.741 / Mean Squared Error: 25.821 / Root Mean Squared Error: 25.821 / Latency: 4.0s sgd -- R^2 Score: -7.6819521340367e+26 / Mean Absolute Error: 239048363331832.62 / Mean Squared Error: 5.849722584020232e+28 / Root Mean Squared Error: 5.849722584020232e+28 / Latency: 4.1s
# Fast Regression Demo
import pandas as pd
from optimalflow.autoCV import evaluate_model,fastRegressor
import joblib
from optimalflow.utilis_func import pipeline_splitting_rule, update_parameters,reset_parameters
reset_parameters()
tr_features = pd.read_csv('./data/regression/train_features.csv')
tr_labels = pd.read_csv('./data/regression/train_labels.csv')
val_features = pd.read_csv('./data/regression/val_features.csv')
val_labels = pd.read_csv('./data/regression/val_labels.csv')
te_features = pd.read_csv('./data/regression/test_features.csv')
te_labels = pd.read_csv('./data/regression/test_labels.csv')
custom_ml = ['lr','knn','tree','svm','mlp','rf','gb','ada','xgb','hgboost','huber','rgcv','cvlasso','sgd']
reg_cv_demo = fastRegressor(custom_estimators = custom_ml,random_state = 13,cv_num = 5,n_comb = 12)
reg_cv_demo.fit(tr_features,tr_labels)
models = {}
for mdl in ['lr','knn','tree','svm','mlp','rf','gb','ada','xgb','hgboost','huber','rgcv','cvlasso','sgd']:
models[mdl] = joblib.load('./pkl/{}_reg_model.pkl'.format(mdl))
for name, mdl in models.items():
try:
ml_evl = evaluate_model(model_type = "reg")
ml_evl.fit(name, mdl, val_features, val_labels)
except:
print(f"Failed to load the {mdl}.")
Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0% lr -- R^2 Score: 0.684 / Mean Absolute Error: 3.674 / Mean Squared Error: 24.037 / Root Mean Squared Error: 24.037 / Latency: 4.0s knn -- R^2 Score: 0.307 / Mean Absolute Error: 4.639 / Mean Squared Error: 52.794 / Root Mean Squared Error: 52.794 / Latency: 4.7s tree -- R^2 Score: 0.653 / Mean Absolute Error: 3.264 / Mean Squared Error: 26.386 / Root Mean Squared Error: 26.386 / Latency: 2.8s svm -- R^2 Score: 0.649 / Mean Absolute Error: 3.466 / Mean Squared Error: 26.746 / Root Mean Squared Error: 26.746 / Latency: 2.3s mlp -- R^2 Score: 0.629 / Mean Absolute Error: 3.56 / Mean Squared Error: 28.244 / Root Mean Squared Error: 28.244 / Latency: 1.7s rf -- R^2 Score: 0.774 / Mean Absolute Error: 2.606 / Mean Squared Error: 17.231 / Root Mean Squared Error: 17.231 / Latency: 20.9s gb -- R^2 Score: 0.782 / Mean Absolute Error: 2.594 / Mean Squared Error: 16.571 / Root Mean Squared Error: 16.571 / Latency: 2.3s ada -- R^2 Score: 0.756 / Mean Absolute Error: 2.84 / Mean Squared Error: 18.572 / Root Mean Squared Error: 18.572 / Latency: 8.5s xgb -- R^2 Score: 0.791 / Mean Absolute Error: 2.634 / Mean Squared Error: 15.911 / Root Mean Squared Error: 15.911 / Latency: 2.1s hgboost -- R^2 Score: 0.758 / Mean Absolute Error: 2.98 / Mean Squared Error: 18.412 / Root Mean Squared Error: 18.412 / Latency: 9.1s huber -- R^2 Score: 0.591 / Mean Absolute Error: 3.711 / Mean Squared Error: 31.145 / Root Mean Squared Error: 31.145 / Latency: 4.0s rgcv -- R^2 Score: 0.672 / Mean Absolute Error: 3.757 / Mean Squared Error: 24.983 / Root Mean Squared Error: 24.983 / Latency: 4.1s cvlasso -- R^2 Score: 0.661 / Mean Absolute Error: 3.741 / Mean Squared Error: 25.821 / Root Mean Squared Error: 25.821 / Latency: 5.1s sgd -- R^2 Score: -7.6819521340367e+26 / Mean Absolute Error: 239048363331832.62 / Mean Squared Error: 5.849722584020232e+28 / Root Mean Squared Error: 5.849722584020232e+28 / Latency: 4.0s