#Required for accessing openml datasets from Lale
!pip install 'liac-arff>=2.4.0'
Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)
import lale.datasets.openml
import pandas as pd
(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(
'credit-g', 'classification', preprocess=True)
from sklearn.preprocessing import StandardScaler as Standard, MinMaxScaler as MinMax
from sklearn.decomposition import PCA
from sklearn.kernel_approximation import Nystroem as Nys
from lale.lib.lale import NoOp
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RF
from xgboost import XGBClassifier as XGBoost
from lale.lib.lale import TopKVotingClassifier
from sklearn.metrics import accuracy_score
from lale.lib.lale import Hyperopt
from sklearn.ensemble import VotingClassifier
lale.wrap_imported_operators()
The optimizer supported as of now is Hyperopt and args_to_optimizer is a dictionary of values that Hyperopt would accept as per https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.hyperopt.html
planned_pipeline = (NoOp | Standard | MinMax) >> (NoOp | PCA | Nys) >> (LR | RF | XGBoost)
ensemble = TopKVotingClassifier(
estimator=planned_pipeline, k=3, optimizer=Hyperopt,
args_to_optimizer={'max_evals':25, 'scoring':'accuracy'})
ensemble.visualize()
trained_ensemble = ensemble.fit(train_X, train_y)
100%|███████| 25/25 [03:15<00:00, 4.04s/trial, best loss: -0.7448038005461415] 100%|█████████| 1/1 [00:06<00:00, 6.63s/trial, best loss: -0.7508298939720779]
#Note that you could also pass just the planned pipeline as below and Hyperopt with its default setting would be used.
ensemble = TopKVotingClassifier(estimator=planned_pipeline)
predictions = trained_ensemble.predict(test_X)
print(accuracy_score(test_y, predictions))
0.7515151515151515
best_pipeline = trained_ensemble.get_pipeline()
best_pipeline.visualize()
best_pipeline.to_json()
{'class': 'lale.lib.sklearn.voting_classifier.VotingClassifierImpl', 'state': 'trained', 'operator': 'VotingClassifier', 'label': 'VotingClassifier', 'documentation_url': 'https://scikit-learn.org/0.20/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn-ensemble-votingclassifier', 'hyperparams': {'estimators': [('p14', {'$ref': '../steps/pipeline'}), ('p21', {'$ref': '../steps/pipeline_0'}), ('p11', {'$ref': '../steps/pipeline_1'})], 'voting': 'soft'}, 'steps': {'pipeline': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['no_op', 'pca'], ['pca', 'lr']], 'steps': {'no_op': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'pca': {'class': 'lale.lib.sklearn.pca.PCAImpl', 'state': 'trainable', 'operator': 'PCA', 'label': 'PCA', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html', 'hyperparams': {'svd_solver': 'randomized'}, 'is_frozen_trainable': True}, 'lr': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'trainable', 'operator': 'LR', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html', 'hyperparams': {'C': 32591.329013327737, 'penalty': 'l1', 'tol': 0.04931166736770484}, 'is_frozen_trainable': True}}}, 'pipeline_0': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['no_op_0', 'no_op_1'], ['no_op_1', 'lr_0']], 'steps': {'no_op_0': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'no_op_1': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'lr_0': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'trainable', 'operator': 'LR', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html', 'hyperparams': {'C': 32762.313084225098, 'penalty': 'l1', 'tol': 0.04485831858516044}, 'is_frozen_trainable': True}}}, 'pipeline_1': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['min_max', 'no_op_2'], ['no_op_2', 'lr_1']], 'steps': {'min_max': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl', 'state': 'trainable', 'operator': 'MinMax', 'label': 'MinMax', 'documentation_url': 'https://scikit-learn.org/0.20/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn-preprocessing-minmaxscaler', 'hyperparams': {}, 'is_frozen_trainable': True}, 'no_op_2': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'lr_1': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'trainable', 'operator': 'LR', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html', 'hyperparams': {'C': 20782.604482916624, 'solver': 'lbfgs', 'tol': 0.051260844412032186}, 'is_frozen_trainable': True}}}}, 'is_frozen_trainable': True, 'coefs': 'coefs_not_available', 'is_frozen_trained': False}