#Required for accessing openml datasets from Lale
!pip install 'liac-arff>=2.4.0'
Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)
WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available.
You should consider upgrading via the '/home/hirzel/python3.6venv/bin/python3.6 -m pip install --upgrade pip' command.
import lale.datasets.openml
import pandas as pd
(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(
'credit-g', 'classification', preprocess=True)
from sklearn.preprocessing import StandardScaler as Standard, MinMaxScaler as MinMax
from sklearn.decomposition import PCA
from sklearn.kernel_approximation import Nystroem as Nys
from lale.lib.lale import NoOp
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RF
from xgboost import XGBClassifier as XGBoost
from lale.lib.lale import TopKVotingClassifier
from sklearn.metrics import accuracy_score
from lale.lib.lale import Hyperopt
from sklearn.ensemble import VotingClassifier
lale.wrap_imported_operators()
The optimizer supported as of now is Hyperopt and args_to_optimizer is a dictionary of values that Hyperopt would accept as per https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.hyperopt.html
planned_pipeline = (NoOp | Standard | MinMax) >> (NoOp | PCA | Nys) >> (LR | RF | XGBoost)
ensemble = TopKVotingClassifier(
estimator=planned_pipeline, k=3, optimizer=Hyperopt,
args_to_optimizer={'max_evals':25, 'scoring':'accuracy'})
ensemble.visualize()
trained_ensemble = ensemble.fit(train_X, train_y)
100%|██████████████████████████████| 25/25 [02:51<00:00, 6.87s/trial, best loss: -0.7417910447761193] 100%|█████████████████████████████████| 1/1 [01:23<00:00, 83.42s/trial, best loss: -0.755223880597015]
#Note that you could also pass just the planned pipeline as below and Hyperopt with its default setting would be used.
ensemble = TopKVotingClassifier(estimator=planned_pipeline)
predictions = trained_ensemble.predict(test_X)
print(accuracy_score(test_y, predictions))
0.7696969696969697
best_pipeline = trained_ensemble.get_pipeline()
best_pipeline.visualize()
best_pipeline.to_json()
{'class': 'lale.lib.sklearn.voting_classifier.VotingClassifierImpl', 'state': 'trained', 'operator': 'VotingClassifier', 'label': 'VotingClassifier', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html', 'hyperparams': {'estimators': [('p17', {'$ref': '../steps/pipeline'}), ('p9', {'$ref': '../steps/pipeline_0'}), ('p16', {'$ref': '../steps/pipeline_1'})], 'voting': 'soft'}, 'steps': {'pipeline': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['min_max', 'no_op'], ['no_op', 'lr']], 'steps': {'min_max': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl', 'state': 'trainable', 'operator': 'MinMax', 'label': 'MinMax', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html', 'hyperparams': {}, 'is_frozen_trainable': True}, 'no_op': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'lr': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'trainable', 'operator': 'LR', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html', 'hyperparams': {'C': 22334.598583769228, 'fit_intercept': False, 'tol': 0.012327172789706938}, 'is_frozen_trainable': True}}}, 'pipeline_0': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['standard', 'pca'], ['pca', 'lr_0']], 'steps': {'standard': {'class': 'lale.lib.sklearn.standard_scaler.StandardScalerImpl', 'state': 'trainable', 'operator': 'Standard', 'label': 'Standard', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.standard_scaler.html', 'hyperparams': {'with_mean': False, 'with_std': False}, 'is_frozen_trainable': True}, 'pca': {'class': 'lale.lib.sklearn.pca.PCAImpl', 'state': 'trainable', 'operator': 'PCA', 'label': 'PCA', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html', 'hyperparams': {'svd_solver': 'randomized'}, 'is_frozen_trainable': True}, 'lr_0': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'trainable', 'operator': 'LR', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html', 'hyperparams': {'C': 31347.843540495694, 'penalty': 'l1', 'tol': 0.03811266583210131}, 'is_frozen_trainable': True}}}, 'pipeline_1': {'class': 'lale.operators.TrainablePipeline', 'state': 'trainable', 'edges': [['min_max_0', 'no_op_0'], ['no_op_0', 'xg_boost']], 'steps': {'min_max_0': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl', 'state': 'trainable', 'operator': 'MinMax', 'label': 'MinMax', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html', 'hyperparams': {}, 'is_frozen_trainable': True}, 'no_op_0': {'class': 'lale.lib.lale.no_op.NoOpImpl', 'state': 'trained', 'operator': 'NoOp', 'label': 'NoOp', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html', 'hyperparams': {}, 'is_frozen_trainable': True, 'coefs': None, 'is_frozen_trained': True}, 'xg_boost': {'class': 'lale.lib.xgboost.xgb_classifier.XGBClassifierImpl', 'state': 'trainable', 'operator': 'XGBoost', 'label': 'XGBoost', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.xgboost.XGBClassifier.html', 'hyperparams': {'booster': 'dart', 'colsample_bylevel': 0.47096071538468853, 'colsample_bytree': 0.7593792234753081, 'learning_rate': 0.20970693320349945, 'max_depth': 18, 'min_child_weight': 2, 'n_estimators': 1063, 'reg_alpha': 0.31844178826811975, 'reg_lambda': 0.7797218315351517, 'subsample': 0.64136510248406}, 'is_frozen_trainable': True}}}}, 'is_frozen_trainable': True, 'coefs': 'coefs_not_available', 'is_frozen_trained': False}