Notebook

In [1]:

#Required for accessing openml datasets from Lale
!pip install 'liac-arff>=2.4.0'

Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)
WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available.
You should consider upgrading via the '/home/hirzel/python3.6venv/bin/python3.6 -m pip install --upgrade pip' command.

In [2]:

import lale.datasets.openml
import pandas as pd
(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(
    'credit-g', 'classification', preprocess=True)

In [3]:

from sklearn.preprocessing import StandardScaler as Standard, MinMaxScaler as MinMax
from sklearn.decomposition import PCA
from sklearn.kernel_approximation import Nystroem as Nys
from lale.lib.lale import NoOp
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RF
from xgboost import XGBClassifier as XGBoost
from lale.lib.lale import TopKVotingClassifier
from sklearn.metrics import accuracy_score
from lale.lib.lale import Hyperopt
from sklearn.ensemble import VotingClassifier
lale.wrap_imported_operators()

The optimizer supported as of now is Hyperopt and args_to_optimizer is a dictionary of values that Hyperopt would accept as per https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.hyperopt.html

In [4]:

planned_pipeline = (NoOp | Standard | MinMax) >> (NoOp | PCA | Nys) >> (LR | RF | XGBoost)
ensemble = TopKVotingClassifier(
    estimator=planned_pipeline, k=3, optimizer=Hyperopt,
    args_to_optimizer={'max_evals':25, 'scoring':'accuracy'})
ensemble.visualize()

In [5]:

trained_ensemble = ensemble.fit(train_X, train_y)

100%|██████████████████████████████| 25/25 [02:51<00:00,  6.87s/trial, best loss: -0.7417910447761193]
100%|█████████████████████████████████| 1/1 [01:23<00:00, 83.42s/trial, best loss: -0.755223880597015]

In [6]:

#Note that you could also pass just the planned pipeline as below and Hyperopt with its default setting would be used.
ensemble = TopKVotingClassifier(estimator=planned_pipeline)

In [7]:

predictions = trained_ensemble.predict(test_X)
print(accuracy_score(test_y, predictions))

0.7696969696969697

In [8]:

best_pipeline = trained_ensemble.get_pipeline()
best_pipeline.visualize()

In [9]:

best_pipeline.to_json()

Out[9]:

{'class': 'lale.lib.sklearn.voting_classifier.VotingClassifierImpl',
 'state': 'trained',
 'operator': 'VotingClassifier',
 'label': 'VotingClassifier',
 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html',
 'hyperparams': {'estimators': [('p17', {'$ref': '../steps/pipeline'}),
   ('p9', {'$ref': '../steps/pipeline_0'}),
   ('p16', {'$ref': '../steps/pipeline_1'})],
  'voting': 'soft'},
 'steps': {'pipeline': {'class': 'lale.operators.TrainablePipeline',
   'state': 'trainable',
   'edges': [['min_max', 'no_op'], ['no_op', 'lr']],
   'steps': {'min_max': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl',
     'state': 'trainable',
     'operator': 'MinMax',
     'label': 'MinMax',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html',
     'hyperparams': {},
     'is_frozen_trainable': True},
    'no_op': {'class': 'lale.lib.lale.no_op.NoOpImpl',
     'state': 'trained',
     'operator': 'NoOp',
     'label': 'NoOp',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html',
     'hyperparams': {},
     'is_frozen_trainable': True,
     'coefs': None,
     'is_frozen_trained': True},
    'lr': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
     'state': 'trainable',
     'operator': 'LR',
     'label': 'LR',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',
     'hyperparams': {'C': 22334.598583769228,
      'fit_intercept': False,
      'tol': 0.012327172789706938},
     'is_frozen_trainable': True}}},
  'pipeline_0': {'class': 'lale.operators.TrainablePipeline',
   'state': 'trainable',
   'edges': [['standard', 'pca'], ['pca', 'lr_0']],
   'steps': {'standard': {'class': 'lale.lib.sklearn.standard_scaler.StandardScalerImpl',
     'state': 'trainable',
     'operator': 'Standard',
     'label': 'Standard',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.standard_scaler.html',
     'hyperparams': {'with_mean': False, 'with_std': False},
     'is_frozen_trainable': True},
    'pca': {'class': 'lale.lib.sklearn.pca.PCAImpl',
     'state': 'trainable',
     'operator': 'PCA',
     'label': 'PCA',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html',
     'hyperparams': {'svd_solver': 'randomized'},
     'is_frozen_trainable': True},
    'lr_0': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',
     'state': 'trainable',
     'operator': 'LR',
     'label': 'LR',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',
     'hyperparams': {'C': 31347.843540495694,
      'penalty': 'l1',
      'tol': 0.03811266583210131},
     'is_frozen_trainable': True}}},
  'pipeline_1': {'class': 'lale.operators.TrainablePipeline',
   'state': 'trainable',
   'edges': [['min_max_0', 'no_op_0'], ['no_op_0', 'xg_boost']],
   'steps': {'min_max_0': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl',
     'state': 'trainable',
     'operator': 'MinMax',
     'label': 'MinMax',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html',
     'hyperparams': {},
     'is_frozen_trainable': True},
    'no_op_0': {'class': 'lale.lib.lale.no_op.NoOpImpl',
     'state': 'trained',
     'operator': 'NoOp',
     'label': 'NoOp',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html',
     'hyperparams': {},
     'is_frozen_trainable': True,
     'coefs': None,
     'is_frozen_trained': True},
    'xg_boost': {'class': 'lale.lib.xgboost.xgb_classifier.XGBClassifierImpl',
     'state': 'trainable',
     'operator': 'XGBoost',
     'label': 'XGBoost',
     'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.xgboost.XGBClassifier.html',
     'hyperparams': {'booster': 'dart',
      'colsample_bylevel': 0.47096071538468853,
      'colsample_bytree': 0.7593792234753081,
      'learning_rate': 0.20970693320349945,
      'max_depth': 18,
      'min_child_weight': 2,
      'n_estimators': 1063,
      'reg_alpha': 0.31844178826811975,
      'reg_lambda': 0.7797218315351517,
      'subsample': 0.64136510248406},
     'is_frozen_trainable': True}}}},
 'is_frozen_trainable': True,
 'coefs': 'coefs_not_available',
 'is_frozen_trained': False}