#!/usr/bin/env python # coding: utf-8 # In[1]: import warnings warnings.filterwarnings("ignore") from lale.lib.lale import NoOp from lale.lib.sklearn import KNeighborsClassifier from lale.lib.sklearn import LogisticRegression from lale.lib.sklearn import Nystroem from lale.lib.sklearn import PCA from lale.operators import make_union, make_choice, make_pipeline # #### Lale provides an `|` combinator or a function make_choice() to allow only one of its arguments to be applied at once in the overall pipeline. In this example, the first step of the pipeline is a choice between Nystroem and NoOp. This means that the data will either be transformed using Nystroem or will be left as is (NoOp is a transformer that does nothing). The second step in the pipeline is a PCA, and the third step is again a choice between two popular classifiers. # In[2]: kernel_tfm_or_not = NoOp | Nystroem kernel_tfm_or_not.visualize() # In[3]: tfm = PCA # In[4]: clf = make_choice(LogisticRegression, KNeighborsClassifier) clf.visualize() # In[5]: optimizable = kernel_tfm_or_not >> tfm >> clf optimizable.visualize() # #### Use the graph to select the best performing model for a dataset. We use Iris dataset from sklearn for this demonstration. Hyperopt is used to scan the hyperparameter search space and select the best performing path from the above graph. # In[6]: from lale.lib.lale import Hyperopt from lale.datasets import load_iris_df (X_train, y_train), (X_test, y_test) = load_iris_df() # In[7]: hpo_trainable = Hyperopt(estimator=optimizable, max_evals=3) hpo_trained = hpo_trainable.fit(X_train, y_train) # In[8]: best_estimator = hpo_trained.get_pipeline() best_estimator.visualize() # #### Look at the results from all trials and retrieve pipelines of other names or types. # In[9]: hpo_trained.summary() # In[10]: worst_name = hpo_trained.summary().loss.argmax() if not isinstance(worst_name, str): #newer pandas argmax returns index worst_name = hpo_trained.summary().index[worst_name] print(worst_name) # In[11]: worst_estimator = hpo_trained.get_pipeline(worst_name) worst_estimator.visualize() # In[12]: worst_estimator_in_sklearn_format = hpo_trained.get_pipeline(worst_name, astype='sklearn')