#!/usr/bin/env python # coding: utf-8 # # Demo Import from Sklearn with Schemas from Lale # # This notebook shows how to use Lale directly with sklearn operators. # The function `lale.wrap_imported_operators()` will automatically wrap # known sklearn operators into Lale operators. # ## Usability # # To make Lale easy to learn and use, its APIs imitate those of # [sklearn](https://scikit-learn.org/), with init, fit, and predict, # and with pipelines. # In[1]: import sklearn.datasets import sklearn.model_selection digits = sklearn.datasets.load_digits() X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( digits.data, digits.target, test_size=0.2, random_state=42) print(f'truth {y_test.tolist()[:20]}') # In[2]: import lale from sklearn.linear_model import LogisticRegression as LR lale.wrap_imported_operators() trainable_lr = LR(LR.enum.solver.lbfgs, C=0.0001) trained_lr = trainable_lr.fit(X_train, y_train) predictions = trained_lr.predict(X_test) print(f'actual {predictions.tolist()[:20]}') # In[3]: from sklearn.metrics import accuracy_score print(f'accuracy {accuracy_score(y_test, predictions):.1%}') # ## Correctness # # Lale uses [JSON Schema](https://json-schema.org/) to check for valid # hyperparameters. These schemas enable not just validation but also # interactive documentation. Thanks to using a single source of truth, the # documentation is correct by construction. # In[4]: from jsonschema import ValidationError try: lale_lr = LR(solver='adam', C=0.01) except ValidationError as e: print(e.message) # In[5]: LR.hyperparam_schema('C') # In[6]: LR.get_defaults() # ## Automation # # Lale includes a compiler that converts types (expressed as JSON # Schema) to optimizer search spaces. It currently has back-ends for # [hyperopt](http://hyperopt.github.io/hyperopt/), # [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html), and # [SMAC](http://www.automl.org/smac/). # We are also actively working towards various other forms of AI # automation using various other tools. # In[7]: from lale.search.op2hp import hyperopt_search_space from hyperopt import STATUS_OK, Trials, fmin, tpe, space_eval import lale.helpers import warnings warnings.filterwarnings("ignore") def objective(hyperparams): trainable = LR(**lale.helpers.dict_without(hyperparams, 'name')) trained = trainable.fit(X_train, y_train) predictions = trained.predict(X_test) return {'loss': -accuracy_score(y_test, predictions), 'status': STATUS_OK} search_space = hyperopt_search_space(LR) trials = Trials() fmin(objective, search_space, algo=tpe.suggest, max_evals=10, trials=trials) best_hps = space_eval(search_space, trials.argmin) print(f'best hyperparams {lale.helpers.dict_without(best_hps, "name")}\n') print(f'accuracy {-min(trials.losses()):.1%}') # ## Composition # # Lale supports composite models, which resemble sklearn pipelines but are # more expressive. # # | Symbol | Name | Description | Sklearn feature | # | ------ | ---- | ------------ | --------------- | # | >> | pipe | Feed to next | `make_pipeline` | # | & | and | Run both | `make_union`, includes concat | # | | | or | Choose one | (missing) | # In[8]: from sklearn.decomposition import PCA from sklearn.svm import SVC from lale.lib.lale import ConcatFeatures as Cat from lale.lib.lale import NoOp lale.wrap_imported_operators() optimizable = (PCA & NoOp) >> Cat >> (LR | SVC) optimizable.visualize() # In[9]: from lale.operators import make_pipeline, make_union, make_choice optimizable = make_pipeline(make_union(PCA, NoOp), make_choice(LR, SVC)) optimizable.visualize() # In[10]: import lale.lib.lale.hyperopt Optimizer = lale.lib.lale.hyperopt.Hyperopt trained = optimizable.auto_configure(X_train, y_train, optimizer=Optimizer, max_evals=10) # In[11]: predictions = trained.predict(X_test) print(f'accuracy {accuracy_score(y_test, predictions):.1%}') trained.visualize() # ## Input and Output Schemas # # Besides schemas for hyperparameter, Lale also provides operator tags # and schemas for input and output data of operators. # In[12]: LR.get_tags() # In[13]: LR.get_schema('input_fit') # In[14]: LR.get_schema('output_predict')