#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().system("pip install 'liac-arff>=2.4.0'") # In[2]: import lale.datasets.openml import pandas as pd (train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch( 'credit-g', 'classification', preprocess=False) pd.concat([train_y.tail(), train_X.tail()], axis=1) # In[3]: from sklearn.preprocessing import Normalizer as Norm from lale.lib.lale import NoOp from sklearn.preprocessing import OneHotEncoder as OneHot from sklearn.linear_model import LogisticRegression as LR from xgboost import XGBClassifier as XGBoost from sklearn.svm import LinearSVC from sklearn.compose import ColumnTransformer from lale.operators import make_pipeline lale.wrap_imported_operators() # In[4]: import numpy as np num_cols = [col for col in train_X.columns if np.issubdtype(train_X.dtypes[col], np.number)] cat_cols = [col for col in train_X.columns if col not in num_cols] # In[5]: pipeline_trainable = ColumnTransformer( transformers=[ ('num_tfm', Norm(norm='l1'), num_cols), ('cat_tfm', OneHot(), cat_cols)]) >> LR() pipeline_trainable.visualize() # In[6]: get_ipython().run_cell_magic('time', '', 'pipeline_trained = pipeline_trainable.fit(train_X, train_y)\n') # In[7]: import sklearn.metrics predictions = pipeline_trained.predict(test_X) print(f'accuracy {sklearn.metrics.accuracy_score(test_y, predictions):.1%}') # In[8]: pipeline_planned = make_pipeline( ColumnTransformer(transformers=[ ('num_tfm', Norm | NoOp, num_cols), ('cat_tfm', OneHot, cat_cols)]), LR | LinearSVC(dual=False)| XGBoost) pipeline_planned.visualize() # In[9]: get_ipython().run_cell_magic('time', '', 'from lale.lib.lale import Hyperopt\npipeline_trained = pipeline_planned.auto_configure(\n train_X, train_y, Hyperopt, cv=3, max_evals=5)\n') # In[10]: pipeline_trained.visualize() pipeline_trained.pretty_print(ipython_display=True, show_imports=False) # In[11]: predictions = pipeline_trained.predict(test_X) print(f'accuracy {sklearn.metrics.accuracy_score(test_y, predictions):.1%}')