#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # # StackingRegressor: a simple stacking implementation for regression # An ensemble-learning meta-regressor for stacking regression # > from mlxtend.regressor import StackingRegressor # ## Overview # Stacking regression is an ensemble learning technique to combine multiple regression models via a meta-regressor. The individual regression models are trained based on the complete training set; then, the meta-regressor is fitted based on the outputs -- meta-features -- of the individual regression models in the ensemble. # ![](./StackingRegressor_files/stackingregression_overview.png) # ### References # # - Breiman, Leo. "[Stacked regressions.](https://link.springer.com/article/10.1023/A:1018046112532#page-1)" Machine learning 24.1 (1996): 49-64. # ## Example 1 - Simple Stacked Regression # In[2]: from mlxtend.regressor import StackingRegressor from mlxtend.data import boston_housing_data from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge from sklearn.svm import SVR import matplotlib.pyplot as plt import numpy as np import warnings warnings.simplefilter('ignore') # Generating a sample dataset np.random.seed(1) X = np.sort(5 * np.random.rand(40, 1), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - np.random.rand(8)) # In[3]: # Initializing models lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) # Training the stacking classifier stregr.fit(X, y) stregr.predict(X) # Evaluate and visualize the fit print("Mean Squared Error: %.4f" % np.mean((stregr.predict(X) - y) ** 2)) print('Variance Score: %.4f' % stregr.score(X, y)) with plt.style.context(('seaborn-whitegrid')): plt.scatter(X, y, c='lightgray') plt.plot(X, stregr.predict(X), c='darkgreen', lw=2) plt.show() # In[4]: stregr # ## Example 2 - Stacked Regression and GridSearch # In this second example we demonstrate how `StackingCVRegressor` works in combination with `GridSearchCV`. The stack still allows tuning hyper parameters of the base and meta models! # # For instance, we can use `estimator.get_params().keys()` to get a full list of tunable parameters. # In[5]: from sklearn.model_selection import GridSearchCV from sklearn.linear_model import Lasso # Initializing models lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) lasso = Lasso(random_state=1) svr_rbf = SVR(kernel='rbf') regressors = [svr_lin, lr, ridge, lasso] stregr = StackingRegressor(regressors=regressors, meta_regressor=svr_rbf) params = {'lasso__alpha': [0.1, 1.0, 10.0], 'ridge__alpha': [0.1, 1.0, 10.0], 'svr__C': [0.1, 1.0, 10.0], 'meta_regressor__C': [0.1, 1.0, 10.0, 100.0], 'meta_regressor__gamma': [0.1, 1.0, 10.0]} grid = GridSearchCV(estimator=stregr, param_grid=params, cv=5, refit=True) grid.fit(X, y) print("Best: %f using %s" % (grid.best_score_, grid.best_params_)) # In[6]: cv_keys = ('mean_test_score', 'std_test_score', 'params') for r, _ in enumerate(grid.cv_results_['mean_test_score']): print("%0.3f +/- %0.2f %r" % (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] / 2.0, grid.cv_results_[cv_keys[2]][r])) if r > 10: break print('...') print('Best parameters: %s' % grid.best_params_) print('Accuracy: %.2f' % grid.best_score_) # In[7]: # Evaluate and visualize the fit print("Mean Squared Error: %.4f" % np.mean((grid.predict(X) - y) ** 2)) print('Variance Score: %.4f' % grid.score(X, y)) with plt.style.context(('seaborn-whitegrid')): plt.scatter(X, y, c='lightgray') plt.plot(X, grid.predict(X), c='darkgreen', lw=2) plt.show() # **Note** # # The `StackingCVRegressor` also enables grid search over the `regressors` and even a single base regressor. When there are level-mixed hyperparameters, `GridSearchCV` will try to replace hyperparameters in a top-down order, i.e., `regressors` -> single base regressor -> regressor hyperparameter. For instance, given a hyperparameter grid such as # # params = {'randomforestregressor__n_estimators': [1, 100], # 'regressors': [(regr1, regr1, regr1), (regr2, regr3)]} # # it will first use the instance settings of either `(regr1, regr2, regr3)` or `(regr2, regr3)` . Then it will replace the `'n_estimators'` settings for a matching regressor based on `'randomforestregressor__n_estimators': [1, 100]`. # ## API # In[8]: with open('../../api_modules/mlxtend.regressor/StackingRegressor.md', 'r') as f: print(f.read()) # In[ ]: