#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from sklearn.datasets import load_boston from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import AdaBoostRegressor as skAdaBoostRegressor # In[2]: class AdaBoostRegressor(): def __init__(self, n_estimators=50, random_state=0): self.n_estimators = n_estimators self.random_state = 0 def fit(self, X, y): sample_weight = np.full(X.shape[0], 1 / X.shape[0]) self.estimators_ = [] self.estimator_weights_ = np.zeros(self.n_estimators) self.estimator_errors_ = np.ones(self.n_estimators) MAX_INT = np.iinfo(np.int32).max rng = np.random.RandomState(self.random_state) for i in range(self.n_estimators): est = DecisionTreeRegressor(max_depth=3, random_state=rng.randint(MAX_INT)) cdf = np.cumsum(sample_weight) cdf /= cdf[-1] uniform_samples = rng.random_sample(X.shape[0]) bootstrap_idx = cdf.searchsorted(uniform_samples, side='right') est.fit(X[bootstrap_idx], y[bootstrap_idx]) y_predict = est.predict(X) error_vect = np.abs(y_predict - y) error_vect /= error_vect.max() estimator_error = (sample_weight * error_vect).sum() beta = estimator_error / (1 - estimator_error) estimator_weight = np.log(1 / beta) sample_weight *= np.power(beta, 1 - error_vect) sample_weight /= np.sum(sample_weight) self.estimators_.append(est) self.estimator_errors_[i] = estimator_error self.estimator_weights_[i] = estimator_weight return self def predict(self, X): predictions = np.array([est.predict(X) for est in self.estimators_]).T sorted_idx = np.argsort(predictions, axis=1) weight_cdf = np.cumsum(self.estimator_weights_[sorted_idx], axis=1) median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis] median_idx = median_or_above.argmax(axis=1) median_estimators = sorted_idx[np.arange(X.shape[0]), median_idx] return predictions[np.arange(X.shape[0]), median_estimators] # In[3]: X, y = load_boston(return_X_y=True) clf1 = AdaBoostRegressor(random_state=0).fit(X, y) clf2 = skAdaBoostRegressor(random_state=0).fit(X, y) assert np.allclose(clf1.estimator_errors_, clf2.estimator_errors_) assert np.allclose(clf1.estimator_weights_, clf2.estimator_weights_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2)