import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.model_selection import cross_val_predict
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier as skStackingClassifier
class StackingClassifier():
def __init__(self, estimators, final_estimator):
self.estimators = estimators
self.final_estimator = final_estimator
def fit(self, X, y):
self.estimators_ = []
for est in self.estimators:
self.estimators_.append(clone(est).fit(X, y))
predictions = []
for est in self.estimators:
cur_prediction = cross_val_predict(est, X, y, method="predict_proba")
if cur_prediction.shape[1] == 2:
predictions.append(cur_prediction[:, [1]])
else:
predictions.append(cur_prediction)
X_meta = np.hstack(predictions)
self.final_estimator_ = clone(self.final_estimator)
self.final_estimator_.fit(X_meta, y)
return self
def transform(self, X):
predictions = []
for est in self.estimators_:
cur_prediction = est.predict_proba(X)
if cur_prediction.shape[1] == 2:
predictions.append(cur_prediction[:, [1]])
else:
predictions.append(cur_prediction)
return np.hstack(predictions)
def predict(self, X):
return self.final_estimator_.predict(self.transform(X))
def predict_proba(self, X):
return self.final_estimator_.predict_proba(self.transform(X))
X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf1 = StackingClassifier(estimators=[RandomForestClassifier(random_state=0),
GradientBoostingClassifier(random_state=0),
SVC(random_state=0, probability=True)],
final_estimator=LogisticRegression(random_state=0)).fit(X, y)
clf2 = skStackingClassifier(estimators=[("rf", RandomForestClassifier(random_state=0)),
("gbdt", GradientBoostingClassifier(random_state=0)),
("svc", SVC(random_state=0, probability=True))],
final_estimator=LogisticRegression(random_state=0)).fit(X, y)
trans1 = clf1.transform(X)
trans2 = clf2.transform(X)
assert np.allclose(trans1, trans2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
X, y = load_iris(return_X_y=True)
clf1 = StackingClassifier(estimators=[RandomForestClassifier(random_state=0),
GradientBoostingClassifier(random_state=0),
SVC(random_state=0, probability=True)],
final_estimator=LogisticRegression(random_state=0)).fit(X, y)
clf2 = skStackingClassifier(estimators=[("rf", RandomForestClassifier(random_state=0)),
("gbdt", GradientBoostingClassifier(random_state=0)),
("svc", SVC(random_state=0, probability=True))],
final_estimator=LogisticRegression(random_state=0)).fit(X, y)
trans1 = clf1.transform(X)
trans2 = clf2.transform(X)
assert np.allclose(trans1, trans2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)