import numpy as np
from sklearn.base import clone
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel as skSelectFromModel
class SelectFromModel():
def __init__(self, estimator):
self.estimator = estimator
def fit(self, X, y):
self.estimator_ = clone(self.estimator)
self.estimator_.fit(X, y)
if hasattr(self.estimator_, "feature_importances_"):
self.importances_ = self.estimator_.feature_importances_
elif hasattr(self.estimator_, "coef_"):
if self.estimator_.coef_.ndim == 1:
self.importances_ = np.abs(self.estimator_.coef_)
else:
self.importances_ = np.linalg.norm(self.estimator_.coef_,
ord=1, axis=0)
self.threshold_ = np.mean(self.importances_)
return self
def transform(self, X):
return X[:, self.importances_ >= self.threshold_]
X, y = load_breast_cancer(return_X_y=True)
clf = RandomForestClassifier(random_state=0)
est1 = SelectFromModel(estimator=clf).fit(X, y)
est2 = skSelectFromModel(estimator=clf).fit(X, y)
assert np.allclose(est1.threshold_, est2.threshold_)
Xt1 = est1.transform(X)
Xt2 = est2.transform(X)
assert np.allclose(Xt1, Xt2)
X, y = load_breast_cancer(return_X_y=True)
clf = LogisticRegression(max_iter=15000, random_state=0)
est1 = SelectFromModel(estimator=clf).fit(X, y)
est2 = skSelectFromModel(estimator=clf).fit(X, y)
assert np.allclose(est1.threshold_, est2.threshold_)
Xt1 = est1.transform(X)
Xt2 = est2.transform(X)
assert np.allclose(Xt1, Xt2)
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(max_iter=15000, random_state=0)
est1 = SelectFromModel(estimator=clf).fit(X, y)
est2 = skSelectFromModel(estimator=clf).fit(X, y)
assert np.allclose(est1.threshold_, est2.threshold_)
Xt1 = est1.transform(X)
Xt2 = est2.transform(X)
assert np.allclose(Xt1, Xt2)