import numpy as np
from sklearn.datasets import load_iris
from sklearn.dummy import DummyClassifier as skDummyClassifier
class DummyClassifier():
def __init__(self, strategy="stratified", random_state=0, constant=None):
self.strategy = strategy
self.constant = constant
self.random_state = random_state
def fit(self, X, y):
self.classes_, y_train = np.unique(y, return_inverse=True)
self.n_classes_ = self.classes_.shape[0]
self.class_prior_ = np.bincount(y_train) / y.shape[0]
return self
def predict(self, X):
if self.strategy == "most_frequent" or self.strategy == "prior":
y = np.full(X.shape[0], self.classes_[np.argmax(self.class_prior_)])
elif self.strategy == "constant":
y = np.full(X.shape[0], self.constant)
elif self.strategy == "uniform":
rng = np.random.RandomState(self.random_state)
y = self.classes_[rng.randint(self.n_classes_, size=X.shape[0])]
elif self.strategy == "stratified":
y = self.classes_[np.argmax(self.predict_proba(X), axis=1)]
return y
def predict_proba(self, X):
if self.strategy == "most_frequent":
p = np.zeros((X.shape[0], self.n_classes_))
p[:, np.argmax(self.class_prior_)] = 1
elif self.strategy == "prior":
p = np.tile(self.class_prior_, (X.shape[0], 1))
elif self.strategy == "constant":
p = np.zeros((X.shape[0], self.n_classes_))
p[:, self.classes_ == self.constant] = 1
elif self.strategy == "uniform":
p = np.full((X.shape[0], self.n_classes_), 1 / self.n_classes_)
elif self.strategy == "stratified":
rng = np.random.RandomState(self.random_state)
p = rng.multinomial(1, self.class_prior_, size=X.shape[0])
return p
X, y = load_iris(return_X_y=True)
clf1 = DummyClassifier(strategy="most_frequent").fit(X, y)
clf2 = skDummyClassifier(strategy="most_frequent").fit(X, y)
assert clf1.n_classes_ == clf2.n_classes_
assert np.array_equal(clf1.classes_, clf2.classes_)
assert np.array_equal(clf1.class_prior_, clf2.class_prior_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.array_equal(prob1, prob2)
clf1 = DummyClassifier(strategy="prior").fit(X, y)
clf2 = skDummyClassifier(strategy="prior").fit(X, y)
assert clf1.n_classes_ == clf2.n_classes_
assert np.array_equal(clf1.classes_, clf2.classes_)
assert np.array_equal(clf1.class_prior_, clf2.class_prior_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.array_equal(prob1, prob2)
clf1 = DummyClassifier(strategy="constant", constant=0).fit(X, y)
clf2 = skDummyClassifier(strategy="constant", constant=0).fit(X, y)
assert clf1.n_classes_ == clf2.n_classes_
assert np.array_equal(clf1.classes_, clf2.classes_)
assert np.array_equal(clf1.class_prior_, clf2.class_prior_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.array_equal(prob1, prob2)
clf1 = DummyClassifier(strategy="uniform", random_state=0).fit(X, y)
clf2 = skDummyClassifier(strategy="uniform", random_state=0).fit(X, y)
assert clf1.n_classes_ == clf2.n_classes_
assert np.array_equal(clf1.classes_, clf2.classes_)
assert np.array_equal(clf1.class_prior_, clf2.class_prior_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.array_equal(prob1, prob2)
clf1 = DummyClassifier(strategy="stratified", random_state=0).fit(X, y)
clf2 = skDummyClassifier(strategy="stratified", random_state=0).fit(X, y)
assert clf1.n_classes_ == clf2.n_classes_
assert np.array_equal(clf1.classes_, clf2.classes_)
assert np.array_equal(clf1.class_prior_, clf2.class_prior_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
#assert np.array_equal(pred1, pred2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.array_equal(prob1, prob2)