#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from sklearn.datasets import load_iris from sklearn.dummy import DummyClassifier as skDummyClassifier # In[2]: class DummyClassifier(): def __init__(self, strategy="stratified", random_state=0, constant=None): self.strategy = strategy self.constant = constant self.random_state = random_state def fit(self, X, y): self.classes_, y_train = np.unique(y, return_inverse=True) self.n_classes_ = self.classes_.shape[0] self.class_prior_ = np.bincount(y_train) / y.shape[0] return self def predict(self, X): if self.strategy == "most_frequent" or self.strategy == "prior": y = np.full(X.shape[0], self.classes_[np.argmax(self.class_prior_)]) elif self.strategy == "constant": y = np.full(X.shape[0], self.constant) elif self.strategy == "uniform": rng = np.random.RandomState(self.random_state) y = self.classes_[rng.randint(self.n_classes_, size=X.shape[0])] elif self.strategy == "stratified": y = self.classes_[np.argmax(self.predict_proba(X), axis=1)] return y def predict_proba(self, X): if self.strategy == "most_frequent": p = np.zeros((X.shape[0], self.n_classes_)) p[:, np.argmax(self.class_prior_)] = 1 elif self.strategy == "prior": p = np.tile(self.class_prior_, (X.shape[0], 1)) elif self.strategy == "constant": p = np.zeros((X.shape[0], self.n_classes_)) p[:, self.classes_ == self.constant] = 1 elif self.strategy == "uniform": p = np.full((X.shape[0], self.n_classes_), 1 / self.n_classes_) elif self.strategy == "stratified": rng = np.random.RandomState(self.random_state) p = rng.multinomial(1, self.class_prior_, size=X.shape[0]) return p # In[3]: X, y = load_iris(return_X_y=True) # In[4]: clf1 = DummyClassifier(strategy="most_frequent").fit(X, y) clf2 = skDummyClassifier(strategy="most_frequent").fit(X, y) assert clf1.n_classes_ == clf2.n_classes_ assert np.array_equal(clf1.classes_, clf2.classes_) assert np.array_equal(clf1.class_prior_, clf2.class_prior_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.array_equal(prob1, prob2) # In[5]: clf1 = DummyClassifier(strategy="prior").fit(X, y) clf2 = skDummyClassifier(strategy="prior").fit(X, y) assert clf1.n_classes_ == clf2.n_classes_ assert np.array_equal(clf1.classes_, clf2.classes_) assert np.array_equal(clf1.class_prior_, clf2.class_prior_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.array_equal(prob1, prob2) # In[6]: clf1 = DummyClassifier(strategy="constant", constant=0).fit(X, y) clf2 = skDummyClassifier(strategy="constant", constant=0).fit(X, y) assert clf1.n_classes_ == clf2.n_classes_ assert np.array_equal(clf1.classes_, clf2.classes_) assert np.array_equal(clf1.class_prior_, clf2.class_prior_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.array_equal(prob1, prob2) # In[7]: clf1 = DummyClassifier(strategy="uniform", random_state=0).fit(X, y) clf2 = skDummyClassifier(strategy="uniform", random_state=0).fit(X, y) assert clf1.n_classes_ == clf2.n_classes_ assert np.array_equal(clf1.classes_, clf2.classes_) assert np.array_equal(clf1.class_prior_, clf2.class_prior_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.array_equal(prob1, prob2) # In[8]: clf1 = DummyClassifier(strategy="stratified", random_state=0).fit(X, y) clf2 = skDummyClassifier(strategy="stratified", random_state=0).fit(X, y) assert clf1.n_classes_ == clf2.n_classes_ assert np.array_equal(clf1.classes_, clf2.classes_) assert np.array_equal(clf1.class_prior_, clf2.class_prior_) pred1 = clf1.predict(X) pred2 = clf2.predict(X) #assert np.array_equal(pred1, pred2) prob1 = clf1.predict_proba(X) prob2 = clf2.predict_proba(X) assert np.array_equal(prob1, prob2)