#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from copy import deepcopy from scipy.special import expit from scipy.optimize import minimize from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression as skLogisticRegression from sklearn.multiclass import OneVsRestClassifier as skOneVsRestClassifier # In[2]: class OneVsRestClassifier(): def __init__(self, estimator): self.estimator = estimator def _encode(self, y): classes = np.unique(y) y_train = np.zeros((y.shape[0], len(classes))) for i, c in enumerate(classes): y_train[y == c, i] = 1 return classes, y_train def fit(self, X, y): self.classes_, y_train = self._encode(y) self.estimators_ = [] for i in range(y_train.shape[1]): cur_y = y_train[:, i] clf = deepcopy(self.estimator) clf.fit(X, cur_y) self.estimators_.append(clf) return self def decision_function(self, X): scores = np.zeros((X.shape[0], len(self.classes_))) for i, est in enumerate(self.estimators_): scores[:, i] = est.decision_function(X) return scores def predict(self, X): scores = self.decision_function(X) indices = np.argmax(scores, axis=1) return self.classes_[indices] # In[3]: # Simplified version of LogisticRegression, only work for binary classification class BinaryLogisticRegression(): def __init__(self, C=1.0): self.C = C @staticmethod def _cost_grad(w, X, y, alpha): def _log_logistic(x): if x > 0: return -np.log(1 + np.exp(-x)) else: return x - np.log(1 + np.exp(x)) yz = y * (np.dot(X, w[:-1]) + w[-1]) cost = -np.sum(np.vectorize(_log_logistic)(yz)) + 0.5 * alpha * np.dot(w[:-1], w[:-1]) grad = np.zeros(len(w)) t = (expit(yz) - 1) * y grad[:-1] = np.dot(X.T, t) + alpha * w[:-1] grad[-1] = np.sum(t) return cost, grad def _solve_lbfgs(self, X, y): y_train = np.full(X.shape[0], -1) y_train[y == 1] = 1 w0 = np.zeros(X.shape[1] + 1) res = minimize(fun=self._cost_grad, jac=True, x0=w0, args=(X, y_train, 1 / self.C), method='L-BFGS-B') return res.x[:-1], res.x[-1] def fit(self, X, y): self.coef_, self.intercept_ = self._solve_lbfgs(X, y) return self def decision_function(self, X): scores = np.dot(X, self.coef_) + self.intercept_ return scores def predict(self, X): scores = self.decision_function(X) indices = (scores > 0).astype(int) return indices # In[4]: for C in [0.1, 1, 10, np.inf]: X, y = load_iris(return_X_y=True) clf1 = OneVsRestClassifier(BinaryLogisticRegression(C=C)).fit(X, y) clf2 = skOneVsRestClassifier(skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs", # keep consisent with scipy default tol=1e-5, max_iter=15000)).fit(X, y) prob1 = clf1.decision_function(X) prob2 = clf2.decision_function(X) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.allclose(prob1, prob2) assert np.array_equal(pred1, pred2)