#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from scipy.optimize import minimize from sklearn.datasets import load_iris from sklearn.svm import LinearSVC as skLinearSVC # In[2]: class LinearSVC(): def __init__(self, C=1.0): self.C = C def _encode(self, y): classes = np.unique(y) y_train = np.full((y.shape[0], len(classes)), -1) for i, c in enumerate(classes): y_train[y == c, i] = 1 if len(classes) == 2: y_train = y_train[:, 1].reshape(-1, 1) return classes, y_train @staticmethod def _cost_grad(w, X, y, C): X_train = np.c_[X, np.ones(X.shape[0])] z = np.dot(X_train, w) yz = y * z mask = yz <= 1 cost = C * np.sum(np.square(1 - yz[mask])) + 0.5 * np.dot(w, w) grad = w + 2 * C * np.dot(X_train[mask].T, z[mask] - y[mask]) return cost, grad def _solve_lbfgs(self, X, y): result = np.zeros((y.shape[1], X.shape[1] + 1)) for i in range(y.shape[1]): cur_y = y[:, i] w0 = np.zeros(X.shape[1] + 1) res = minimize(fun=self._cost_grad, jac=True, x0=w0, args=(X, cur_y, self.C), method='L-BFGS-B') result[i] = res.x return result[:, :-1], result[:, -1] def fit(self, X, y): self.classes_, y_train = self._encode(y) self.coef_, self.intercept_ = self._solve_lbfgs(X, y_train) return self def decision_function(self, X): scores = np.dot(X, self.coef_.T) + self.intercept_ if scores.shape[1] == 1: return scores.ravel() else: return scores def predict(self, X): scores = self.decision_function(X) if len(scores.shape) == 1: indices = (scores > 0).astype(int) else: indices = np.argmax(scores, axis=1) return self.classes_[indices] # In[3]: X, y = load_iris(return_X_y=True) X, y = X[y != 2], y[y != 2] clf1 = LinearSVC().fit(X, y) clf2 = skLinearSVC(dual=False).fit(X, y) assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-2) assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-3) prob1 = clf1.decision_function(X) prob2 = clf2.decision_function(X) assert np.allclose(prob1, prob2, atol=1e-2) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2) # In[4]: X, y = load_iris(return_X_y=True) clf1 = LinearSVC().fit(X, y) clf2 = skLinearSVC(dual=False).fit(X, y) assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-1) assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-2) prob1 = clf1.decision_function(X) prob2 = clf2.decision_function(X) assert np.allclose(prob1, prob2, atol=1e-1) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2)