#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from scipy.linalg import inv, lstsq, svd from scipy.optimize import minimize from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression as skLinearRegression # ### Implementation 1 # - based on scipy.linalg.lstsq # - add dummy feature # In[2]: class LinearRegression(): def fit(self, X, y): X_train = np.hstack((np.ones((X.shape[0], 1)), X)) coef, _, _, _ = lstsq(X_train, y) self.coef_ = coef[1:] self.intercept_ = coef[0] return self def predict(self, X): y_pred = np.dot(X, self.coef_) + self.intercept_ return y_pred # In[3]: X, y = load_boston(return_X_y=True) reg1 = LinearRegression().fit(X, y) reg2 = skLinearRegression().fit(X, y) assert np.allclose(reg1.coef_, reg2.coef_) assert np.isclose(reg1.intercept_, reg2.intercept_) pred1 = reg1.predict(X) pred2 = reg2.predict(X) assert np.allclose(pred1, pred2) # ### Implementation 2 # - based on scipy.linalg.lstsq # - center the dataset and calculate the intercept manually # - similar to scikit-learn # In[4]: class LinearRegression(): def fit(self, X, y): X_mean = np.mean(X, axis=0) y_mean = np.mean(y) X_train = X - X_mean y_train = y - y_mean self.coef_, _, _, _ = lstsq(X_train, y_train) self.intercept_ = y_mean - np.dot(X_mean, self.coef_) return self def predict(self, X): y_pred = np.dot(X, self.coef_) + self.intercept_ return y_pred # In[5]: X, y = load_boston(return_X_y=True) reg1 = LinearRegression().fit(X, y) reg2 = skLinearRegression().fit(X, y) assert np.allclose(reg1.coef_, reg2.coef_) assert np.isclose(reg1.intercept_, reg2.intercept_) pred1 = reg1.predict(X) pred2 = reg2.predict(X) assert np.allclose(pred1, pred2) # ### Implementation 3 # - based on formula # - center the dataset and calculate the intercept manually # In[6]: class LinearRegression(): def fit(self, X, y): X_mean = np.mean(X, axis=0) y_mean = np.mean(y) X_train = X - X_mean y_train = y - y_mean A = np.dot(X_train.T, X_train) Xy = np.dot(X_train.T, y_train) self.coef_ = np.dot(inv(A), Xy).ravel() self.intercept_ = y_mean - np.dot(X_mean, self.coef_) return self def predict(self, X): y_pred = np.dot(X, self.coef_) + self.intercept_ return y_pred # In[7]: X, y = load_boston(return_X_y=True) reg1 = LinearRegression().fit(X, y) reg2 = skLinearRegression().fit(X, y) assert np.allclose(reg1.coef_, reg2.coef_) assert np.isclose(reg1.intercept_, reg2.intercept_) pred1 = reg1.predict(X) pred2 = reg2.predict(X) assert np.allclose(pred1, pred2) # ### Implementation 4 # - based on svd # - center the dataset and calculate the intercept manually # In[8]: class LinearRegression(): def fit(self, X, y): X_mean = np.mean(X, axis=0) y_mean = np.mean(y) X_train = X - X_mean y_train = y - y_mean U, S, VT = svd(X_train, full_matrices=False) self.coef_ = np.dot(np.dot(np.dot(VT.T, np.diag(1 / S)), U.T), y).ravel() self.intercept_ = y_mean - np.dot(X_mean, self.coef_) return self def predict(self, X): y_pred = np.dot(X, self.coef_) + self.intercept_ return y_pred # In[9]: X, y = load_boston(return_X_y=True) reg1 = LinearRegression().fit(X, y) reg2 = skLinearRegression().fit(X, y) assert np.allclose(reg1.coef_, reg2.coef_) assert np.isclose(reg1.intercept_, reg2.intercept_) pred1 = reg1.predict(X) pred2 = reg2.predict(X) assert np.allclose(pred1, pred2) # ### Implementation 5 # - based on gradient decent # - center the dataset and calculate the intercept manually # In[10]: def cost_gradient(theta, X, y): h = np.dot(X, theta) - y J = np.dot(h, h) / (2 * X.shape[0]) grad = np.dot(X.T, h) / X.shape[0] return J, grad class LinearRegression(): def fit(self, X, y): X_mean = np.mean(X, axis=0) y_mean = np.mean(y) X_train = X - X_mean y_train = y - y_mean theta = np.zeros(X_train.shape[1]) res = minimize(fun=cost_gradient, jac=True, x0=theta, args=(X_train, y_train), method='L-BFGS-B') self.coef_ = res.x self.intercept_ = y_mean - np.dot(X_mean, self.coef_) return self def predict(self, X): y_pred = np.dot(X, self.coef_) + self.intercept_ return y_pred # In[11]: X, y = load_boston(return_X_y=True) # center and scale the dataset to improve performance X = (X - X.mean(axis=0)) / X.std(axis=0) reg1 = LinearRegression().fit(X, y) reg2 = skLinearRegression().fit(X, y) assert np.allclose(reg1.coef_, reg2.coef_, atol=1e-4) assert np.isclose(reg1.intercept_, reg2.intercept_) pred1 = reg1.predict(X) pred2 = reg2.predict(X) assert np.allclose(pred1, pred2, atol=1e-3)