This Notebook contain example of Regression problem, using Boston House Price dataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
np.set_printoptions(suppress=True)
data = load_boston(return_X_y=False)
X = data['data']
y = data['target']
feature_names = data['feature_names']
DSC = data['DESCR']
#print(DSC)
print('Number of features:',X.shape[1])
print('Number of examples:',X.shape[0])
Number of features: 13 Number of examples: 506
n=0
for i in range(X[n].shape[0]):
print(feature_names[i],X[n][i],sep='\t')
print('--------')
print('target : ',y[n])
CRIM 0.00632 ZN 18.0 INDUS 2.31 CHAS 0.0 NOX 0.538 RM 6.575 AGE 65.2 DIS 4.09 RAD 1.0 TAX 296.0 PTRATIO 15.3 B 396.9 LSTAT 4.98 -------- target : 24.0
X = X[:,[5,12]]
print(' x1 \t x2 \t| y')
print('_'*30)
for xi,yi in zip(X[:10], y[:10]):
print(xi[0],'\t',xi[1],'\t|',yi)
x1 x2 | y ______________________________ 6.575 4.98 | 24.0 6.421 9.14 | 21.6 7.185 4.03 | 34.7 6.998 2.94 | 33.4 7.147 5.33 | 36.2 6.43 5.21 | 28.7 6.012 12.43 | 22.9 6.172 19.15 | 27.1 5.631 29.93 | 16.5 6.004 17.1 | 18.9
plt.plot(X[:,0],y,'.',alpha=0.5)
plt.xlabel('input: x')
plt.ylabel('output: y')
plt.show()
#y = x1w1 + x2w2 + ..
$.$ $$min_w ||y-Xw||_2^2 $$ $.$ $$w = (X^{T}X)^{-1}X^{T}y$$
X = data['data'][:,[5,12]]
y = data['target']
print(X.shape, y.shape)
(506, 2) (506,)
X1 = np.c_[np.ones(len(X)), X]
X1.shape, y.shape
((506, 3), (506,))
X1
array([[1. , 6.575, 4.98 ], [1. , 6.421, 9.14 ], [1. , 7.185, 4.03 ], ..., [1. , 6.976, 5.64 ], [1. , 6.794, 6.48 ], [1. , 6.03 , 7.88 ]])
(((X1.T@X1)**(-1))@X1.T)@y
array([64.49064447, 10.33577397, 4.81400466])
np.linalg.pinv(X1)@y
array([-1.35827281, 5.09478798, -0.64235833])
w = np.linalg.pinv(X1)@y
w
array([-1.35827281, 5.09478798, -0.64235833])
X1
array([[1. , 6.575, 4.98 ], [1. , 6.421, 9.14 ], [1. , 7.185, 4.03 ], ..., [1. , 6.976, 5.64 ], [1. , 6.794, 6.48 ], [1. , 6.03 , 7.88 ]])
w
array([-1.35827281, 5.09478798, -0.64235833])
y[0]
24.0
yp = X1@w
yp[0]
28.941013680602513
np.mean((y - yp)**2)
30.51246877729947
np.mean(np.abs(y - yp))
3.952580067119268
from sklearn import linear_model
X = data['data'][:,[5,12]]
y = data['target']
print(X.shape, y.shape)
(506, 2) (506,)
model = linear_model.LinearRegression()
#help(model)
model.fit(X,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
model.intercept_, model.coef_
(-1.3582728118744818, array([ 5.09478798, -0.64235833]))
yp = model.predict(X)
np.mean((y-yp)**2)
30.51246877729947
np.mean(np.abs((y-yp)))
3.952580067119271
X = data['data']
y = data['target']
print(X.shape, y.shape)
(506, 13) (506,)
model = linear_model.LinearRegression()
model.fit(X,y)
model.intercept_, model.coef_
(36.45948838509001, array([ -0.10801136, 0.04642046, 0.02055863, 2.68673382, -17.76661123, 3.80986521, 0.00069222, -1.47556685, 0.30604948, -0.01233459, -0.95274723, 0.00931168, -0.52475838]))
yp = model.predict(X)
np.mean((y-yp)**2)
21.894831181729206
np.mean(np.abs((y-yp)))
3.270862810900317
from sklearn.model_selection import train_test_split
Xt, Xs, yt, ys = train_test_split(X,y,test_size=0.3)
Xt.shape, yt.shape, Xs.shape, ys.shape
((354, 13), (354,), (152, 13), (152,))
model = linear_model.LinearRegression()
model.fit(Xt,yt)
#model.intercept_, model.coef_
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
ytp = model.predict(Xt)
ysp = model.predict(Xs)
np.mean((yt-ytp)**2)
22.039530375706953
np.mean((ys-ysp)**2)
22.550610525430503
yt[0]
50.0
ytp[0]
40.489586293051204
ys[0]
24.3
ysp[0]
29.198709476914402