import numpy as np import matplotlib.pyplot as plt data = np.loadtxt('https://jonghank.github.io/ee370/files/fit_data.csv', \ delimiter=',') x, y = data[:,0], data[:,1] plt.figure(figsize=(6,6), dpi=100) plt.plot(x, y, 'o', alpha=0.5) plt.grid() plt.axis('square') plt.xlim(0, 1) plt.ylim(0, 1) plt.xlabel(r'$x$') plt.ylabel(r'$y$') plt.title('Raw data') plt.show() n = len(x) d = 6 X = np.zeros((n,d)) for i in range(d): X[:,i] = x**i theta_opt = np.linalg.lstsq(X, y, rcond=None)[0] residual = y - X@theta_opt print(f'Optimal theta: {theta_opt}') print(f'RMSE: {np.linalg.norm(residual)/np.sqrt(n)}') vp = np.linspace(0, 1, 100) X_vp = np.zeros((len(vp),d)) for i in range(d): X_vp[:,i] = vp**i; plt.figure(figsize=(6,6), dpi=100) plt.plot(x, y, 'o', alpha=0.5, label='Raw data') plt.plot(vp, np.dot(X_vp, theta_opt), label='Predictor') plt.grid() plt.axis('square') plt.xlim(0, 1) plt.ylim(0, 1) plt.xlabel(r'$x$') plt.ylabel(r'$y$') plt.title("Polynomial predictor") plt.legend() plt.show() import pandas as pd df = pd.read_csv('https://web.stanford.edu/~hastie/Papers/LARS/diabetes.data', delimiter='\t') df n, d = df.shape X = np.hstack((np.ones((n,1)), df.values[:,:-1])) y = df.values[:,-1] theta_opt = np.linalg.lstsq(X, y, rcond=None)[0] MSE = np.linalg.norm(X.dot(theta_opt)-y)**2/n print(f'MSE: {MSE}') theta_opt plt.figure(figsize=(6,6), dpi=100) plt.plot(y, y, 'k') plt.plot(y, X.dot(theta_opt), 'o', alpha=0.5) plt.xlabel('y') plt.ylabel(r'$\hat{y}$') plt.axis('square') plt.grid() # note that each feature represents # u1: age # u2: sex # u3: bmi body mass index # u4: map mean arterial pressure # u5: s1 (tc) : total cholesterol # u6: s2 (ldl): low density lipoprotein # u7: s3 (hdl): high density lipoprotein # u8: s4 (tch): # u9: s5 (ltg): # u10: s6 (glu): # features: age sex bmi map tc ldl hdl tch ltg glu X_JHK = np.array([41, 1, 18.3, 90, 171, 80.0, 74.9, 2, 4.75, 90.0]) X_JHK = np.hstack((1, X_JHK)) y_JHK = X_JHK.dot(theta_opt) print (y_JHK) plt.figure(figsize=(6,6), dpi=100) plt.plot(y, y, 'k') plt.plot(y, X.dot(theta_opt), 'o', alpha=0.5) plt.plot(y_JHK, y_JHK, 'ro', label='JHK') plt.xlabel('y') plt.ylabel(r'$\hat{y}$') plt.legend() plt.axis('square') plt.grid()