Solving below with various values of $\gamma$. \begin{align*} \underset{x}{\minimize} \quad & \|Ax-b\|_2^2 + \gamma\|x\|_2^2 \end{align*}
import numpy as np
import matplotlib.pyplot as plt
# Problem data.
m = 15
n = 10
np.random.seed(2010)
A = np.random.randn(m, n)
b = np.random.randn(m)
gamma_vals = np.logspace(-3, 3)
sq_penalty = []
l2_penalty = []
xhat_values = []
for gamma in gamma_vals:
A_tilde = np.vstack((A,np.sqrt(gamma)*np.eye(n)))
b_tilde = np.hstack((b,np.zeros(n)))
xhat = np.linalg.lstsq(A_tilde, b_tilde, rcond=None)[0]
sq_val = np.linalg.norm(A.dot(xhat)-b)**2
l2_val = np.linalg.norm(xhat)**2
xhat_values.append(xhat)
sq_penalty.append(sq_val)
l2_penalty.append(l2_val)
# Plot trade-off curve.
plt.figure(figsize=(12,9), dpi=100)
plt.plot(l2_penalty, sq_penalty)
plt.xlabel(r'$||x||_2^2$')
plt.ylabel(r'$||Ax-b||_2^2$')
plt.title('Trade-Off Curve for Ridge regression')
plt.grid()
plt.show()
# Plot entries of x vs. gamma.
plt.figure(figsize=(12,9), dpi=100)
for i in range(n):
plt.semilogx(gamma_vals, [xi[i] for xi in xhat_values])
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$x_{i}$')
plt.title(r'Entries of $x$ vs. $\gamma$')
plt.grid()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
data = np.loadtxt('https://jonghank.github.io/ee370/files/fit_data.csv', \
delimiter=',')
x, y = data[:,0], data[:,1]
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y, 'o', alpha=0.5)
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.title('Raw data')
plt.show()
n = len(x)
d = 6
X = np.zeros((n,d))
for i in range(d):
X[:,i] = x**i
theta_opt = np.linalg.lstsq(X, y, rcond=None)[0]
residual = y - X@theta_opt
print(f'Optimal theta: {theta_opt}')
print(f'RMSE: {np.linalg.norm(residual)/np.sqrt(n)}')
vp = np.linspace(0, 1, 100)
X_vp = np.zeros((len(vp),d))
for i in range(d):
X_vp[:,i] = vp**i;
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y, 'o', alpha=0.5, label='Raw data')
plt.plot(vp, np.dot(X_vp, theta_opt), label='Predictor')
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.title("Polynomial predictor")
plt.legend()
plt.show()
Optimal theta: [-1.08775601e-01 1.51579304e+01 -9.49190641e+01 2.39806312e+02 -2.64399727e+02 1.05545151e+02] RMSE: 0.053104652653532106
n = len(x)
d = 51
X = np.zeros((n,d))
for i in range(d):
X[:,i] = x**i
theta_opt = np.linalg.lstsq(X, y, rcond=None)[0]
residual = y - X@theta_opt
print(f'Optimal theta: {theta_opt}')
print(f'RMSE: {np.linalg.norm(residual)/np.sqrt(n)}')
vp = np.linspace(0, 1, 100)
X_vp = np.zeros((len(vp),d))
for i in range(d):
X_vp[:,i] = vp**i;
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y, 'o', alpha=0.5, label='Raw data')
plt.plot(vp, np.dot(X_vp, theta_opt), label='Predictor')
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.title("Polynomial predictor")
plt.legend()
plt.show()
Optimal theta: [ 6.43577083e-02 -2.02108339e+01 1.99729935e+03 -5.91693997e+04 9.89312386e+05 -1.06502731e+07 7.78589027e+07 -3.97022448e+08 1.42954383e+09 -3.63239559e+09 6.38255976e+09 -7.20782990e+09 3.69199237e+09 2.52178189e+09 -4.79928710e+09 -7.88394758e+08 4.99997756e+09 1.77119699e+09 -4.71617994e+09 -4.75028673e+09 1.59952814e+09 6.56862088e+09 4.67921762e+09 -2.10138031e+09 -7.38656904e+09 -6.56885634e+09 -3.36901418e+08 6.50540018e+09 8.96942492e+09 5.33092230e+09 -2.03616334e+09 -8.45725209e+09 -9.94471097e+09 -5.48470337e+09 2.44073294e+09 9.35636397e+09 1.12389225e+10 6.74997616e+09 -1.91536112e+09 -1.00367233e+10 -1.27424546e+10 -7.82265002e+09 2.57424728e+09 1.22943410e+10 1.41944210e+10 4.97398563e+09 -1.03789008e+10 -1.80222230e+10 -3.60841369e+09 2.44566204e+10 -9.66325817e+09] RMSE: 0.04485132736350962
n = len(x)
d = 51
X = np.zeros((n,d))
for i in range(d):
X[:,i] = x**i
gamma = 1.0e-7
X_tilde = np.vstack((X,np.sqrt(gamma)*np.eye(d)))
y_tilde = np.hstack((y,np.zeros(d)))
theta_opt = np.linalg.lstsq(X_tilde, y_tilde, rcond=None)[0]
residual = y - X@theta_opt
print(f'Optimal theta: {theta_opt}')
print(f'RMSE: {np.linalg.norm(residual)/np.sqrt(n)}')
vp = np.linspace(0, 1, 100)
X_vp = np.zeros((len(vp),d))
for i in range(d):
X_vp[:,i] = vp**i;
plt.figure(figsize=(6,6), dpi=100)
plt.plot(x, y, 'o', alpha=0.5, label='Raw data')
plt.plot(vp, np.dot(X_vp, theta_opt), label='Predictor')
plt.grid()
plt.axis('square')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.title("Polynomial predictor")
plt.legend()
plt.show()
Optimal theta: [-8.03641759e-02 1.35126051e+01 -7.50531024e+01 1.44630037e+02 -5.75202141e+01 -7.11956321e+01 -8.19583420e+00 3.36832888e+01 3.83447947e+01 2.15979440e+01 8.12066580e-01 -1.41952536e+01 -2.06544477e+01 -1.98506646e+01 -1.45267726e+01 -7.35336407e+00 -2.91664959e-01 5.50506086e+00 9.54310662e+00 1.17728153e+01 1.23946369e+01 1.17178075e+01 1.00726077e+01 7.76482650e+00 5.05845940e+00 2.17452189e+00 -7.02854927e-01 -3.41865938e+00 -5.84060488e+00 -7.85534625e+00 -9.36742526e+00 -1.03004597e+01 -1.05998416e+01 -1.02361836e+01 -9.20884707e+00 -7.54903597e+00 -5.32210261e+00 -2.62885922e+00 3.94184393e-01 3.57564076e+00 6.71103702e+00 9.56544890e+00 1.18768213e+01 1.33598110e+01 1.37099886e+01 1.26082492e+01 9.72529275e+00 4.72605832e+00 -2.72598877e+00 -1.29647955e+01 -2.63179476e+01] RMSE: 0.051647907396211355