Sascha Spors, Professorship Signal Theory and Digital Signal Processing, Institute of Communications Engineering (INT), Faculty of Computer Science and Electrical Engineering (IEF), University of Rostock, Germany
Winter Semester 2024/25 (Master Course #24512)
Feel free to contact lecturer frank.schultz@uni-rostock.de
When no assumption on an underlying data generation process is being made, pure linear algebra is used to solve for model parameters. Hence, we should link
to the very same playground using the following simple toy example.
import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import svd, diagsvd, inv, pinv, norm
from numpy.linalg import matrix_rank
np.set_printoptions(precision=3,
floatmode='maxprec',
suppress=True)
X = np.array([[1, 1],
[1, 2],
[1, 3],
[1, 4]])
print(X, X.shape, matrix_rank(X))
y_col = np.array([[1],
[3],
[5],
[7]])
print(y_col, y_col.shape)
[U, s, Vh] = svd(X)
V = Vh.T
y_left_null = (-U[:,2]+U[:,3])[:, None] # [:, None] makes it a (4,1) array
print(y_left_null, y_left_null.shape)
y = y_col + y_left_null
print(y, y.shape)
M, N = X.shape
print(M, N)
y_col.T @ y_left_null # column space is ortho to left null space
# magnitudes of vectors
np.sqrt(y_col.T @ y_col), np.sqrt(y_left_null.T @ y_left_null)
X.T @ X # this is full rank -> invertible
inv(X.T @ X)
# left inverse for tall/thin, full column rank X
Xli = inv(X.T @ X) @ X.T
Xli
# left inverse via SVD option 1 -> invert singular values & reverse space mapping: U -> V
S = diagsvd(s, M, N)
Sli = inv(S.T @ S) @ S.T
Xli_svd_1 = V @ Sli @ U.T
# left inverse via SVD option 2 -> invert singular values & reverse space mapping: U -> V
# s / s^2 = 1 / s might be nicer seen here
Xli_svd_2 = V @ diagsvd(s / s**2, N, M) @ U.T
np.allclose(Xli_svd_2, Xli_svd_1), np.allclose(Xli, Xli_svd_1), np.allclose(Xli, pinv(X))
theta_hat = Xli @ y # it is rarely called that way in this context, but: we actually train a model with this operation
theta_hat # fitted / trained model parameters
Xli @ y_col # we get same theta_hat if using only column space stuff of y
Xli @ y_left_null # this must yield zero, as X cannot bring left null to row space
y_hat = X @ theta_hat
y_hat # == y_col
e = y - y_hat # e == y_left_null
e, e.T @ e
# recap: y_hat = y_col, e = y_left_null
# y = y_col + y_lef_null = y_hat + e
# hence
y_hat.T @ e # column space is ortho to left null space
# projection matrices:
P_col = X @ Xli
P_col, P_col @ y, y_col
# check P_col projection in terms of SVD
S @ Sli, np.allclose(U @ (S @ Sli) @ U.T, P_col)
P_left_null = np.eye(M) - P_col
P_left_null, P_left_null @ y, e
# check P_left_null projection in terms of SVD
np.eye(M) - S @ Sli, np.allclose(U @ (np.eye(M) - S @ Sli) @ U.T, P_left_null)
P_row = Xli @ X # == always identity matrix for full column rank X
P_row, P_row @ theta_hat
# check P_row projection in terms of SVD
Sli @ S, np.allclose(V @ (Sli @ S) @ V.T, P_row)
P_null = np.eye(N) - P_row # == always zero matrix for full column rank X
P_null # null space is spanned only by zero vector
# check P_null projection in terms of SVD
np.allclose(V @ (np.eye(N) - Sli @ S) @ V.T, P_null)
plt.figure(figsize=(8,4))
# residuals
for m in range(M):
plt.plot([X[m, 1], X[m, 1]],
[y[m, 0], y_col[m, 0]], lw=3, label='error '+str(m+1))
# data
plt.plot(X[:,1], y, 'C4x',
ms=10, mew=3,
label='data')
# fitted line
plt.plot(X[:,1], theta_hat[0] * X[:,0] + theta_hat[1] * X[:,1], 'k', label='least squares fit (interpolation)')
x = np.linspace(0, 1, 10)
plt.plot(x, theta_hat[0] + theta_hat[1] * x, 'C7:', label='least squares fit (extrapolation)')
x = np.linspace(4, 5, 10)
plt.plot(x, theta_hat[0] + theta_hat[1] * x, 'C7:')
plt.xticks(np.arange(6))
plt.yticks(np.arange(11)-1)
plt.xlim(0, 5)
plt.ylim(-1, 9)
plt.xlabel('feature x1')
plt.ylabel('y')
plt.title(r'min the sum of squared errors solves for $\hat{\theta}=[-1,2]^T$ -> intercept: -1, slope: +2')
plt.legend()
plt.grid(True)