Sascha Spors, Professorship Signal Theory and Digital Signal Processing, Institute of Communications Engineering (INT), Faculty of Computer Science and Electrical Engineering (IEF), University of Rostock, Germany
Winter Semester 2023/24 (Master Course #24512)
Feel free to contact lecturer frank.schultz@uni-rostock.de
with a matrix $\textbf{X}$ - full column rank, but rather high condition number.
Using toy data, we calculate and plot the L-curve and find an optimum regularization parameter $\lambda_{opt}$, i.e. where L-curve has maximum curvature. For this $\lambda_{opt}$ we get a suitable trade-off between prediction error and magnitude of model parameter vector. L-curve is typically plotted log-log because of rather large range of numbers.
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import inv, matrix_rank, cond
from scipy.linalg import svd, diagsvd
rng = np.random.default_rng(1234)
mu = np.array([0, 0])
tmp = 1 - 1e-4
cov = [[1, tmp], [tmp, 1]]
M, N = 2**7, mu.shape[0]
X = rng.multivariate_normal(mu, cov, size=M, method='cholesky')
[U, s, Vh] = svd(X)
Uh = U.T.conj()
V = Vh.T.conj()
S = diagsvd(s, M, N)
I = np.eye(N)
n = rng.standard_normal(size=M)
y = (2 * X[:, 0] + X[:, 1] + n)[:, None]
matrix_rank(X), s.shape[0], cond(X), s[0] / s[-1]
lmb = np.logspace(-2.2, 1.9, 2**7)
lcurve = np.zeros([2, lmb.shape[0]])
for cnt, val in enumerate(lmb):
X_reg_left_inv = V @ inv(S.T @ S + val * I) @ S.T @ Uh
theta_hat = X_reg_left_inv @ y
tmp = y - X @ theta_hat
lcurve[0, cnt] = tmp.T.conj() @ tmp
lcurve[1, cnt] = theta_hat.T.conj() @ theta_hat
curvature = np.gradient(np.gradient(lcurve[1, :]))
lmb_opt_idx = np.argmin(np.abs(curvature))
plt.loglog(lcurve[0, :], lcurve[1, :], 'o:', ms=2)
plt.loglog(lcurve[0, lmb_opt_idx], lcurve[1, lmb_opt_idx], 'C3o')
plt.text(lcurve[0, 0], lcurve[1, 0], r'$\lambda\rightarrow 0$')
plt.text(lcurve[0, -1], lcurve[1, -1], r'$\lambda\rightarrow \infty$')
plt.text(lcurve[0, lmb_opt_idx], lcurve[1, lmb_opt_idx],
r'$\lambda_\mathrm{opt}$')
plt.xlabel(r'$||\bf{y} - \bf{X \hat{\theta}}||_2^2$')
plt.ylabel(r'$||\bf{\hat{\theta}}||_2^2$')
plt.title(r'L-Curve for $\hat{\bf{\theta}}(\lambda)$')
plt.xlim(100, 200)
plt.ylim(1, 100)
plt.grid(True, which='both', ls=':', lw=0.5)
# l-curve for tikzpicture in ddasp_exercise_slides.tex
print('coordinates {', sep='', end='')
for cnt, val in enumerate(lmb):
print('(', np.log10(lcurve[0, cnt]), ',', np.log10(
lcurve[1, cnt]), ')', sep='', end='')
print('};', sep='', end='')
# l-curve values of opt lambda for tikzpicture in ddasp_exercise_slides.tex
print('coordinates {', sep='', end='')
print('(', np.log10(lcurve[0, lmb_opt_idx]), ',', np.log10(
lcurve[1, lmb_opt_idx]), ')', sep='', end='')
print('};', sep='', end='')
# l-curve values for small lambda for tikzpicture in ddasp_exercise_slides.tex
print('coordinates {', sep='', end='')
print('(', np.log10(lcurve[0, 0]), ',', np.log10(
lcurve[1, 0]), ')', sep='', end='')
print('};', sep='', end='')
# l-curve values for large lambda for tikzpicture in ddasp_exercise_slides.tex
print('coordinates {', sep='', end='')
print('(', np.log10(lcurve[0, -1]), ',', np.log10(
lcurve[1, -1]), ')', sep='', end='')
print('};', sep='', end='')