#!/usr/bin/env python
# coding: utf-8

# Sascha Spors,
# Professorship Signal Theory and Digital Signal Processing,
# Institute of Communications Engineering (INT),
# Faculty of Computer Science and Electrical Engineering (IEF),
# University of Rostock,
# Germany
# 
# # Data Driven Audio Signal Processing - A Tutorial with Computational Examples
# 
# Winter Semester 2024/25 (Master Course #24512)
# 
# - lecture: https://github.com/spatialaudio/data-driven-audio-signal-processing-lecture
# - tutorial: https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise
# 
# Feel free to contact lecturer frank.schultz@uni-rostock.de

# # Exercise 5: Linear Regression Toy Example
# 
# ## Objectives
# 
# When no assumption on an underlying data generation process is being made, pure linear algebra is used to solve for model parameters. Hence, we should link
# - linear regression model (simple line fit)
# - left inverse of a tall / thin, full column (feature) matrix
# - (residual) least squares
# - projection matrices to the 4 subspaces
# 
# to the very same playground using the following simple toy example.

# In[ ]:


import matplotlib.pyplot as plt
import numpy as np
from scipy.linalg import svd, diagsvd, inv, pinv, norm
from numpy.linalg import matrix_rank

np.set_printoptions(precision=3,
                    floatmode='maxprec',
                    suppress=True)


# In[ ]:


X = np.array([[1, 1],
              [1, 2],
              [1, 3],
              [1, 4]])
print(X, X.shape, matrix_rank(X))
y_col = np.array([[1],
                  [3],
                  [5],
                  [7]])
print(y_col, y_col.shape)
[U, s, Vh] = svd(X)
V = Vh.T
y_left_null = (-U[:,2]+U[:,3])[:, None]  # [:, None] makes it a (4,1) array
print(y_left_null, y_left_null.shape)
y = y_col + y_left_null
print(y, y.shape)
M, N = X.shape
print(M, N)


# In[ ]:


y_col.T @ y_left_null  # column space is ortho to left null space


# In[ ]:


# magnitudes of vectors
np.sqrt(y_col.T @ y_col), np.sqrt(y_left_null.T @ y_left_null)


# In[ ]:


X.T @ X  # this is full rank -> invertible


# In[ ]:


inv(X.T @ X)


# In[ ]:


# left inverse for tall/thin, full column rank X
Xli = inv(X.T @ X) @ X.T
Xli


# In[ ]:


# left inverse via SVD option 1 -> invert singular values & reverse space mapping: U -> V
S = diagsvd(s, M, N)
Sli = inv(S.T @ S) @ S.T
Xli_svd_1 = V @ Sli @ U.T


# In[ ]:


# left inverse via SVD option 2 -> invert singular values & reverse space mapping: U -> V
# s / s^2 = 1 / s might be nicer seen here
Xli_svd_2 = V @ diagsvd(s / s**2, N, M) @ U.T

np.allclose(Xli_svd_2, Xli_svd_1),        np.allclose(Xli, Xli_svd_1),      np.allclose(Xli, pinv(X))


# In[ ]:


theta_hat = Xli @ y  # it is rarely called that way in this context, but: we actually train a model with this operation
theta_hat  # fitted / trained model parameters


# In[ ]:


Xli @ y_col  # we get same theta_hat if using only column space stuff of y 


# In[ ]:


Xli @ y_left_null  # this must yield zero, as X cannot bring left null to row space


# In[ ]:


y_hat = X @ theta_hat
y_hat  # == y_col


# In[ ]:


e = y - y_hat  # e == y_left_null
e, e.T @ e


# In[ ]:


# recap: y_hat = y_col, e = y_left_null
# y = y_col + y_lef_null = y_hat + e
# hence
y_hat.T @ e  # column space is ortho to left null space


# In[ ]:


# projection matrices:
P_col = X @ Xli
P_col, P_col @ y, y_col


# In[ ]:


# check P_col projection in terms of SVD
S @ Sli, np.allclose(U @ (S @ Sli) @ U.T, P_col)


# In[ ]:


P_left_null = np.eye(M) - P_col
P_left_null, P_left_null @ y, e


# In[ ]:


# check P_left_null projection in terms of SVD
np.eye(M) - S @ Sli, np.allclose(U @ (np.eye(M) - S @ Sli) @ U.T, P_left_null)


# In[ ]:


P_row = Xli @ X  # == always identity matrix for full column rank X
P_row, P_row @ theta_hat


# In[ ]:


# check P_row projection in terms of SVD
Sli @ S, np.allclose(V @ (Sli @ S) @ V.T, P_row)


# In[ ]:


P_null = np.eye(N) - P_row  # == always zero matrix for full column rank X
P_null  # null space is spanned only by zero vector


# In[ ]:


# check P_null projection in terms of SVD
np.allclose(V @ (np.eye(N) - Sli @ S) @ V.T, P_null)


# In[ ]:


plt.figure(figsize=(8,4))

# residuals
for m in range(M):
    plt.plot([X[m, 1], X[m, 1]],
             [y[m, 0], y_col[m, 0]], lw=3, label='error '+str(m+1))
# data
plt.plot(X[:,1], y, 'C4x',
         ms=10, mew=3,
         label='data')
# fitted line
plt.plot(X[:,1], theta_hat[0] * X[:,0] + theta_hat[1] * X[:,1], 'k', label='least squares fit (interpolation)')
x = np.linspace(0, 1, 10)
plt.plot(x, theta_hat[0] + theta_hat[1] * x, 'C7:', label='least squares fit (extrapolation)')
x = np.linspace(4, 5, 10)
plt.plot(x, theta_hat[0] + theta_hat[1] * x, 'C7:')

plt.xticks(np.arange(6))
plt.yticks(np.arange(11)-1)
plt.xlim(0, 5)
plt.ylim(-1, 9)
plt.xlabel('feature x1')
plt.ylabel('y')
plt.title(r'min the sum of squared errors solves for $\hat{\theta}=[-1,2]^T$ -> intercept: -1, slope: +2')
plt.legend()
plt.grid(True)


# ## Copyright
# 
# - the notebooks are provided as [Open Educational Resources](https://en.wikipedia.org/wiki/Open_educational_resources)
# - the text is licensed under [Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/)
# - the code of the IPython examples is licensed under the [MIT license](https://opensource.org/licenses/MIT)
# - feel free to use the notebooks for your own purposes
# - please attribute the work as follows: *Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock* ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year.