Notebook

Sascha Spors, Professorship Signal Theory and Digital Signal Processing, Institute of Communications Engineering (INT), Faculty of Computer Science and Electrical Engineering (IEF), University of Rostock, Germany

Data Driven Audio Signal Processing - A Tutorial with Computational Examples¶

Master Course #24512

Feel free to contact lecturer frank.schultz@uni-rostock.de

Principal Component Analysis (PCA) of Simple Audio Features¶

real world data from exercise12_MusicGenreClassification.ipynb
we take an PCA of an audio feature matrix
to make it still easy to visualize, we choose three original features, which allow a comparable simple interpretation of the PC scores and PC loadings

In [ ]:

import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import svd, diagsvd
from statsmodels.multivariate.pca import PCA

In [ ]:

matplotlib_widget_flag = True

In [ ]:

if matplotlib_widget_flag:
    %matplotlib widget

In [ ]:

audiofolder = "./audio_ex12/"
with np.load(audiofolder + "/_raw_data_large.npz") as data:
    X = data["Xdata"]
    Y = data["Ydata"]
# 0...true_peak_lin
# 1...true_peak_lin2
# 2...true_peak_db
# 3...rms_lin2
# 4...rms_lin
# 5...rms_db
# 6...lufs_lin
# 7...lufs_lin2
# 8...lufs_db
# 9...crest_lin
# 10...crest_db
# 11...low_high_ratio
X = np.squeeze([X[:,0], X[:,5], X[:,8]]).T
# feel free to play around with other feature matrices
# a PCA on the whole X matrix yields very high variance explanation using
# just PC 1 score, so the chosen, simple features 0...11 seem to be redundant
# somehow 

# the PCA assumes mean-free columns, so we explicitly design X this way
X = X - np.mean(X, axis=0)
# we also normalize by standard deviation,
# then each column of X has unit variance and total variance of X is 3: 
if True:
    X = X / np.std(X, axis=0, ddof=1)
    print(np.var(X, axis=0, ddof=1), np.sum(np.var(X, axis=0, ddof=1)) )

N, F = X.shape[0], X.shape[1]
N, F

In [ ]:

[U, s, Vh] = svd(X, full_matrices=False)
S = diagsvd(s, F, F)
V = Vh.conj().T
pcs = U @ S  # PC scores
pcl = V  # PC loadings

In [ ]:

var_pcs = np.var(pcs, axis=0, ddof=1)
std_pcs = np.std(pcs, axis=0, ddof=1)
print(var_pcs / np.sum(var_pcs) * 100)  # PC1 explains 95% variance, so this example is pretty straightforward
print(np.cumsum(var_pcs) / np.sum(var_pcs) * 100)

Plot Directions of the PCs and Original Data¶

In [ ]:

fig = plt.figure()
fig.set_size_inches(9,3)

ax = fig.add_subplot(131)
ax.plot(X[:, 0], X[:, 1], ".", color="gray", ms=1)
ax.axis("square")
ax.set_xlabel("original feature 1: true_peak_lin")
ax.set_ylabel("original feature 2: rms_db")
ax.grid(True)

ax = fig.add_subplot(132)
ax.plot(X[:, 0], X[:, 2], ".", color="gray", ms=1)
ax.axis("square")
ax.set_xlabel("original feature 1: true_peak_lin")
ax.set_ylabel("original feature 3: lufs_db")
ax.grid(True)

ax = fig.add_subplot(133)
ax.plot(X[:, 1], X[:, 2], ".", color="gray", ms=1)
ax.axis("square")
ax.set_xlabel("original feature 2: rms_db")
ax.set_ylabel("original feature 3: lufs_db")
ax.grid(True)

plt.tight_layout()

In [ ]:

fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")
ax.plot(X[:, 0], X[:, 1], X[:, 2], ".", color="gray", ms=1)
ax.axis("square")
ax.set_xlabel("original feature 1: true_peak_lin")
ax.set_ylabel("original feature 2: rms_db")
ax.set_zlabel("original feature 3: lufs_db")
ax.set_title("data cloud in original coordinate system")
ax.grid(True)
ax.azim = -44
ax.elev = 28

In [ ]:

fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

ax.plot(X[:, 0], X[:, 1], X[:, 2], ".", color="gray", ms=1)

# draw directions of PC axis
# length follows 3*sigma rule for normal distribution
# note that V not necessarily spans a right-hand system
col = ['C3', 'C2', 'C0']
for i in range(F):
    ax.plot(
        [0, 3*std_pcs[i] * V[0, i]],
        [0, 3*std_pcs[i] * V[1, i]],
        [0, 3*std_pcs[i] * V[2, i]],
        col[i], lw=3,
        label=f"{'direction of PC '}{i+1}" f"{' score, col '}{i+1}" f"{' in V, right sing vec '}{i+1}")

ax.axis("square")
ax.set_xlabel("original feature 1: true_peak_lin")
ax.set_ylabel("original feature 2: rms_db")
ax.set_zlabel("original feature 3: lufs_db")
ax.set_title("data cloud in original coordinate system")
ax.legend()
ax.grid(True)
ax.azim = -44
ax.elev = 28

Plot Data in PC Coordinate System¶

In [ ]:

[U_pcs, s_pcs, Vh_pcs] = svd(pcs, full_matrices=False)
S_pcs = diagsvd(s_pcs, F, F)
V_pcs = Vh_pcs.conj().T

# make sure that correct U/V pair for pcs holds by
# introducing corresponding reflections
for i in range(F):
    if not np.allclose((U_pcs @ S_pcs)[:, i], pcs[:, i]):
        U_pcs[:,i] *= -1
        V_pcs[:,i] *= -1
# then V_pcs indicates the correct directions in the PC coordinate system
print(np.allclose(U_pcs @ S_pcs, pcs))

In [ ]:

fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

ax.plot(pcs[:, 0], pcs[:, 1], pcs[:, 2], "x", color="gray", ms=1)

# draw directions of PC axis
# length follows 3*sigma rule for normal distribution
col = ['C3', 'C2', 'C0']
for i in range(F):
    ax.plot(
        [0, 3*std_pcs[i] * V_pcs[0, i]],
        [0, 3*std_pcs[i] * V_pcs[1, i]],
        [0, 3*std_pcs[i] * V_pcs[2, i]],
        col[i], lw=3,
        label=f"{'direction of PC '}{i+1}" f"{', length = 3 std(PC '}{i+1}" f"{')'}")

ax.axis("square")
ax.set_xlabel("PC 1")
ax.set_ylabel("PC 2")
ax.set_zlabel("PC 3")
ax.set_title("data cloud in PC coordinate system")
ax.legend()
ax.grid(True)
ax.azim = -37
ax.elev = 28

Truncated SVD / Low-Rank Approximation¶

In [ ]:

r_des = 2  # 1 or 2 

# SVD mindset
# sum of outer products, i.e. sum of rank-1 matrices
X_rank_red = np.zeros((N, F))
for i in range(r_des):
    X_rank_red += s[i] * np.outer(U[:, i], V[:, i])

# PCA mindset:
# we might also use the PC signals and set intended PC loadings to zero
X_rank_red2 = np.zeros((N, F))
pcl_rank_red = np.copy(pcl)
pcl_rank_red[:, r_des:] = 0
X_rank_red2 = pcs @ pcl_rank_red.conj().T
print(np.allclose(X_rank_red, X_rank_red2))
pcl_rank_red

In [ ]:

fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

ax.plot(X_rank_red2[:, 0],
        X_rank_red2[:, 1],
        X_rank_red2[:, 2], "x", color="gray", ms=1)

# draw directions of PC axis
# length follows 3*sigma rule for normal distribution
# note that V not necessarily spans a right-hand system
col = ['C3', 'C2', 'C0']
for i in range(F):
    ax.plot(
        [0, 3*std_pcs[i] * V[0, i]],
        [0, 3*std_pcs[i] * V[1, i]],
        [0, 3*std_pcs[i] * V[2, i]],
        col[i], lw=1,
        label=f"{'direction of PC '}{i+1}" f"{' score, col '}{i+1}" f"{' in V, right sing vec '}{i+1}")

ax.axis("square")
ax.set_xlim(-6, 6)
ax.set_ylim(-6, 6)
ax.set_zlim(-6, 6)
ax.set_xlabel("new feature 1 after rank reduction")
ax.set_ylabel("new feature 2")
ax.set_zlabel("new feature 3")
ax.set_title("rank-{0:d} approximation of data".format(r_des))
ax.legend()
ax.grid(True)
ax.azim = -44
ax.elev = 28

Dimensionality Reduction after PCA¶

In [ ]:

dim_des = 2 # 1 or 2
# PCA mindset
X_dim_red = np.zeros((N, dim_des))
X_dim_red = pcs[:, :dim_des]
print('original matrix shape     ', X.shape)
print('matrix shape after dim red', X_dim_red.shape)

X_dim_red_plot = np.zeros((N, F))
X_dim_red_plot[:, :dim_des] = pcs[:, :dim_des]

# check with SVD mindset
print(np.allclose((U @ S)[:, :dim_des], X_dim_red))
print(np.allclose(X @ V[:, :dim_des], X_dim_red))
V[:, :dim_des]

In [ ]:

# the dimensionality reduction actually yields a matrix with smaller
# dimension, cf. shape of X_dim_red
# for convenience we plot data here in 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

ax.plot(
    X_dim_red_plot[:, 0],
    X_dim_red_plot[:, 1],
    X_dim_red_plot[:, 2],
    "x", color="gray", ms=1)

# draw directions of PC axis
# length follows 3*sigma rule for normal distribution
col = ['C3', 'C2', 'C0']
for i in range(F):
    ax.plot(
        [0, 3*std_pcs[i] * V_pcs[0, i]],
        [0, 3*std_pcs[i] * V_pcs[1, i]],
        [0, 3*std_pcs[i] * V_pcs[2, i]],
        col[i], lw=1,
        label=f"{'direction of PC '}{i+1}" f"{', length = 3 std(PC '}{i+1}" f"{')'}")

ax.axis("square")
#ax.set_xlim(-6, 6)
#ax.set_ylim(-6, 6)
#ax.set_zlim(-6, 6)
ax.set_xlabel("PC 1")
ax.set_ylabel("PC 2")
ax.set_zlabel("PC 3")
plt.title("PCA data dimensionality reduction to {0:d}D".format(dim_des))
ax.legend()
ax.grid(True)
ax.azim = -37
ax.elev = 28

print("reduced to dimension", X_dim_red.shape)

Check / Interpret PCA Loadings¶

we interprete on original $\mathbf{V}$ = pcl matrix, not to V_pcs
however, for simple linear combination mindset, the column vectors of $\mathbf{V}^T$ should be interpreted

In [ ]:

# 0...true_peak_lin
# 1...true_peak_lin2
# 2...true_peak_db
# 3...rms_lin2
# 4...rms_lin
# 5...rms_db
# 6...lufs_lin
# 7...lufs_lin2
# 8...lufs_db
# 9...crest_lin
# 10...crest_db
# 11...low_high_ratio
# X = np.squeeze([X[:,0], X[:,5], X[:,8]]).T

In [ ]:

tmp = pcl.conj().T
tmp
# PC1 feature could be something like 'inverted technical loudness', the hotter the music the higher all original features
# PC2 feature could explain relation between dB RMS/dB Dynamics vs. linear Peak or just dB vs. linear
# PC3 feature has almost no impact onto the data, interpretation on the meaning is not straightforward

In [ ]:

# explain the true_peak_lin signal:
tmp[:,0][:, None], np.allclose(pcs @ tmp[:,0], X[:,0])

In [ ]:

# explain the rms_db signal:
tmp[:,1][:, None], np.allclose(pcs @ tmp[:,1], X[:,1])

In [ ]:

# explain the lufs_db signal:
tmp[:,2][:, None], np.allclose(pcs @ tmp[:,2], X[:,2])

Copyright¶

the notebooks are provided as Open Educational Resources
feel free to use the notebooks for your own purposes
the text is licensed under Creative Commons Attribution 4.0
the code of the IPython examples is licensed under the MIT license
please attribute the work as follows: Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year.