import matplotlib.pyplot as plt
import scipy as sc
import numpy as np
# Load signal used to test stuff
from scipy.io import wavfile
fs, data = wavfile.read("/home/jfsantos/Documents/speech_training.wav")
data = data/2.0**15
%matplotlib inline
plt.plot(data[0:fs*5])
[<matplotlib.lines.Line2D at 0x31c9490>]
from sparse_coding.sparse_coding_gammatone import gammatone_matrix, erb_space
from scipy.signal import hamming
resolution = 160
step = 8
b = 1.019
n_channels = 50
win = hamming(resolution)
# Compute a Gammatone dictionary
D_multi = np.r_[tuple(gammatone_matrix(b, fc, resolution, step)
for fc in erb_space(150, fs/2, n_channels))]
import itertools
# Example of an entry from the dictionary
fig, axes = plt.subplots(6,6)
fig.set_size_inches(16,12)
for i, j in itertools.product(range(1,7), range(6)):
axes[i-1][j].plot(D_multi[6*(i-1)+j])
from scikits.talkbox import segment_axis
# Get sample speech segment to reconstruct
test_data = win*segment_axis(data[fs*200:fs*210], resolution, overlap=int(.5*resolution))
print test_data.shape
# Reconstruct it frame-by-frame
from sklearn.decomposition import SparseCoder
coder = SparseCoder(dictionary = D_multi, transform_n_nonzero_coefs=None, transform_alpha=10, transform_algorithm="omp")
result = coder.transform(test_data)
(1999, 160)
orig = data[fs*200:fs*210]
out = np.zeros(orig.shape)
for n in range(result.shape[0]):
idx0 = int(n*resolution*.5)
out[idx0:idx0+resolution] += np.sum(D_multi.T*result[n],axis=1)
np.mean(sum(result!=0))
33.667368421052629
fig, axes = plt.subplots(3)
fig.set_size_inches(10,8)
axes[0].plot(orig)
axes[1].plot(out, 'g')
axes[2].plot((out-orig)**2, 'r')
[<matplotlib.lines.Line2D at 0x82291d0>]
wavfile.write('reconst_gammatone.wav', 16000, np.asarray(out, dtype=np.float32))
#wavfile.write('orig.wav', 16000, np.asarray(orig, dtype=np.float32))
# Test different sparsity values and correspondent outputs
K = [1, 2, 5, 10, 20, 50]
out_k = np.zeros((len(orig),len(K)))
for k in range(len(K)):
coder = SparseCoder(dictionary = D_multi, transform_n_nonzero_coefs=K[k], transform_alpha=None, transform_algorithm="omp")
result = coder.transform(test_data)
for n in range(result.shape[0]):
idx0 = int(n*resolution*.5)
out_k[idx0:idx0+resolution,k] += np.sum(D_multi.T*result[n],axis=1)
fig, axes = plt.subplots(len(K)+1)
fig.set_size_inches(10,15)
axes[0].plot(orig)
for k in range(len(K)):
axes[k+1].plot(out_k[:,k],'g')
plt.specgram(orig)
for k in range(len(K)):
plt.figure()
plt.specgram(out_k[:,k])
wavfile.write('reconst_gammatone_k1.wav', 16000, np.asarray(out_k[:,0], dtype=np.float32))