Notebook

Get Scripts¶

In [0]:

!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/getData.py
!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/constants.py
!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/trainingUtils.py
!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/models.py
!wget -q https://raw.githubusercontent.com/Mmiglio/SpeechRecognition/Attention-Mechanism-and-Memory-Networks/src/analysisFunctions.py

Download speech data¶

In [2]:

from getData import downloadSpeechData, getDataDict

# Download data
downloadSpeechData(data_path='speechData/')

# Get dict with files and labels
dataDict = getDataDict(data_path='speechData/')

Downloading http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz into /content/speechData/train.tar.gz
Extracting /content/speechData/train.tar.gz into /content/speechData/train
Downloading http://download.tensorflow.org/data/speech_commands_test_set_v0.01.tar.gz into /content/speechData/test.tar.gz
Extracting /content/speechData/test.tar.gz into /content/speechData/test

In [3]:

from getData import getDataframe

trainDF = getDataframe(dataDict['train'])
valDF = getDataframe(dataDict['val'])
testDF = getDataframe(dataDict['test']) 

print("Train files: {}".format(trainDF.shape[0]))
print("Validation files: {}".format(valDF.shape[0]))
print("Test files: {}".format(testDF.shape[0]))

Train files: 51088
Validation files: 6798
Test files: 6835

In [4]:

trainDF.head()

Out[4]:

	files	labels	category
0	speechData/train/dog/33f60c62_nohash_1.wav	8	dog
1	speechData/train/bird/30065f33_nohash_0.wav	7	bird
2	speechData/train/down/f5c3de1b_nohash_0.wav	21	down
3	speechData/train/go/2bd2cad5_nohash_0.wav	27	go
4	speechData/train/seven/aac5b7c1_nohash_1.wav	11	seven

Tensorflow 2.0¶

In [5]:

!pip install -q tensorflow-gpu==2.0.0-beta1

     |████████████████████████████████| 348.9MB 67kB/s 
     |████████████████████████████████| 3.1MB 49.2MB/s 
     |████████████████████████████████| 501kB 58.7MB/s

In [6]:

import tensorflow as tf

print("GPU Available: ", tf.test.is_gpu_available())
print("Version: ", tf.__version__)

GPU Available:  True
Version:  2.0.0-beta1

Input pipeline¶

In [7]:

!pip install -q python_speech_features

  Building wheel for python-speech-features (setup.py) ... done

In [0]:

from trainingUtils import getDataset

BATCH_SIZE = 32
# Reduced trainign dataset
NUM_EXAMPLES = 30000

train_data, train_steps = getDataset(
    df=trainDF[:NUM_EXAMPLES],
    batch_size=BATCH_SIZE,
    cache_file='train_cache',
    shuffle=True
) 

val_data, val_steps = getDataset(
    df=valDF,
    batch_size=BATCH_SIZE,
    cache_file='val_cache',
    shuffle=False
)

test_data, test_steps = getDataset(
    df=testDF,
    batch_size=BATCH_SIZE,
    cache_file='test_cache',
    shuffle=False
) 

Models¶

In [9]:

from models import rnn_att_model

model_lstm = rnn_att_model()
model_gru = rnn_att_model(rnn = 'GRU')

model_lstm.compile(loss='sparse_categorical_crossentropy', 
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["sparse_categorical_accuracy"])
model_lstm.summary()

model_gru.compile(loss='sparse_categorical_crossentropy', 
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["sparse_categorical_accuracy"])
model_gru.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            [(None, 99, 40)]     0                                            
__________________________________________________________________________________________________
reshape (Reshape)               (None, 99, 40, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 99, 40, 1)    4           reshape[0][0]                    
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 99, 40, 30)   300         batch_normalization[0][0]        
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 99, 40, 30)   120         conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 99, 40, 1)    271         batch_normalization_1[0][0]      
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 99, 40, 1)    4           conv2d_1[0][0]                   
__________________________________________________________________________________________________
squeeze_dim (Lambda)            (None, 99, 40)       0           batch_normalization_2[0][0]      
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 99, 120)      48480       squeeze_dim[0][0]                
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 99, 120)      86880       bidirectional[0][0]              
__________________________________________________________________________________________________
lambda (Lambda)                 [(None, 99, 60), (No 0           bidirectional_1[0][0]            
__________________________________________________________________________________________________
Attention (Attention)           (None, 99, 60)       0           lambda[0][0]                     
                                                                 lambda[0][1]                     
__________________________________________________________________________________________________
flatten (Flatten)               (None, 5940)         0           Attention[0][0]                  
__________________________________________________________________________________________________
dense (Dense)                   (None, 512)          3041792     flatten[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 30)           15390       dense[0][0]                      
==================================================================================================
Total params: 3,193,241
Trainable params: 3,193,177
Non-trainable params: 64
__________________________________________________________________________________________________
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_2 (InputLayer)            [(None, 99, 40)]     0                                            
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 99, 40, 1)    0           input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 99, 40, 1)    4           reshape_1[0][0]                  
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 99, 40, 30)   300         batch_normalization_3[0][0]      
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 99, 40, 30)   120         conv2d_2[0][0]                   
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 99, 40, 1)    271         batch_normalization_4[0][0]      
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 99, 40, 1)    4           conv2d_3[0][0]                   
__________________________________________________________________________________________________
squeeze_dim (Lambda)            (None, 99, 40)       0           batch_normalization_5[0][0]      
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 99, 120)      36720       squeeze_dim[0][0]                
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 99, 120)      65520       bidirectional_2[0][0]            
__________________________________________________________________________________________________
lambda_1 (Lambda)               [(None, 99, 60), (No 0           bidirectional_3[0][0]            
__________________________________________________________________________________________________
Attention (Attention)           (None, 99, 60)       0           lambda_1[0][0]                   
                                                                 lambda_1[0][1]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 5940)         0           Attention[0][0]                  
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 512)          3041792     flatten_1[0][0]                  
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 30)           15390       dense_2[0][0]                    
==================================================================================================
Total params: 3,160,121
Trainable params: 3,160,057
Non-trainable params: 64
__________________________________________________________________________________________________

In [10]:

EPOCHS = 25

# Stop if the validation accuracy doesn't imporove for 5 epochs
earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Reduce LR on Plateau
reduceLR = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1)

history_lstm = model_lstm.fit(train_data.repeat(), 
               steps_per_epoch=train_steps,
               validation_data=val_data.repeat(),
               validation_steps=val_steps,
               epochs=EPOCHS,
               callbacks=[earlyStopping, reduceLR])

history_gru = model_gru.fit(train_data.repeat(), 
               steps_per_epoch=train_steps,
               validation_data=val_data.repeat(),
               validation_steps=val_steps,
               epochs=EPOCHS,
               callbacks=[earlyStopping, reduceLR])

WARNING: Logging before flag parsing goes to stderr.
W0712 21:45:40.357242 139958465672960 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0712 21:45:40.358233 139959572494080 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.string
W0712 21:45:40.359967 139959572494080 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.int32
W0712 21:45:40.360871 139959572494080 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.int32
W0712 21:45:40.359025 139958465672960 backprop.py:842] The dtype of the watched tensor must be floating (e.g. tf.float32), got tf.int32

Epoch 1/25

W0712 21:47:49.606497 139961739814784 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

937/937 [==============================] - 202s 216ms/step - loss: 1.3229 - sparse_categorical_accuracy: 0.6127 - val_loss: 0.6924 - val_sparse_categorical_accuracy: 0.7988
Epoch 2/25
937/937 [==============================] - 35s 37ms/step - loss: 0.4067 - sparse_categorical_accuracy: 0.8774 - val_loss: 0.3573 - val_sparse_categorical_accuracy: 0.8962
Epoch 3/25
937/937 [==============================] - 33s 35ms/step - loss: 0.2919 - sparse_categorical_accuracy: 0.9128 - val_loss: 0.3980 - val_sparse_categorical_accuracy: 0.8850
Epoch 4/25
937/937 [==============================] - 33s 35ms/step - loss: 0.2420 - sparse_categorical_accuracy: 0.9264 - val_loss: 0.3517 - val_sparse_categorical_accuracy: 0.9008
Epoch 5/25
937/937 [==============================] - 33s 35ms/step - loss: 0.2094 - sparse_categorical_accuracy: 0.9372 - val_loss: 0.3107 - val_sparse_categorical_accuracy: 0.9204
Epoch 6/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1851 - sparse_categorical_accuracy: 0.9439 - val_loss: 0.4418 - val_sparse_categorical_accuracy: 0.8861
Epoch 7/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1745 - sparse_categorical_accuracy: 0.9480 - val_loss: 0.2865 - val_sparse_categorical_accuracy: 0.9248
Epoch 8/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1601 - sparse_categorical_accuracy: 0.9510 - val_loss: 0.3374 - val_sparse_categorical_accuracy: 0.9161
Epoch 9/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1451 - sparse_categorical_accuracy: 0.9566 - val_loss: 0.3629 - val_sparse_categorical_accuracy: 0.9192
Epoch 10/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1379 - sparse_categorical_accuracy: 0.9595 - val_loss: 0.2808 - val_sparse_categorical_accuracy: 0.9316
Epoch 11/25
937/937 [==============================] - 34s 36ms/step - loss: 0.1279 - sparse_categorical_accuracy: 0.9610 - val_loss: 0.4144 - val_sparse_categorical_accuracy: 0.9085
Epoch 12/25
937/937 [==============================] - 34s 36ms/step - loss: 0.1192 - sparse_categorical_accuracy: 0.9640 - val_loss: 0.3341 - val_sparse_categorical_accuracy: 0.9285
Epoch 13/25
935/937 [============================>.] - ETA: 0s - loss: 0.1191 - sparse_categorical_accuracy: 0.9640
Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
937/937 [==============================] - 34s 36ms/step - loss: 0.1192 - sparse_categorical_accuracy: 0.9640 - val_loss: 0.3319 - val_sparse_categorical_accuracy: 0.9310
Epoch 14/25
937/937 [==============================] - 33s 36ms/step - loss: 0.0689 - sparse_categorical_accuracy: 0.9790 - val_loss: 0.2813 - val_sparse_categorical_accuracy: 0.9421
Epoch 15/25
937/937 [==============================] - 34s 36ms/step - loss: 0.0550 - sparse_categorical_accuracy: 0.9832 - val_loss: 0.2903 - val_sparse_categorical_accuracy: 0.9431
Epoch 00015: early stopping
Epoch 1/25
937/937 [==============================] - 40s 42ms/step - loss: 1.2130 - sparse_categorical_accuracy: 0.6491 - val_loss: 0.7879 - val_sparse_categorical_accuracy: 0.8065
Epoch 2/25
937/937 [==============================] - 34s 37ms/step - loss: 0.4272 - sparse_categorical_accuracy: 0.8720 - val_loss: 0.5981 - val_sparse_categorical_accuracy: 0.8361
Epoch 3/25
937/937 [==============================] - 33s 36ms/step - loss: 0.3232 - sparse_categorical_accuracy: 0.9043 - val_loss: 0.4231 - val_sparse_categorical_accuracy: 0.8889
Epoch 4/25
937/937 [==============================] - 33s 35ms/step - loss: 0.2641 - sparse_categorical_accuracy: 0.9226 - val_loss: 0.3812 - val_sparse_categorical_accuracy: 0.8968
Epoch 5/25
937/937 [==============================] - 33s 36ms/step - loss: 0.2397 - sparse_categorical_accuracy: 0.9306 - val_loss: 0.3607 - val_sparse_categorical_accuracy: 0.9107
Epoch 6/25
937/937 [==============================] - 33s 35ms/step - loss: 0.2184 - sparse_categorical_accuracy: 0.9372 - val_loss: 0.3218 - val_sparse_categorical_accuracy: 0.9170
Epoch 7/25
937/937 [==============================] - 33s 36ms/step - loss: 0.1946 - sparse_categorical_accuracy: 0.9432 - val_loss: 0.3593 - val_sparse_categorical_accuracy: 0.9067
Epoch 8/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1791 - sparse_categorical_accuracy: 0.9485 - val_loss: 0.3151 - val_sparse_categorical_accuracy: 0.9276
Epoch 9/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1778 - sparse_categorical_accuracy: 0.9501 - val_loss: 0.3913 - val_sparse_categorical_accuracy: 0.9117
Epoch 10/25
937/937 [==============================] - 33s 35ms/step - loss: 0.1722 - sparse_categorical_accuracy: 0.9507 - val_loss: 0.3450 - val_sparse_categorical_accuracy: 0.9250
Epoch 11/25
935/937 [============================>.] - ETA: 0s - loss: 0.1518 - sparse_categorical_accuracy: 0.9558
Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
937/937 [==============================] - 33s 35ms/step - loss: 0.1518 - sparse_categorical_accuracy: 0.9558 - val_loss: 0.3421 - val_sparse_categorical_accuracy: 0.9198
Epoch 12/25
937/937 [==============================] - 33s 35ms/step - loss: 0.0823 - sparse_categorical_accuracy: 0.9758 - val_loss: 0.2716 - val_sparse_categorical_accuracy: 0.9394
Epoch 13/25
937/937 [==============================] - 33s 35ms/step - loss: 0.0638 - sparse_categorical_accuracy: 0.9815 - val_loss: 0.2723 - val_sparse_categorical_accuracy: 0.9402
Epoch 14/25
937/937 [==============================] - 34s 36ms/step - loss: 0.0565 - sparse_categorical_accuracy: 0.9829 - val_loss: 0.2747 - val_sparse_categorical_accuracy: 0.9412
Epoch 15/25
935/937 [============================>.] - ETA: 0s - loss: 0.0530 - sparse_categorical_accuracy: 0.9837
Epoch 00015: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
937/937 [==============================] - 33s 35ms/step - loss: 0.0529 - sparse_categorical_accuracy: 0.9838 - val_loss: 0.2766 - val_sparse_categorical_accuracy: 0.9415
Epoch 16/25
937/937 [==============================] - 33s 35ms/step - loss: 0.0470 - sparse_categorical_accuracy: 0.9869 - val_loss: 0.2801 - val_sparse_categorical_accuracy: 0.9418
Epoch 17/25
937/937 [==============================] - 33s 35ms/step - loss: 0.0465 - sparse_categorical_accuracy: 0.9869 - val_loss: 0.2817 - val_sparse_categorical_accuracy: 0.9416
Epoch 00017: early stopping

Save model and training history to drive¶

In [11]:

from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive

In [12]:

!mkdir /content/gdrive/My\ Drive/Colab\ Notebooks/rnnModel

mkdir: cannot create directory ‘/content/gdrive/My Drive/Colab Notebooks/rnnModel’: File exists

In [13]:

GDRIVE_PATH = "/content/gdrive/My Drive/Colab Notebooks/rnnModel/"

## Save model 
model_lstm_json = model_lstm.to_json()
with open(GDRIVE_PATH + "model_lstm.json", "w") as file:
    file.write(model_lstm_json)
# serialize weights to HDF5
model_lstm.save_weights(GDRIVE_PATH + "model_lstm_weights.h5")
print("Saved lstm model to drive")

model_gru_json = model_gru.to_json()
with open(GDRIVE_PATH + "model_gru.json", "w") as file:
    file.write(model_gru_json)
# serialize weights to HDF5
model_gru.save_weights(GDRIVE_PATH + "model_gru_weights.h5")
print("Saved gru model to drive")

Saved lstm model to drive
Saved gru model to drive

In [14]:

import pickle
## Save history data
with open(GDRIVE_PATH + "lstm_train_results.pickle", "wb") as handle:
    pickle.dump(history_lstm.history, handle, protocol=pickle.HIGHEST_PROTOCOL)    
print("Saved lstm training history to drive")

with open(GDRIVE_PATH + "gru_train_results.pickle", "wb") as handle:
    pickle.dump(history_gru.history, handle, protocol=pickle.HIGHEST_PROTOCOL)    
print("Saved gru training history to drive")

Saved lstm training history to drive
Saved gru training history to drive

In [0]:

## test load
with open(GDRIVE_PATH + "lstm_train_results.pickle", 'rb') as handle:
    lstm_hist = pickle.load(handle)
    
with open(GDRIVE_PATH + "gru_train_results.pickle", 'rb') as handle:
    gru_hist = pickle.load(handle)

In [16]:

import matplotlib.pyplot as plt
import seaborn as sns 

sns.set()

lstm_acc = lstm_hist['sparse_categorical_accuracy']
lstm_val_acc = lstm_hist['val_sparse_categorical_accuracy']

lstm_loss = lstm_hist['loss']
lstm_val_loss = lstm_hist['val_loss']

gru_acc = gru_hist['sparse_categorical_accuracy']
gru_val_acc = gru_hist['val_sparse_categorical_accuracy']

gru_loss = gru_hist['loss']
gru_val_loss = gru_hist['val_loss']

fig, (ax1,ax2) = plt.subplots(2,2, figsize=(16,8))

ax1[0].plot(lstm_loss, label='lstm train')
ax1[0].plot(lstm_val_loss, label='lstm validation')
ax1[0].set_title('Model loss')
ax1[0].set_xlabel('Epoch')
ax1[0].set_ylabel('Loss')
ax1[0].legend()

ax1[1].plot(gru_loss, label='gru train')
ax1[1].plot(gru_val_loss, label='gru validation')
ax1[1].set_title('Model loss')
ax1[1].set_xlabel('Epoch')
ax1[1].set_ylabel('Loss')
ax1[1].legend()

ax2[0].plot(lstm_acc, label='lstm train')
ax2[0].plot(lstm_val_acc, label='lstm validation')
ax2[0].set_title('Model accuracy')
ax2[0].set_xlabel('Epoch')
ax2[0].set_ylabel('Accuracy')
ax2[0].legend()

ax2[1].plot(gru_acc, label='gru train')
ax2[1].plot(gru_val_acc, label='gru validation')
ax2[1].set_title('Model accuracy')
ax2[1].set_xlabel('Epoch')
ax2[1].set_ylabel('Accuracy')
ax2[1].legend()

plt.tight_layout()
fig.show()

Predict¶

In [17]:

lstm_out = model_lstm.predict(test_data, verbose=1)
gru_out = model_gru.predict(test_data, verbose=1)

214/214 [==============================] - 40s 186ms/step
214/214 [==============================] - 4s 19ms/step

In [0]:

import numpy as np
lstm_y_pred = np.argmax(lstm_out, axis=1)
gru_y_pred = np.argmax(gru_out, axis=1)
y_true = testDF['labels'].tolist()

In [19]:

from sklearn.metrics import confusion_matrix, accuracy_score

print('LSTM Accuracy: {:.4f}'.format(accuracy_score(lstm_y_pred, y_true)))
print('GRU Accuracy: {:.4f}'.format(accuracy_score(gru_y_pred, y_true)))

lstm_cm = confusion_matrix(lstm_y_pred, y_true)
gru_cm = confusion_matrix(gru_y_pred, y_true)

LSTM Accuracy: 0.9469
GRU Accuracy: 0.9462

In [20]:

from analysisFunctions import plot_confusion_matrix
from constants import inv_categories
    
plot_confusion_matrix(lstm_cm, target_names=list(inv_categories.values())[:-1], normalize=True)
plot_confusion_matrix(gru_cm, target_names=list(inv_categories.values())[:-1], normalize=True)

Attention Scores¶

In [21]:

layer_out = tf.keras.Model(inputs=model_lstm.input, outputs=model_lstm.get_layer('Attention').output)
final_out = tf.keras.Model(inputs=model_lstm.input, outputs=model_lstm.get_layer('dense_1').output)
attention = layer_out.predict(test_data, verbose=1)
out = final_out.predict(test_data, verbose=1)

214/214 [==============================] - 4s 20ms/step
214/214 [==============================] - 4s 19ms/step

In [22]:

print(out.shape)
y_p = np.argmax(out,axis=1)
y_t = testDF['labels'].tolist()
print(accuracy_score(y_p,y_t))

(6835, 30)
0.9468910021945867

In [36]:

from scipy.io import wavfile

attention_avg = np.apply_along_axis(np.max,2,attention)
print(attention_avg.shape)




fig, (ax1, ax2) = plt.subplots(2,4,figsize=(24,8))
for index, i in enumerate(np.random.randint(0,6835,4)):
  f = testDF['files'][i]
  _, wave = wavfile.read(f)
  ax1[index].plot(attention_avg[i])
  ax2[index].plot(range(len(wave)), wave)
  ax2[index].set_title('Raw wave of ' + f)
  ax2[index].set_ylabel('Amplitude')
  ax2[index].set_xlabel('Sample')
fig.show()

(6835, 99)