%matplotlib inline

import glob
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
from sklearn.svm import SVC

filelist = glob.glob('./ohayou_*.wav')
filelist

snd_array = []
for fname in filelist:
    fs, snd = wavfile.read(fname)
    snd_array.append({'data':snd[:, 1], 'sample_rate':fs, 'fname':fname})

plot_data = snd_array[0]
plt.plot(np.arange(0.0, np.size( plot_data['data']), 1)/plot_data['sample_rate'], plot_data['data'])
plt.xlim([0,1])
plt.ylim([-30000,30000])
plt.title(plot_data['fname'])
plt.xlabel('[second]')

plot_data = snd_array[7]
plt.plot(np.arange(0.0, np.size( plot_data['data']), 1)/plot_data['sample_rate'], plot_data['data'])
plt.xlim([0,1])
plt.ylim([-30000,30000])
plt.title(plot_data['fname'])
plt.xlabel('[ms]')

def time_index(data):
    import numpy as np
    return np.arange(0.0, np.size(data['data']), 1)/data['sample_rate']

time_index(snd_array[0])

for snd_data in snd_array:
    if 'weak' in snd_data['fname']:
        col = 'b'
    elif 'strong' in snd_data['fname']:
        col = 'r'
    plt.figure()
    plt.plot(time_index(snd_data), snd_data['data'], color=col)
    plt.xlim([0,1])
    plt.ylim([-2.**15, 2.**15])
    plt.title(plot_data['fname'])
    plt.xlabel('[second]')

for snd_data in snd_array:
    n_window = 500
    org_data = np.r_[np.zeros(n_window), snd_data['data']]
    scan_range = range(0, snd_data['data'].shape[0])
    amplitude_mean = np.zeros(len(scan_range))
    for i_time in scan_range:
        extract_data = org_data[i_time:i_time + n_window]
        amplitude_mean[i_time] = np.mean(np.abs(extract_data))
    snd_data['amp'] = amplitude_mean
    if 'weak' in snd_data['fname']:
        col = 'b'
    elif 'strong' in snd_data['fname']:
        col = 'r'
    plt.figure()
    plt.plot(time_index(snd_data), snd_data['amp'], color=col)

def extract_speak(snd, th_amp=1000, th_time=0.1):
    import numpy as np
    index_snd_buff = th_amp < snd['amp']
    index_snd = np.bool_(np.zeros(index_snd_buff.shape[0]))
    count = 0
    for i, v in enumerate(index_snd_buff):
        if v == False:
            if th_time * snd['sample_rate'] < count:
                index_snd[i-count:i] = True
            count = 0
        else:
            count += 1
    return index_snd

for snd_data in snd_array:
    plot_data = snd_data['amp'][extract_speak(snd_data)]
    if 'weak' in snd_data['fname']:
        col = 'b'
    elif 'strong' in snd_data['fname']:
        col = 'r'
    plt.figure()
    plt.plot(time_index(snd_data)[:plot_data.shape[0]], plot_data, color=col)

def autocorr(dt):
    import scipy.signal as sig
    cor = sig.correlate(dt,dt,mode="full")
    return cor[cor.size/2:]

sin_wave = np.sin(np.linspace(0,20,100))
plt.plot(sin_wave)

plt.plot(autocorr(sin_wave))

snd_data = snd_array[0]
snd_index = extract_speak(snd_data)
pitch = autocorr(np.float_(snd_data['data'][snd_index]))

plt.plot(np.arange(0.0, sum(snd_index), 1)/snd_data['sample_rate'], pitch)

plt.plot(np.arange(0.0, sum(snd_index), 1)/snd_data['sample_rate'],pitch)
plt.xlim([0,0.02])

def extract_speak_mid(snd, extract_length=0.05):
    snd_index = extract_speak(snd)
    extract_start = sum(snd_index)/2
    extract_end = sum(snd_index)/2+extract_length*snd_data['sample_rate']
    return snd_data['data'][snd_index][extract_start:extract_end]

snd_data = snd_array[0]
corr_data = extract_speak_mid(snd_data)
pitch = autocorr(np.float_(corr_data))

plt.plot(corr_data)

plt.plot(np.arange(0.0, pitch.shape[0], 1)/snd_data['sample_rate'], pitch)
plt.xlim([0,0.02])

def extract_pitch(corr, fs, min_pitch=0.002, max_pitch=0.01):
    import numpy as np
    return (np.argmax(corr[min_pitch*fs:max_pitch*fs]) + min_pitch*fs) / fs

extract_pitch(pitch, snd_data['sample_rate'])

for snd_data in snd_array:
    use_index = extract_speak(snd_data)
    snd_data['amp_mean'] = np.mean(snd_data['amp'][use_index])
    pitch = autocorr(np.float_(extract_speak_mid(snd_data)))
    snd_data['pitch'] = extract_pitch(pitch, snd_data['sample_rate'])

plt.hold(True)
for snd_data in snd_array:
    if 'weak' in snd_data['fname']:
        col = 'b'
    elif 'strong' in snd_data['fname']:
        col = 'r'
    plt.plot(snd_data['amp_mean'], snd_data['pitch'], '.', color=col)
plt.xlabel('amp_mean')
plt.ylabel('pitch')
plt.hold(False)

vol_array = np.zeros(len(snd_array))
peak_array = np.zeros(len(snd_array))
label = np.zeros(len(snd_array))
for i, snd_data in enumerate(snd_array):
    vol_array[i] = snd_data['amp_mean']
    peak_array[i] = snd_data['pitch']
    if 'weak' in snd_data['fname']:
        label[i] = 1

data_training = [[x/1000, y*1000] for (x, y) in zip(vol_array, peak_array)] # 桁あわせ
label_training = [int(x) for x in label]

data_training

label_training

estimator = SVC(C=10.0, kernel='linear')
estimator.fit(data_training, label_training)

label_prediction = estimator.predict(data_training)
print(label_prediction)

X = np.array(data_training)
h = .02
x_min, x_max = X[:, 0].min()-1, X[:, 0].max()+1
y_min, y_max = X[:, 1].min()-1, X[:, 1].max()+1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

plot_data = np.array(data_training)
label = np.array(label_training)
plt.hold(True)
plt.plot(plot_data[label==0, 0], plot_data[label==0, 1], '.', color='r')
plt.plot(plot_data[label==1, 0], plot_data[label==1, 1], '.', color='b')
plt.hold(False)
plt.xlabel('amp')
plt.ylabel('pitch')