%matplotlib inline import glob import numpy as np from scipy.io import wavfile import matplotlib.pyplot as plt from sklearn.svm import SVC filelist = glob.glob('./ohayou_*.wav') filelist snd_array = [] for fname in filelist: fs, snd = wavfile.read(fname) snd_array.append({'data':snd[:, 1], 'sample_rate':fs, 'fname':fname}) plot_data = snd_array[0] plt.plot(np.arange(0.0, np.size( plot_data['data']), 1)/plot_data['sample_rate'], plot_data['data']) plt.xlim([0,1]) plt.ylim([-30000,30000]) plt.title(plot_data['fname']) plt.xlabel('[second]') plot_data = snd_array[7] plt.plot(np.arange(0.0, np.size( plot_data['data']), 1)/plot_data['sample_rate'], plot_data['data']) plt.xlim([0,1]) plt.ylim([-30000,30000]) plt.title(plot_data['fname']) plt.xlabel('[ms]') def time_index(data): import numpy as np return np.arange(0.0, np.size(data['data']), 1)/data['sample_rate'] time_index(snd_array[0]) for snd_data in snd_array: if 'weak' in snd_data['fname']: col = 'b' elif 'strong' in snd_data['fname']: col = 'r' plt.figure() plt.plot(time_index(snd_data), snd_data['data'], color=col) plt.xlim([0,1]) plt.ylim([-2.**15, 2.**15]) plt.title(plot_data['fname']) plt.xlabel('[second]') for snd_data in snd_array: n_window = 500 org_data = np.r_[np.zeros(n_window), snd_data['data']] scan_range = range(0, snd_data['data'].shape[0]) amplitude_mean = np.zeros(len(scan_range)) for i_time in scan_range: extract_data = org_data[i_time:i_time + n_window] amplitude_mean[i_time] = np.mean(np.abs(extract_data)) snd_data['amp'] = amplitude_mean if 'weak' in snd_data['fname']: col = 'b' elif 'strong' in snd_data['fname']: col = 'r' plt.figure() plt.plot(time_index(snd_data), snd_data['amp'], color=col) def extract_speak(snd, th_amp=1000, th_time=0.1): import numpy as np index_snd_buff = th_amp < snd['amp'] index_snd = np.bool_(np.zeros(index_snd_buff.shape[0])) count = 0 for i, v in enumerate(index_snd_buff): if v == False: if th_time * snd['sample_rate'] < count: index_snd[i-count:i] = True count = 0 else: count += 1 return index_snd for snd_data in snd_array: plot_data = snd_data['amp'][extract_speak(snd_data)] if 'weak' in snd_data['fname']: col = 'b' elif 'strong' in snd_data['fname']: col = 'r' plt.figure() plt.plot(time_index(snd_data)[:plot_data.shape[0]], plot_data, color=col) def autocorr(dt): import scipy.signal as sig cor = sig.correlate(dt,dt,mode="full") return cor[cor.size/2:] sin_wave = np.sin(np.linspace(0,20,100)) plt.plot(sin_wave) plt.plot(autocorr(sin_wave)) snd_data = snd_array[0] snd_index = extract_speak(snd_data) pitch = autocorr(np.float_(snd_data['data'][snd_index])) plt.plot(np.arange(0.0, sum(snd_index), 1)/snd_data['sample_rate'], pitch) plt.plot(np.arange(0.0, sum(snd_index), 1)/snd_data['sample_rate'],pitch) plt.xlim([0,0.02]) def extract_speak_mid(snd, extract_length=0.05): snd_index = extract_speak(snd) extract_start = sum(snd_index)/2 extract_end = sum(snd_index)/2+extract_length*snd_data['sample_rate'] return snd_data['data'][snd_index][extract_start:extract_end] snd_data = snd_array[0] corr_data = extract_speak_mid(snd_data) pitch = autocorr(np.float_(corr_data)) plt.plot(corr_data) plt.plot(np.arange(0.0, pitch.shape[0], 1)/snd_data['sample_rate'], pitch) plt.xlim([0,0.02]) def extract_pitch(corr, fs, min_pitch=0.002, max_pitch=0.01): import numpy as np return (np.argmax(corr[min_pitch*fs:max_pitch*fs]) + min_pitch*fs) / fs extract_pitch(pitch, snd_data['sample_rate']) for snd_data in snd_array: use_index = extract_speak(snd_data) snd_data['amp_mean'] = np.mean(snd_data['amp'][use_index]) pitch = autocorr(np.float_(extract_speak_mid(snd_data))) snd_data['pitch'] = extract_pitch(pitch, snd_data['sample_rate']) plt.hold(True) for snd_data in snd_array: if 'weak' in snd_data['fname']: col = 'b' elif 'strong' in snd_data['fname']: col = 'r' plt.plot(snd_data['amp_mean'], snd_data['pitch'], '.', color=col) plt.xlabel('amp_mean') plt.ylabel('pitch') plt.hold(False) vol_array = np.zeros(len(snd_array)) peak_array = np.zeros(len(snd_array)) label = np.zeros(len(snd_array)) for i, snd_data in enumerate(snd_array): vol_array[i] = snd_data['amp_mean'] peak_array[i] = snd_data['pitch'] if 'weak' in snd_data['fname']: label[i] = 1 data_training = [[x/1000, y*1000] for (x, y) in zip(vol_array, peak_array)] # 桁あわせ label_training = [int(x) for x in label] data_training label_training estimator = SVC(C=10.0, kernel='linear') estimator.fit(data_training, label_training) label_prediction = estimator.predict(data_training) print(label_prediction) X = np.array(data_training) h = .02 x_min, x_max = X[:, 0].min()-1, X[:, 0].max()+1 y_min, y_max = X[:, 1].min()-1, X[:, 1].max()+1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8) plot_data = np.array(data_training) label = np.array(label_training) plt.hold(True) plt.plot(plot_data[label==0, 0], plot_data[label==0, 1], '.', color='r') plt.plot(plot_data[label==1, 0], plot_data[label==1, 1], '.', color='b') plt.hold(False) plt.xlabel('amp') plt.ylabel('pitch')