import os
import re
import tempfile;
import ipywidgets as widgets;
from ipywebrtc import AudioRecorder, CameraStream, AudioStream;
from onsei.speech_record import SpeechRecord, AlignmentMethod;
from onsei.utils import segment_speech;
from onsei.figures import ViewRecordFigure, CompareFigure;
from onsei.sentence import Sentence;
from onsei.widgets import SampleSelector, UploadSample;
# Globals
def get_jsut_samples():
samples = {}
basepath = "data/jsut_basic5000_sample"
with open(os.path.join(basepath, "transcript_utf8.txt")) as f:
for line in f:
basename, sentence = line.rstrip().split(':')
filename = os.path.join(basepath, f"{basename}.wav")
samples[sentence] = {
"filename": filename,
"sentence": sentence,
}
return samples
def get_forvo_samples():
samples = {}
basepath = "data/forvo/everyday_phrases/greetings_and_apologies"
for fname in sorted(os.listdir(basepath)):
m = re.match(r"^pronunciation_ja_([^.]+).wav$", fname)
if m:
sentence = m.group(1)
samples[sentence] = {
"filename": os.path.join(basepath, fname),
"sentence": sentence,
}
return samples
samples = {
"Forvo": get_forvo_samples(),
"JSUT Basic 5000 corpus": get_jsut_samples(),
"My samples": {}, # Special collection for user's samples
}
teacher_rec = None
student_rec = None
sentence = None
default_autoplay = True
show_spaces_between_segments = False
default_crop_vad = True
my_samples_dir = tempfile.TemporaryDirectory();
# Create widgets
w_sample_selector = SampleSelector(samples)
w_upload_sample = UploadSample(samples, my_samples_dir.name)
w_autoplay_tick = widgets.Checkbox(
value=default_autoplay,
description='Autoplay',
disabled=False,
indent=False
)
w_show_spaces_tick = widgets.Checkbox(
value=show_spaces_between_segments,
description='Show spaces between sentence segments',
disabled=False,
indent=False
)
w_crop_vad_tick = widgets.Checkbox(
value=default_crop_vad,
description='Crop graphs to detected speech',
disabled=False,
indent=False
)
w_alignment_method_dropdown = widgets.Dropdown(
options=[a.value for a in AlignmentMethod],
description='Alignment method:',
disabled=False,
)
w_options_accordion = widgets.Accordion(
children=[widgets.VBox([
w_autoplay_tick,
w_show_spaces_tick,
w_crop_vad_tick,
w_alignment_method_dropdown,
])],
selected_index=None,
)
w_options_accordion.set_title(0, "Options")
w_audio = widgets.Audio(value=b'', format='wav', autoplay=default_autoplay, loop=False)
w_sentence = widgets.HTML(value='')
camera = CameraStream(constraints={'audio': True, 'video': False})
w_recorder = AudioRecorder(stream=camera)
w_compare_btn = widgets.Button(description="Compare")
w_cmp_result = widgets.Label(value='')
fig_teacher = ViewRecordFigure(title="Teacher's recording")
fig_student = ViewRecordFigure(title="Your recording")
fig_cmp = CompareFigure()
# Callbacks
def add_uploaded_sample(change):
global samples
sample = change["new"]
samples['My samples'][sample['sentence']] = sample
# Switch to this sample
w_sample_selector.set_selection('My samples', sample['sentence'])
w_upload_sample.observe(add_uploaded_sample, 'value')
def update_autoplay(change):
w_audio.autoplay = change['new']
w_autoplay_tick.observe(update_autoplay, 'value')
def update_show_spaces(change):
global show_spaces_between_segments
show_spaces_between_segments = change['new']
update_sentence()
w_show_spaces_tick.observe(update_show_spaces, 'value')
widgets.jslink((w_crop_vad_tick, 'value'), (fig_teacher, 'crop_vad'))
widgets.jslink((w_crop_vad_tick, 'value'), (fig_student, 'crop_vad'))
def get_sample_audio_data(sample):
return open(sample['filename'], 'rb').read()
def update_sentence():
if sentence:
sentence_html = sentence.to_html()
if not show_spaces_between_segments:
sentence_html = sentence_html.replace(" ", "")
w_sentence.value = f'<p style="font-size: xx-large">{sentence_html}</p>'
else:
w_sentence.value = ''
def update_sample(sample):
global teacher_rec
global sentence
sentence = Sentence(sample["sentence"])
with w_sentence.hold_sync(), w_audio.hold_sync(), fig_teacher.hold_sync(), fig_student.hold_sync(), fig_cmp.hold_sync(), w_cmp_result.hold_sync():
update_sentence()
teacher_rec = SpeechRecord(sample['filename'], sentence, name="Teacher");
w_audio.value = get_sample_audio_data(sample);
fig_teacher.update_data(teacher_rec);
fig_student.clear();
fig_cmp.clear();
w_cmp_result.value = ""
update_sample(w_sample_selector.selected_sample());
def sample_changed(change):
update_sample(dict(change["new"]));
w_sample_selector.observe(sample_changed, 'value')
def get_student_wav_filename():
try:
w_recorder.save('test.webm')
except ValueError as exc:
if str(exc).startswith('No data'):
w_cmp_result.value = f"Record something first !"
raise exc
!ffmpeg -hide_banner -loglevel error -y -i test.webm -ar 16000 -ac 1 test.wav
return 'test.wav'
def run_compare(_):
global teacher_rec
global student_rec
student_wav_filename = get_student_wav_filename()
# Alternatively, here is a sample:
#student_wav_filename = "data/itsu_ga_ii_ka_wakarimasen.wav"
student_rec = SpeechRecord(student_wav_filename, sentence, name="Student");
fig_student.update_data(student_rec);
alignment_method = w_alignment_method_dropdown.value
try:
student_rec.align_with(teacher_rec, method=alignment_method)
mean_distance = student_rec.compare_pitch();
w_cmp_result.value = f"Success !\nMean distance = {mean_distance:.2f}"
except Exception as exc:
w_cmp_result.value = "FAILED !"
raise exc
fig_cmp.update_data(teacher_rec, student_rec)
w_compare_btn.on_click(run_compare)
# Update the comparison if we change the alignment method
w_alignment_method_dropdown.observe(run_compare, 'value')
# Layout
w_tab = widgets.Tab()
w_tab.children = [w_sample_selector, w_upload_sample]
w_tab.set_title(0, "Samples")
w_tab.set_title(1, "Upload new samples")
box = widgets.Box([
widgets.Box([
w_tab,
w_options_accordion
]),
w_sentence,
widgets.Box([
widgets.VBox([widgets.Label(value="Teacher's recording:"), w_audio], layout=widgets.Layout(width='33%')),
widgets.VBox([widgets.Label(value="Your recording:"), w_recorder], layout=widgets.Layout(width='33%')),
widgets.VBox([w_compare_btn, w_cmp_result], layout=widgets.Layout(width='33%')),
]),
fig_cmp,
fig_student,
fig_teacher,
], layout=widgets.Layout(display="flex", flex_flow="column", align_items="stretch", align_content="center")
)
display(box)