Install the Transformers, Datasets, and Evaluate libraries to run this notebook.
!pip install datasets evaluate transformers[sentencepiece]
!pip install gradio
import numpy as np
import gradio as gr
def reverse_audio(audio):
sr, data = audio
reversed_audio = (sr, np.flipud(data))
return reversed_audio
mic = gr.Audio(source="microphone", type="numpy", label="Speak here...")
gr.Interface(reverse_audio, mic, "audio").launch()
import numpy as np
import gradio as gr
notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
def generate_tone(note, octave, duration):
sr = 48000
a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
frequency = a4_freq * 2 ** (tones_from_a4 / 12)
duration = int(duration)
audio = np.linspace(0, duration, duration * sr)
audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)
return (sr, audio)
gr.Interface(
generate_tone,
[
gr.Dropdown(notes, type="index"),
gr.Slider(minimum=4, maximum=6, step=1),
gr.Textbox(type="number", value=1, label="Duration in seconds"),
],
"audio",
).launch()
from transformers import pipeline
import gradio as gr
model = pipeline("automatic-speech-recognition")
def transcribe_audio(mic=None, file=None):
if mic is not None:
audio = mic
elif file is not None:
audio = file
else:
return "You must either provide a mic recording or a file"
transcription = model(audio)["text"]
return transcription
gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(source="microphone", type="filepath", optional=True),
gr.Audio(source="upload", type="filepath", optional=True),
],
outputs="text",
).launch()