Skip to content

Instantly share code, notes, and snippets.

@msly
Last active March 21, 2023 13:26
Show Gist options
  • Select an option

  • Save msly/6961bc4eda56811ef17cfba0caaaa88d to your computer and use it in GitHub Desktop.

Select an option

Save msly/6961bc4eda56811ef17cfba0caaaa88d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import gradio as gr
import whisper
from datetime import datetime
last_model_name = "small"
model = whisper.load_model(last_model_name)
def sendToWhisper(audio_record, audio_upload, audio_url, task, model_name, language_selected, results):
start, result = datetime.now(), [model_name, None, language_selected, None, None]
audio = audio_url or audio_record or audio_upload
if audio is None:
return [["No input"]*5]
print(f"audio not none: {audio}")
global last_model_name
if model_name != last_model_name:
global model
model = whisper.load_model(model_name)
last_model_name = model_name
print(f"load model, device:{model.device}")
mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(audio))).to(model.device)
print(f"mel shape: {mel.shape}")
results.append(result)
prob, language = 0, language_selected
if language == "none":
langugage = None
elif language == "detect":
_, probs = model.detect_language(mel)
language = max(probs, key=probs.get)
prob = probs[language]
print(f"detected language: {model_name} - {language}")
result[2:4] = [language, str(prob)]
print(f"using model: {model_name} - {language} - {task}")
t1 = datetime.now()
options = whisper.DecodingOptions(task=task, language=language)
output_text = whisper.decode(model, mel, options)
t2 = datetime.now()
result[1], result[4] = output_text.text, str((datetime.now() - start).total_seconds())
print(result, (t2 - t1).total_seconds())
return results
CSS = """
#audio_inputs {
height:100px;
max-height:100px;
}
"""
LANGUAGES = [ "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "iw", "uk", "el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su" ]
all_models = whisper.available_models()
with gr.Blocks(css=CSS) as demo:
gr.Markdown("### [OpenAI Whisper](https://openai.com/blog/whisper/) Demo")
gr.Markdown("A [Gradio](https://gradio.app) based Speech-to-Text (aka ASR) Demo of the [Open AI Whisper Model](https://github.com/openai/whisper)")
gr.Markdown('<p align="right"><a href="https://huggingface.co/spaces/davidtsong/whisper-demo">inspired</a> and built like <a href="https://gist.github.com/kpe/6a70395ce171ffee43d927eaf90b81b6/#file-openai-whisper-demo-md">so</a></p>')
results = gr.State([])
with gr.Column():
gr.Markdown("### Record or upload audio or audio url")
with gr.Row():
audio_record = gr.Audio(source="microphone", type="filepath", elem_id="audio_inputs", label="Recorded Audio")
audio_upload = gr.Audio(source="upload", type="filepath", interactive=True, elem_id="audio_inputs")
audio_url = gr.Textbox(label="Audio URL")
gr.Markdown("### Select a model and language")
with gr.Row():
models_selected = gr.Dropdown(all_models, label="Whisper model to use", value="tiny")
language = gr.Dropdown(["detect", "none"] + LANGUAGES, label="Language", value="detect")
task = gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe")
submit = gr.Button(value="Transcribe")
gr.Markdown("### Result")
output = gr.Dataframe(headers=["Model", "Transcription", "Language", "Language Confidence", "Time [s]"], label="Results", wrap=True)
submit.click(fn=sendToWhisper, inputs=[audio_record, audio_upload, audio_url, task, models_selected, language, results], outputs=output)
demo.launch(server_name='0.0.0.0', share=False)
@msly
Copy link
Author

msly commented Mar 21, 2023

https://gist.github.com/kpe/6a70395ce171ffee43d927eaf90b81b6

1 add audio_url
2 listen 0.0.0.0, share=False
3 load_model once when same model name

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment