Spaces:
Runtime error
Runtime error
Rachid Ammari
commited on
Commit
·
5621130
1
Parent(s):
99a5348
Added spanish model and example
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import whisper
|
|
4 |
|
5 |
wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
6 |
wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french")
|
|
|
7 |
whisper_model = whisper.load_model("base")
|
8 |
|
9 |
def transcribe_audio(language=None, mic=None, file=None):
|
@@ -23,6 +24,8 @@ def load_models(lang):
|
|
23 |
return wav2vec_en_model
|
24 |
elif lang == 'fr':
|
25 |
return wav2vec_fr_model
|
|
|
|
|
26 |
else:
|
27 |
# default english
|
28 |
return wav2vec_en_model
|
@@ -33,12 +36,16 @@ This Space allows easy comparisons for transcribed texts between Facebook's Wav2
|
|
33 |
(Even if Whisper includes a language detection and even an automatic translation, here we have decided to select the language to speed up the transcription and to focus only on the quality of the transcriptions. The default language is english)
|
34 |
"""
|
35 |
article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of."
|
36 |
-
examples = [["en", None, "english_sentence.flac"],
|
|
|
|
|
|
|
|
|
37 |
|
38 |
gr.Interface(
|
39 |
fn=transcribe_audio,
|
40 |
inputs=[
|
41 |
-
gr.Radio(label="Language", choices=["en", "fr"], value="en"),
|
42 |
gr.Audio(source="microphone", type="filepath", optional=True),
|
43 |
gr.Audio(source="upload", type="filepath", optional=True),
|
44 |
],
|
|
|
4 |
|
5 |
wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
6 |
wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french")
|
7 |
+
wav2vec_es_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
|
8 |
whisper_model = whisper.load_model("base")
|
9 |
|
10 |
def transcribe_audio(language=None, mic=None, file=None):
|
|
|
24 |
return wav2vec_en_model
|
25 |
elif lang == 'fr':
|
26 |
return wav2vec_fr_model
|
27 |
+
elif lang == 'es':
|
28 |
+
return wav2vec_es_model
|
29 |
else:
|
30 |
# default english
|
31 |
return wav2vec_en_model
|
|
|
36 |
(Even if Whisper includes a language detection and even an automatic translation, here we have decided to select the language to speed up the transcription and to focus only on the quality of the transcriptions. The default language is english)
|
37 |
"""
|
38 |
article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of."
|
39 |
+
examples = [["en", None, "english_sentence.flac"],
|
40 |
+
["en", None, "6_Steps_To_Hit_ANY_Goal.mp3000.mp3"],
|
41 |
+
["fr", None, "2022-a-Droite-un-fauteuil-pour-trois-3034044.mp3000.mp3"],
|
42 |
+
["fr", None, "podcast-bdl-episode-5-mix-v2.mp3000.mp3"],
|
43 |
+
["es", None, "momiasartesecretodelantiguoegipto-nationalgeographicespana-ivoox73191074.mp3000.mp3"]]
|
44 |
|
45 |
gr.Interface(
|
46 |
fn=transcribe_audio,
|
47 |
inputs=[
|
48 |
+
gr.Radio(label="Language", choices=["en", "fr", "es"], value="en"),
|
49 |
gr.Audio(source="microphone", type="filepath", optional=True),
|
50 |
gr.Audio(source="upload", type="filepath", optional=True),
|
51 |
],
|
momiasartesecretodelantiguoegipto-nationalgeographicespana-ivoox73191074.mp3000.mp3
ADDED
Binary file (480 kB). View file
|
|