XLS-R-2B-EN-15 / app.py
patrickvonplaten's picture
Update app.py
23eacd8
raw
history blame
2.39 kB
import gradio as gr
import librosa
from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
model_name = "facebook/wav2vec2-xls-r-2b-en-to-15"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = SpeechEncoderDecoderModel.from_pretrained(model_name)
def process_audio_file(file):
data, sr = librosa.load(file)
if sr != 16000:
data = librosa.resample(data, sr, 16000)
print(data.shape)
input_values = feature_extractor(data, return_tensors="pt").input_values
return input_values
def transcribe(file, target_language):
target_code = target_language.split("(")[-1].split(")")[0]
forced_bos_token_id = MAPPING[target_code]
input_values = process_audio_file(file)
sequences = model.generate(input_values, forced_bos_token_id=forced_bos_token_id)
transcription = tokenizer.batch_decode(sequences, skip_special_tokens=True)
return transcription[0]
target_language = [
"English (en)",
"German (de)",
"Turkish (tr)",
"Persian (fa)",
"Swedish (sv)",
"Mongolian (mn)",
"Chinese (zh)",
"Welsh (cy)",
"Catalan (ca)",
"Slovenian (sl)",
"Estonian (et)",
"Indonesian (id)",
"Arabic (ar)",
"Tamil (ta)",
"Latvian (lv)",
"Japanese (ja)",
]
MAPPING = {
"en": 250004,
"de": 250003,
"tr": 250023,
"fa": 250029,
"sv": 250042,
"mn": 250037,
"zh": 250025,
"cy": 250007,
"ca": 250005,
"sl": 250052,
"et": 250006,
"id": 250032,
"ar": 250001,
"ta": 250044,
"lv": 250017,
"ja": 250012,
}
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type='filepath'),
gr.inputs.Dropdown(target_language),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="XLS-R 2B EN-to-15 Speech Translation",
description="A simple interface to translate from spoken English to 15 written languages.",
article = "<p style='text-align: center'><a href='https://huggingface.co/facebook/wav2vec2-xls-r-2b-en-to-15' target='_blank'>Click to learn more about XLS-R-2B-EN-15 </a> | <a href='https://arxiv.org/abs/2111.09296' target='_blank'> With ๐ŸŽ™๏ธ from Facebook XLS-R </a></p>",
)
iface.launch()