Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torchaudio | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer | |
# Load the translation model and tokenizer | |
translation_model_name = "Helsinki-NLP/opus-mt-en-ml" | |
translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name) | |
translation_model = MarianMTModel.from_pretrained(translation_model_name) | |
# Load the speech recognition model and tokenizer | |
asr_model_name = "facebook/wav2vec2-large-960h" | |
asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_name) | |
asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_name) | |
# Translation function | |
def translate_text(text): | |
inputs = translation_tokenizer(text, return_tensors="pt", padding=True) | |
outputs = translation_model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True) | |
translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return translated_text | |
# Speech-to-text function | |
def speech_to_text(audio_path): | |
speech, rate = torchaudio.load(audio_path) | |
input_values = asr_processor(speech.squeeze(), sampling_rate=rate, return_tensors="pt").input_values | |
logits = asr_model(input_values).logits | |
predicted_ids = logits.argmax(dim=-1) | |
transcription = asr_processor.batch_decode(predicted_ids)[0] | |
return transcription | |
# Combined function for Gradio interface | |
def translate_speech(audio_path): | |
text = speech_to_text(audio_path) | |
translation = translate_text(text) | |
return translation | |
# Gradio interface | |
iface = gr.Interface( | |
fn=lambda text, audio_path: (translate_text(text), translate_speech(audio_path) if audio_path else None), | |
inputs=[gr.Textbox(label="Input English Text"), gr.Audio(type="filepath")], | |
outputs=[gr.Textbox(label="Translated Malayalam Text (from Text)"), gr.Textbox(label="Translated Malayalam Text (from Speech)")], | |
title="English to Malayalam Translator", | |
description="Translate English text or speech to Malayalam. Either enter text or speak into the microphone." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |