Spaces:
Runtime error
Runtime error
File size: 3,239 Bytes
43da485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
from transformers import pipeline
# Load Hugging Face models for speech-to-text and grammar correction
# For different languages, you can modify the pipelines to use language-specific models.
s2t_en = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")
s2t_fr = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For French
s2t_ur = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For Urdu (if available)
grammar_en = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")
grammar_fr = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For French grammar correction
grammar_ur = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For Urdu grammar correction
def out(audio1, audio2, input_lang, output_lang):
if input_lang == "English":
s2t_model = s2t_en
grammar_model = grammar_en
elif input_lang == "French":
s2t_model = s2t_fr
grammar_model = grammar_fr
else:
s2t_model = s2t_ur
grammar_model = grammar_ur
# Check if audio is provided
if audio1 is None and audio2 is None:
return "No audio uploaded", "No audio uploaded"
elif audio1 is None:
# Use the second audio input (microphone or file)
x = s2t_model(audio2)["text"]
corrected = grammar_model(x)[0]['generated_text']
else:
# Use the first audio input (microphone or file)
x = s2t_model(audio1)["text"]
corrected = grammar_model(x)[0]['generated_text']
# If output language is different, translate (you can use Hugging Face models for translation)
if output_lang == "English":
# Placeholder translation model; you should replace this with a suitable translation pipeline
# Currently, we are assuming output will be in the same language.
translated = corrected
elif output_lang == "French":
# Placeholder translation model for French
translated = corrected
else:
# Placeholder translation model for Urdu
translated = corrected
return corrected, translated
# Define Gradio Interface
iface = gr.Interface(
fn=out,
title="Speech-to-Text with Grammar Correction and Translation",
description="Select input and output language. Upload an audio file or use the microphone to convert speech to text, correct the grammar, and optionally translate it.",
inputs=[
gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio File (Optional)", optional=True),
gr.inputs.Audio(source="microphone", type="filepath", label="Record Using Microphone (Optional)", optional=True),
gr.inputs.Dropdown(["English", "French", "Urdu"], label="Input Language", default="English"),
gr.inputs.Dropdown(["English", "French", "Urdu"], label="Output Language", default="English")
],
outputs=["text", "text"],
examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"]],
)
# Launch Gradio Interface
iface.launch(enable_queue=True, show_error=True)
|