import gradio as gr from transformers import pipeline # Load Hugging Face models for speech-to-text and grammar correction # For different languages, you can modify the pipelines to use language-specific models. s2t_en = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") s2t_fr = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For French s2t_ur = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") # For Urdu (if available) grammar_en = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") grammar_fr = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For French grammar correction grammar_ur = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1") # For Urdu grammar correction def out(audio1, audio2, input_lang, output_lang): if input_lang == "English": s2t_model = s2t_en grammar_model = grammar_en elif input_lang == "French": s2t_model = s2t_fr grammar_model = grammar_fr else: s2t_model = s2t_ur grammar_model = grammar_ur # Check if audio is provided if audio1 is None and audio2 is None: return "No audio uploaded", "No audio uploaded" elif audio1 is None: # Use the second audio input (microphone or file) x = s2t_model(audio2)["text"] corrected = grammar_model(x)[0]['generated_text'] else: # Use the first audio input (microphone or file) x = s2t_model(audio1)["text"] corrected = grammar_model(x)[0]['generated_text'] # If output language is different, translate (you can use Hugging Face models for translation) if output_lang == "English": # Placeholder translation model; you should replace this with a suitable translation pipeline # Currently, we are assuming output will be in the same language. translated = corrected elif output_lang == "French": # Placeholder translation model for French translated = corrected else: # Placeholder translation model for Urdu translated = corrected return corrected, translated # Define Gradio Interface iface = gr.Interface( fn=out, title="Speech-to-Text with Grammar Correction and Translation", description="Select input and output language. Upload an audio file or use the microphone to convert speech to text, correct the grammar, and optionally translate it.", inputs=[ gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio File (Optional)", optional=True), gr.inputs.Audio(source="microphone", type="filepath", label="Record Using Microphone (Optional)", optional=True), gr.inputs.Dropdown(["English", "French", "Urdu"], label="Input Language", default="English"), gr.inputs.Dropdown(["English", "French", "Urdu"], label="Output Language", default="English") ], outputs=["text", "text"], examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"]], ) # Launch Gradio Interface iface.launch(enable_queue=True, show_error=True)