File size: 3,239 Bytes
43da485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from transformers import pipeline

# Load Hugging Face models for speech-to-text and grammar correction
# For different languages, you can modify the pipelines to use language-specific models.
s2t_en = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")
s2t_fr = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")  # For French
s2t_ur = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr")  # For Urdu (if available)

grammar_en = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")
grammar_fr = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")  # For French grammar correction
grammar_ur = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")  # For Urdu grammar correction

def out(audio1, audio2, input_lang, output_lang):
    if input_lang == "English":
        s2t_model = s2t_en
        grammar_model = grammar_en
    elif input_lang == "French":
        s2t_model = s2t_fr
        grammar_model = grammar_fr
    else:
        s2t_model = s2t_ur
        grammar_model = grammar_ur

    # Check if audio is provided
    if audio1 is None and audio2 is None:
        return "No audio uploaded", "No audio uploaded"
    elif audio1 is None:
        # Use the second audio input (microphone or file)
        x = s2t_model(audio2)["text"]
        corrected = grammar_model(x)[0]['generated_text']
    else:
        # Use the first audio input (microphone or file)
        x = s2t_model(audio1)["text"]
        corrected = grammar_model(x)[0]['generated_text']

    # If output language is different, translate (you can use Hugging Face models for translation)
    if output_lang == "English":
        # Placeholder translation model; you should replace this with a suitable translation pipeline
        # Currently, we are assuming output will be in the same language.
        translated = corrected
    elif output_lang == "French":
        # Placeholder translation model for French
        translated = corrected
    else:
        # Placeholder translation model for Urdu
        translated = corrected

    return corrected, translated


# Define Gradio Interface
iface = gr.Interface(
    fn=out, 
    title="Speech-to-Text with Grammar Correction and Translation",
    description="Select input and output language. Upload an audio file or use the microphone to convert speech to text, correct the grammar, and optionally translate it.",
    inputs=[
        gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio File (Optional)", optional=True),                                           
        gr.inputs.Audio(source="microphone", type="filepath", label="Record Using Microphone (Optional)", optional=True),
        gr.inputs.Dropdown(["English", "French", "Urdu"], label="Input Language", default="English"),
        gr.inputs.Dropdown(["English", "French", "Urdu"], label="Output Language", default="English")
    ],
    outputs=["text", "text"],
    examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"]],
)

# Launch Gradio Interface
iface.launch(enable_queue=True, show_error=True)