Spaces:

ixxan
/

uyghur-speech-models

Running

File size: 1,657 Bytes

20aa839
3a18b3b
bef8623
660776b
20aa839
448bf1b
 
 
 
 
 
 
 
 
 
 
c492cbb
448bf1b
 
 
 
 
 
 
 
 
 
 
 
 
 
9db718b
448bf1b
 
 
 
 
 
 
c492cbb
448bf1b
9db718b
448bf1b
 
 
 
 
 
 
 
 
 
 
20aa839
448bf1b
 
 
 
20aa839
 
448bf1b
 
20aa839

import gradio as gr
import asr
import tts
import util

mms_transcribe = gr.Interface(
    fn=asr.transcribe,
    inputs=[
        gr.Audio(
            label="Record or Upload Uyghur Audio",
            sources=["microphone", "upload"],
            type="filepath",
        ),
        gr.Dropdown(
            choices=[model for model in asr.models_info],
            label="Select a Model",
            value="Ixxan-FineTuned-MMS",
            interactive=True
        ),
    ],
    outputs=[
        gr.Textbox(label="Uyghur Arabic Transcription"),
        gr.Textbox(label="Uyghur Latin Transcription"),
    ],
    examples=util.asr_examples,
    title="Speech-To-Text",
    description=(
        "Transcribe Uyghur speech audio from a microphone or input file."
    ),
    allow_flagging="never",
)

mms_synthesize = gr.Interface(
    fn=tts.synthesize,
    inputs=[
        gr.Text(label="Input text"),
        gr.Dropdown(
            choices=[model for model in tts.models_info],
            label="Select a Model",
            value="Ixxan-FineTuned-MMS",
            interactive=True
        )
    ],
    outputs=[
        gr.Audio(label="Generated Audio"),
    ],
    examples=util.tts_examples,
    title="Text-To-Speech",
    description=(
        "Generate audio from input Uyghur text."
        ),
    allow_flagging="never",
)

tabbed_interface = gr.TabbedInterface(
    [mms_transcribe, mms_synthesize],
    ["Speech-To-Text", "Text-To-Speech"],
)

with gr.Blocks() as demo:
    gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
    tabbed_interface.render()

if __name__ == "__main__":
    demo.queue()
    demo.launch()