import gradio as gr
from transformers import pipeline

# Load the ASR pipeline
pipe = pipeline(
    "automatic-speech-recognition",
    model="lyimo/whisper-small-sw-badili-v4"
)

def transcribe(audio):
    if audio is None:
        return ""
    
    # Process audio file path with pipeline
    result = pipe(
        audio,
        generate_kwargs={"language": "swahili"}
    )
    return result["text"]

# Create Gradio interface
interface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
    outputs=gr.Textbox(label="Transcription"),
    title="Swahili Speech Recognition",
    description="Record or upload Swahili audio to see the Whisper transcription",
    allow_flagging="never"
)

# Launch the app
interface.launch()