from transformers import pipeline import gradio as gr pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr") def transcribe(audio, state=""): text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"] state += text + " " return state, state # streaming mode iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."), "state"], outputs=["textbox", "state"], title="Realtime Speech-to-Text in French", description="Realtime demo for French automatic speech recognition.", allow_flagging="never", live=True, ) iface.launch()