Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings | |
import av | |
import numpy as np | |
# ASR Model | |
pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st") | |
# Function to process audio frames | |
def audio_callback(frame: av.AudioFrame) -> av.AudioFrame: | |
audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono | |
if "audio_buffer" not in st.session_state: | |
st.session_state.audio_buffer = b"" | |
st.session_state.audio_buffer += audio_data.tobytes() | |
return frame | |
# Transcribe audio buffer | |
def transcribe_audio(): | |
if "audio_buffer" in st.session_state: | |
audio_data = st.session_state.audio_buffer | |
result = pipe(audio_data) | |
st.write("Transcription:", result["text"]) | |
# Streamlit UI | |
st.title("Voice Recognition App") | |
webrtc_streamer( | |
key="audio", | |
mode=WebRtcMode.SENDRECV, | |
audio_processor_factory=lambda: audio_callback, | |
media_stream_constraints={"audio": True, "video": False}, | |
) | |
if st.button("Transcribe Audio"): | |
transcribe_audio() | |