check / app.py
Gopikanth123's picture
Update app.py
040d848 verified
raw
history blame
1.14 kB
import streamlit as st
from transformers import pipeline
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import numpy as np
# ASR Model
pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st")
# Function to process audio frames
def audio_callback(frame: av.AudioFrame) -> av.AudioFrame:
audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono
if "audio_buffer" not in st.session_state:
st.session_state.audio_buffer = b""
st.session_state.audio_buffer += audio_data.tobytes()
return frame
# Transcribe audio buffer
def transcribe_audio():
if "audio_buffer" in st.session_state:
audio_data = st.session_state.audio_buffer
result = pipe(audio_data)
st.write("Transcription:", result["text"])
# Streamlit UI
st.title("Voice Recognition App")
webrtc_streamer(
key="audio",
mode=WebRtcMode.SENDRECV,
audio_processor_factory=lambda: audio_callback,
media_stream_constraints={"audio": True, "video": False},
)
if st.button("Transcribe Audio"):
transcribe_audio()