Gopikanth123 commited on
Commit
040d848
·
verified ·
1 Parent(s): 8010b1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -62
app.py CHANGED
@@ -1,67 +1,36 @@
1
  import streamlit as st
2
- import streamlit.components.v1 as components
3
-
4
- # HTML and JavaScript for Speech Recognition using webkitSpeechRecognition
5
- speech_recognition_html = """
6
- <html>
7
- <body>
8
- <button onclick="startRecognition()">Start Speech Recognition</button>
9
- <p id="output">Speak something...</p>
10
-
11
- <script>
12
- // Check if the browser supports speech recognition
13
- if (!('webkitSpeechRecognition' in window)) {
14
- document.getElementById('output').textContent = "Speech recognition not supported in this browser.";
15
- }
16
-
17
- var recognition = new webkitSpeechRecognition();
18
- recognition.continuous = false; // Stops after speech input
19
- recognition.interimResults = true;
20
- recognition.lang = 'en-US'; // Set language for recognition (English)
21
-
22
- recognition.onresult = function(event) {
23
- var transcript = event.results[event.resultIndex][0].transcript;
24
- document.getElementById('output').textContent = transcript;
25
- // Send transcript back to Streamlit using postMessage
26
- window.parent.postMessage({func: 'update_output', transcript: transcript}, '*');
27
- };
28
-
29
- recognition.onerror = function(event) {
30
- console.error("Speech recognition error", event.error);
31
- document.getElementById('output').textContent = "Error in recognition";
32
- };
33
-
34
- function startRecognition() {
35
- recognition.start();
36
- }
37
- </script>
38
- </body>
39
- </html>
40
- """
41
 
42
  # Streamlit UI
43
- st.title("Speech-to-Text Demo")
44
- st.write("Click the button below and start speaking. The recognized text will be shown here:")
45
-
46
- # Display the HTML with the embedded speech recognition
47
- components.html(speech_recognition_html, height=200)
48
-
49
- # Output area where the recognized speech will be displayed
50
- output = st.empty()
51
 
52
- # This is where the recognized text will be shown on the Streamlit side
53
- st.write("Recognized Text:")
54
- transcript = st.text_area("Transcript:", "", height=150)
 
 
 
55
 
56
- # Listen for postMessage events from the iframe to update the text area
57
- components.html("""
58
- <script>
59
- window.addEventListener('message', function(event) {
60
- if (event.data.func === 'update_output') {
61
- document.getElementById('output').textContent = event.data.transcript;
62
- // Update the Streamlit text area with the transcript
63
- window.parent.postMessage({func: 'update_text_area', text: event.data.transcript}, '*');
64
- }
65
- });
66
- </script>
67
- """, height=0)
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
4
+ import av
5
+ import numpy as np
6
+
7
+ # ASR Model
8
+ pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st")
9
+
10
+ # Function to process audio frames
11
+ def audio_callback(frame: av.AudioFrame) -> av.AudioFrame:
12
+ audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono
13
+ if "audio_buffer" not in st.session_state:
14
+ st.session_state.audio_buffer = b""
15
+ st.session_state.audio_buffer += audio_data.tobytes()
16
+ return frame
17
+
18
+ # Transcribe audio buffer
19
+ def transcribe_audio():
20
+ if "audio_buffer" in st.session_state:
21
+ audio_data = st.session_state.audio_buffer
22
+ result = pipe(audio_data)
23
+ st.write("Transcription:", result["text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Streamlit UI
26
+ st.title("Voice Recognition App")
 
 
 
 
 
 
 
27
 
28
+ webrtc_streamer(
29
+ key="audio",
30
+ mode=WebRtcMode.SENDRECV,
31
+ audio_processor_factory=lambda: audio_callback,
32
+ media_stream_constraints={"audio": True, "video": False},
33
+ )
34
 
35
+ if st.button("Transcribe Audio"):
36
+ transcribe_audio()