Spaces:
Sleeping
Sleeping
Gopikanth123
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,67 +1,36 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
document.getElementById('output').textContent = transcript;
|
25 |
-
// Send transcript back to Streamlit using postMessage
|
26 |
-
window.parent.postMessage({func: 'update_output', transcript: transcript}, '*');
|
27 |
-
};
|
28 |
-
|
29 |
-
recognition.onerror = function(event) {
|
30 |
-
console.error("Speech recognition error", event.error);
|
31 |
-
document.getElementById('output').textContent = "Error in recognition";
|
32 |
-
};
|
33 |
-
|
34 |
-
function startRecognition() {
|
35 |
-
recognition.start();
|
36 |
-
}
|
37 |
-
</script>
|
38 |
-
</body>
|
39 |
-
</html>
|
40 |
-
"""
|
41 |
|
42 |
# Streamlit UI
|
43 |
-
st.title("
|
44 |
-
st.write("Click the button below and start speaking. The recognized text will be shown here:")
|
45 |
-
|
46 |
-
# Display the HTML with the embedded speech recognition
|
47 |
-
components.html(speech_recognition_html, height=200)
|
48 |
-
|
49 |
-
# Output area where the recognized speech will be displayed
|
50 |
-
output = st.empty()
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
<script>
|
59 |
-
window.addEventListener('message', function(event) {
|
60 |
-
if (event.data.func === 'update_output') {
|
61 |
-
document.getElementById('output').textContent = event.data.transcript;
|
62 |
-
// Update the Streamlit text area with the transcript
|
63 |
-
window.parent.postMessage({func: 'update_text_area', text: event.data.transcript}, '*');
|
64 |
-
}
|
65 |
-
});
|
66 |
-
</script>
|
67 |
-
""", height=0)
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
|
4 |
+
import av
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
# ASR Model
|
8 |
+
pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st")
|
9 |
+
|
10 |
+
# Function to process audio frames
|
11 |
+
def audio_callback(frame: av.AudioFrame) -> av.AudioFrame:
|
12 |
+
audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono
|
13 |
+
if "audio_buffer" not in st.session_state:
|
14 |
+
st.session_state.audio_buffer = b""
|
15 |
+
st.session_state.audio_buffer += audio_data.tobytes()
|
16 |
+
return frame
|
17 |
+
|
18 |
+
# Transcribe audio buffer
|
19 |
+
def transcribe_audio():
|
20 |
+
if "audio_buffer" in st.session_state:
|
21 |
+
audio_data = st.session_state.audio_buffer
|
22 |
+
result = pipe(audio_data)
|
23 |
+
st.write("Transcription:", result["text"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Streamlit UI
|
26 |
+
st.title("Voice Recognition App")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
webrtc_streamer(
|
29 |
+
key="audio",
|
30 |
+
mode=WebRtcMode.SENDRECV,
|
31 |
+
audio_processor_factory=lambda: audio_callback,
|
32 |
+
media_stream_constraints={"audio": True, "video": False},
|
33 |
+
)
|
34 |
|
35 |
+
if st.button("Transcribe Audio"):
|
36 |
+
transcribe_audio()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|