Gopikanth123 commited on
Commit
a999793
·
verified ·
1 Parent(s): a6f2237

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -38
app.py CHANGED
@@ -1,19 +1,16 @@
1
  import streamlit as st
2
- import torch
3
- import soundfile as sf
4
  import pyttsx3
5
  import threading
6
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
 
7
  from gradio_client import Client
 
8
 
9
  # Initialize session state
10
  if "messages" not in st.session_state:
11
  st.session_state["messages"] = [] # Store chat history
12
 
13
- # Load the Wav2Vec 2.0 model and processor from Hugging Face
14
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
15
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
16
-
17
  # Function to generate a response using Gradio client
18
  def generate_response(query):
19
  try:
@@ -52,34 +49,69 @@ def update_chat_history():
52
  if "bot" in msg:
53
  st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
54
 
55
- # Function to recognize speech using Hugging Face's Wav2Vec 2.0
56
- def recognize_speech_huggingface():
57
- st.info("Listening... Speak into the microphone.")
58
- fs = 16000 # Sample rate in Hz
59
- duration = 5 # Duration in seconds
60
-
61
- # Record the audio using sounddevice or use a pre-recorded file
62
- # (Here we're using soundfile to record from microphone)
63
- audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
64
- sd.wait()
65
- # Save the audio file to a temporary buffer
66
- sf.write('audio.wav', audio_data, fs)
67
-
68
- # Read the audio file using soundfile and process it
69
- audio_input, _ = sf.read('audio.wav')
70
 
71
- # Preprocess the audio and recognize the speech
72
- inputs = processor(audio_input, return_tensors="pt", sampling_rate=fs)
73
- with torch.no_grad():
74
- logits = model(input_values=inputs.input_values).logits
75
 
76
- # Decode the logits to text
77
- predicted_ids = torch.argmax(logits, dim=-1)
78
- recognized_text = processor.decode(predicted_ids[0])
79
 
80
- st.session_state["user_input"] = recognized_text
81
- st.success(f"Recognized Text: {recognized_text}")
82
- handle_user_input(recognized_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Main Streamlit app
85
  st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
@@ -138,7 +170,7 @@ st.markdown(
138
  """
139
  Welcome to the *Llama2 Chatbot*!
140
  - *Type* your message below, or
141
- - *Use the microphone* to speak to the bot.
142
  """
143
  )
144
 
@@ -154,10 +186,17 @@ with chat_history_container:
154
  if submit_button:
155
  handle_user_input(user_input)
156
 
157
- # Separate button for speech recognition outside of the form
158
- if st.button("Speak"):
159
- recognize_speech_huggingface()
160
 
161
- st.markdown("### Chat History")
162
  # Update chat history on every interaction
163
- update_chat_history()
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
  import pyttsx3
3
  import threading
4
+ import wave
5
+ import io
6
+ import speech_recognition as sr
7
  from gradio_client import Client
8
+ import streamlit.components.v1 as components
9
 
10
  # Initialize session state
11
  if "messages" not in st.session_state:
12
  st.session_state["messages"] = [] # Store chat history
13
 
 
 
 
 
14
  # Function to generate a response using Gradio client
15
  def generate_response(query):
16
  try:
 
49
  if "bot" in msg:
50
  st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
51
 
52
+ # Function to recognize speech from audio received as bytes
53
+ def recognize_speech_from_audio(audio_bytes):
54
+ st.info("Processing audio...")
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Convert byte stream to audio file
57
+ audio_data = io.BytesIO(audio_bytes)
58
+ recognizer = sr.Recognizer()
 
59
 
60
+ # Recognize speech from the audio data
61
+ with sr.AudioFile(audio_data) as source:
62
+ audio = recognizer.record(source)
63
 
64
+ try:
65
+ recognized_text = recognizer.recognize_google(audio)
66
+ st.session_state["user_input"] = recognized_text
67
+ st.success(f"Recognized Text: {recognized_text}")
68
+ handle_user_input(recognized_text)
69
+ except sr.UnknownValueError:
70
+ st.error("Sorry, I couldn't understand the audio.")
71
+ except sr.RequestError:
72
+ st.error("Could not request results; please check your internet connection.")
73
+
74
+ # JavaScript for audio recording and sending data to Streamlit
75
+ audio_recorder_html = """
76
+ <script>
77
+ let audioChunks = [];
78
+ let mediaRecorder;
79
+
80
+ function startRecording() {
81
+ navigator.mediaDevices.getUserMedia({ audio: true })
82
+ .then(function(stream) {
83
+ mediaRecorder = new MediaRecorder(stream);
84
+ mediaRecorder.ondataavailable = function(event) {
85
+ audioChunks.push(event.data);
86
+ };
87
+ mediaRecorder.onstop = function() {
88
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
89
+ const reader = new FileReader();
90
+ reader.onloadend = function() {
91
+ const audioBase64 = reader.result.split(',')[1];
92
+ window.parent.postMessage({ 'type': 'audio_data', 'audio': audioBase64 }, '*');
93
+ };
94
+ reader.readAsDataURL(audioBlob);
95
+ };
96
+ mediaRecorder.start();
97
+ });
98
+ }
99
+
100
+ function stopRecording() {
101
+ mediaRecorder.stop();
102
+ }
103
+
104
+ function handleStartStop() {
105
+ if (mediaRecorder && mediaRecorder.state === "recording") {
106
+ stopRecording();
107
+ } else {
108
+ startRecording();
109
+ }
110
+ }
111
+ </script>
112
+ <button onclick="handleStartStop()">Start/Stop Recording</button>
113
+ <p>Click the button to start/stop audio recording.</p>
114
+ """
115
 
116
  # Main Streamlit app
117
  st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
 
170
  """
171
  Welcome to the *Llama2 Chatbot*!
172
  - *Type* your message below, or
173
+ - *Speak* to the bot using your microphone.
174
  """
175
  )
176
 
 
186
  if submit_button:
187
  handle_user_input(user_input)
188
 
189
+ # Display JavaScript for audio recording
190
+ components.html(audio_recorder_html, height=300)
 
191
 
 
192
  # Update chat history on every interaction
193
+ update_chat_history()
194
+
195
+ # Listening to the audio data sent by JavaScript
196
+ def process_audio_data():
197
+ audio_data = st.experimental_get_query_params().get('audio', [None])[0]
198
+ if audio_data:
199
+ recognize_speech_from_audio(audio_data)
200
+
201
+ # Call the function to process audio if available
202
+ process_audio_data()