Gopikanth123 commited on
Commit
7cde845
·
verified ·
1 Parent(s): 89e9f69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -24
app.py CHANGED
@@ -1,16 +1,18 @@
1
  import streamlit as st
2
- import sounddevice as sd
3
- import numpy as np
4
- import speech_recognition as sr
5
  import pyttsx3
6
  import threading
7
- import io
8
- from gradio_client import Client
9
 
10
  # Initialize session state
11
  if "messages" not in st.session_state:
12
  st.session_state["messages"] = [] # Store chat history
13
 
 
 
 
 
14
  # Function to generate a response using Gradio client
15
  def generate_response(query):
16
  try:
@@ -33,9 +35,6 @@ def handle_user_input(user_input):
33
  # Speak out bot response in a new thread to avoid blocking
34
  threading.Thread(target=speak_text, args=(response,), daemon=True).start()
35
 
36
- # Update chat history after each interaction
37
- # update_chat_history()
38
-
39
  # Function to speak text (Voice Output)
40
  def speak_text(text):
41
  engine = pyttsx3.init()
@@ -52,30 +51,34 @@ def update_chat_history():
52
  if "bot" in msg:
53
  st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
54
 
55
- # Function to recognize speech using sounddevice
56
- def recognize_speech_sounddevice():
57
  st.info("Listening... Speak into the microphone.")
58
  fs = 16000 # Sample rate in Hz
59
  duration = 5 # Duration in seconds
60
 
61
- # Record the audio using sounddevice
 
62
  audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
63
  sd.wait()
 
 
64
 
65
- # Convert the audio data to the format expected by speech_recognition
66
- recognizer = sr.Recognizer()
67
- audio = sr.AudioData(audio_data.tobytes(), fs, 2)
68
 
69
- try:
70
- recognized_text = recognizer.recognize_google(audio)
71
- st.session_state["user_input"] = recognized_text
72
- st.success(f"Recognized Text: {recognized_text}")
73
- handle_user_input(recognized_text)
74
- except sr.UnknownValueError:
75
- st.error("Sorry, I couldn't understand the audio.")
76
- except sr.RequestError:
77
- st.error("Could not request results; please check your internet connection.")
78
 
 
 
 
79
 
80
  # Main Streamlit app
81
  st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
@@ -152,7 +155,7 @@ with chat_history_container:
152
 
153
  # Separate button for speech recognition outside of the form
154
  if st.button("Speak"):
155
- recognize_speech_sounddevice()
156
 
157
  st.markdown("### Chat History")
158
  # Update chat history on every interaction
 
1
  import streamlit as st
2
+ import torch
3
+ import soundfile as sf
 
4
  import pyttsx3
5
  import threading
6
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
7
 
8
  # Initialize session state
9
  if "messages" not in st.session_state:
10
  st.session_state["messages"] = [] # Store chat history
11
 
12
+ # Load the Wav2Vec 2.0 model and processor from Hugging Face
13
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
14
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
15
+
16
  # Function to generate a response using Gradio client
17
  def generate_response(query):
18
  try:
 
35
  # Speak out bot response in a new thread to avoid blocking
36
  threading.Thread(target=speak_text, args=(response,), daemon=True).start()
37
 
 
 
 
38
  # Function to speak text (Voice Output)
39
  def speak_text(text):
40
  engine = pyttsx3.init()
 
51
  if "bot" in msg:
52
  st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
53
 
54
+ # Function to recognize speech using Hugging Face's Wav2Vec 2.0
55
+ def recognize_speech_huggingface():
56
  st.info("Listening... Speak into the microphone.")
57
  fs = 16000 # Sample rate in Hz
58
  duration = 5 # Duration in seconds
59
 
60
+ # Record the audio using sounddevice or use a pre-recorded file
61
+ # (Here we're using soundfile to record from microphone)
62
  audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
63
  sd.wait()
64
+ # Save the audio file to a temporary buffer
65
+ sf.write('audio.wav', audio_data, fs)
66
 
67
+ # Read the audio file using soundfile and process it
68
+ audio_input, _ = sf.read('audio.wav')
 
69
 
70
+ # Preprocess the audio and recognize the speech
71
+ inputs = processor(audio_input, return_tensors="pt", sampling_rate=fs)
72
+ with torch.no_grad():
73
+ logits = model(input_values=inputs.input_values).logits
74
+
75
+ # Decode the logits to text
76
+ predicted_ids = torch.argmax(logits, dim=-1)
77
+ recognized_text = processor.decode(predicted_ids[0])
 
78
 
79
+ st.session_state["user_input"] = recognized_text
80
+ st.success(f"Recognized Text: {recognized_text}")
81
+ handle_user_input(recognized_text)
82
 
83
  # Main Streamlit app
84
  st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
 
155
 
156
  # Separate button for speech recognition outside of the form
157
  if st.button("Speak"):
158
+ recognize_speech_huggingface()
159
 
160
  st.markdown("### Chat History")
161
  # Update chat history on every interaction