Spaces:

Gopikanth123
/

check

Sleeping

App Files Files Community

Gopikanth123 commited on Dec 16, 2024

Commit

7cde845

verified ·

1 Parent(s): 89e9f69

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -24

app.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import streamlit as st
-import sounddevice as sd
-import numpy as np
-import speech_recognition as sr
 import pyttsx3
 import threading
-import io
-from gradio_client import Client
 # Initialize session state
 if "messages" not in st.session_state:
     st.session_state["messages"] = []  # Store chat history
 # Function to generate a response using Gradio client
 def generate_response(query):
     try:
@@ -33,9 +35,6 @@ def handle_user_input(user_input):
         # Speak out bot response in a new thread to avoid blocking
         threading.Thread(target=speak_text, args=(response,), daemon=True).start()
-        # Update chat history after each interaction
-        # update_chat_history()
 # Function to speak text (Voice Output)
 def speak_text(text):
     engine = pyttsx3.init()
@@ -52,30 +51,34 @@ def update_chat_history():
         if "bot" in msg:
             st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
-# Function to recognize speech using sounddevice
-def recognize_speech_sounddevice():
     st.info("Listening... Speak into the microphone.")
     fs = 16000  # Sample rate in Hz
     duration = 5  # Duration in seconds
-    # Record the audio using sounddevice
     audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
     sd.wait()
-    # Convert the audio data to the format expected by speech_recognition
-    recognizer = sr.Recognizer()
-    audio = sr.AudioData(audio_data.tobytes(), fs, 2)
-    try:
-        recognized_text = recognizer.recognize_google(audio)
-        st.session_state["user_input"] = recognized_text
-        st.success(f"Recognized Text: {recognized_text}")
-        handle_user_input(recognized_text)
-    except sr.UnknownValueError:
-        st.error("Sorry, I couldn't understand the audio.")
-    except sr.RequestError:
-        st.error("Could not request results; please check your internet connection.")
 # Main Streamlit app
 st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
@@ -152,7 +155,7 @@ with chat_history_container:
     # Separate button for speech recognition outside of the form
     if st.button("Speak"):
-        recognize_speech_sounddevice()
     st.markdown("### Chat History")
     # Update chat history on every interaction

 import streamlit as st
+import torch
+import soundfile as sf
 import pyttsx3
 import threading
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 # Initialize session state
 if "messages" not in st.session_state:
     st.session_state["messages"] = []  # Store chat history
+# Load the Wav2Vec 2.0 model and processor from Hugging Face
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 # Function to generate a response using Gradio client
 def generate_response(query):
     try:
         # Speak out bot response in a new thread to avoid blocking
         threading.Thread(target=speak_text, args=(response,), daemon=True).start()
 # Function to speak text (Voice Output)
 def speak_text(text):
     engine = pyttsx3.init()
         if "bot" in msg:
             st.markdown(f"<div class='chat-bubble bot-message'><strong>Bot:</strong> {msg['bot']}</div>", unsafe_allow_html=True)
+# Function to recognize speech using Hugging Face's Wav2Vec 2.0
+def recognize_speech_huggingface():
     st.info("Listening... Speak into the microphone.")
     fs = 16000  # Sample rate in Hz
     duration = 5  # Duration in seconds
+    # Record the audio using sounddevice or use a pre-recorded file
+    # (Here we're using soundfile to record from microphone)
     audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
     sd.wait()
+    # Save the audio file to a temporary buffer
+    sf.write('audio.wav', audio_data, fs)
+    # Read the audio file using soundfile and process it
+    audio_input, _ = sf.read('audio.wav')
+    # Preprocess the audio and recognize the speech
+    inputs = processor(audio_input, return_tensors="pt", sampling_rate=fs)
+    with torch.no_grad():
+        logits = model(input_values=inputs.input_values).logits
+    # Decode the logits to text
+    predicted_ids = torch.argmax(logits, dim=-1)
+    recognized_text = processor.decode(predicted_ids[0])
+    st.session_state["user_input"] = recognized_text
+    st.success(f"Recognized Text: {recognized_text}")
+    handle_user_input(recognized_text)
 # Main Streamlit app
 st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide")
     # Separate button for speech recognition outside of the form
     if st.button("Speak"):
+        recognize_speech_huggingface()
     st.markdown("### Chat History")
     # Update chat history on every interaction