import streamlit as st import torch import soundfile as sf import pyttsx3 import threading from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor # Initialize session state if "messages" not in st.session_state: st.session_state["messages"] = [] # Store chat history # Load the Wav2Vec 2.0 model and processor from Hugging Face processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h") model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h") # Function to generate a response using Gradio client def generate_response(query): try: client = Client("Gopikanth123/llama2") result = client.predict(query=query, api_name="/predict") return result except Exception as e: return f"Error communicating with the Gradio backend: {e}" # Function to handle user input and bot response def handle_user_input(user_input): if user_input: # Add user message to session state st.session_state["messages"].append({"user": user_input}) # Generate bot response response = generate_response(user_input) st.session_state["messages"].append({"bot": response}) # Speak out bot response in a new thread to avoid blocking threading.Thread(target=speak_text, args=(response,), daemon=True).start() # Function to speak text (Voice Output) def speak_text(text): engine = pyttsx3.init() engine.stop() # Ensure no previous loop is running engine.say(text) engine.runAndWait() # Function to update chat history dynamically def update_chat_history(): chat_history = st.session_state["messages"] for msg in chat_history: if "user" in msg: st.markdown(f"

You: {msg['user']}

", unsafe_allow_html=True) if "bot" in msg: st.markdown(f"

Bot: {msg['bot']}

", unsafe_allow_html=True) # Function to recognize speech using Hugging Face's Wav2Vec 2.0 def recognize_speech_huggingface(): st.info("Listening... Speak into the microphone.") fs = 16000 # Sample rate in Hz duration = 5 # Duration in seconds # Record the audio using sounddevice or use a pre-recorded file # (Here we're using soundfile to record from microphone) audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16') sd.wait() # Save the audio file to a temporary buffer sf.write('audio.wav', audio_data, fs) # Read the audio file using soundfile and process it audio_input, _ = sf.read('audio.wav') # Preprocess the audio and recognize the speech inputs = processor(audio_input, return_tensors="pt", sampling_rate=fs) with torch.no_grad(): logits = model(input_values=inputs.input_values).logits # Decode the logits to text predicted_ids = torch.argmax(logits, dim=-1) recognized_text = processor.decode(predicted_ids[0]) st.session_state["user_input"] = recognized_text st.success(f"Recognized Text: {recognized_text}") handle_user_input(recognized_text) # Main Streamlit app st.set_page_config(page_title="Llama2 Chatbot", page_icon="🤖", layout="wide") st.markdown( """ """, unsafe_allow_html=True ) st.title("🤖 Chat with Llama2 Bot") st.markdown( """ Welcome to the *Llama2 Chatbot*! - *Type* your message below, or - *Use the microphone* to speak to the bot. """ ) # Display chat history chat_history_container = st.container() with chat_history_container: # Add input field within a form with st.form(key='input_form', clear_on_submit=True): user_input = st.text_input("Type your message here...", placeholder="Hello, how are you?") submit_button = st.form_submit_button("Send") # Handle form submission if submit_button: handle_user_input(user_input) # Separate button for speech recognition outside of the form if st.button("Speak"): recognize_speech_huggingface() st.markdown("### Chat History") # Update chat history on every interaction update_chat_history()