Spaces:

jblast94
/

voice-assistant

Build error

App Files Files Community

jblast94 commited on 5 days ago

Commit

edd277f

verified ·

1 Parent(s): 15f0953

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -81

app.py CHANGED Viewed

@@ -1,107 +1,93 @@
 import gradio as gr
 import os
 import requests
-from llama_cpp import Llama  # Import the Llama class from llama-cpp-python
-# --- Model Loading ---
-# The model you selected is in the GGUF format, which is not compatible with
-# the standard Hugging Face AutoModel class. We need to use a dedicated
-# GGUF inference engine, like llama-cpp-python.
-model_name = "mradermacher/gemma-3n-E2B-GGUF"
-model_path = gr.mount_model(model_name) # This function will download the GGUF file
-# Try to initialize the Llama model
 try:
-    # Initialize the Llama model with the GGUF file path
-    # We set `verbose=False` to keep the logs clean.
-    llm = Llama(model_path=model_path, n_gpu_layers=1, verbose=False)
-    print("Llama model initialized successfully!")
 except Exception as e:
-    print(f"Error initializing Llama model: {e}")
-    llm = None
-    print("Please check if the model is compatible with llama-cpp-python.")
-# --- Constants & Configuration ---
-# To secure your Chatterbox endpoint URL, you should add it to your
-# Hugging Face Space secrets with a key like `CHATTERBOX_ENDPOINT`.
-# You can access it in the code like this:
-CHATTERBOX_ENDPOINT = os.environ.get("CHATTERBOX_ENDPOINT", "http://localhost:5000")
-# --- Core Logic ---
-def process_audio_and_generate(audio_file_path):
     """
-    This function handles the full workflow:
-    1. Takes the path to a recorded audio file.
-    2. Sends the audio to your Chatterbox TTS endpoint for transcription.
-    3. Passes the transcribed text to the GGUF model.
-    4. Generates a text response.
-    Args:
-        audio_file_path (str): The file path of the recorded audio.
-    Returns:
-        tuple: A tuple containing the transcription and the Gemma response.
     """
-    if audio_file_path is None:
-        return "Please provide an audio recording.", "No audio input received."
-    # --- Step 1: Speech-to-Text (using your Chatterbox endpoint) ---
-    transcription = "Transcription failed." # Default value in case of error
-    try:
-        with open(audio_file_path, "rb") as audio_file:
-            # Assumes the API expects a multipart form data request with the file.
-            files = {'file': audio_file}
-            response = requests.post(CHATTERBOX_ENDPOINT, files=files)
-            response.raise_for_status() # Raise an exception for bad status codes
-            transcription = response.json().get("transcription", "Transcription failed.")
-    except requests.exceptions.RequestException as e:
-        transcription = f"Error calling Chatterbox API: {e}"
-        print(transcription)
-        return transcription, "Transcription service is not available."
-    except Exception as e:
-        transcription = f"Error during transcription: {e}"
-        print(transcription)
-        return transcription, "Transcription failed."
-    # --- Step 2: Generate Response with Gemma (GGUF model) ---
-    response_text = "Gemma model is not available." # Default value
-    if llm:
         try:
-            # We'll use the model's `create_completion` method to generate text.
-            # We wrap the transcription in a prompt template that the model expects.
-            prompt = f"### User:\n{transcription}\n### Assistant:\n"
-            # Generate the response from the model
-            completion = llm.create_completion(
-                prompt,
-                max_tokens=150,  # Limits the length of the response
-                stop=["### User:"], # Stops generation when it sees the next user turn
-                echo=False,  # Don't repeat the input prompt in the output
-            )
-            response_text = completion['choices'][0]['text']
         except Exception as e:
-            response_text = f"Error generating response from model: {e}"
-            print(response_text)
-    return transcription, response_text.strip()
-# --- Gradio Interface Setup ---
 iface = gr.Interface(
-    fn=process_audio_and_generate,
-    inputs=gr.Audio(sources=["microphone"], type="filepath"),
     outputs=[
         gr.Textbox(label="Transcription"),
-        gr.Textbox(label="Gemma's Response")
     ],
-    title="Gemma Voice Assistant",
-    description="Speak into the microphone and get a live response from a Gemma-powered assistant."
 )
 # Launch the Gradio app
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+from supabase import create_client, Client
 import os
 import requests
+import json
+# ======================================================================
+# --- SETUP AND CONFIGURATION ---
+# ======================================================================
+# Retrieve Supabase credentials from environment variables (Hugging Face Space secrets)
+# This is a secure way to store your API keys and other sensitive information.
 try:
+    supabase_url = os.environ.get("SUPABASE_URL")
+    supabase_key = os.environ.get("SUPABASE_KEY")
+    supabase: Client = create_client(supabase_url, supabase_key)
 except Exception as e:
+    print(f"Error initializing Supabase client: {e}")
+    supabase = None
+# Set the endpoint for your voice generation engine
+# Make sure to replace this with the actual URL from your RunPod instance.
+CHATTTERBOX_ENDPOINT = os.environ.get("CHATTERBOX_ENDPOINT")
+# ======================================================================
+# --- CORE LOGIC ---
+# ======================================================================
+def process_voice_command(audio):
     """
+    This function handles the core logic of the voice assistant.
+    It takes an audio file, transcribes it, gets a response from a language model,
+    saves the interaction to Supabase, and generates an audio response.
     """
+    # Placeholder for the actual transcription logic
+    # You will replace this with a call to your speech-to-text model.
+    transcribed_text = "Placeholder: Your transcribed text will appear here."
+    # Placeholder for the response from the language model
+    # You will replace this with a call to your Gemma model.
+    model_response = "Placeholder: This is Gemma's generated response."
+    # Save the chat to Supabase
+    if supabase:
+        try:
+            # We are using a dummy table name 'chats', make sure this matches your Supabase table.
+            supabase.table("chats").insert({
+                "user_input": transcribed_text,
+                "model_response": model_response
+            }).execute()
+            print("Chat successfully saved to Supabase! 💾")
+        except Exception as e:
+            print(f"Error saving chat to Supabase: {e}")
+    else:
+        print("Supabase client not initialized. Skipping database save.")
+    # Placeholder for the voice generation logic using Chatterbox
+    # You will replace this with an actual API call to your Chatterbox endpoint.
+    if CHATTTERBOX_ENDPOINT:
         try:
+            # Example API call structure
+            payload = {"text": model_response}
+            headers = {"Content-Type": "application/json"}
+            requests.post(CHATTTERBOX_ENDPOINT, data=json.dumps(payload), headers=headers)
+            print("Response sent to Chatterbox endpoint! 🎙️")
         except Exception as e:
+            print(f"Error sending data to Chatterbox: {e}")
+    else:
+        print("Chatterbox endpoint not set. Skipping voice generation.")
+    # Return the transcription and the response to the Gradio interface.
+    return transcribed_text, model_response
+# ======================================================================
+# --- GRADIO INTERFACE ---
+# ======================================================================
+# Create the Gradio interface
 iface = gr.Interface(
+    fn=process_voice_command,
+    inputs=gr.Audio(sources=["microphone"], label="Speak your command here..."),
     outputs=[
         gr.Textbox(label="Transcription"),
+        gr.Textbox(label="AI's Response")
     ],
+    title="My Personal Voice Assistant",
+    description="Speak into the microphone and get a response from the AI, with chats saved to your Supabase database."
 )
 # Launch the Gradio app
 if __name__ == "__main__":
+    iface.launch()