jblast94 commited on
Commit
edd277f
·
verified ·
1 Parent(s): 15f0953

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -81
app.py CHANGED
@@ -1,107 +1,93 @@
1
  import gradio as gr
 
2
  import os
3
  import requests
4
- from llama_cpp import Llama # Import the Llama class from llama-cpp-python
5
 
6
- # --- Model Loading ---
7
- # The model you selected is in the GGUF format, which is not compatible with
8
- # the standard Hugging Face AutoModel class. We need to use a dedicated
9
- # GGUF inference engine, like llama-cpp-python.
10
 
11
- model_name = "mradermacher/gemma-3n-E2B-GGUF"
12
- model_path = gr.mount_model(model_name) # This function will download the GGUF file
13
-
14
- # Try to initialize the Llama model
15
  try:
16
- # Initialize the Llama model with the GGUF file path
17
- # We set `verbose=False` to keep the logs clean.
18
- llm = Llama(model_path=model_path, n_gpu_layers=1, verbose=False)
19
- print("Llama model initialized successfully!")
20
  except Exception as e:
21
- print(f"Error initializing Llama model: {e}")
22
- llm = None
23
- print("Please check if the model is compatible with llama-cpp-python.")
24
 
 
 
 
25
 
26
- # --- Constants & Configuration ---
27
- # To secure your Chatterbox endpoint URL, you should add it to your
28
- # Hugging Face Space secrets with a key like `CHATTERBOX_ENDPOINT`.
29
- # You can access it in the code like this:
30
- CHATTERBOX_ENDPOINT = os.environ.get("CHATTERBOX_ENDPOINT", "http://localhost:5000")
31
 
32
- # --- Core Logic ---
33
- def process_audio_and_generate(audio_file_path):
34
  """
35
- This function handles the full workflow:
36
- 1. Takes the path to a recorded audio file.
37
- 2. Sends the audio to your Chatterbox TTS endpoint for transcription.
38
- 3. Passes the transcribed text to the GGUF model.
39
- 4. Generates a text response.
40
-
41
- Args:
42
- audio_file_path (str): The file path of the recorded audio.
43
-
44
- Returns:
45
- tuple: A tuple containing the transcription and the Gemma response.
46
  """
47
- if audio_file_path is None:
48
- return "Please provide an audio recording.", "No audio input received."
 
 
49
 
50
- # --- Step 1: Speech-to-Text (using your Chatterbox endpoint) ---
51
- transcription = "Transcription failed." # Default value in case of error
52
- try:
53
- with open(audio_file_path, "rb") as audio_file:
54
- # Assumes the API expects a multipart form data request with the file.
55
- files = {'file': audio_file}
56
- response = requests.post(CHATTERBOX_ENDPOINT, files=files)
57
- response.raise_for_status() # Raise an exception for bad status codes
58
- transcription = response.json().get("transcription", "Transcription failed.")
59
- except requests.exceptions.RequestException as e:
60
- transcription = f"Error calling Chatterbox API: {e}"
61
- print(transcription)
62
- return transcription, "Transcription service is not available."
63
- except Exception as e:
64
- transcription = f"Error during transcription: {e}"
65
- print(transcription)
66
- return transcription, "Transcription failed."
67
 
68
- # --- Step 2: Generate Response with Gemma (GGUF model) ---
69
- response_text = "Gemma model is not available." # Default value
70
- if llm:
71
  try:
72
- # We'll use the model's `create_completion` method to generate text.
73
- # We wrap the transcription in a prompt template that the model expects.
74
- prompt = f"### User:\n{transcription}\n### Assistant:\n"
75
-
76
- # Generate the response from the model
77
- completion = llm.create_completion(
78
- prompt,
79
- max_tokens=150, # Limits the length of the response
80
- stop=["### User:"], # Stops generation when it sees the next user turn
81
- echo=False, # Don't repeat the input prompt in the output
82
- )
83
-
84
- response_text = completion['choices'][0]['text']
85
-
86
  except Exception as e:
87
- response_text = f"Error generating response from model: {e}"
88
- print(response_text)
 
 
 
 
89
 
90
- return transcription, response_text.strip()
 
 
91
 
92
- # --- Gradio Interface Setup ---
93
  iface = gr.Interface(
94
- fn=process_audio_and_generate,
95
- inputs=gr.Audio(sources=["microphone"], type="filepath"),
96
  outputs=[
97
  gr.Textbox(label="Transcription"),
98
- gr.Textbox(label="Gemma's Response")
99
  ],
100
- title="Gemma Voice Assistant",
101
- description="Speak into the microphone and get a live response from a Gemma-powered assistant."
102
  )
103
 
104
  # Launch the Gradio app
105
  if __name__ == "__main__":
106
- iface.launch()
107
-
 
1
  import gradio as gr
2
+ from supabase import create_client, Client
3
  import os
4
  import requests
5
+ import json
6
 
7
+ # ======================================================================
8
+ # --- SETUP AND CONFIGURATION ---
9
+ # ======================================================================
 
10
 
11
+ # Retrieve Supabase credentials from environment variables (Hugging Face Space secrets)
12
+ # This is a secure way to store your API keys and other sensitive information.
 
 
13
  try:
14
+ supabase_url = os.environ.get("SUPABASE_URL")
15
+ supabase_key = os.environ.get("SUPABASE_KEY")
16
+ supabase: Client = create_client(supabase_url, supabase_key)
 
17
  except Exception as e:
18
+ print(f"Error initializing Supabase client: {e}")
19
+ supabase = None
 
20
 
21
+ # Set the endpoint for your voice generation engine
22
+ # Make sure to replace this with the actual URL from your RunPod instance.
23
+ CHATTTERBOX_ENDPOINT = os.environ.get("CHATTERBOX_ENDPOINT")
24
 
25
+ # ======================================================================
26
+ # --- CORE LOGIC ---
27
+ # ======================================================================
 
 
28
 
29
+ def process_voice_command(audio):
 
30
  """
31
+ This function handles the core logic of the voice assistant.
32
+ It takes an audio file, transcribes it, gets a response from a language model,
33
+ saves the interaction to Supabase, and generates an audio response.
 
 
 
 
 
 
 
 
34
  """
35
+
36
+ # Placeholder for the actual transcription logic
37
+ # You will replace this with a call to your speech-to-text model.
38
+ transcribed_text = "Placeholder: Your transcribed text will appear here."
39
 
40
+ # Placeholder for the response from the language model
41
+ # You will replace this with a call to your Gemma model.
42
+ model_response = "Placeholder: This is Gemma's generated response."
43
+
44
+ # Save the chat to Supabase
45
+ if supabase:
46
+ try:
47
+ # We are using a dummy table name 'chats', make sure this matches your Supabase table.
48
+ supabase.table("chats").insert({
49
+ "user_input": transcribed_text,
50
+ "model_response": model_response
51
+ }).execute()
52
+ print("Chat successfully saved to Supabase! 💾")
53
+ except Exception as e:
54
+ print(f"Error saving chat to Supabase: {e}")
55
+ else:
56
+ print("Supabase client not initialized. Skipping database save.")
57
 
58
+ # Placeholder for the voice generation logic using Chatterbox
59
+ # You will replace this with an actual API call to your Chatterbox endpoint.
60
+ if CHATTTERBOX_ENDPOINT:
61
  try:
62
+ # Example API call structure
63
+ payload = {"text": model_response}
64
+ headers = {"Content-Type": "application/json"}
65
+ requests.post(CHATTTERBOX_ENDPOINT, data=json.dumps(payload), headers=headers)
66
+ print("Response sent to Chatterbox endpoint! 🎙️")
 
 
 
 
 
 
 
 
 
67
  except Exception as e:
68
+ print(f"Error sending data to Chatterbox: {e}")
69
+ else:
70
+ print("Chatterbox endpoint not set. Skipping voice generation.")
71
+
72
+ # Return the transcription and the response to the Gradio interface.
73
+ return transcribed_text, model_response
74
 
75
+ # ======================================================================
76
+ # --- GRADIO INTERFACE ---
77
+ # ======================================================================
78
 
79
+ # Create the Gradio interface
80
  iface = gr.Interface(
81
+ fn=process_voice_command,
82
+ inputs=gr.Audio(sources=["microphone"], label="Speak your command here..."),
83
  outputs=[
84
  gr.Textbox(label="Transcription"),
85
+ gr.Textbox(label="AI's Response")
86
  ],
87
+ title="My Personal Voice Assistant",
88
+ description="Speak into the microphone and get a response from the AI, with chats saved to your Supabase database."
89
  )
90
 
91
  # Launch the Gradio app
92
  if __name__ == "__main__":
93
+ iface.launch()