Spaces:

Artificial-superintelligence
/

ATHAI

Sleeping

App Files Files Community

Ath commited on Jun 14

Commit

5467d43

•

1 Parent(s): 38880b6

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -93

app.py CHANGED Viewed

@@ -1,16 +1,11 @@
 import os
-import requests
 import google.generativeai as genai
 import gradio as gr
-from tempfile import NamedTemporaryFile
-from google.cloud import speech
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-# Set up Google Cloud credentials for Speech-to-Text
-os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path_to_your_google_cloud_credentials.json"
 # Create the model
 generation_config = {
     "temperature": 1,
@@ -23,112 +18,45 @@ generation_config = {
 model = genai.GenerativeModel(
     model_name="gemini-1.5-pro",
     generation_config=generation_config,
-    system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
 )
 chat_session = model.start_chat(history=[])
-# Eleven Labs API configuration
-eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
-eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
-def chat_and_tts_text(user_input, history):
-    try:
-        # Send the user's text input to the chat session
-        response = chat_session.send_message(user_input)
-        response_text = response.text
-        # Update the chat history with text input and response
-        history.append((user_input, response_text))
-        return history, history
-    except Exception as e:
-        return history, f"Error: {str(e)}"
-def convert_audio_to_text(audio_file):
-    try:
-        client = speech.SpeechClient()
-        with open(audio_file.name, "rb") as audio:
-            content = audio.read()
-        audio = speech.RecognitionAudio(content=content)
-        config = speech.RecognitionConfig(
-            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
-            sample_rate_hertz=16000,
-            language_code="en-US",
-        )
-        response = client.recognize(config=config, audio=audio)
-        # Assuming the audio contains only one speech segment
-        transcript = response.results[0].alternatives[0].transcript
-        return transcript
-    except Exception as e:
-        return f"Error in audio to text conversion: {str(e)}"
-def chat_and_tts_audio(audio_file, history):
-    try:
-        # Convert uploaded audio file to text
-        user_input = convert_audio_to_text(audio_file)
-        # Send the user's audio input to the chat session
-        response = chat_session.send_message(user_input)
-        response_text = response.text
-        # Eleven Labs text-to-speech request payload
-        payload = {
-            "text": response_text,
-            "voice_settings": {
-                "stability": 0,
-                "similarity_boost": 0
-            }
-        }
-        headers = {
-            "xi-api-key": eleven_labs_api_key,
-            "Content-Type": "application/json"
-        }
-        # Make the request to Eleven Labs API
-        tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
-        # Check if the response is successful and save the audio content to a temporary file
-        if tts_response.status_code == 200:
-            with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
-                temp_audio.write(tts_response.content)
-                audio_path = temp_audio.name
-        else:
-            audio_path = None
-        # Update the chat history with audio input and response
-        history.append((user_input, response_text))
-        return history, history, audio_path
-    except Exception as e:
-        return history, f"Error: {str(e)}", None
 # Create the Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
-    gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Chat History")
-            user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
-            submit_btn_text = gr.Button("Send")
-        with gr.Column(scale=2):
-            user_input_audio = gr.File(label="Upload Audio", type="binary")
-            submit_btn_audio = gr.Button("Send")
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Response Audio", type="filepath")
     state = gr.State([])
-    submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
-    submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio, state], outputs=[chatbot, state, audio_output])
 demo.launch()

 import os
 import google.generativeai as genai
 import gradio as gr
+from gtts import gTTS
 # Configure your Google Generative AI API key
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 # Create the model
 generation_config = {
     "temperature": 1,
 model = genai.GenerativeModel(
     model_name="gemini-1.5-pro",
     generation_config=generation_config,
+    system_instruction="You are a human that lives on earth, and you also speak friendly, and you also use bro words, and you never say you're an AI. Your name is Ath, and you speak 100% like a human",
 )
 chat_session = model.start_chat(history=[])
+def chat_and_tts(user_input, history):
+    # Send the user's input to the chat session
+    response = chat_session.send_message(user_input)
+    response_text = response.text
+    print("Response from chat session:", response_text)  # Debug response text
+    # Convert the response text to speech using gTTS
+    tts = gTTS(text=response_text, lang='en')
+    audio_path = 'response_audio.mp3'
+    tts.save(audio_path)
+    print(f'Audio content written to file "{audio_path}"')
+    # Update the chat history
+    history.append((user_input, response_text))
+    return history, history, audio_path
 # Create the Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
+    gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")
     with gr.Row():
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Chat History")
+            user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
+            submit_btn = gr.Button("Send")
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Response Audio", type="filepath")
     state = gr.State([])
+    submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])
 demo.launch()