Ath commited on
Commit
5467d43
1 Parent(s): 38880b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -93
app.py CHANGED
@@ -1,16 +1,11 @@
1
  import os
2
- import requests
3
  import google.generativeai as genai
4
  import gradio as gr
5
- from tempfile import NamedTemporaryFile
6
- from google.cloud import speech
7
 
8
  # Configure your Google Generative AI API key
9
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
10
 
11
- # Set up Google Cloud credentials for Speech-to-Text
12
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path_to_your_google_cloud_credentials.json"
13
-
14
  # Create the model
15
  generation_config = {
16
  "temperature": 1,
@@ -23,112 +18,45 @@ generation_config = {
23
  model = genai.GenerativeModel(
24
  model_name="gemini-1.5-pro",
25
  generation_config=generation_config,
26
- system_instruction="You are a human that lives on earth, and you also speak friendly and use informal language. Your name is Ath, and you speak like a 100% human.",
27
  )
28
 
29
  chat_session = model.start_chat(history=[])
30
 
31
- # Eleven Labs API configuration
32
- eleven_labs_api_key = os.getenv("ELEVEN_LABS_API_KEY")
33
- eleven_labs_url = "https://api.elevenlabs.io/v1/text-to-speech/aEO01A4wXwd1O8GPgGlF"
34
-
35
- def chat_and_tts_text(user_input, history):
36
- try:
37
- # Send the user's text input to the chat session
38
- response = chat_session.send_message(user_input)
39
- response_text = response.text
40
-
41
- # Update the chat history with text input and response
42
- history.append((user_input, response_text))
43
-
44
- return history, history
45
- except Exception as e:
46
- return history, f"Error: {str(e)}"
47
-
48
- def convert_audio_to_text(audio_file):
49
- try:
50
- client = speech.SpeechClient()
51
-
52
- with open(audio_file.name, "rb") as audio:
53
- content = audio.read()
54
-
55
- audio = speech.RecognitionAudio(content=content)
56
- config = speech.RecognitionConfig(
57
- encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
58
- sample_rate_hertz=16000,
59
- language_code="en-US",
60
- )
61
 
62
- response = client.recognize(config=config, audio=audio)
63
-
64
- # Assuming the audio contains only one speech segment
65
- transcript = response.results[0].alternatives[0].transcript
66
- return transcript
 
 
67
 
68
- except Exception as e:
69
- return f"Error in audio to text conversion: {str(e)}"
70
-
71
- def chat_and_tts_audio(audio_file, history):
72
- try:
73
- # Convert uploaded audio file to text
74
- user_input = convert_audio_to_text(audio_file)
75
-
76
- # Send the user's audio input to the chat session
77
- response = chat_session.send_message(user_input)
78
- response_text = response.text
79
-
80
- # Eleven Labs text-to-speech request payload
81
- payload = {
82
- "text": response_text,
83
- "voice_settings": {
84
- "stability": 0,
85
- "similarity_boost": 0
86
- }
87
- }
88
- headers = {
89
- "xi-api-key": eleven_labs_api_key,
90
- "Content-Type": "application/json"
91
- }
92
-
93
- # Make the request to Eleven Labs API
94
- tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
95
-
96
- # Check if the response is successful and save the audio content to a temporary file
97
- if tts_response.status_code == 200:
98
- with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
99
- temp_audio.write(tts_response.content)
100
- audio_path = temp_audio.name
101
- else:
102
- audio_path = None
103
-
104
- # Update the chat history with audio input and response
105
- history.append((user_input, response_text))
106
-
107
- return history, history, audio_path
108
- except Exception as e:
109
- return history, f"Error: {str(e)}", None
110
 
111
  # Create the Gradio UI
112
  with gr.Blocks() as demo:
113
  gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
114
- gr.Markdown("Ask any question by typing or upload an audio file to receive a response from Ath in text and audio format.")
115
 
116
  with gr.Row():
117
  with gr.Column(scale=2):
118
  chatbot = gr.Chatbot(label="Chat History")
119
- user_input_text = gr.Textbox(placeholder="Type your question...", label="Text Input")
120
- submit_btn_text = gr.Button("Send")
121
-
122
- with gr.Column(scale=2):
123
- user_input_audio = gr.File(label="Upload Audio", type="binary")
124
- submit_btn_audio = gr.Button("Send")
125
 
126
  with gr.Column(scale=1):
127
  audio_output = gr.Audio(label="Response Audio", type="filepath")
128
 
129
  state = gr.State([])
130
 
131
- submit_btn_text.click(chat_and_tts_text, inputs=[user_input_text, state], outputs=[chatbot, state])
132
- submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio, state], outputs=[chatbot, state, audio_output])
133
 
134
  demo.launch()
 
1
  import os
 
2
  import google.generativeai as genai
3
  import gradio as gr
4
+ from gtts import gTTS
 
5
 
6
  # Configure your Google Generative AI API key
7
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
8
 
 
 
 
9
  # Create the model
10
  generation_config = {
11
  "temperature": 1,
 
18
  model = genai.GenerativeModel(
19
  model_name="gemini-1.5-pro",
20
  generation_config=generation_config,
21
+ system_instruction="You are a human that lives on earth, and you also speak friendly, and you also use bro words, and you never say you're an AI. Your name is Ath, and you speak 100% like a human",
22
  )
23
 
24
  chat_session = model.start_chat(history=[])
25
 
26
+ def chat_and_tts(user_input, history):
27
+ # Send the user's input to the chat session
28
+ response = chat_session.send_message(user_input)
29
+ response_text = response.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ print("Response from chat session:", response_text) # Debug response text
32
+
33
+ # Convert the response text to speech using gTTS
34
+ tts = gTTS(text=response_text, lang='en')
35
+ audio_path = 'response_audio.mp3'
36
+ tts.save(audio_path)
37
+ print(f'Audio content written to file "{audio_path}"')
38
 
39
+ # Update the chat history
40
+ history.append((user_input, response_text))
41
+
42
+ return history, history, audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Create the Gradio UI
45
  with gr.Blocks() as demo:
46
  gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
47
+ gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")
48
 
49
  with gr.Row():
50
  with gr.Column(scale=2):
51
  chatbot = gr.Chatbot(label="Chat History")
52
+ user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
53
+ submit_btn = gr.Button("Send")
 
 
 
 
54
 
55
  with gr.Column(scale=1):
56
  audio_output = gr.Audio(label="Response Audio", type="filepath")
57
 
58
  state = gr.State([])
59
 
60
+ submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])
 
61
 
62
  demo.launch()