Spaces:
Sleeping
Sleeping
Ath
commited on
Commit
•
5467d43
1
Parent(s):
38880b6
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,11 @@
|
|
1 |
import os
|
2 |
-
import requests
|
3 |
import google.generativeai as genai
|
4 |
import gradio as gr
|
5 |
-
from
|
6 |
-
from google.cloud import speech
|
7 |
|
8 |
# Configure your Google Generative AI API key
|
9 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
10 |
|
11 |
-
# Set up Google Cloud credentials for Speech-to-Text
|
12 |
-
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path_to_your_google_cloud_credentials.json"
|
13 |
-
|
14 |
# Create the model
|
15 |
generation_config = {
|
16 |
"temperature": 1,
|
@@ -23,112 +18,45 @@ generation_config = {
|
|
23 |
model = genai.GenerativeModel(
|
24 |
model_name="gemini-1.5-pro",
|
25 |
generation_config=generation_config,
|
26 |
-
system_instruction="You are a human that lives on earth, and you also speak friendly and use
|
27 |
)
|
28 |
|
29 |
chat_session = model.start_chat(history=[])
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
def chat_and_tts_text(user_input, history):
|
36 |
-
try:
|
37 |
-
# Send the user's text input to the chat session
|
38 |
-
response = chat_session.send_message(user_input)
|
39 |
-
response_text = response.text
|
40 |
-
|
41 |
-
# Update the chat history with text input and response
|
42 |
-
history.append((user_input, response_text))
|
43 |
-
|
44 |
-
return history, history
|
45 |
-
except Exception as e:
|
46 |
-
return history, f"Error: {str(e)}"
|
47 |
-
|
48 |
-
def convert_audio_to_text(audio_file):
|
49 |
-
try:
|
50 |
-
client = speech.SpeechClient()
|
51 |
-
|
52 |
-
with open(audio_file.name, "rb") as audio:
|
53 |
-
content = audio.read()
|
54 |
-
|
55 |
-
audio = speech.RecognitionAudio(content=content)
|
56 |
-
config = speech.RecognitionConfig(
|
57 |
-
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
58 |
-
sample_rate_hertz=16000,
|
59 |
-
language_code="en-US",
|
60 |
-
)
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
try:
|
73 |
-
# Convert uploaded audio file to text
|
74 |
-
user_input = convert_audio_to_text(audio_file)
|
75 |
-
|
76 |
-
# Send the user's audio input to the chat session
|
77 |
-
response = chat_session.send_message(user_input)
|
78 |
-
response_text = response.text
|
79 |
-
|
80 |
-
# Eleven Labs text-to-speech request payload
|
81 |
-
payload = {
|
82 |
-
"text": response_text,
|
83 |
-
"voice_settings": {
|
84 |
-
"stability": 0,
|
85 |
-
"similarity_boost": 0
|
86 |
-
}
|
87 |
-
}
|
88 |
-
headers = {
|
89 |
-
"xi-api-key": eleven_labs_api_key,
|
90 |
-
"Content-Type": "application/json"
|
91 |
-
}
|
92 |
-
|
93 |
-
# Make the request to Eleven Labs API
|
94 |
-
tts_response = requests.post(eleven_labs_url, json=payload, headers=headers)
|
95 |
-
|
96 |
-
# Check if the response is successful and save the audio content to a temporary file
|
97 |
-
if tts_response.status_code == 200:
|
98 |
-
with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
99 |
-
temp_audio.write(tts_response.content)
|
100 |
-
audio_path = temp_audio.name
|
101 |
-
else:
|
102 |
-
audio_path = None
|
103 |
-
|
104 |
-
# Update the chat history with audio input and response
|
105 |
-
history.append((user_input, response_text))
|
106 |
-
|
107 |
-
return history, history, audio_path
|
108 |
-
except Exception as e:
|
109 |
-
return history, f"Error: {str(e)}", None
|
110 |
|
111 |
# Create the Gradio UI
|
112 |
with gr.Blocks() as demo:
|
113 |
gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
|
114 |
-
gr.Markdown("Ask any question
|
115 |
|
116 |
with gr.Row():
|
117 |
with gr.Column(scale=2):
|
118 |
chatbot = gr.Chatbot(label="Chat History")
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
with gr.Column(scale=2):
|
123 |
-
user_input_audio = gr.File(label="Upload Audio", type="binary")
|
124 |
-
submit_btn_audio = gr.Button("Send")
|
125 |
|
126 |
with gr.Column(scale=1):
|
127 |
audio_output = gr.Audio(label="Response Audio", type="filepath")
|
128 |
|
129 |
state = gr.State([])
|
130 |
|
131 |
-
|
132 |
-
submit_btn_audio.click(chat_and_tts_audio, inputs=[user_input_audio, state], outputs=[chatbot, state, audio_output])
|
133 |
|
134 |
demo.launch()
|
|
|
1 |
import os
|
|
|
2 |
import google.generativeai as genai
|
3 |
import gradio as gr
|
4 |
+
from gtts import gTTS
|
|
|
5 |
|
6 |
# Configure your Google Generative AI API key
|
7 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
8 |
|
|
|
|
|
|
|
9 |
# Create the model
|
10 |
generation_config = {
|
11 |
"temperature": 1,
|
|
|
18 |
model = genai.GenerativeModel(
|
19 |
model_name="gemini-1.5-pro",
|
20 |
generation_config=generation_config,
|
21 |
+
system_instruction="You are a human that lives on earth, and you also speak friendly, and you also use bro words, and you never say you're an AI. Your name is Ath, and you speak 100% like a human",
|
22 |
)
|
23 |
|
24 |
chat_session = model.start_chat(history=[])
|
25 |
|
26 |
+
def chat_and_tts(user_input, history):
|
27 |
+
# Send the user's input to the chat session
|
28 |
+
response = chat_session.send_message(user_input)
|
29 |
+
response_text = response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
print("Response from chat session:", response_text) # Debug response text
|
32 |
+
|
33 |
+
# Convert the response text to speech using gTTS
|
34 |
+
tts = gTTS(text=response_text, lang='en')
|
35 |
+
audio_path = 'response_audio.mp3'
|
36 |
+
tts.save(audio_path)
|
37 |
+
print(f'Audio content written to file "{audio_path}"')
|
38 |
|
39 |
+
# Update the chat history
|
40 |
+
history.append((user_input, response_text))
|
41 |
+
|
42 |
+
return history, history, audio_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# Create the Gradio UI
|
45 |
with gr.Blocks() as demo:
|
46 |
gr.Markdown("<h1 style='text-align: center;'>Chat with Ath</h1>")
|
47 |
+
gr.Markdown("Ask any question and get a friendly response from Ath. The response will also be converted to speech.")
|
48 |
|
49 |
with gr.Row():
|
50 |
with gr.Column(scale=2):
|
51 |
chatbot = gr.Chatbot(label="Chat History")
|
52 |
+
user_input = gr.Textbox(placeholder="Ask me anything...", label="Your Question")
|
53 |
+
submit_btn = gr.Button("Send")
|
|
|
|
|
|
|
|
|
54 |
|
55 |
with gr.Column(scale=1):
|
56 |
audio_output = gr.Audio(label="Response Audio", type="filepath")
|
57 |
|
58 |
state = gr.State([])
|
59 |
|
60 |
+
submit_btn.click(chat_and_tts, inputs=[user_input, state], outputs=[chatbot, state, audio_output])
|
|
|
61 |
|
62 |
demo.launch()
|