Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -159,12 +159,12 @@ class ConversationBot:
|
|
| 159 |
def init_agent(self, openai_api_key):
|
| 160 |
self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
|
| 161 |
self.t2i = T2I(device="cuda:0")
|
| 162 |
-
|
| 163 |
self.t2a = T2A(device="cuda:0")
|
| 164 |
self.tts = TTS(device="cpu")
|
| 165 |
self.t2s = T2S(device="cpu")
|
| 166 |
self.i2a = I2A(device="cuda:0")
|
| 167 |
-
|
| 168 |
# self.asr = ASR(device="cuda:0")
|
| 169 |
self.inpaint = Inpaint(device="cuda:0")
|
| 170 |
# self.tts_ood = TTS_OOD(device="cpu")
|
|
@@ -172,9 +172,9 @@ class ConversationBot:
|
|
| 172 |
Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
|
| 173 |
description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
|
| 174 |
"The input to this tool should be a string, representing the text used to generate image. "),
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
|
| 179 |
description="useful for when you want to generate an audio from a user input text and it saved it to a file."
|
| 180 |
"The input to this tool should be a string, representing the text used to generate audio."),
|
|
@@ -195,9 +195,9 @@ class ConversationBot:
|
|
| 195 |
Tool(name="Generate Audio From The Image", func=self.i2a.inference,
|
| 196 |
description="useful for when you want to generate an audio based on an image."
|
| 197 |
"The input to this tool should be a string, representing the image_path. "),
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
|
| 202 |
description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
|
| 203 |
"The input to this tool should be a string, representing the audio_path.")]
|
|
@@ -219,7 +219,7 @@ class ConversationBot:
|
|
| 219 |
if __name__ == '__main__':
|
| 220 |
bot = ConversationBot()
|
| 221 |
|
| 222 |
-
with gr.Blocks(css="#chatbot {overflow:auto; height:
|
| 223 |
with gr.Row():
|
| 224 |
openai_api_key_textbox = gr.Textbox(
|
| 225 |
placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
|
|
|
|
| 159 |
def init_agent(self, openai_api_key):
|
| 160 |
self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
|
| 161 |
self.t2i = T2I(device="cuda:0")
|
| 162 |
+
self.i2t = ImageCaptioning(device="cuda:0")
|
| 163 |
self.t2a = T2A(device="cuda:0")
|
| 164 |
self.tts = TTS(device="cpu")
|
| 165 |
self.t2s = T2S(device="cpu")
|
| 166 |
self.i2a = I2A(device="cuda:0")
|
| 167 |
+
self.a2t = A2T(device="cpu")
|
| 168 |
# self.asr = ASR(device="cuda:0")
|
| 169 |
self.inpaint = Inpaint(device="cuda:0")
|
| 170 |
# self.tts_ood = TTS_OOD(device="cpu")
|
|
|
|
| 172 |
Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
|
| 173 |
description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
|
| 174 |
"The input to this tool should be a string, representing the text used to generate image. "),
|
| 175 |
+
Tool(name="Get Photo Description", func=self.i2t.inference,
|
| 176 |
+
description="useful for when you want to know what is inside the photo. receives image_path as input. "
|
| 177 |
+
"The input to this tool should be a string, representing the image_path. "),
|
| 178 |
Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
|
| 179 |
description="useful for when you want to generate an audio from a user input text and it saved it to a file."
|
| 180 |
"The input to this tool should be a string, representing the text used to generate audio."),
|
|
|
|
| 195 |
Tool(name="Generate Audio From The Image", func=self.i2a.inference,
|
| 196 |
description="useful for when you want to generate an audio based on an image."
|
| 197 |
"The input to this tool should be a string, representing the image_path. "),
|
| 198 |
+
Tool(name="Generate Text From The Audio", func=self.a2t.inference,
|
| 199 |
+
description="useful for when you want to describe an audio in text, receives audio_path as input."
|
| 200 |
+
"The input to this tool should be a string, representing the audio_path."),
|
| 201 |
Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
|
| 202 |
description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
|
| 203 |
"The input to this tool should be a string, representing the audio_path.")]
|
|
|
|
| 219 |
if __name__ == '__main__':
|
| 220 |
bot = ConversationBot()
|
| 221 |
|
| 222 |
+
with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
|
| 223 |
with gr.Row():
|
| 224 |
openai_api_key_textbox = gr.Textbox(
|
| 225 |
placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
|