Spaces:

AIGC-Audio
/

AudioGPT

Build error

App Files Files Community

lmzjms commited on Apr 4, 2023

Commit

4948a80

1 Parent(s): a23ac82

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -159,12 +159,12 @@ class ConversationBot:
     def init_agent(self, openai_api_key):
         self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
         self.t2i = T2I(device="cuda:0")
-        # self.i2t = ImageCaptioning(device="cuda:0")
         self.t2a = T2A(device="cuda:0")
         self.tts = TTS(device="cpu")
         self.t2s = T2S(device="cpu")
         self.i2a = I2A(device="cuda:0")
-        # self.a2t = A2T(device="cpu")
         # self.asr = ASR(device="cuda:0")
         self.inpaint = Inpaint(device="cuda:0")
         # self.tts_ood = TTS_OOD(device="cpu")
@@ -172,9 +172,9 @@ class ConversationBot:
             Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
                  description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
                              "The input to this tool should be a string, representing the text used to generate image. "),
-            # Tool(name="Get Photo Description", func=self.i2t.inference,
-            #      description="useful for when you want to know what is inside the photo. receives image_path as input. "
-            #                  "The input to this tool should be a string, representing the image_path. "),
             Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
                  description="useful for when you want to generate an audio from a user input text and it saved it to a file."
                              "The input to this tool should be a string, representing the text used to generate audio."),
@@ -195,9 +195,9 @@ class ConversationBot:
             Tool(name="Generate Audio From The Image", func=self.i2a.inference,
                  description="useful for when you want to generate an audio based on an image."
                               "The input to this tool should be a string, representing the image_path. "),
-            # Tool(name="Generate Text From The Audio", func=self.a2t.inference,
-            #      description="useful for when you want to describe an audio in text, receives audio_path as input."
-            #                  "The input to this tool should be a string, representing the audio_path.")]
             Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
                  description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
                              "The input to this tool should be a string, representing the audio_path.")]
@@ -219,7 +219,7 @@ class ConversationBot:
 if __name__ == '__main__':
     bot = ConversationBot()
-    with gr.Blocks(css="#chatbot {overflow:auto; height:600px;}") as demo:
         with gr.Row():
             openai_api_key_textbox = gr.Textbox(
                 placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",

     def init_agent(self, openai_api_key):
         self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
         self.t2i = T2I(device="cuda:0")
+        self.i2t = ImageCaptioning(device="cuda:0")
         self.t2a = T2A(device="cuda:0")
         self.tts = TTS(device="cpu")
         self.t2s = T2S(device="cpu")
         self.i2a = I2A(device="cuda:0")
+        self.a2t = A2T(device="cpu")
         # self.asr = ASR(device="cuda:0")
         self.inpaint = Inpaint(device="cuda:0")
         # self.tts_ood = TTS_OOD(device="cpu")
             Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
                  description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
                              "The input to this tool should be a string, representing the text used to generate image. "),
+            Tool(name="Get Photo Description", func=self.i2t.inference,
+                 description="useful for when you want to know what is inside the photo. receives image_path as input. "
+                             "The input to this tool should be a string, representing the image_path. "),
             Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
                  description="useful for when you want to generate an audio from a user input text and it saved it to a file."
                              "The input to this tool should be a string, representing the text used to generate audio."),
             Tool(name="Generate Audio From The Image", func=self.i2a.inference,
                  description="useful for when you want to generate an audio based on an image."
                               "The input to this tool should be a string, representing the image_path. "),
+            Tool(name="Generate Text From The Audio", func=self.a2t.inference,
+                 description="useful for when you want to describe an audio in text, receives audio_path as input."
+                             "The input to this tool should be a string, representing the audio_path."),
             Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
                  description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
                              "The input to this tool should be a string, representing the audio_path.")]
 if __name__ == '__main__':
     bot = ConversationBot()
+    with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
         with gr.Row():
             openai_api_key_textbox = gr.Textbox(
                 placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",