Spaces:

haoheliu
/

audioldm-text-to-audio-generation

Runtime error

App Files Files Community

380

Person screaming

#263

by GuchiVanPelt - opened Apr 28, 2023

base: refs/heads/main

←

from: refs/pr/263

Discussion Files changed

+11

-17

This PR is in draft mode

Files changed (3) hide show

README.md +1 -1
app.py +9 -10
requirements.txt +1 -6

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🔊
 colorFrom: indigo
 colorTo: red
 sdk: gradio
-sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 license: bigscience-openrail-m

 colorFrom: indigo
 colorTo: red
 sdk: gradio
+sdk_version: 3.27.0
 app_file: app.py
 pinned: false
 license: bigscience-openrail-m

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from transformers import AutoProcessor, ClapModel
 # make Space compatible with CPU duplicates
 if torch.cuda.is_available():
     device = "cuda"
-    torch_dtype = torch.float32
 else:
     device = "cpu"
     torch_dtype = torch.float32
@@ -34,7 +34,6 @@ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_c
         text,
         audio_length_in_s=duration,
         guidance_scale=guidance_scale,
-        num_inference_steps=100,
         negative_prompt=negative_prompt,
         num_waveforms_per_prompt=n_candidates if n_candidates else 1,
         generator=generator.manual_seed(int(random_seed)),
@@ -170,7 +169,7 @@ with iface:
     )
     with gr.Group():
-        with gr.Blocks():
             textbox = gr.Textbox(
                 value="A hammer is hitting a wooden surface",
                 max_lines=1,
@@ -185,7 +184,7 @@ with iface:
                 info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
                 elem_id="prompt-in",
             )
             with gr.Accordion("Click to modify detailed configurations", open=False):
                 seed = gr.Number(
                     value=45,
@@ -195,8 +194,8 @@ with iface:
                 duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
                 guidance_scale = gr.Slider(
                     0,
-                    5,
-                    value=3.5,
                     step=0.5,
                     label="Guidance scale",
                     info="Large => better quality and relevancy to text; Small => better diversity",
@@ -209,9 +208,9 @@ with iface:
                     label="Number waveforms to generate",
                     info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
                 )
             outputs = gr.Video(label="Output", elem_id="output-video")
-            btn = gr.Button("Submit", elem_id=".gr-Button") # .style(full_width=True)
         with gr.Group(elem_id="share-btn-container", visible=False):
             community_icon = gr.HTML(community_icon_html)
@@ -224,7 +223,7 @@ with iface:
             outputs=[outputs],
         )
-        share_button.click(None, [], [], js=share_js)
         gr.HTML(
             """
         <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
@@ -275,4 +274,4 @@ with iface:
             )
 # <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
-iface.queue(max_size=10).launch(debug=True, share=True)

 # make Space compatible with CPU duplicates
 if torch.cuda.is_available():
     device = "cuda"
+    torch_dtype = torch.float16
 else:
     device = "cpu"
     torch_dtype = torch.float32
         text,
         audio_length_in_s=duration,
         guidance_scale=guidance_scale,
         negative_prompt=negative_prompt,
         num_waveforms_per_prompt=n_candidates if n_candidates else 1,
         generator=generator.manual_seed(int(random_seed)),
     )
     with gr.Group():
+        with gr.Box():
             textbox = gr.Textbox(
                 value="A hammer is hitting a wooden surface",
                 max_lines=1,
                 info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
                 elem_id="prompt-in",
             )
             with gr.Accordion("Click to modify detailed configurations", open=False):
                 seed = gr.Number(
                     value=45,
                 duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
                 guidance_scale = gr.Slider(
                     0,
+                    4,
+                    value=2.5,
                     step=0.5,
                     label="Guidance scale",
                     info="Large => better quality and relevancy to text; Small => better diversity",
                     label="Number waveforms to generate",
                     info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
                 )
             outputs = gr.Video(label="Output", elem_id="output-video")
+            btn = gr.Button("Submit").style(full_width=True)
         with gr.Group(elem_id="share-btn-container", visible=False):
             community_icon = gr.HTML(community_icon_html)
             outputs=[outputs],
         )
+        share_button.click(None, [], [], _js=share_js)
         gr.HTML(
             """
         <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
             )
 # <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
+iface.queue(max_size=10).launch(debug=True)

requirements.txt CHANGED Viewed

@@ -1,9 +1,4 @@
 git+https://github.com/huggingface/diffusers.git
 git+https://github.com/huggingface/transformers.git
 --extra-index-url https://download.pytorch.org/whl/cu113
-torch
-numpy==1.24.3
-pydantic==2.10.6
-fastapi
-gradio
-accelerate

 git+https://github.com/huggingface/diffusers.git
 git+https://github.com/huggingface/transformers.git
 --extra-index-url https://download.pytorch.org/whl/cu113
+torch >= 2.0