Spaces:

haoheliu
/

audioldm2-text2audio-text2music

Running on A10G

App Files Files Community

sanchit-gandhi commited on Aug 21, 2023

Commit

871eacf

1 Parent(s): 05b81fb

up

Browse files

Files changed (3) hide show

app.py +6 -5
packages.txt +1 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ else:
 # load the diffusers pipeline
 repo_id = "cvssp/audioldm2"
 pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
-pipe.unet = torch.compile(pipe.unet)
 # set the generator for reproducibility
 generator = torch.Generator(device)
@@ -29,7 +29,7 @@ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_c
         text,
         audio_length_in_s=duration,
         guidance_scale=guidance_scale,
-        num_inference_steps=100,
         negative_prompt=negative_prompt,
         num_waveforms_per_prompt=n_candidates if n_candidates else 1,
         generator=generator.manual_seed(int(random_seed)),
@@ -141,7 +141,7 @@ with iface:
     gr.HTML(
         """
         <p>This is the demo for AudioLDM 2, powered by 🧨 Diffusers. Demo uses the checkpoint <a
-        href="https://huggingface.co/cvssp/audioldm2"> AudioLDM 2 base </a>. For faster inference without waiting in
         queue, you may duplicate the space and upgrade to a GPU in the settings. <br/> <a
         href="https://huggingface.co/spaces/haoheliu/audioldm2-text2audio-text2music?duplicate=true"> <img
         style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> <p/>
@@ -174,7 +174,7 @@ with iface:
                 duration = gr.Slider(5, 15, value=10, step=2.5, label="Duration (seconds)")
                 guidance_scale = gr.Slider(
                     0,
-                    6.5,
                     value=3.5,
                     step=0.5,
                     label="Guidance scale",
@@ -258,4 +258,5 @@ with iface:
                 """
             )
-iface.queue(max_size=10).launch(share=True)

 # load the diffusers pipeline
 repo_id = "cvssp/audioldm2"
 pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
+# pipe.unet = torch.compile(pipe.unet)
 # set the generator for reproducibility
 generator = torch.Generator(device)
         text,
         audio_length_in_s=duration,
         guidance_scale=guidance_scale,
+        num_inference_steps=200,
         negative_prompt=negative_prompt,
         num_waveforms_per_prompt=n_candidates if n_candidates else 1,
         generator=generator.manual_seed(int(random_seed)),
     gr.HTML(
         """
         <p>This is the demo for AudioLDM 2, powered by 🧨 Diffusers. Demo uses the checkpoint <a
+        href="https://huggingface.co/cvssp/audioldm2"> AudioLDM 2 base</a>. For faster inference without waiting in
         queue, you may duplicate the space and upgrade to a GPU in the settings. <br/> <a
         href="https://huggingface.co/spaces/haoheliu/audioldm2-text2audio-text2music?duplicate=true"> <img
         style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> <p/>
                 duration = gr.Slider(5, 15, value=10, step=2.5, label="Duration (seconds)")
                 guidance_scale = gr.Slider(
                     0,
+                    7,
                     value=3.5,
                     step=0.5,
                     label="Guidance scale",
                 """
             )
+iface.queue(max_size=20).launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-torch
 librosa
 transformers
 git+https://github.com/huggingface/diffusers.git

+--extra-index-url https://download.pytorch.org/whl/cu113
+torch>=2.0
 librosa
 transformers
 git+https://github.com/huggingface/diffusers.git