Spaces:
Running
on
A10G
Running
on
A10G
Person screaming
#263
by
GuchiVanPelt
- opened
- README.md +1 -1
- app.py +8 -9
- requirements.txt +1 -5
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: bigscience-openrail-m
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.27.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: bigscience-openrail-m
|
app.py
CHANGED
@@ -9,7 +9,7 @@ from transformers import AutoProcessor, ClapModel
|
|
9 |
# make Space compatible with CPU duplicates
|
10 |
if torch.cuda.is_available():
|
11 |
device = "cuda"
|
12 |
-
torch_dtype = torch.
|
13 |
else:
|
14 |
device = "cpu"
|
15 |
torch_dtype = torch.float32
|
@@ -34,7 +34,6 @@ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_c
|
|
34 |
text,
|
35 |
audio_length_in_s=duration,
|
36 |
guidance_scale=guidance_scale,
|
37 |
-
num_inference_steps=100,
|
38 |
negative_prompt=negative_prompt,
|
39 |
num_waveforms_per_prompt=n_candidates if n_candidates else 1,
|
40 |
generator=generator.manual_seed(int(random_seed)),
|
@@ -170,7 +169,7 @@ with iface:
|
|
170 |
)
|
171 |
|
172 |
with gr.Group():
|
173 |
-
with gr.
|
174 |
textbox = gr.Textbox(
|
175 |
value="A hammer is hitting a wooden surface",
|
176 |
max_lines=1,
|
@@ -185,7 +184,7 @@ with iface:
|
|
185 |
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
186 |
elem_id="prompt-in",
|
187 |
)
|
188 |
-
|
189 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
190 |
seed = gr.Number(
|
191 |
value=45,
|
@@ -195,8 +194,8 @@ with iface:
|
|
195 |
duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
|
196 |
guidance_scale = gr.Slider(
|
197 |
0,
|
198 |
-
|
199 |
-
value=
|
200 |
step=0.5,
|
201 |
label="Guidance scale",
|
202 |
info="Large => better quality and relevancy to text; Small => better diversity",
|
@@ -209,9 +208,9 @@ with iface:
|
|
209 |
label="Number waveforms to generate",
|
210 |
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
211 |
)
|
212 |
-
|
213 |
outputs = gr.Video(label="Output", elem_id="output-video")
|
214 |
-
btn = gr.Button("Submit"
|
215 |
|
216 |
with gr.Group(elem_id="share-btn-container", visible=False):
|
217 |
community_icon = gr.HTML(community_icon_html)
|
@@ -224,7 +223,7 @@ with iface:
|
|
224 |
outputs=[outputs],
|
225 |
)
|
226 |
|
227 |
-
share_button.click(None, [], [],
|
228 |
gr.HTML(
|
229 |
"""
|
230 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
|
|
9 |
# make Space compatible with CPU duplicates
|
10 |
if torch.cuda.is_available():
|
11 |
device = "cuda"
|
12 |
+
torch_dtype = torch.float16
|
13 |
else:
|
14 |
device = "cpu"
|
15 |
torch_dtype = torch.float32
|
|
|
34 |
text,
|
35 |
audio_length_in_s=duration,
|
36 |
guidance_scale=guidance_scale,
|
|
|
37 |
negative_prompt=negative_prompt,
|
38 |
num_waveforms_per_prompt=n_candidates if n_candidates else 1,
|
39 |
generator=generator.manual_seed(int(random_seed)),
|
|
|
169 |
)
|
170 |
|
171 |
with gr.Group():
|
172 |
+
with gr.Box():
|
173 |
textbox = gr.Textbox(
|
174 |
value="A hammer is hitting a wooden surface",
|
175 |
max_lines=1,
|
|
|
184 |
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
185 |
elem_id="prompt-in",
|
186 |
)
|
187 |
+
|
188 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
189 |
seed = gr.Number(
|
190 |
value=45,
|
|
|
194 |
duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
|
195 |
guidance_scale = gr.Slider(
|
196 |
0,
|
197 |
+
4,
|
198 |
+
value=2.5,
|
199 |
step=0.5,
|
200 |
label="Guidance scale",
|
201 |
info="Large => better quality and relevancy to text; Small => better diversity",
|
|
|
208 |
label="Number waveforms to generate",
|
209 |
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
210 |
)
|
211 |
+
|
212 |
outputs = gr.Video(label="Output", elem_id="output-video")
|
213 |
+
btn = gr.Button("Submit").style(full_width=True)
|
214 |
|
215 |
with gr.Group(elem_id="share-btn-container", visible=False):
|
216 |
community_icon = gr.HTML(community_icon_html)
|
|
|
223 |
outputs=[outputs],
|
224 |
)
|
225 |
|
226 |
+
share_button.click(None, [], [], _js=share_js)
|
227 |
gr.HTML(
|
228 |
"""
|
229 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
requirements.txt
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
git+https://github.com/huggingface/diffusers.git
|
2 |
git+https://github.com/huggingface/transformers.git
|
3 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
4 |
-
torch
|
5 |
-
numpy==1.24.3
|
6 |
-
pydantic
|
7 |
-
fastapi
|
8 |
-
gradio
|
|
|
1 |
git+https://github.com/huggingface/diffusers.git
|
2 |
git+https://github.com/huggingface/transformers.git
|
3 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
4 |
+
torch >= 2.0
|
|
|
|
|
|
|
|