Spaces:
Running
on
A10G
Running
on
A10G
add few examples and share to community button (#7)
Browse files- add few examples and share to community button (ebf12568b1845b59385e6653033b48efe332ea62)
Co-authored-by: Radamés Ajna <[email protected]>
- app.py +62 -26
- share_btn.py +34 -20
app.py
CHANGED
@@ -30,7 +30,7 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
|
|
30 |
# waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
|
31 |
if(len(waveform) == 1):
|
32 |
waveform = waveform[0]
|
33 |
-
return waveform
|
34 |
|
35 |
# iface = gr.Interface(fn=text2audio, inputs=[
|
36 |
# gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
|
@@ -42,17 +42,22 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
|
|
42 |
# )
|
43 |
# iface.launch(share=True)
|
44 |
|
|
|
45 |
css = """
|
|
|
|
|
|
|
|
|
46 |
.gradio-container {
|
47 |
font-family: 'IBM Plex Sans', sans-serif;
|
48 |
}
|
49 |
.gr-button {
|
50 |
color: white;
|
51 |
-
border-color:
|
52 |
-
background:
|
53 |
}
|
54 |
input[type='range'] {
|
55 |
-
accent-color:
|
56 |
}
|
57 |
.dark input[type='range'] {
|
58 |
accent-color: #dfdfdf;
|
@@ -98,7 +103,6 @@ css = """
|
|
98 |
border-radius: 14px !important;
|
99 |
}
|
100 |
#advanced-options {
|
101 |
-
display: none;
|
102 |
margin-bottom: 20px;
|
103 |
}
|
104 |
.footer {
|
@@ -125,6 +129,12 @@ css = """
|
|
125 |
font-weight: bold;
|
126 |
font-size: 115%;
|
127 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
.animate-spin {
|
129 |
animation: spin 1s linear infinite;
|
130 |
}
|
@@ -154,16 +164,20 @@ css = """
|
|
154 |
#share-btn-container .wrap {
|
155 |
display: none !important;
|
156 |
}
|
157 |
-
|
158 |
.gr-form{
|
159 |
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
160 |
}
|
161 |
#prompt-container{
|
162 |
gap: 0;
|
163 |
}
|
164 |
-
#
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
167 |
"""
|
168 |
iface = gr.Blocks(css=css)
|
169 |
|
@@ -188,17 +202,21 @@ with iface:
|
|
188 |
</p>
|
189 |
</div>
|
190 |
"""
|
191 |
-
)
|
192 |
gr.HTML("""
|
193 |
-
<
|
194 |
-
|
195 |
-
|
196 |
-
<
|
197 |
-
<
|
|
|
|
|
|
|
|
|
198 |
with gr.Group():
|
199 |
with gr.Box():
|
200 |
############# Input
|
201 |
-
textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.")
|
202 |
|
203 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
204 |
seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
|
@@ -207,7 +225,7 @@ with iface:
|
|
207 |
n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
|
208 |
############# Output
|
209 |
# outputs=gr.Audio(label="Output", type="numpy")
|
210 |
-
outputs=gr.Video(label="Output")
|
211 |
|
212 |
# with gr.Group(elem_id="container-advanced-btns"):
|
213 |
# # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
|
@@ -216,10 +234,17 @@ with iface:
|
|
216 |
# loading_icon = gr.HTML(loading_icon_html, visible=False)
|
217 |
# share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
218 |
# outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
|
219 |
-
|
220 |
btn = gr.Button("Submit").style(full_width=True)
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
gr.HTML('''
|
224 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
225 |
<p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
|
@@ -229,17 +254,28 @@ with iface:
|
|
229 |
<br>
|
230 |
</div>
|
231 |
''')
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
with gr.Accordion("Additional information", open=False):
|
234 |
gr.HTML(
|
235 |
-
|
236 |
<div class="acknowledgments">
|
237 |
<p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
|
238 |
</div>
|
239 |
"""
|
240 |
-
|
241 |
# <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
|
242 |
-
|
243 |
-
iface.queue(concurrency_count
|
244 |
iface.launch(debug=True)
|
245 |
-
# iface.launch(debug=True, share=True)
|
|
|
30 |
# waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
|
31 |
if(len(waveform) == 1):
|
32 |
waveform = waveform[0]
|
33 |
+
return waveform
|
34 |
|
35 |
# iface = gr.Interface(fn=text2audio, inputs=[
|
36 |
# gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
|
|
|
42 |
# )
|
43 |
# iface.launch(share=True)
|
44 |
|
45 |
+
|
46 |
css = """
|
47 |
+
a {
|
48 |
+
color: inherit;
|
49 |
+
text-decoration: underline;
|
50 |
+
}
|
51 |
.gradio-container {
|
52 |
font-family: 'IBM Plex Sans', sans-serif;
|
53 |
}
|
54 |
.gr-button {
|
55 |
color: white;
|
56 |
+
border-color: #000000;
|
57 |
+
background: #000000;
|
58 |
}
|
59 |
input[type='range'] {
|
60 |
+
accent-color: #000000;
|
61 |
}
|
62 |
.dark input[type='range'] {
|
63 |
accent-color: #dfdfdf;
|
|
|
103 |
border-radius: 14px !important;
|
104 |
}
|
105 |
#advanced-options {
|
|
|
106 |
margin-bottom: 20px;
|
107 |
}
|
108 |
.footer {
|
|
|
129 |
font-weight: bold;
|
130 |
font-size: 115%;
|
131 |
}
|
132 |
+
#container-advanced-btns{
|
133 |
+
display: flex;
|
134 |
+
flex-wrap: wrap;
|
135 |
+
justify-content: space-between;
|
136 |
+
align-items: center;
|
137 |
+
}
|
138 |
.animate-spin {
|
139 |
animation: spin 1s linear infinite;
|
140 |
}
|
|
|
164 |
#share-btn-container .wrap {
|
165 |
display: none !important;
|
166 |
}
|
|
|
167 |
.gr-form{
|
168 |
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
169 |
}
|
170 |
#prompt-container{
|
171 |
gap: 0;
|
172 |
}
|
173 |
+
#generated_id{
|
174 |
+
min-height: 700px
|
175 |
+
}
|
176 |
+
#setting_id{
|
177 |
+
margin-bottom: 12px;
|
178 |
+
text-align: center;
|
179 |
+
font-weight: 900;
|
180 |
+
}
|
181 |
"""
|
182 |
iface = gr.Blocks(css=css)
|
183 |
|
|
|
202 |
</p>
|
203 |
</div>
|
204 |
"""
|
205 |
+
)
|
206 |
gr.HTML("""
|
207 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
208 |
+
AudioLDM: Text-to-Audio Generation with Latent Diffusion Models
|
209 |
+
</h1>
|
210 |
+
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
211 |
+
<br/>
|
212 |
+
<a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation?duplicate=true">
|
213 |
+
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
214 |
+
<p/>
|
215 |
+
""")
|
216 |
with gr.Group():
|
217 |
with gr.Box():
|
218 |
############# Input
|
219 |
+
textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.", elem_id="prompt-in")
|
220 |
|
221 |
with gr.Accordion("Click to modify detailed configurations", open=False):
|
222 |
seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
|
|
|
225 |
n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
|
226 |
############# Output
|
227 |
# outputs=gr.Audio(label="Output", type="numpy")
|
228 |
+
outputs=gr.Video(label="Output", elem_id="output-video")
|
229 |
|
230 |
# with gr.Group(elem_id="container-advanced-btns"):
|
231 |
# # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
|
|
|
234 |
# loading_icon = gr.HTML(loading_icon_html, visible=False)
|
235 |
# share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
|
236 |
# outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
|
|
|
237 |
btn = gr.Button("Submit").style(full_width=True)
|
238 |
+
|
239 |
+
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
|
240 |
+
community_icon = gr.HTML(community_icon_html)
|
241 |
+
loading_icon = gr.HTML(loading_icon_html)
|
242 |
+
share_button = gr.Button("Share to community", elem_id="share-btn")
|
243 |
+
|
244 |
+
btn.click(text2audio, inputs=[
|
245 |
+
textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs, share_group])
|
246 |
+
|
247 |
+
share_button.click(None, [], [], _js=share_js)
|
248 |
gr.HTML('''
|
249 |
<div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
|
250 |
<p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
|
|
|
254 |
<br>
|
255 |
</div>
|
256 |
''')
|
257 |
+
gr.Examples([
|
258 |
+
["A hammer is hitting a wooden surface", 5, 2.5, 45, 3],
|
259 |
+
["Peaceful and calming ambient music with singing bowl and other instruments.", 5, 2.5, 45, 3],
|
260 |
+
["A man is speaking in a small room.", 5, 2.5, 45, 3],
|
261 |
+
["A female is speaking followed by footstep sound", 5, 2.5, 45, 3],
|
262 |
+
["Wooden table tapping sound followed by water pouring sound.", 5, 2.5, 45, 3],
|
263 |
+
],
|
264 |
+
fn=text2audio,
|
265 |
+
inputs=[textbox, duration, guidance_scale, seed, n_candidates],
|
266 |
+
outputs=[outputs],
|
267 |
+
cache_examples=True,
|
268 |
+
)
|
269 |
with gr.Accordion("Additional information", open=False):
|
270 |
gr.HTML(
|
271 |
+
"""
|
272 |
<div class="acknowledgments">
|
273 |
<p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
|
274 |
</div>
|
275 |
"""
|
276 |
+
)
|
277 |
# <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
|
278 |
+
|
279 |
+
iface.queue(concurrency_count=3)
|
280 |
iface.launch(debug=True)
|
281 |
+
# iface.launch(debug=True, share=True)
|
share_btn.py
CHANGED
@@ -22,34 +22,48 @@ share_js = """async () => {
|
|
22 |
const url = await response.text();
|
23 |
return url;
|
24 |
}
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
const shareBtnEl = gradioEl.querySelector('#share-btn');
|
29 |
const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
|
30 |
const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
|
31 |
-
if(!
|
32 |
return;
|
33 |
};
|
34 |
shareBtnEl.style.pointerEvents = 'none';
|
35 |
shareIconEl.style.display = 'none';
|
36 |
loadingIconEl.style.removeProperty('display');
|
37 |
-
const
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
);
|
46 |
-
const urls = await Promise.all(files.map((f) => uploadFile(f)));
|
47 |
-
const htmlImgs = urls.map(url => `<img src='${url}' width='400' height='400'>`);
|
48 |
-
const descriptionMd = `<div style='display: flex; flex-wrap: wrap; column-gap: 0.75rem;'>
|
49 |
-
${htmlImgs.join(`\n`)}
|
50 |
-
</div>`;
|
51 |
const params = new URLSearchParams({
|
52 |
-
title:
|
53 |
description: descriptionMd,
|
54 |
});
|
55 |
const paramsStr = params.toString();
|
@@ -57,4 +71,4 @@ ${htmlImgs.join(`\n`)}
|
|
57 |
shareBtnEl.style.removeProperty('pointer-events');
|
58 |
shareIconEl.style.removeProperty('display');
|
59 |
loadingIconEl.style.display = 'none';
|
60 |
-
}"""
|
|
|
22 |
const url = await response.text();
|
23 |
return url;
|
24 |
}
|
25 |
+
async function getInputVideoFile(videoEl){
|
26 |
+
const res = await fetch(videoEl.src);
|
27 |
+
const blob = await res.blob();
|
28 |
+
const videoId = Date.now() % 200;
|
29 |
+
const fileName = `sd-perception-${{videoId}}.mp4`;
|
30 |
+
return new File([blob], fileName, { type: 'video/mp4' });
|
31 |
+
}
|
32 |
+
|
33 |
+
async function audioToBase64(audioFile) {
|
34 |
+
return new Promise((resolve, reject) => {
|
35 |
+
let reader = new FileReader();
|
36 |
+
reader.readAsDataURL(audioFile);
|
37 |
+
reader.onload = () => resolve(reader.result);
|
38 |
+
reader.onerror = error => reject(error);
|
39 |
+
|
40 |
+
});
|
41 |
+
}
|
42 |
+
const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app');
|
43 |
+
const inputPromptEl = gradioEl.querySelector('#prompt-in input').value;
|
44 |
+
const outputVideoEl = gradioEl.querySelector('#output-video video');
|
45 |
+
|
46 |
+
let titleTxt = `Text-to-Audio: ${inputPromptEl}`;
|
47 |
+
|
48 |
const shareBtnEl = gradioEl.querySelector('#share-btn');
|
49 |
const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
|
50 |
const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
|
51 |
+
if(!outputVideoEl){
|
52 |
return;
|
53 |
};
|
54 |
shareBtnEl.style.pointerEvents = 'none';
|
55 |
shareIconEl.style.display = 'none';
|
56 |
loadingIconEl.style.removeProperty('display');
|
57 |
+
const outputVideo = await getInputVideoFile(outputVideoEl);
|
58 |
+
const urlOutputVideo = await uploadFile(outputVideo);
|
59 |
+
|
60 |
+
const descriptionMd = `
|
61 |
+
##### ${inputPromptEl}
|
62 |
+
|
63 |
+
${urlOutputVideo}
|
64 |
+
`;
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
const params = new URLSearchParams({
|
66 |
+
title: titleTxt,
|
67 |
description: descriptionMd,
|
68 |
});
|
69 |
const paramsStr = params.toString();
|
|
|
71 |
shareBtnEl.style.removeProperty('pointer-events');
|
72 |
shareIconEl.style.removeProperty('display');
|
73 |
loadingIconEl.style.display = 'none';
|
74 |
+
}"""
|