Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -153,6 +153,37 @@ def get_tango(prompt):
|
|
153 |
print(result)
|
154 |
return result
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
def blend_vsfx(video_in, audio_result):
|
157 |
audioClip = AudioFileClip(audio_result)
|
158 |
print(f"AUD: {audioClip.duration}")
|
@@ -180,6 +211,10 @@ def infer(video_in, chosen_model):
|
|
180 |
audio_result = get_audiogen(caption)
|
181 |
elif chosen_model == "Tango" :
|
182 |
audio_result = get_tango(caption)
|
|
|
|
|
|
|
|
|
183 |
|
184 |
final_res = blend_vsfx(video_in, audio_result)
|
185 |
return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
|
@@ -195,6 +230,10 @@ def retry(edited_prompt, video_in, chosen_model):
|
|
195 |
audio_result = get_audiogen(caption)
|
196 |
elif chosen_model == "Tango" :
|
197 |
audio_result = get_tango(caption)
|
|
|
|
|
|
|
|
|
198 |
|
199 |
final_res = blend_vsfx(video_in, audio_result)
|
200 |
return audio_result, final_res
|
@@ -225,7 +264,7 @@ with gr.Blocks(css=css) as demo:
|
|
225 |
with gr.Column():
|
226 |
video_in = gr.Video(sources=["upload"], label="Video input")
|
227 |
with gr.Row():
|
228 |
-
chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango"], value="Tango")
|
229 |
submit_btn = gr.Button("Submit", scale=0)
|
230 |
with gr.Column():
|
231 |
caption_o = gr.Textbox(label="Scene caption", interactive=False)
|
@@ -267,14 +306,12 @@ with gr.Blocks(css=css) as demo:
|
|
267 |
fn=infer,
|
268 |
inputs=[video_in, chosen_model],
|
269 |
outputs=[caption_o, retry_btn, audio_o, video_o],
|
270 |
-
concurrency_limit = 2
|
271 |
)
|
272 |
|
273 |
retry_btn.click(
|
274 |
fn=retry,
|
275 |
inputs=[caption_o, video_in, chosen_model],
|
276 |
outputs=[audio_o, video_o],
|
277 |
-
concurrency_limit = 2
|
278 |
)
|
279 |
|
280 |
demo.queue(max_size=10).launch(show_api=False, debug=True, show_error=True)
|
|
|
153 |
print(result)
|
154 |
return result
|
155 |
|
156 |
+
def get_tango2(prompt):
|
157 |
+
try:
|
158 |
+
client = Client("declare-lab/tango2")
|
159 |
+
except:
|
160 |
+
raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
|
161 |
+
|
162 |
+
result = client.predict(
|
163 |
+
prompt,
|
164 |
+
100,
|
165 |
+
4,
|
166 |
+
api_name="/predict"
|
167 |
+
)
|
168 |
+
print(result)
|
169 |
+
return result
|
170 |
+
|
171 |
+
def get_stable_audio_open(prompt):
|
172 |
+
try:
|
173 |
+
client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
|
174 |
+
except:
|
175 |
+
raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
|
176 |
+
|
177 |
+
result = client.predict(
|
178 |
+
prompt=prompt,
|
179 |
+
seconds_total=30,
|
180 |
+
steps=100,
|
181 |
+
cfg_scale=7,
|
182 |
+
api_name="/predict"
|
183 |
+
)
|
184 |
+
print(result)
|
185 |
+
return result
|
186 |
+
|
187 |
def blend_vsfx(video_in, audio_result):
|
188 |
audioClip = AudioFileClip(audio_result)
|
189 |
print(f"AUD: {audioClip.duration}")
|
|
|
211 |
audio_result = get_audiogen(caption)
|
212 |
elif chosen_model == "Tango" :
|
213 |
audio_result = get_tango(caption)
|
214 |
+
elif chosen_model == "Tango 2" :
|
215 |
+
audio_result = get_tango2(caption)
|
216 |
+
elif chosen_model == "Stable Audio Open" :
|
217 |
+
audio_result = get_stable_audio_open(caption)
|
218 |
|
219 |
final_res = blend_vsfx(video_in, audio_result)
|
220 |
return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
|
|
|
230 |
audio_result = get_audiogen(caption)
|
231 |
elif chosen_model == "Tango" :
|
232 |
audio_result = get_tango(caption)
|
233 |
+
elif chosen_model == "Tango 2" :
|
234 |
+
audio_result = get_tango2(caption)
|
235 |
+
elif chosen_model == "Stable Audio Open" :
|
236 |
+
audio_result = get_stable_audio_open(caption)
|
237 |
|
238 |
final_res = blend_vsfx(video_in, audio_result)
|
239 |
return audio_result, final_res
|
|
|
264 |
with gr.Column():
|
265 |
video_in = gr.Video(sources=["upload"], label="Video input")
|
266 |
with gr.Row():
|
267 |
+
chosen_model = gr.Dropdown(label="Choose a model", choices=["MAGNet", "AudioLDM-2", "AudioGen", "Tango", "Tango 2", "Stable Audio Open"], value="Tango")
|
268 |
submit_btn = gr.Button("Submit", scale=0)
|
269 |
with gr.Column():
|
270 |
caption_o = gr.Textbox(label="Scene caption", interactive=False)
|
|
|
306 |
fn=infer,
|
307 |
inputs=[video_in, chosen_model],
|
308 |
outputs=[caption_o, retry_btn, audio_o, video_o],
|
|
|
309 |
)
|
310 |
|
311 |
retry_btn.click(
|
312 |
fn=retry,
|
313 |
inputs=[caption_o, video_in, chosen_model],
|
314 |
outputs=[audio_o, video_o],
|
|
|
315 |
)
|
316 |
|
317 |
demo.queue(max_size=10).launch(show_api=False, debug=True, show_error=True)
|