Spaces:
Paused
Paused
unknown
commited on
Commit
·
be5b973
1
Parent(s):
12d8e68
app.py
Browse files
app.py
CHANGED
|
@@ -96,7 +96,6 @@ class FoleyController:
|
|
| 96 |
time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
|
| 97 |
time_detector = VideoOnsetNet(False)
|
| 98 |
self.time_detector, _ = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
|
| 99 |
-
self.time_detector = self.time_detector
|
| 100 |
|
| 101 |
self.pipeline = build_foleycrafter()
|
| 102 |
ckpt = torch.load(temporal_ckpt_path)
|
|
@@ -204,81 +203,77 @@ class FoleyController:
|
|
| 204 |
save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
|
| 205 |
|
| 206 |
return save_sample_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
)
|
| 213 |
-
|
| 214 |
-
gr.Markdown(
|
| 215 |
-
"<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a>  " # noqa
|
| 216 |
-
"<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a>  "
|
| 217 |
-
"<a href='https://github.com/open-mmlab/foleycrafter'>Code</a>  "
|
| 218 |
-
"<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
|
| 219 |
-
)
|
| 220 |
-
|
| 221 |
-
with gr.Column(variant="panel"):
|
| 222 |
-
with gr.Row(equal_height=False):
|
| 223 |
-
with gr.Column():
|
| 224 |
-
with gr.Row():
|
| 225 |
-
init_img = gr.Video(label="Input Video")
|
| 226 |
-
with gr.Row():
|
| 227 |
-
prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
|
| 228 |
-
with gr.Row():
|
| 229 |
-
negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
|
| 230 |
-
|
| 231 |
-
with gr.Row():
|
| 232 |
-
sampler_dropdown = gr.Dropdown(
|
| 233 |
-
label="Sampling method",
|
| 234 |
-
choices=list(scheduler_dict.keys()),
|
| 235 |
-
value=list(scheduler_dict.keys())[0],
|
| 236 |
-
)
|
| 237 |
-
sample_step_slider = gr.Slider(
|
| 238 |
-
label="Sampling steps", value=25, minimum=10, maximum=100, step=1
|
| 239 |
-
)
|
| 240 |
-
|
| 241 |
-
cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
|
| 242 |
-
ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
|
| 243 |
-
temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
|
| 244 |
-
|
| 245 |
-
with gr.Row():
|
| 246 |
-
seed_textbox = gr.Textbox(label="Seed", value=42)
|
| 247 |
-
seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
|
| 248 |
-
seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
|
| 249 |
-
|
| 250 |
-
generate_button = gr.Button(value="Generate", variant="primary")
|
| 251 |
-
|
| 252 |
-
result_video = gr.Video(label="Generated Audio", interactive=False)
|
| 253 |
-
|
| 254 |
-
generate_button.click(
|
| 255 |
-
fn=controller.foley,
|
| 256 |
-
inputs=[
|
| 257 |
-
init_img,
|
| 258 |
-
prompt_textbox,
|
| 259 |
-
negative_prompt_textbox,
|
| 260 |
-
ip_adapter_scale,
|
| 261 |
-
temporal_scale,
|
| 262 |
-
sampler_dropdown,
|
| 263 |
-
sample_step_slider,
|
| 264 |
-
cfg_scale_slider,
|
| 265 |
-
seed_textbox,
|
| 266 |
-
],
|
| 267 |
-
outputs=[result_video],
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
-
return demo
|
| 271 |
-
|
| 272 |
-
if __name__ == "__main__":
|
| 273 |
-
controller = FoleyController()
|
| 274 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 275 |
-
|
| 276 |
-
# move to gpu
|
| 277 |
-
controller.time_detector = controller.time_detector.to(device)
|
| 278 |
-
controller.pipeline = controller.pipeline.to(device)
|
| 279 |
-
controller.vocoder = controller.vocoder.to(device)
|
| 280 |
-
controller.image_encoder = controller.image_encoder.to(device)
|
| 281 |
-
|
| 282 |
-
demo = ui()
|
| 283 |
demo.queue(10)
|
| 284 |
demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])
|
|
|
|
| 96 |
time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
|
| 97 |
time_detector = VideoOnsetNet(False)
|
| 98 |
self.time_detector, _ = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
|
|
|
|
| 99 |
|
| 100 |
self.pipeline = build_foleycrafter()
|
| 101 |
ckpt = torch.load(temporal_ckpt_path)
|
|
|
|
| 203 |
save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
|
| 204 |
|
| 205 |
return save_sample_path
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
controller = FoleyController()
|
| 209 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 210 |
+
|
| 211 |
+
# move to gpu
|
| 212 |
+
controller.time_detector = controller.time_detector.to(device)
|
| 213 |
+
controller.pipeline = controller.pipeline.to(device)
|
| 214 |
+
controller.vocoder = controller.vocoder.to(device)
|
| 215 |
+
controller.image_encoder = controller.image_encoder.to(device)
|
| 216 |
+
|
| 217 |
+
with gr.Blocks(css=css) as demo:
|
| 218 |
+
gr.HTML(
|
| 219 |
+
'<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
|
| 220 |
+
)
|
| 221 |
+
with gr.Row():
|
| 222 |
+
gr.Markdown(
|
| 223 |
+
"<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a>  " # noqa
|
| 224 |
+
"<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a>  "
|
| 225 |
+
"<a href='https://github.com/open-mmlab/foleycrafter'>Code</a>  "
|
| 226 |
+
"<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
|
| 227 |
+
)
|
| 228 |
|
| 229 |
+
with gr.Column(variant="panel"):
|
| 230 |
+
with gr.Row(equal_height=False):
|
| 231 |
+
with gr.Column():
|
| 232 |
+
with gr.Row():
|
| 233 |
+
init_img = gr.Video(label="Input Video")
|
| 234 |
+
with gr.Row():
|
| 235 |
+
prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
|
| 236 |
+
with gr.Row():
|
| 237 |
+
negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
|
| 238 |
+
|
| 239 |
+
with gr.Row():
|
| 240 |
+
sampler_dropdown = gr.Dropdown(
|
| 241 |
+
label="Sampling method",
|
| 242 |
+
choices=list(scheduler_dict.keys()),
|
| 243 |
+
value=list(scheduler_dict.keys())[0],
|
| 244 |
+
)
|
| 245 |
+
sample_step_slider = gr.Slider(
|
| 246 |
+
label="Sampling steps", value=25, minimum=10, maximum=100, step=1
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
|
| 250 |
+
ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
|
| 251 |
+
temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
|
| 252 |
+
|
| 253 |
+
with gr.Row():
|
| 254 |
+
seed_textbox = gr.Textbox(label="Seed", value=42)
|
| 255 |
+
seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
|
| 256 |
+
seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
|
| 257 |
+
|
| 258 |
+
generate_button = gr.Button(value="Generate", variant="primary")
|
| 259 |
+
|
| 260 |
+
result_video = gr.Video(label="Generated Audio", interactive=False)
|
| 261 |
+
|
| 262 |
+
generate_button.click(
|
| 263 |
+
fn=controller.foley,
|
| 264 |
+
inputs=[
|
| 265 |
+
init_img,
|
| 266 |
+
prompt_textbox,
|
| 267 |
+
negative_prompt_textbox,
|
| 268 |
+
ip_adapter_scale,
|
| 269 |
+
temporal_scale,
|
| 270 |
+
sampler_dropdown,
|
| 271 |
+
sample_step_slider,
|
| 272 |
+
cfg_scale_slider,
|
| 273 |
+
seed_textbox,
|
| 274 |
+
],
|
| 275 |
+
outputs=[result_video],
|
| 276 |
)
|
| 277 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
demo.queue(10)
|
| 279 |
demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])
|