Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	
		unknown
		
	commited on
		
		
					Commit 
							
							·
						
						be5b973
	
1
								Parent(s):
							
							12d8e68
								
app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -96,7 +96,6 @@ class FoleyController: | |
| 96 | 
             
                    time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
         | 
| 97 | 
             
                    time_detector      = VideoOnsetNet(False)
         | 
| 98 | 
             
                    self.time_detector, _   = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
         | 
| 99 | 
            -
                    self.time_detector = self.time_detector
         | 
| 100 |  | 
| 101 | 
             
                    self.pipeline = build_foleycrafter()
         | 
| 102 | 
             
                    ckpt = torch.load(temporal_ckpt_path)
         | 
| @@ -204,81 +203,77 @@ class FoleyController: | |
| 204 | 
             
                    save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
         | 
| 205 |  | 
| 206 | 
             
                    return save_sample_path 
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 207 |  | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
             | 
| 211 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 212 | 
             
                    )
         | 
| 213 | 
            -
             | 
| 214 | 
            -
                        gr.Markdown(
         | 
| 215 | 
            -
                            "<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a>  "  # noqa
         | 
| 216 | 
            -
                            "<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a>  "
         | 
| 217 | 
            -
                            "<a href='https://github.com/open-mmlab/foleycrafter'>Code</a>  "
         | 
| 218 | 
            -
                            "<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
         | 
| 219 | 
            -
                        )
         | 
| 220 | 
            -
             | 
| 221 | 
            -
                    with gr.Column(variant="panel"):
         | 
| 222 | 
            -
                        with gr.Row(equal_height=False):
         | 
| 223 | 
            -
                            with gr.Column():
         | 
| 224 | 
            -
                                with gr.Row():
         | 
| 225 | 
            -
                                    init_img = gr.Video(label="Input Video")
         | 
| 226 | 
            -
                                with gr.Row():
         | 
| 227 | 
            -
                                    prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
         | 
| 228 | 
            -
                                with gr.Row():
         | 
| 229 | 
            -
                                    negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
         | 
| 230 | 
            -
             | 
| 231 | 
            -
                                with gr.Row():
         | 
| 232 | 
            -
                                    sampler_dropdown = gr.Dropdown(
         | 
| 233 | 
            -
                                        label="Sampling method",
         | 
| 234 | 
            -
                                        choices=list(scheduler_dict.keys()),
         | 
| 235 | 
            -
                                        value=list(scheduler_dict.keys())[0],
         | 
| 236 | 
            -
                                    )
         | 
| 237 | 
            -
                                    sample_step_slider = gr.Slider(
         | 
| 238 | 
            -
                                        label="Sampling steps", value=25, minimum=10, maximum=100, step=1
         | 
| 239 | 
            -
                                    )
         | 
| 240 | 
            -
             | 
| 241 | 
            -
                                cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
         | 
| 242 | 
            -
                                ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
         | 
| 243 | 
            -
                                temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
         | 
| 244 | 
            -
             | 
| 245 | 
            -
                                with gr.Row():
         | 
| 246 | 
            -
                                    seed_textbox = gr.Textbox(label="Seed", value=42)
         | 
| 247 | 
            -
                                    seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
         | 
| 248 | 
            -
                                seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
         | 
| 249 | 
            -
             | 
| 250 | 
            -
                                generate_button = gr.Button(value="Generate", variant="primary")
         | 
| 251 | 
            -
             | 
| 252 | 
            -
                            result_video = gr.Video(label="Generated Audio", interactive=False)
         | 
| 253 | 
            -
             | 
| 254 | 
            -
                        generate_button.click(
         | 
| 255 | 
            -
                            fn=controller.foley,
         | 
| 256 | 
            -
                            inputs=[
         | 
| 257 | 
            -
                                init_img,
         | 
| 258 | 
            -
                                prompt_textbox,
         | 
| 259 | 
            -
                                negative_prompt_textbox,
         | 
| 260 | 
            -
                                ip_adapter_scale,
         | 
| 261 | 
            -
                                temporal_scale,
         | 
| 262 | 
            -
                                sampler_dropdown,
         | 
| 263 | 
            -
                                sample_step_slider,
         | 
| 264 | 
            -
                                cfg_scale_slider,
         | 
| 265 | 
            -
                                seed_textbox,
         | 
| 266 | 
            -
                            ],
         | 
| 267 | 
            -
                            outputs=[result_video],
         | 
| 268 | 
            -
                        )
         | 
| 269 | 
            -
             | 
| 270 | 
            -
                return demo
         | 
| 271 | 
            -
             | 
| 272 | 
            -
            if __name__ == "__main__": 
         | 
| 273 | 
            -
                controller = FoleyController()
         | 
| 274 | 
            -
                device = "cuda" if torch.cuda.is_available() else "cpu" 
         | 
| 275 | 
            -
             | 
| 276 | 
            -
                # move to gpu
         | 
| 277 | 
            -
                controller.time_detector = controller.time_detector.to(device)
         | 
| 278 | 
            -
                controller.pipeline = controller.pipeline.to(device)
         | 
| 279 | 
            -
                controller.vocoder = controller.vocoder.to(device)
         | 
| 280 | 
            -
                controller.image_encoder = controller.image_encoder.to(device)
         | 
| 281 | 
            -
             | 
| 282 | 
            -
                demo = ui()
         | 
| 283 | 
             
                demo.queue(10)
         | 
| 284 | 
             
                demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])
         | 
|  | |
| 96 | 
             
                    time_detector_ckpt = osp.join(osp.join(self.model_dir, 'timestamp_detector.pth.tar'))
         | 
| 97 | 
             
                    time_detector      = VideoOnsetNet(False)
         | 
| 98 | 
             
                    self.time_detector, _   = torch_utils.load_model(time_detector_ckpt, time_detector, strict=True)
         | 
|  | |
| 99 |  | 
| 100 | 
             
                    self.pipeline = build_foleycrafter()
         | 
| 101 | 
             
                    ckpt = torch.load(temporal_ckpt_path)
         | 
|  | |
| 203 | 
             
                    save_sample_path = os.path.join(self.savedir_sample, f"{name}.mp4")
         | 
| 204 |  | 
| 205 | 
             
                    return save_sample_path 
         | 
| 206 | 
            +
                
         | 
| 207 | 
            +
             | 
| 208 | 
            +
            controller = FoleyController()
         | 
| 209 | 
            +
            device = "cuda" if torch.cuda.is_available() else "cpu" 
         | 
| 210 | 
            +
             | 
| 211 | 
            +
            # move to gpu
         | 
| 212 | 
            +
            controller.time_detector = controller.time_detector.to(device)
         | 
| 213 | 
            +
            controller.pipeline = controller.pipeline.to(device)
         | 
| 214 | 
            +
            controller.vocoder = controller.vocoder.to(device)
         | 
| 215 | 
            +
            controller.image_encoder = controller.image_encoder.to(device)
         | 
| 216 | 
            +
             | 
| 217 | 
            +
            with gr.Blocks(css=css) as demo:
         | 
| 218 | 
            +
                gr.HTML(
         | 
| 219 | 
            +
                    '<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
         | 
| 220 | 
            +
                )
         | 
| 221 | 
            +
                with gr.Row():
         | 
| 222 | 
            +
                    gr.Markdown(
         | 
| 223 | 
            +
                        "<div align='center'><font size='5'><a href='https://foleycrafter.github.io/'>Project Page</a>  "  # noqa
         | 
| 224 | 
            +
                        "<a href='https://arxiv.org/abs/xxxx.xxxxx/'>Paper</a>  "
         | 
| 225 | 
            +
                        "<a href='https://github.com/open-mmlab/foleycrafter'>Code</a>  "
         | 
| 226 | 
            +
                        "<a href='https://huggingface.co/spaces/ymzhang319/FoleyCrafter'>Demo</a> </font></div>"
         | 
| 227 | 
            +
                    )
         | 
| 228 |  | 
| 229 | 
            +
                with gr.Column(variant="panel"):
         | 
| 230 | 
            +
                    with gr.Row(equal_height=False):
         | 
| 231 | 
            +
                        with gr.Column():
         | 
| 232 | 
            +
                            with gr.Row():
         | 
| 233 | 
            +
                                init_img = gr.Video(label="Input Video")
         | 
| 234 | 
            +
                            with gr.Row():
         | 
| 235 | 
            +
                                prompt_textbox = gr.Textbox(value='', label="Prompt", lines=1)
         | 
| 236 | 
            +
                            with gr.Row():
         | 
| 237 | 
            +
                                negative_prompt_textbox = gr.Textbox(value=N_PROMPT, label="Negative prompt", lines=1)
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                            with gr.Row():
         | 
| 240 | 
            +
                                sampler_dropdown = gr.Dropdown(
         | 
| 241 | 
            +
                                    label="Sampling method",
         | 
| 242 | 
            +
                                    choices=list(scheduler_dict.keys()),
         | 
| 243 | 
            +
                                    value=list(scheduler_dict.keys())[0],
         | 
| 244 | 
            +
                                )
         | 
| 245 | 
            +
                                sample_step_slider = gr.Slider(
         | 
| 246 | 
            +
                                    label="Sampling steps", value=25, minimum=10, maximum=100, step=1
         | 
| 247 | 
            +
                                )
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                            cfg_scale_slider = gr.Slider(label="CFG Scale", value=7.5, minimum=0, maximum=20)
         | 
| 250 | 
            +
                            ip_adapter_scale = gr.Slider(label="Visual Content Scale", value=1.0, minimum=0, maximum=1)
         | 
| 251 | 
            +
                            temporal_scale = gr.Slider(label="Temporal Align Scale", value=0., minimum=0., maximum=1.0)
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                            with gr.Row():
         | 
| 254 | 
            +
                                seed_textbox = gr.Textbox(label="Seed", value=42)
         | 
| 255 | 
            +
                                seed_button = gr.Button(value="\U0001f3b2", elem_classes="toolbutton")
         | 
| 256 | 
            +
                            seed_button.click(fn=lambda x: random.randint(1, 1e8), outputs=[seed_textbox], queue=False)
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                            generate_button = gr.Button(value="Generate", variant="primary")
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                        result_video = gr.Video(label="Generated Audio", interactive=False)
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                    generate_button.click(
         | 
| 263 | 
            +
                        fn=controller.foley,
         | 
| 264 | 
            +
                        inputs=[
         | 
| 265 | 
            +
                            init_img,
         | 
| 266 | 
            +
                            prompt_textbox,
         | 
| 267 | 
            +
                            negative_prompt_textbox,
         | 
| 268 | 
            +
                            ip_adapter_scale,
         | 
| 269 | 
            +
                            temporal_scale,
         | 
| 270 | 
            +
                            sampler_dropdown,
         | 
| 271 | 
            +
                            sample_step_slider,
         | 
| 272 | 
            +
                            cfg_scale_slider,
         | 
| 273 | 
            +
                            seed_textbox,
         | 
| 274 | 
            +
                        ],
         | 
| 275 | 
            +
                        outputs=[result_video],
         | 
| 276 | 
             
                    )
         | 
| 277 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 278 | 
             
                demo.queue(10)
         | 
| 279 | 
             
                demo.launch(server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=["./foleycrafter.png"])
         |