Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	add error description
Browse files- .gitignore +2 -1
- app.py +22 -2
- attn_ctrl.py +1 -1
- models/unet/motion_embeddings.py +3 -3
- requirements.txt +4 -1
    	
        .gitignore
    CHANGED
    
    | @@ -1,2 +1,3 @@ | |
| 1 | 
             
            results/*
         | 
| 2 | 
            -
            results_all/*
         | 
|  | 
|  | |
| 1 | 
             
            results/*
         | 
| 2 | 
            +
            results_all/*
         | 
| 3 | 
            +
            *.pt
         | 
    	
        app.py
    CHANGED
    
    | @@ -17,6 +17,12 @@ def inference_app( | |
| 17 | 
             
                    seed,
         | 
| 18 | 
             
                    inference_steps):
         | 
| 19 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 20 | 
             
                return inference_main(
         | 
| 21 | 
             
                    embedding_dir=embedding_dir,
         | 
| 22 | 
             
                    prompt=prompt, 
         | 
| @@ -202,7 +208,6 @@ if __name__ == "__main__": | |
| 202 | 
             
                        </a>
         | 
| 203 | 
             
                        <br>
         | 
| 204 | 
             
                        <strong>Please consider starring <span style="color: orange">★</span> the <a href="https://github.com/EnVision-Research/MotionInversion" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this useful!</strong>
         | 
| 205 | 
            -
                        </p>
         | 
| 206 | 
             
                    """
         | 
| 207 | 
             
                    )
         | 
| 208 | 
             
                    with gr.Tabs(elem_classes=["tabs"]):
         | 
| @@ -219,12 +224,27 @@ if __name__ == "__main__": | |
| 219 | 
             
                                output_video = gr.Video(label="Output Video")
         | 
| 220 | 
             
                                generated_prompt = gr.Textbox(label="Generated Prompt")
         | 
| 221 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 222 | 
             
                        with gr.Accordion("Advanced Settings", open=False):
         | 
| 223 | 
             
                            with gr.Row():
         | 
| 224 | 
             
                                inference_steps = gr.Number(label="Inference Steps", value=30)
         | 
| 225 | 
             
                                motion_type = gr.Dropdown(label="Motion Type", choices=["camera", "object"], value="object")
         | 
| 226 |  | 
| 227 | 
            -
             | 
| 228 |  | 
| 229 | 
             
                    checkpoint_dropdown.change(fn=update_preview_video, inputs=checkpoint_dropdown, outputs=preview_video)
         | 
| 230 | 
             
                    inference_button.click(inference_model, inputs=[text_input, checkpoint_dropdown,inference_steps,motion_type, seed], outputs=output_video)
         | 
|  | |
| 17 | 
             
                    seed,
         | 
| 18 | 
             
                    inference_steps):
         | 
| 19 |  | 
| 20 | 
            +
                print('inference info:')
         | 
| 21 | 
            +
                print('ref video:',embedding_dir)
         | 
| 22 | 
            +
                print('prompt:',prompt)
         | 
| 23 | 
            +
                print('motion type:',motion_type)
         | 
| 24 | 
            +
                print('infer steps:',inference_steps)
         | 
| 25 | 
            +
             | 
| 26 | 
             
                return inference_main(
         | 
| 27 | 
             
                    embedding_dir=embedding_dir,
         | 
| 28 | 
             
                    prompt=prompt, 
         | 
|  | |
| 208 | 
             
                        </a>
         | 
| 209 | 
             
                        <br>
         | 
| 210 | 
             
                        <strong>Please consider starring <span style="color: orange">★</span> the <a href="https://github.com/EnVision-Research/MotionInversion" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this useful!</strong>
         | 
|  | |
| 211 | 
             
                    """
         | 
| 212 | 
             
                    )
         | 
| 213 | 
             
                    with gr.Tabs(elem_classes=["tabs"]):
         | 
|  | |
| 224 | 
             
                                output_video = gr.Video(label="Output Video")
         | 
| 225 | 
             
                                generated_prompt = gr.Textbox(label="Generated Prompt")
         | 
| 226 |  | 
| 227 | 
            +
                                with gr.Accordion('Encounter Errors', open=False):
         | 
| 228 | 
            +
                                    gr.Markdown('''
         | 
| 229 | 
            +
                                                <strong>Generally, inference time for one video often takes 45~50s on ZeroGPU</strong>.
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                                                <br>
         | 
| 232 | 
            +
                                                <strong>You have exceeded your GPU quota</strong>: A limitation set by HF. Retry in an hour.           
         | 
| 233 | 
            +
                                                <br>
         | 
| 234 | 
            +
                                                <strong>GPU task aborted</strong>: Possibly caused by ZeroGPU being used by too many people, the inference time excceeds the time limit. You may try again later, or clone the repo and run it locally. 
         | 
| 235 | 
            +
                                                <br>
         | 
| 236 | 
            +
                                                
         | 
| 237 | 
            +
                                                If any other issues occur, please feel free to contact us through the community or by email ([email protected]). We will try our best to help you :)
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                                                ''')
         | 
| 240 | 
            +
             | 
| 241 | 
            +
             | 
| 242 | 
             
                        with gr.Accordion("Advanced Settings", open=False):
         | 
| 243 | 
             
                            with gr.Row():
         | 
| 244 | 
             
                                inference_steps = gr.Number(label="Inference Steps", value=30)
         | 
| 245 | 
             
                                motion_type = gr.Dropdown(label="Motion Type", choices=["camera", "object"], value="object")
         | 
| 246 |  | 
| 247 | 
            +
                    gr.Examples(examples=examples_inference,inputs=[preview_video,text_input,motion_type,checkpoint_dropdown])
         | 
| 248 |  | 
| 249 | 
             
                    checkpoint_dropdown.change(fn=update_preview_video, inputs=checkpoint_dropdown, outputs=preview_video)
         | 
| 250 | 
             
                    inference_button.click(inference_model, inputs=[text_input, checkpoint_dropdown,inference_steps,motion_type, seed], outputs=output_video)
         | 
    	
        attn_ctrl.py
    CHANGED
    
    | @@ -245,7 +245,7 @@ def register_attention_control(unet, config=None): | |
| 245 | 
             
                            additional_info['removeMFromV'] = config.strategy.get('removeMFromV', False)
         | 
| 246 | 
             
                            additional_info['vSpatial_frameSubtraction'] = config.strategy.get('vSpatial_frameSubtraction', False)
         | 
| 247 | 
             
                            net_.forward = temp_attn_forward(net_, additional_info)
         | 
| 248 | 
            -
                            print('register Motion V embedding at ', block_name)
         | 
| 249 | 
             
                            return count + 1
         | 
| 250 | 
             
                        else:
         | 
| 251 | 
             
                            return count
         | 
|  | |
| 245 | 
             
                            additional_info['removeMFromV'] = config.strategy.get('removeMFromV', False)
         | 
| 246 | 
             
                            additional_info['vSpatial_frameSubtraction'] = config.strategy.get('vSpatial_frameSubtraction', False)
         | 
| 247 | 
             
                            net_.forward = temp_attn_forward(net_, additional_info)
         | 
| 248 | 
            +
                            # print('register Motion V embedding at ', block_name)
         | 
| 249 | 
             
                            return count + 1
         | 
| 250 | 
             
                        else:
         | 
| 251 | 
             
                            return count
         | 
    	
        models/unet/motion_embeddings.py
    CHANGED
    
    | @@ -8,7 +8,7 @@ class MotionEmbedding(nn.Module): | |
| 8 | 
             
                def __init__(self, embed_dim: int = None, max_seq_length: int = 32, wh: int = 1):
         | 
| 9 | 
             
                    super().__init__()
         | 
| 10 | 
             
                    self.embed = nn.Parameter(torch.zeros(wh, max_seq_length, embed_dim))
         | 
| 11 | 
            -
                    print('register spatial motion embedding with', wh)
         | 
| 12 |  | 
| 13 | 
             
                    self.scale = 1.0
         | 
| 14 | 
             
                    self.trained_length = -1
         | 
| @@ -216,8 +216,8 @@ def inject_motion_embeddings(model, combinations=None, config=None): | |
| 216 | 
             
                        setattr(parent_module, module_name, new_module)
         | 
| 217 |  | 
| 218 | 
             
                inject_layers = list(set(inject_layers))
         | 
| 219 | 
            -
                for name in inject_layers:
         | 
| 220 | 
            -
                    print(f"Injecting motion embedding at {name}")
         | 
| 221 |  | 
| 222 | 
             
                parameters_list = []
         | 
| 223 | 
             
                for name, para in model.named_parameters():
         | 
|  | |
| 8 | 
             
                def __init__(self, embed_dim: int = None, max_seq_length: int = 32, wh: int = 1):
         | 
| 9 | 
             
                    super().__init__()
         | 
| 10 | 
             
                    self.embed = nn.Parameter(torch.zeros(wh, max_seq_length, embed_dim))
         | 
| 11 | 
            +
                    # print('register spatial motion embedding with', wh)
         | 
| 12 |  | 
| 13 | 
             
                    self.scale = 1.0
         | 
| 14 | 
             
                    self.trained_length = -1
         | 
|  | |
| 216 | 
             
                        setattr(parent_module, module_name, new_module)
         | 
| 217 |  | 
| 218 | 
             
                inject_layers = list(set(inject_layers))
         | 
| 219 | 
            +
                # for name in inject_layers:
         | 
| 220 | 
            +
                    # print(f"Injecting motion embedding at {name}")
         | 
| 221 |  | 
| 222 | 
             
                parameters_list = []
         | 
| 223 | 
             
                for name, para in model.named_parameters():
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -50,4 +50,7 @@ transformers==4.45.2 | |
| 50 | 
             
            triton==3.0.0
         | 
| 51 | 
             
            typing_extensions==4.12.2
         | 
| 52 | 
             
            urllib3==2.2.3
         | 
| 53 | 
            -
            zipp==3.20.2
         | 
|  | |
|  | |
|  | 
|  | |
| 50 | 
             
            triton==3.0.0
         | 
| 51 | 
             
            typing_extensions==4.12.2
         | 
| 52 | 
             
            urllib3==2.2.3
         | 
| 53 | 
            +
            zipp==3.20.2
         | 
| 54 | 
            +
            gradio==4.44.0
         | 
| 55 | 
            +
            gradio-imageslider==0.0.20
         | 
| 56 | 
            +
            gradio-client==1.3.0
         |