Spaces:
Runtime error
Runtime error
add error description
Browse files- .gitignore +2 -1
- app.py +22 -2
- attn_ctrl.py +1 -1
- models/unet/motion_embeddings.py +3 -3
- requirements.txt +4 -1
.gitignore
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
results/*
|
| 2 |
-
results_all/*
|
|
|
|
|
|
| 1 |
results/*
|
| 2 |
+
results_all/*
|
| 3 |
+
*.pt
|
app.py
CHANGED
|
@@ -17,6 +17,12 @@ def inference_app(
|
|
| 17 |
seed,
|
| 18 |
inference_steps):
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
return inference_main(
|
| 21 |
embedding_dir=embedding_dir,
|
| 22 |
prompt=prompt,
|
|
@@ -202,7 +208,6 @@ if __name__ == "__main__":
|
|
| 202 |
</a>
|
| 203 |
<br>
|
| 204 |
<strong>Please consider starring <span style="color: orange">★</span> the <a href="https://github.com/EnVision-Research/MotionInversion" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this useful!</strong>
|
| 205 |
-
</p>
|
| 206 |
"""
|
| 207 |
)
|
| 208 |
with gr.Tabs(elem_classes=["tabs"]):
|
|
@@ -219,12 +224,27 @@ if __name__ == "__main__":
|
|
| 219 |
output_video = gr.Video(label="Output Video")
|
| 220 |
generated_prompt = gr.Textbox(label="Generated Prompt")
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
with gr.Accordion("Advanced Settings", open=False):
|
| 223 |
with gr.Row():
|
| 224 |
inference_steps = gr.Number(label="Inference Steps", value=30)
|
| 225 |
motion_type = gr.Dropdown(label="Motion Type", choices=["camera", "object"], value="object")
|
| 226 |
|
| 227 |
-
|
| 228 |
|
| 229 |
checkpoint_dropdown.change(fn=update_preview_video, inputs=checkpoint_dropdown, outputs=preview_video)
|
| 230 |
inference_button.click(inference_model, inputs=[text_input, checkpoint_dropdown,inference_steps,motion_type, seed], outputs=output_video)
|
|
|
|
| 17 |
seed,
|
| 18 |
inference_steps):
|
| 19 |
|
| 20 |
+
print('inference info:')
|
| 21 |
+
print('ref video:',embedding_dir)
|
| 22 |
+
print('prompt:',prompt)
|
| 23 |
+
print('motion type:',motion_type)
|
| 24 |
+
print('infer steps:',inference_steps)
|
| 25 |
+
|
| 26 |
return inference_main(
|
| 27 |
embedding_dir=embedding_dir,
|
| 28 |
prompt=prompt,
|
|
|
|
| 208 |
</a>
|
| 209 |
<br>
|
| 210 |
<strong>Please consider starring <span style="color: orange">★</span> the <a href="https://github.com/EnVision-Research/MotionInversion" target="_blank" rel="noopener noreferrer">GitHub Repo</a> if you find this useful!</strong>
|
|
|
|
| 211 |
"""
|
| 212 |
)
|
| 213 |
with gr.Tabs(elem_classes=["tabs"]):
|
|
|
|
| 224 |
output_video = gr.Video(label="Output Video")
|
| 225 |
generated_prompt = gr.Textbox(label="Generated Prompt")
|
| 226 |
|
| 227 |
+
with gr.Accordion('Encounter Errors', open=False):
|
| 228 |
+
gr.Markdown('''
|
| 229 |
+
<strong>Generally, inference time for one video often takes 45~50s on ZeroGPU</strong>.
|
| 230 |
+
|
| 231 |
+
<br>
|
| 232 |
+
<strong>You have exceeded your GPU quota</strong>: A limitation set by HF. Retry in an hour.
|
| 233 |
+
<br>
|
| 234 |
+
<strong>GPU task aborted</strong>: Possibly caused by ZeroGPU being used by too many people, the inference time excceeds the time limit. You may try again later, or clone the repo and run it locally.
|
| 235 |
+
<br>
|
| 236 |
+
|
| 237 |
+
If any other issues occur, please feel free to contact us through the community or by email ([email protected]). We will try our best to help you :)
|
| 238 |
+
|
| 239 |
+
''')
|
| 240 |
+
|
| 241 |
+
|
| 242 |
with gr.Accordion("Advanced Settings", open=False):
|
| 243 |
with gr.Row():
|
| 244 |
inference_steps = gr.Number(label="Inference Steps", value=30)
|
| 245 |
motion_type = gr.Dropdown(label="Motion Type", choices=["camera", "object"], value="object")
|
| 246 |
|
| 247 |
+
gr.Examples(examples=examples_inference,inputs=[preview_video,text_input,motion_type,checkpoint_dropdown])
|
| 248 |
|
| 249 |
checkpoint_dropdown.change(fn=update_preview_video, inputs=checkpoint_dropdown, outputs=preview_video)
|
| 250 |
inference_button.click(inference_model, inputs=[text_input, checkpoint_dropdown,inference_steps,motion_type, seed], outputs=output_video)
|
attn_ctrl.py
CHANGED
|
@@ -245,7 +245,7 @@ def register_attention_control(unet, config=None):
|
|
| 245 |
additional_info['removeMFromV'] = config.strategy.get('removeMFromV', False)
|
| 246 |
additional_info['vSpatial_frameSubtraction'] = config.strategy.get('vSpatial_frameSubtraction', False)
|
| 247 |
net_.forward = temp_attn_forward(net_, additional_info)
|
| 248 |
-
print('register Motion V embedding at ', block_name)
|
| 249 |
return count + 1
|
| 250 |
else:
|
| 251 |
return count
|
|
|
|
| 245 |
additional_info['removeMFromV'] = config.strategy.get('removeMFromV', False)
|
| 246 |
additional_info['vSpatial_frameSubtraction'] = config.strategy.get('vSpatial_frameSubtraction', False)
|
| 247 |
net_.forward = temp_attn_forward(net_, additional_info)
|
| 248 |
+
# print('register Motion V embedding at ', block_name)
|
| 249 |
return count + 1
|
| 250 |
else:
|
| 251 |
return count
|
models/unet/motion_embeddings.py
CHANGED
|
@@ -8,7 +8,7 @@ class MotionEmbedding(nn.Module):
|
|
| 8 |
def __init__(self, embed_dim: int = None, max_seq_length: int = 32, wh: int = 1):
|
| 9 |
super().__init__()
|
| 10 |
self.embed = nn.Parameter(torch.zeros(wh, max_seq_length, embed_dim))
|
| 11 |
-
print('register spatial motion embedding with', wh)
|
| 12 |
|
| 13 |
self.scale = 1.0
|
| 14 |
self.trained_length = -1
|
|
@@ -216,8 +216,8 @@ def inject_motion_embeddings(model, combinations=None, config=None):
|
|
| 216 |
setattr(parent_module, module_name, new_module)
|
| 217 |
|
| 218 |
inject_layers = list(set(inject_layers))
|
| 219 |
-
for name in inject_layers:
|
| 220 |
-
print(f"Injecting motion embedding at {name}")
|
| 221 |
|
| 222 |
parameters_list = []
|
| 223 |
for name, para in model.named_parameters():
|
|
|
|
| 8 |
def __init__(self, embed_dim: int = None, max_seq_length: int = 32, wh: int = 1):
|
| 9 |
super().__init__()
|
| 10 |
self.embed = nn.Parameter(torch.zeros(wh, max_seq_length, embed_dim))
|
| 11 |
+
# print('register spatial motion embedding with', wh)
|
| 12 |
|
| 13 |
self.scale = 1.0
|
| 14 |
self.trained_length = -1
|
|
|
|
| 216 |
setattr(parent_module, module_name, new_module)
|
| 217 |
|
| 218 |
inject_layers = list(set(inject_layers))
|
| 219 |
+
# for name in inject_layers:
|
| 220 |
+
# print(f"Injecting motion embedding at {name}")
|
| 221 |
|
| 222 |
parameters_list = []
|
| 223 |
for name, para in model.named_parameters():
|
requirements.txt
CHANGED
|
@@ -50,4 +50,7 @@ transformers==4.45.2
|
|
| 50 |
triton==3.0.0
|
| 51 |
typing_extensions==4.12.2
|
| 52 |
urllib3==2.2.3
|
| 53 |
-
zipp==3.20.2
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
triton==3.0.0
|
| 51 |
typing_extensions==4.12.2
|
| 52 |
urllib3==2.2.3
|
| 53 |
+
zipp==3.20.2
|
| 54 |
+
gradio==4.44.0
|
| 55 |
+
gradio-imageslider==0.0.20
|
| 56 |
+
gradio-client==1.3.0
|