denk commited on
Commit
8ef0ffb
·
1 Parent(s): cd07504
Files changed (3) hide show
  1. README.md +96 -0
  2. config.json +25 -0
  3. diffusion_pytorch_model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - video
7
+ - video-generation
8
+ - video-to-video
9
+ - controlnet
10
+ - diffusers
11
+ ---
12
+ # Dilated Controlnet for Wan2.1
13
+
14
+ This repo contains the code for dilated controlnet module for Wan2.1 model.
15
+ Dilated controlnet has `stride` parameter.
16
+
17
+
18
+ ### How to
19
+ Clone repo
20
+ ```bash
21
+ git clone https://github.com/TheDenk/wan2.1-dilated-controlnet.git
22
+ cd wan2.1-dilated-controlnet
23
+ ```
24
+
25
+ Create venv
26
+ ```bash
27
+ python -m venv venv
28
+ source venv/bin/activate
29
+ ```
30
+
31
+ Install requirements
32
+ ```bash
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ ### Inference examples
37
+ #### Inference with cli
38
+ ```bash
39
+ python -m inference.cli_demo \
40
+ --video_path "resources/physical-4.mp4" \
41
+ --prompt "A balloon filled with water was thrown to the ground, exploding and splashing water in all directions. There were graffiti on the wall, studio lighting, and commercial movie shooting." \
42
+ --controlnet_type "hed" \
43
+ --controlnet_stride 3 \
44
+ --base_model_path Wan-AI/Wan2.1-T2V-1.3B-Diffusers \
45
+ --controlnet_model_path TheDenk/wan2.1-t2v-1.3b-controlnetd-hed-v1
46
+ ```
47
+
48
+ #### Inference with Gradio
49
+ ```bash
50
+ python -m inference.gradio_web_demo \
51
+ --controlnet_type "hed" \
52
+ --controlnet_stride 3 \
53
+ --base_model_path Wan-AI/Wan2.1-T2V-1.3B-Diffusers \
54
+ --controlnet_model_path TheDenk/wan2.1-t2v-1.3b-controlnetd-hed-v1
55
+ ```
56
+ #### Detailed Inference
57
+ ```bash
58
+ python -m inference.cli_demo \
59
+ --video_path "resources/physical-4.mp4" \
60
+ --prompt "A balloon filled with water was thrown to the ground, exploding and splashing water in all directions. There were graffiti on the wall, studio lighting, and commercial movie shooting." \
61
+ --controlnet_type "hed" \
62
+ --base_model_path Wan-AI/Wan2.1-T2V-1.3B-Diffusers \
63
+ --controlnet_model_path TheDenk/wan2.1-t2v-1.3b-controlnetd-hed-v1 \
64
+ --controlnet_weight 0.8 \
65
+ --controlnet_guidance_start 0.0 \
66
+ --controlnet_guidance_end 0.8 \
67
+ --controlnet_stride 3 \
68
+ --num_inference_steps 50 \
69
+ --guidance_scale 5.0 \
70
+ --video_height 480 \
71
+ --video_width 832 \
72
+ --num_frames 81 \
73
+ --negative_prompt "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards" \
74
+ --seed 42 \
75
+ --out_fps 16 \
76
+ --output_path "result.mp4"
77
+ ```
78
+
79
+
80
+ ## Acknowledgements
81
+ Original code and models [Wan2.1](https://github.com/Wan-Video/Wan2.1).
82
+
83
+
84
+ ## Citations
85
+ ```
86
+ @misc{TheDenk,
87
+ title={Dilated Controlnet},
88
+ author={Karachev Denis},
89
+ url={https://github.com/TheDenk/wan2.1-dilated-controlnet},
90
+ publisher={Github},
91
+ year={2025}
92
+ }
93
+ ```
94
+
95
+ ## Contacts
96
+ <p>Issues should be raised directly in the repository. For professional support and recommendations please <a>[email protected]</a>.</p>
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanControlnet",
3
+ "_diffusers_version": "0.33.0.dev0",
4
+ "added_kv_proj_dim": null,
5
+ "attention_head_dim": 128,
6
+ "cross_attn_norm": true,
7
+ "downscale_coef": 8,
8
+ "eps": 1e-06,
9
+ "ffn_dim": 8960,
10
+ "freq_dim": 256,
11
+ "image_dim": null,
12
+ "in_channels": 3,
13
+ "num_attention_heads": 12,
14
+ "num_layers": 8,
15
+ "out_proj_dim": 1536,
16
+ "patch_size": [
17
+ 1,
18
+ 2,
19
+ 2
20
+ ],
21
+ "qk_norm": "rms_norm_across_heads",
22
+ "rope_max_seq_len": 1024,
23
+ "text_dim": 4096,
24
+ "vae_channels": 16
25
+ }
diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9afc9d20d56dd4b90758f12f57382ce29df9ea343b25a7ab856240bae8484640
3
+ size 834314664