Mariam-Elz commited on
Commit
5dc17ed
·
verified ·
1 Parent(s): d910d9f

Upload imagedream/configs/sd_v2_base_ipmv_local.yaml with huggingface_hub

Browse files
imagedream/configs/sd_v2_base_ipmv_local.yaml CHANGED
@@ -1,62 +1,62 @@
1
- model:
2
- target: imagedream.ldm.interface.LatentDiffusionInterface
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- timesteps: 1000
7
- scale_factor: 0.18215
8
- parameterization: "eps"
9
-
10
- unet_config:
11
- target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12
- params:
13
- image_size: 32 # unused
14
- in_channels: 4
15
- out_channels: 4
16
- model_channels: 320
17
- attention_resolutions: [ 4, 2, 1 ]
18
- num_res_blocks: 2
19
- channel_mult: [ 1, 2, 4, 4 ]
20
- num_head_channels: 64 # need to fix for flash-attn
21
- use_spatial_transformer: True
22
- use_linear_in_transformer: True
23
- transformer_depth: 1
24
- context_dim: 1024
25
- use_checkpoint: False
26
- legacy: False
27
- camera_dim: 16
28
- with_ip: True
29
- ip_dim: 16 # ip token length
30
- ip_mode: "local_resample"
31
- ip_weight: 1.0 # adjust for similarity to image
32
-
33
- vae_config:
34
- target: imagedream.ldm.models.autoencoder.AutoencoderKL
35
- params:
36
- embed_dim: 4
37
- monitor: val/rec_loss
38
- ddconfig:
39
- #attn_type: "vanilla-xformers"
40
- double_z: true
41
- z_channels: 4
42
- resolution: 256
43
- in_channels: 3
44
- out_ch: 3
45
- ch: 128
46
- ch_mult:
47
- - 1
48
- - 2
49
- - 4
50
- - 4
51
- num_res_blocks: 2
52
- attn_resolutions: []
53
- dropout: 0.0
54
- lossconfig:
55
- target: torch.nn.Identity
56
-
57
- clip_config:
58
- target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
59
- params:
60
- freeze: True
61
- layer: "penultimate"
62
- ip_mode: "local_resample"
 
1
+ model:
2
+ target: imagedream.ldm.interface.LatentDiffusionInterface
3
+ params:
4
+ linear_start: 0.00085
5
+ linear_end: 0.0120
6
+ timesteps: 1000
7
+ scale_factor: 0.18215
8
+ parameterization: "eps"
9
+
10
+ unet_config:
11
+ target: imagedream.ldm.modules.diffusionmodules.openaimodel.MultiViewUNetModel
12
+ params:
13
+ image_size: 32 # unused
14
+ in_channels: 4
15
+ out_channels: 4
16
+ model_channels: 320
17
+ attention_resolutions: [ 4, 2, 1 ]
18
+ num_res_blocks: 2
19
+ channel_mult: [ 1, 2, 4, 4 ]
20
+ num_head_channels: 64 # need to fix for flash-attn
21
+ use_spatial_transformer: True
22
+ use_linear_in_transformer: True
23
+ transformer_depth: 1
24
+ context_dim: 1024
25
+ use_checkpoint: False
26
+ legacy: False
27
+ camera_dim: 16
28
+ with_ip: True
29
+ ip_dim: 16 # ip token length
30
+ ip_mode: "local_resample"
31
+ ip_weight: 1.0 # adjust for similarity to image
32
+
33
+ vae_config:
34
+ target: imagedream.ldm.models.autoencoder.AutoencoderKL
35
+ params:
36
+ embed_dim: 4
37
+ monitor: val/rec_loss
38
+ ddconfig:
39
+ #attn_type: "vanilla-xformers"
40
+ double_z: true
41
+ z_channels: 4
42
+ resolution: 256
43
+ in_channels: 3
44
+ out_ch: 3
45
+ ch: 128
46
+ ch_mult:
47
+ - 1
48
+ - 2
49
+ - 4
50
+ - 4
51
+ num_res_blocks: 2
52
+ attn_resolutions: []
53
+ dropout: 0.0
54
+ lossconfig:
55
+ target: torch.nn.Identity
56
+
57
+ clip_config:
58
+ target: imagedream.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
59
+ params:
60
+ freeze: True
61
+ layer: "penultimate"
62
+ ip_mode: "local_resample"