Luffuly nielsr HF Staff commited on
Commit
5c8e351
·
verified ·
1 Parent(s): bd5376c

Add library name, pipeline tag, link to Github (#1)

Browse files

- Add library name, pipeline tag, link to Github (f4c4a1b9ee19048023521449dd73a4b157d47275)


Co-authored-by: Niels Rogge <[email protected]>

Files changed (1) hide show
  1. README.md +229 -3
README.md CHANGED
@@ -1,3 +1,229 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: diffusers
4
+ pipeline_tag: image-to-3d
5
+ ---
6
+
7
+ # File information
8
+
9
+ The repository contains the following file information:
10
+
11
+ Filename: model_index.json
12
+ Content: {
13
+ "_class_name": "MVDiffusionImagePipeline",
14
+ "_diffusers_version": "0.30.3",
15
+ "feature_extractor": [
16
+ "transformers",
17
+ "CLIPImageProcessor"
18
+ ],
19
+ "image_encoder": [
20
+ "transformers",
21
+ "CLIPVisionModelWithProjection"
22
+ ],
23
+ "requires_safety_checker": true,
24
+ "safety_checker": [
25
+ null,
26
+ null
27
+ ],
28
+ "scheduler": [
29
+ "diffusers",
30
+ "DDIMScheduler"
31
+ ],
32
+ "unet": [
33
+ "mv_unet",
34
+ "UnifieldWrappedUNet"
35
+ ],
36
+ "vae": [
37
+ "diffusers",
38
+ "AutoencoderKL"
39
+ ]
40
+ }
41
+
42
+ Filename: config.json
43
+ Content: {
44
+ "_class_name": "AutoencoderKL",
45
+ "_diffusers_version": "0.30.3",
46
+ "_name_or_path": "Luffuly/unique3d-mvimage-diffuser",
47
+ "act_fn": "silu",
48
+ "block_out_channels": [
49
+ 128,
50
+ 256,
51
+ 512,
52
+ 512
53
+ ],
54
+ "down_block_types": [
55
+ "DownEncoderBlock2D",
56
+ "DownEncoderBlock2D",
57
+ "DownEncoderBlock2D",
58
+ "DownEncoderBlock2D"
59
+ ],
60
+ "force_upcast": true,
61
+ "in_channels": 3,
62
+ "latent_channels": 4,
63
+ "latents_mean": null,
64
+ "latents_std": null,
65
+ "layers_per_block": 2,
66
+ "mid_block_add_attention": true,
67
+ "norm_num_groups": 32,
68
+ "out_channels": 3,
69
+ "sample_size": 256,
70
+ "scaling_factor": 0.18215,
71
+ "shift_factor": null,
72
+ "up_block_types": [
73
+ "UpDecoderBlock2D",
74
+ "UpDecoderBlock2D",
75
+ "UpDecoderBlock2D",
76
+ "UpDecoderBlock2D"
77
+ ],
78
+ "use_post_quant_conv": true,
79
+ "use_quant_conv": true
80
+ }
81
+
82
+ Filename: scheduler_config.json
83
+ Content: {
84
+ "_class_name": "DDIMScheduler",
85
+ "_diffusers_version": "0.30.3",
86
+ "beta_end": 0.012,
87
+ "beta_schedule": "scaled_linear",
88
+ "beta_start": 0.00085,
89
+ "clip_sample": false,
90
+ "clip_sample_range": 1.0,
91
+ "dynamic_thresholding_ratio": 0.995,
92
+ "num_train_timesteps": 1000,
93
+ "prediction_type": "epsilon",
94
+ "rescale_betas_zero_snr": false,
95
+ "sample_max_value": 1.0,
96
+ "set_alpha_to_one": false,
97
+ "skip_prk_steps": true,
98
+ "steps_offset": 1,
99
+ "thresholding": false,
100
+ "timestep_spacing": "leading",
101
+ "trained_betas": null
102
+ }
103
+
104
+ Filename: config.json
105
+ Content: {
106
+ "_class_name": "UnifieldWrappedUNet",
107
+ "_diffusers_version": "0.30.3",
108
+ "_name_or_path": "outputs/vroid-mvimage-6view/checkpoint",
109
+ "act_fn": "silu",
110
+ "addition_embed_type": null,
111
+ "addition_embed_type_num_heads": 64,
112
+ "addition_time_embed_dim": null,
113
+ "attention_head_dim": 8,
114
+ "attention_type": "default",
115
+ "block_out_channels": [
116
+ 320,
117
+ 640,
118
+ 1280,
119
+ 1280
120
+ ],
121
+ "center_input_sample": false,
122
+ "class_embed_type": null,
123
+ "class_embeddings_concat": false,
124
+ "conv_in_kernel": 3,
125
+ "conv_out_kernel": 3,
126
+ "cross_attention_dim": 768,
127
+ "cross_attention_norm": null,
128
+ "down_block_types": [
129
+ "CrossAttnDownBlock2D",
130
+ "CrossAttnDownBlock2D",
131
+ "CrossAttnDownBlock2D",
132
+ "DownBlock2D"
133
+ ],
134
+ "downsample_padding": 1,
135
+ "dropout": 0.0,
136
+ "dual_cross_attention": false,
137
+ "encoder_hid_dim": null,
138
+ "encoder_hid_dim_type": null,
139
+ "flip_sin_to_cos": true,
140
+ "freq_shift": 0,
141
+ "in_channels": 8,
142
+ "layers_per_block": 2,
143
+ "mid_block_only_cross_attention": null,
144
+ "mid_block_scale_factor": 1,
145
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
146
+ "norm_eps": 1e-05,
147
+ "norm_num_groups": 32,
148
+ "num_attention_heads": null,
149
+ "num_class_embeds": 8,
150
+ "only_cross_attention": false,
151
+ "out_channels": 4,
152
+ "projection_class_embeddings_input_dim": null,
153
+ "resnet_out_scale_factor": 1.0,
154
+ "resnet_skip_time_act": false,
155
+ "resnet_time_scale_shift": "default",
156
+ "reverse_transformer_layers_per_block": null,
157
+ "sample_size": 64,
158
+ "time_cond_proj_dim": null,
159
+ "time_embedding_act_fn": null,
160
+ "time_embedding_dim": null,
161
+ "time_embedding_type": "positional",
162
+ "timestep_post_act": null,
163
+ "transformer_layers_per_block": 1,
164
+ "up_block_types": [
165
+ "UpBlock2D",
166
+ "CrossAttnUpBlock2D",
167
+ "CrossAttnUpBlock2D",
168
+ "CrossAttnUpBlock2D"
169
+ ],
170
+ "n_views": 6,
171
+ "upcast_attention": false,
172
+ "use_linear_projection": false
173
+ }
174
+
175
+ Filename: preprocessor_config.json
176
+ Content: {
177
+ "crop_size": {
178
+ "height": 224,
179
+ "width": 224
180
+ },
181
+ "do_center_crop": true,
182
+ "do_convert_rgb": true,
183
+ "do_normalize": true,
184
+ "do_rescale": true,
185
+ "do_resize": true,
186
+ "image_mean": [
187
+ 0.48145466,
188
+ 0.4578275,
189
+ 0.40821073
190
+ ],
191
+ "image_processor_type": "CLIPImageProcessor",
192
+ "image_std": [
193
+ 0.26862954,
194
+ 0.26130258,
195
+ 0.27577711
196
+ ],
197
+ "resample": 3,
198
+ "rescale_factor": 0.00392156862745098,
199
+ "size": {
200
+ "shortest_edge": 224
201
+ }
202
+ }
203
+
204
+ Filename: config.json
205
+ Content: {
206
+ "_name_or_path": "Luffuly/unique3d-mvimage-diffuser",
207
+ "architectures": [
208
+ "CLIPVisionModelWithProjection"
209
+ ],
210
+ "attention_dropout": 0.0,
211
+ "dropout": 0.0,
212
+ "hidden_act": "quick_gelu",
213
+ "hidden_size": 1024,
214
+ "image_size": 224,
215
+ "initializer_factor": 1.0,
216
+ "initializer_range": 0.02,
217
+ "intermediate_size": 4096,
218
+ "layer_norm_eps": 1e-05,
219
+ "model_type": "clip_vision_model",
220
+ "num_attention_heads": 16,
221
+ "num_channels": 3,
222
+ "num_hidden_layers": 24,
223
+ "patch_size": 14,
224
+ "projection_dim": 768,
225
+ "torch_dtype": "float16",
226
+ "transformers_version": "4.45.2"
227
+ }
228
+
229
+ # Code: https://github.com/TingtingLiao/soap