init
Browse files- Step-1X-3D-Texture/step1x-3d-ig2v.safetensors +3 -0
- Step1X-3D-Geometry-1300m/model_index.json +32 -0
- Step1X-3D-Geometry-1300m/scheduler/scheduler_config.json +16 -0
- Step1X-3D-Geometry-1300m/transformer/config.json +18 -0
- Step1X-3D-Geometry-1300m/transformer/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-1300m/vae/config.json +31 -0
- Step1X-3D-Geometry-1300m/vae/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-1300m/visual_eature_extractor/preprocessor_config.json +27 -0
- Step1X-3D-Geometry-1300m/visual_encoder/config.json +15 -0
- Step1X-3D-Geometry-1300m/visual_encoder/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-Label-1300m/label_encoder/config.json +6 -0
- Step1X-3D-Geometry-Label-1300m/label_encoder/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-Label-1300m/model_index.json +32 -0
- Step1X-3D-Geometry-Label-1300m/scheduler/scheduler_config.json +16 -0
- Step1X-3D-Geometry-Label-1300m/transformer/config.json +17 -0
- Step1X-3D-Geometry-Label-1300m/transformer/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-Label-1300m/vae/config.json +30 -0
- Step1X-3D-Geometry-Label-1300m/vae/diffusion_pytorch_model.safetensors +3 -0
- Step1X-3D-Geometry-Label-1300m/visual_eature_extractor/preprocessor_config.json +27 -0
- Step1X-3D-Geometry-Label-1300m/visual_encoder/config.json +17 -0
- Step1X-3D-Geometry-Label-1300m/visual_encoder/diffusion_pytorch_model.safetensors +3 -0
Step-1X-3D-Texture/step1x-3d-ig2v.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05cd28096e875712c1f2489bb325a9bc7324a41cd3f367d7a38c63f1bea77964
|
3 |
+
size 3602537816
|
Step1X-3D-Geometry-1300m/model_index.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "Step1X3DGeometryPipeline",
|
3 |
+
"_diffusers_version": "0.32.2",
|
4 |
+
"caption_encoder": [
|
5 |
+
null,
|
6 |
+
null
|
7 |
+
],
|
8 |
+
"label_encoder": [
|
9 |
+
null,
|
10 |
+
null
|
11 |
+
],
|
12 |
+
"scheduler": [
|
13 |
+
"diffusers",
|
14 |
+
"FlowMatchEulerDiscreteScheduler"
|
15 |
+
],
|
16 |
+
"transformer": [
|
17 |
+
"step1x3d_geometry.models.transformers.flux_transformer_1d",
|
18 |
+
"FluxDenoiser"
|
19 |
+
],
|
20 |
+
"vae": [
|
21 |
+
"step1x3d_geometry.models.autoencoders.michelangelo_autoencoder",
|
22 |
+
"MichelangeloAutoencoder"
|
23 |
+
],
|
24 |
+
"visual_eature_extractor": [
|
25 |
+
"transformers",
|
26 |
+
"BitImageProcessor"
|
27 |
+
],
|
28 |
+
"visual_encoder": [
|
29 |
+
"step1x3d_geometry.models.conditional_encoders.dinov2_encoder",
|
30 |
+
"Dinov2Encoder"
|
31 |
+
]
|
32 |
+
}
|
Step1X-3D-Geometry-1300m/scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "FlowMatchEulerDiscreteScheduler",
|
3 |
+
"_diffusers_version": "0.32.2",
|
4 |
+
"base_image_seq_len": 256,
|
5 |
+
"base_shift": 0.5,
|
6 |
+
"invert_sigmas": false,
|
7 |
+
"max_image_seq_len": 4096,
|
8 |
+
"max_shift": 1.15,
|
9 |
+
"num_train_timesteps": 1000,
|
10 |
+
"shift": 1.0,
|
11 |
+
"shift_terminal": null,
|
12 |
+
"use_beta_sigmas": false,
|
13 |
+
"use_dynamic_shifting": false,
|
14 |
+
"use_exponential_sigmas": false,
|
15 |
+
"use_karras_sigmas": false
|
16 |
+
}
|
Step1X-3D-Geometry-1300m/transformer/config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"weights": null,
|
3 |
+
"input_channels": 64,
|
4 |
+
"width": 1280,
|
5 |
+
"layers": 12,
|
6 |
+
"num_single_layers": 24,
|
7 |
+
"num_heads": 20,
|
8 |
+
"condition_dim": 1024,
|
9 |
+
"multi_condition_type": "in_context",
|
10 |
+
"use_visual_condition": true,
|
11 |
+
"visual_condition_dim": 1024,
|
12 |
+
"n_views": 1,
|
13 |
+
"use_caption_condition": false,
|
14 |
+
"caption_condition_dim": 1024,
|
15 |
+
"use_label_condition": false,
|
16 |
+
"label_condition_dim": 1024,
|
17 |
+
"identity_init": false
|
18 |
+
}
|
Step1X-3D-Geometry-1300m/transformer/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8cfc27716009acc60035f2489a9b61de81790625b70aa27a682efc99a5137a9
|
3 |
+
size 5265243816
|
Step1X-3D-Geometry-1300m/vae/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"weights": null,
|
3 |
+
"n_samples": 16384,
|
4 |
+
"use_downsample": true,
|
5 |
+
"downsample_ratio": 0.0625,
|
6 |
+
"num_latents": 2048,
|
7 |
+
"point_feats": 3,
|
8 |
+
"embed_point_feats": false,
|
9 |
+
"out_dim": 1,
|
10 |
+
"embed_dim": 64,
|
11 |
+
"embed_type": "fourier",
|
12 |
+
"num_freqs": 8,
|
13 |
+
"include_pi": false,
|
14 |
+
"width": 768,
|
15 |
+
"heads": 12,
|
16 |
+
"num_encoder_layers": 8,
|
17 |
+
"num_decoder_layers": 16,
|
18 |
+
"init_scale": 0.009021097956087902,
|
19 |
+
"qkv_bias": false,
|
20 |
+
"qk_norm": false,
|
21 |
+
"use_ln_post": true,
|
22 |
+
"use_flash": true,
|
23 |
+
"use_checkpoint": true,
|
24 |
+
"use_multi_reso": false,
|
25 |
+
"resolutions": null,
|
26 |
+
"sampling_prob": null,
|
27 |
+
"with_sharp_data": true,
|
28 |
+
"volume_decoder_type": "hierarchical",
|
29 |
+
"surface_extractor_type": "mc",
|
30 |
+
"z_scale_factor": 1.0
|
31 |
+
}
|
Step1X-3D-Geometry-1300m/vae/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa1b17360f6614a54ab3b971c966cfe64ff87a5445186b02c666e634b59c8302
|
3 |
+
size 766401652
|
Step1X-3D-Geometry-1300m/visual_eature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 518,
|
4 |
+
"width": 518
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
0.48145466,
|
13 |
+
0.4578275,
|
14 |
+
0.40821073
|
15 |
+
],
|
16 |
+
"image_processor_type": "BitImageProcessor",
|
17 |
+
"image_std": [
|
18 |
+
0.26862954,
|
19 |
+
0.26130258,
|
20 |
+
0.27577711
|
21 |
+
],
|
22 |
+
"resample": 3,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"size": {
|
25 |
+
"shortest_edge": 518
|
26 |
+
}
|
27 |
+
}
|
Step1X-3D-Geometry-1300m/visual_encoder/config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"weights": null,
|
3 |
+
"encode_camera": false,
|
4 |
+
"camera_embeds_type": "sincos",
|
5 |
+
"camera_embeds_dim": null,
|
6 |
+
"n_views": 1,
|
7 |
+
"empty_embeds_ratio": 0.1,
|
8 |
+
"normalize_embeds": false,
|
9 |
+
"zero_uncond_embeds": true,
|
10 |
+
"freeze_modulation_dino": false,
|
11 |
+
"enable_gradient_checkpointing": false,
|
12 |
+
"image_size": 518,
|
13 |
+
"dino_type": "facebook/dinov2-with-registers-large",
|
14 |
+
"kwargs": null
|
15 |
+
}
|
Step1X-3D-Geometry-1300m/visual_encoder/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c07f8abe3af5db214e3341c9f23fb2351cb15591afdae0da8e835dfabc42e850
|
3 |
+
size 1217544216
|
Step1X-3D-Geometry-Label-1300m/label_encoder/config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"hidden_size": 1024,
|
3 |
+
"empty_embeds_ratio": 0.1,
|
4 |
+
"normalize_embeds": false,
|
5 |
+
"zero_uncond_embeds": true
|
6 |
+
}
|
Step1X-3D-Geometry-Label-1300m/label_encoder/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d7d76cfe4bc95f264421fa34076826bada5a218e63febabe69461d8c2607e4b
|
3 |
+
size 33104
|
Step1X-3D-Geometry-Label-1300m/model_index.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "Step1X3DGeometryPipeline",
|
3 |
+
"_diffusers_version": "0.32.2",
|
4 |
+
"caption_encoder": [
|
5 |
+
null,
|
6 |
+
null
|
7 |
+
],
|
8 |
+
"label_encoder": [
|
9 |
+
"step1x3d_geometry.models.conditional_encoders.label_encoder",
|
10 |
+
"LabelEncoder"
|
11 |
+
],
|
12 |
+
"scheduler": [
|
13 |
+
"diffusers",
|
14 |
+
"FlowMatchEulerDiscreteScheduler"
|
15 |
+
],
|
16 |
+
"transformer": [
|
17 |
+
"step1x3d_geometry.models.transformers.flux_transformer_1d",
|
18 |
+
"FluxDenoiser"
|
19 |
+
],
|
20 |
+
"vae": [
|
21 |
+
"step1x3d_geometry.models.autoencoders.michelangelo_autoencoder",
|
22 |
+
"MichelangeloAutoencoder"
|
23 |
+
],
|
24 |
+
"visual_eature_extractor": [
|
25 |
+
"transformers",
|
26 |
+
"BitImageProcessor"
|
27 |
+
],
|
28 |
+
"visual_encoder": [
|
29 |
+
"step1x3d_geometry.models.conditional_encoders.dinov2_clip_encoder",
|
30 |
+
"Dinov2CLIPEncoder"
|
31 |
+
]
|
32 |
+
}
|
Step1X-3D-Geometry-Label-1300m/scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "FlowMatchEulerDiscreteScheduler",
|
3 |
+
"_diffusers_version": "0.32.2",
|
4 |
+
"base_image_seq_len": 256,
|
5 |
+
"base_shift": 0.5,
|
6 |
+
"invert_sigmas": false,
|
7 |
+
"max_image_seq_len": 4096,
|
8 |
+
"max_shift": 1.15,
|
9 |
+
"num_train_timesteps": 1000,
|
10 |
+
"shift": 1.0,
|
11 |
+
"shift_terminal": null,
|
12 |
+
"use_beta_sigmas": false,
|
13 |
+
"use_dynamic_shifting": false,
|
14 |
+
"use_exponential_sigmas": false,
|
15 |
+
"use_karras_sigmas": false
|
16 |
+
}
|
Step1X-3D-Geometry-Label-1300m/transformer/config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"input_channels": 64,
|
3 |
+
"width": 1536,
|
4 |
+
"layers": 8,
|
5 |
+
"num_single_layers": 16,
|
6 |
+
"num_heads": 16,
|
7 |
+
"condition_dim": 1024,
|
8 |
+
"multi_condition_type": "in_context",
|
9 |
+
"use_visual_condition": true,
|
10 |
+
"visual_condition_dim": 1024,
|
11 |
+
"n_views": 1,
|
12 |
+
"use_caption_condition": false,
|
13 |
+
"caption_condition_dim": 1024,
|
14 |
+
"use_label_condition": true,
|
15 |
+
"label_condition_dim": 1024,
|
16 |
+
"identity_init": false
|
17 |
+
}
|
Step1X-3D-Geometry-Label-1300m/transformer/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0df096288471320f3fd566e94db628c3a36a697ae6fa55e60152b236fd5918aa
|
3 |
+
size 5086980952
|
Step1X-3D-Geometry-Label-1300m/vae/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_samples": 16384,
|
3 |
+
"use_downsample": true,
|
4 |
+
"downsample_ratio": 0.0625,
|
5 |
+
"num_latents": 2048,
|
6 |
+
"point_feats": 3,
|
7 |
+
"embed_point_feats": false,
|
8 |
+
"out_dim": 1,
|
9 |
+
"embed_dim": 64,
|
10 |
+
"embed_type": "fourier",
|
11 |
+
"num_freqs": 8,
|
12 |
+
"include_pi": false,
|
13 |
+
"width": 768,
|
14 |
+
"heads": 12,
|
15 |
+
"num_encoder_layers": 8,
|
16 |
+
"num_decoder_layers": 16,
|
17 |
+
"init_scale": 0.009021097956087902,
|
18 |
+
"qkv_bias": false,
|
19 |
+
"qk_norm": false,
|
20 |
+
"use_ln_post": true,
|
21 |
+
"use_flash": true,
|
22 |
+
"use_checkpoint": true,
|
23 |
+
"use_multi_reso": false,
|
24 |
+
"resolutions": null,
|
25 |
+
"sampling_prob": null,
|
26 |
+
"with_sharp_data": true,
|
27 |
+
"volume_decoder_type": "hierarchical",
|
28 |
+
"surface_extractor_type": "mc",
|
29 |
+
"z_scale_factor": 1.0
|
30 |
+
}
|
Step1X-3D-Geometry-Label-1300m/vae/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9d764b4e3fdf377f1ba960215f2e91800fa6bf86ec5243340b96e22a0a7f781
|
3 |
+
size 766401652
|
Step1X-3D-Geometry-Label-1300m/visual_eature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 518,
|
4 |
+
"width": 518
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
0.48145466,
|
13 |
+
0.4578275,
|
14 |
+
0.40821073
|
15 |
+
],
|
16 |
+
"image_processor_type": "BitImageProcessor",
|
17 |
+
"image_std": [
|
18 |
+
0.26862954,
|
19 |
+
0.26130258,
|
20 |
+
0.27577711
|
21 |
+
],
|
22 |
+
"resample": 3,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"size": {
|
25 |
+
"shortest_edge": 518
|
26 |
+
}
|
27 |
+
}
|
Step1X-3D-Geometry-Label-1300m/visual_encoder/config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"encode_camera": false,
|
3 |
+
"camera_embeds_type": "sincos",
|
4 |
+
"camera_embeds_dim": null,
|
5 |
+
"n_views": 1,
|
6 |
+
"empty_embeds_ratio": 0.1,
|
7 |
+
"normalize_embeds": false,
|
8 |
+
"zero_uncond_embeds": true,
|
9 |
+
"freeze_modulation_clip": false,
|
10 |
+
"freeze_modulation_dino": false,
|
11 |
+
"enable_gradient_checkpointing": false,
|
12 |
+
"image_size": 518,
|
13 |
+
"fuse_type": "concat",
|
14 |
+
"dino_type": "facebook/dinov2-with-registers-large",
|
15 |
+
"clip_type": "openai/clip-vit-large-patch14",
|
16 |
+
"kwargs": null
|
17 |
+
}
|
Step1X-3D-Geometry-Label-1300m/visual_encoder/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9b22f50296f1f63841c92a724c732bffee8051b59e830e9eac9a99af8d0e65e
|
3 |
+
size 2928089212
|