jiuhai commited on
Commit
8d8898a
·
verified ·
1 Parent(s): 8650775

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -58,3 +58,5 @@ checkpoint-139000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
58
  checkpoint-140000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
59
  checkpoint-149000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
60
  checkpoint-150000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 
 
 
58
  checkpoint-140000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
59
  checkpoint-149000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
60
  checkpoint-150000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
61
+ checkpoint-159000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
62
+ checkpoint-160000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
checkpoint-159000/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ar_steps": 1,
3
+ "architectures": [
4
+ "DiffVLMDiffusion"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "condition_layer": -1,
9
+ "eos_token_id": 151645,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1536,
12
+ "image_token_id": 151655,
13
+ "img_cross_attention_dim": 2048,
14
+ "img_diffuser_depth": 2,
15
+ "img_ffn_dim_multiplier": null,
16
+ "img_hidden_size": 1536,
17
+ "img_multiple_of": 256,
18
+ "img_norm_eps": 1e-05,
19
+ "img_num_attention_heads": 12,
20
+ "img_num_kv_heads": 12,
21
+ "img_qk_norm": true,
22
+ "in_channels": 32,
23
+ "initializer_range": 0.02,
24
+ "inject_img_diffuser": false,
25
+ "input_size": 32,
26
+ "intermediate_size": 8960,
27
+ "layer_group_size": 7,
28
+ "layerwise_start_idx": 0,
29
+ "lora_alpha": 128,
30
+ "lora_bias": "none",
31
+ "lora_dropout": 0.05,
32
+ "lora_enable": false,
33
+ "lora_r": 64,
34
+ "max_position_embeddings": 32768,
35
+ "max_window_layers": 28,
36
+ "model_type": "qwen2_vl",
37
+ "non_linearity": 1,
38
+ "norm_elementwise_affine": true,
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 28,
41
+ "num_key_value_heads": 2,
42
+ "patch_size": 1,
43
+ "repa_coeff": 0.1,
44
+ "repa_layers": "2",
45
+ "repa_shared": false,
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "mrope_section": [
49
+ 16,
50
+ 24,
51
+ 24
52
+ ],
53
+ "rope_type": "default",
54
+ "type": "default"
55
+ },
56
+ "rope_theta": 1000000.0,
57
+ "sample_size": 128,
58
+ "sampling_steps": 28,
59
+ "sliding_window": null,
60
+ "tie_word_embeddings": true,
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.47.0",
63
+ "use_cache": true,
64
+ "use_repa": false,
65
+ "use_residual_attn": true,
66
+ "use_sliding_window": false,
67
+ "vae_path": "mit-han-lab/dc-ae-f32c32-in-1.0-diffusers",
68
+ "video_token_id": 151656,
69
+ "vision_config": {
70
+ "hidden_size": 1536,
71
+ "in_chans": 3,
72
+ "model_type": "qwen2_vl",
73
+ "spatial_patch_size": 14
74
+ },
75
+ "vision_end_token_id": 151653,
76
+ "vision_start_token_id": 151652,
77
+ "vision_token_id": 151654,
78
+ "vocab_size": 151936
79
+ }
checkpoint-159000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.47.0"
6
+ }
checkpoint-159000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684a02212e942f09cc3d159b0a75b0ba4e62db1dad4d47839582c7ca8cb3821d
3
+ size 4411759432
checkpoint-159000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd65c22947cc01ffd056abf23bf5e85fef3113761845be2e1999e7a4c63c3064
3
+ size 6332050591
checkpoint-159000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97cb6ccd2e29fd4b72eb363ad9b4dbcaf74ef1968fd1915fb612f37b5a6b558c
3
+ size 16389
checkpoint-159000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d537c3a4a874bf712772d66898b9bfcd36740d32df158899f54de683efa29c18
3
+ size 16389
checkpoint-159000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e8cd96f7de858e2d4c48f3b53ae72ef78d8d4f6512295e0179474978d1d402
3
+ size 16389
checkpoint-159000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f5b0be13b84473d159d27446fe411f29b1eae0c16099564f686e01f1e2237a
3
+ size 16389
checkpoint-159000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d2603f090be6ff3cb692e975f57c591c95f183c73413a857816a51fc35b2949
3
+ size 16389
checkpoint-159000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7cf90b50f5b10eca8e75fbbf60261c879faba25ccb5abef0399187fc93c96a9
3
+ size 16389
checkpoint-159000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db9a6e1d688f0674fac649af3dfbf68ba183661cb69f24e971d374d16467523b
3
+ size 16389
checkpoint-159000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36101d40980eda3ee83c3b0ad79adb167494d7c1e596f92117e65c89b1e7eecd
3
+ size 16389
checkpoint-159000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f148c29aec2ddcdb176a47e1ecab4d5d57fae6d70f12455fb35d079b33209141
3
+ size 1465
checkpoint-159000/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579c0ac89716252dbe33a22b55cb5dbdc461bded1e16aa60bc50032e78aff3c2
3
+ size 24436530
checkpoint-159000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8719ef97bc06e49eb8f34491f9f19c7f80db01ff7a0d4c6515ad620cc8e837c6
3
+ size 6481
checkpoint-160000/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ar_steps": 1,
3
+ "architectures": [
4
+ "DiffVLMDiffusion"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "condition_layer": -1,
9
+ "eos_token_id": 151645,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1536,
12
+ "image_token_id": 151655,
13
+ "img_cross_attention_dim": 2048,
14
+ "img_diffuser_depth": 2,
15
+ "img_ffn_dim_multiplier": null,
16
+ "img_hidden_size": 1536,
17
+ "img_multiple_of": 256,
18
+ "img_norm_eps": 1e-05,
19
+ "img_num_attention_heads": 12,
20
+ "img_num_kv_heads": 12,
21
+ "img_qk_norm": true,
22
+ "in_channels": 32,
23
+ "initializer_range": 0.02,
24
+ "inject_img_diffuser": false,
25
+ "input_size": 32,
26
+ "intermediate_size": 8960,
27
+ "layer_group_size": 7,
28
+ "layerwise_start_idx": 0,
29
+ "lora_alpha": 128,
30
+ "lora_bias": "none",
31
+ "lora_dropout": 0.05,
32
+ "lora_enable": false,
33
+ "lora_r": 64,
34
+ "max_position_embeddings": 32768,
35
+ "max_window_layers": 28,
36
+ "model_type": "qwen2_vl",
37
+ "non_linearity": 1,
38
+ "norm_elementwise_affine": true,
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 28,
41
+ "num_key_value_heads": 2,
42
+ "patch_size": 1,
43
+ "repa_coeff": 0.1,
44
+ "repa_layers": "2",
45
+ "repa_shared": false,
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "mrope_section": [
49
+ 16,
50
+ 24,
51
+ 24
52
+ ],
53
+ "rope_type": "default",
54
+ "type": "default"
55
+ },
56
+ "rope_theta": 1000000.0,
57
+ "sample_size": 128,
58
+ "sampling_steps": 28,
59
+ "sliding_window": null,
60
+ "tie_word_embeddings": true,
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.47.0",
63
+ "use_cache": true,
64
+ "use_repa": false,
65
+ "use_residual_attn": true,
66
+ "use_sliding_window": false,
67
+ "vae_path": "mit-han-lab/dc-ae-f32c32-in-1.0-diffusers",
68
+ "video_token_id": 151656,
69
+ "vision_config": {
70
+ "hidden_size": 1536,
71
+ "in_chans": 3,
72
+ "model_type": "qwen2_vl",
73
+ "spatial_patch_size": 14
74
+ },
75
+ "vision_end_token_id": 151653,
76
+ "vision_start_token_id": 151652,
77
+ "vision_token_id": 151654,
78
+ "vocab_size": 151936
79
+ }
checkpoint-160000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.47.0"
6
+ }
checkpoint-160000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e65e71c9631e89fdfff958ded4c64ebcf49d3190d8742d56f3aca3960d9e731
3
+ size 4411759432
checkpoint-160000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ac216fce7a8ba9240e31d55d8e134401dafb90011f0faf08c4b60f98a108c2
3
+ size 6332050591
checkpoint-160000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d208393d780ff3b41275343c9112d766850d2648dab0495548822f5d8b4496
3
+ size 16389
checkpoint-160000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dfec107ffdb499837d80b57f8e41fcc99a1db3b259e3e17d327bf13325afff2
3
+ size 16389
checkpoint-160000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb84dbf932c0f708a800042f074b63e9ee1a8c09ffd87c7dbc6454c9a523856e
3
+ size 16389
checkpoint-160000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc1f2c0c40fa9f4ba60e2e617a4d5a5a8c034c618d9941730e8774038e33b6a
3
+ size 16389
checkpoint-160000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddcc1feba1167f21748f60037617e44f9945b91c15136f47519df026ee6b0fd
3
+ size 16389
checkpoint-160000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d125e317428814927fc4308b5114f55a667927d0e069836da6a202c2e60ccf67
3
+ size 16389
checkpoint-160000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66cfcfac862eb6d873cc10fb5edaf57d6311c5aaafbdf2d7cef78dd84dce0b7
3
+ size 16389
checkpoint-160000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd66a5b1ade5e469b812fdb1a1077b7036ad97e486ed59cd66b55fa6b1631695
3
+ size 16389
checkpoint-160000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f5214d34f38228e255b42310eb640f38268c592146b1fa30394548c89a82a0d
3
+ size 1465
checkpoint-160000/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbcd1ae5b5d8cff2b1509c37f52848ce4d069dcfaff7417a5879874799bf8501
3
+ size 24591211
checkpoint-160000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8719ef97bc06e49eb8f34491f9f19c7f80db01ff7a0d4c6515ad620cc8e837c6
3
+ size 6481