winglian commited on
Commit
e450524
·
verified ·
1 Parent(s): 25e440b

Converted ./Llama-4-Maverick-17B-128E-Linearized to BNB with nf4 and bf16

Browse files
Files changed (45) hide show
  1. config.json +94 -61
  2. model-00001-of-00043.safetensors +3 -0
  3. model-00002-of-00043.safetensors +3 -0
  4. model-00003-of-00043.safetensors +3 -0
  5. model-00004-of-00043.safetensors +3 -0
  6. model-00005-of-00043.safetensors +3 -0
  7. model-00006-of-00043.safetensors +3 -0
  8. model-00007-of-00043.safetensors +3 -0
  9. model-00008-of-00043.safetensors +3 -0
  10. model-00009-of-00043.safetensors +3 -0
  11. model-00010-of-00043.safetensors +3 -0
  12. model-00011-of-00043.safetensors +3 -0
  13. model-00012-of-00043.safetensors +3 -0
  14. model-00013-of-00043.safetensors +3 -0
  15. model-00014-of-00043.safetensors +3 -0
  16. model-00015-of-00043.safetensors +3 -0
  17. model-00016-of-00043.safetensors +3 -0
  18. model-00017-of-00043.safetensors +3 -0
  19. model-00018-of-00043.safetensors +3 -0
  20. model-00019-of-00043.safetensors +3 -0
  21. model-00020-of-00043.safetensors +3 -0
  22. model-00021-of-00043.safetensors +3 -0
  23. model-00022-of-00043.safetensors +3 -0
  24. model-00023-of-00043.safetensors +3 -0
  25. model-00024-of-00043.safetensors +3 -0
  26. model-00025-of-00043.safetensors +3 -0
  27. model-00026-of-00043.safetensors +3 -0
  28. model-00027-of-00043.safetensors +3 -0
  29. model-00028-of-00043.safetensors +3 -0
  30. model-00029-of-00043.safetensors +3 -0
  31. model-00030-of-00043.safetensors +3 -0
  32. model-00031-of-00043.safetensors +3 -0
  33. model-00032-of-00043.safetensors +3 -0
  34. model-00033-of-00043.safetensors +3 -0
  35. model-00034-of-00043.safetensors +3 -0
  36. model-00035-of-00043.safetensors +3 -0
  37. model-00036-of-00043.safetensors +3 -0
  38. model-00037-of-00043.safetensors +3 -0
  39. model-00038-of-00043.safetensors +3 -0
  40. model-00039-of-00043.safetensors +3 -0
  41. model-00040-of-00043.safetensors +3 -0
  42. model-00041-of-00043.safetensors +3 -0
  43. model-00042-of-00043.safetensors +3 -0
  44. model-00043-of-00043.safetensors +3 -0
  45. model.safetensors.index.json +0 -0
config.json CHANGED
@@ -1,59 +1,11 @@
1
  {
2
  "architectures": [
3
- "Llama4ForCausalLM"
4
  ],
5
- "attention_bias": false,
6
- "attention_chunk_size": 8192,
7
- "attention_dropout": 0.0,
8
- "attn_scale": 0.1,
9
- "attn_temperature_tuning": 4,
10
- "bos_token_id": 200000,
11
- "eos_token_id": 200001,
12
- "floor_scale": 8192,
13
- "for_llm_compressor": false,
14
- "head_dim": 128,
15
- "hidden_act": "silu",
16
- "hidden_size": 5120,
17
- "initializer_range": 0.02,
18
- "interleave_moe_layer_step": 2,
19
- "intermediate_size": 8192,
20
- "intermediate_size_mlp": 16384,
21
- "max_position_embeddings": 262144,
22
- "model_type": "llama4_text",
23
- "moe_layers": [
24
- 1,
25
- 3,
26
- 5,
27
- 7,
28
- 9,
29
- 11,
30
- 13,
31
- 15,
32
- 17,
33
- 19,
34
- 21,
35
- 23,
36
- 25,
37
- 27,
38
- 29,
39
- 31,
40
- 33,
41
- 35,
42
- 37,
43
- 39,
44
- 41,
45
- 43,
46
- 45,
47
- 47
48
- ],
49
- "no_rope_layers": 4,
50
- "num_attention_heads": 40,
51
- "num_experts_per_tok": 1,
52
- "num_hidden_layers": 48,
53
- "num_key_value_heads": 8,
54
- "num_local_experts": 128,
55
- "output_router_logits": false,
56
- "pad_token_id": 200018,
57
  "quantization_config": {
58
  "_load_in_4bit": true,
59
  "_load_in_8bit": false,
@@ -69,15 +21,96 @@
69
  "load_in_8bit": false,
70
  "quant_method": "bitsandbytes"
71
  },
72
- "rms_norm_eps": 1e-05,
73
- "rope_scaling": null,
74
- "rope_theta": 500000.0,
75
- "router_aux_loss_coef": 0.001,
76
- "router_jitter_noise": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  "tie_word_embeddings": false,
78
  "torch_dtype": "bfloat16",
79
  "transformers_version": "4.51.0",
80
- "use_cache": true,
81
- "use_qk_norm": false,
82
- "vocab_size": 202048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }
 
1
  {
2
  "architectures": [
3
+ "Llama4ForConditionalGeneration"
4
  ],
5
+ "boi_token_index": 200080,
6
+ "eoi_token_index": 200081,
7
+ "image_token_index": 200092,
8
+ "model_type": "llama4",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "quantization_config": {
10
  "_load_in_4bit": true,
11
  "_load_in_8bit": false,
 
21
  "load_in_8bit": false,
22
  "quant_method": "bitsandbytes"
23
  },
24
+ "text_config": {
25
+ "_attn_implementation_autoset": true,
26
+ "attention_bias": false,
27
+ "attention_chunk_size": 8192,
28
+ "attention_dropout": 0.0,
29
+ "attn_scale": 0.1,
30
+ "attn_temperature_tuning": 4,
31
+ "bos_token_id": 200000,
32
+ "eos_token_id": 200001,
33
+ "floor_scale": 8192,
34
+ "for_llm_compressor": false,
35
+ "head_dim": 128,
36
+ "hidden_act": "silu",
37
+ "hidden_size": 5120,
38
+ "initializer_range": 0.02,
39
+ "interleave_moe_layer_step": 2,
40
+ "intermediate_size": 8192,
41
+ "intermediate_size_mlp": 16384,
42
+ "max_position_embeddings": 262144,
43
+ "model_type": "llama4_text",
44
+ "moe_layers": [
45
+ 1,
46
+ 3,
47
+ 5,
48
+ 7,
49
+ 9,
50
+ 11,
51
+ 13,
52
+ 15,
53
+ 17,
54
+ 19,
55
+ 21,
56
+ 23,
57
+ 25,
58
+ 27,
59
+ 29,
60
+ 31,
61
+ 33,
62
+ 35,
63
+ 37,
64
+ 39,
65
+ 41,
66
+ 43,
67
+ 45,
68
+ 47
69
+ ],
70
+ "no_rope_layers": 4,
71
+ "num_attention_heads": 40,
72
+ "num_experts_per_tok": 1,
73
+ "num_hidden_layers": 48,
74
+ "num_key_value_heads": 8,
75
+ "num_local_experts": 128,
76
+ "output_router_logits": false,
77
+ "pad_token_id": 200018,
78
+ "rms_norm_eps": 1e-05,
79
+ "rope_scaling": null,
80
+ "rope_theta": 500000.0,
81
+ "router_aux_loss_coef": 0.001,
82
+ "router_jitter_noise": 0.0,
83
+ "torch_dtype": "bfloat16",
84
+ "use_cache": true,
85
+ "use_qk_norm": false,
86
+ "vocab_size": 202048
87
+ },
88
  "tie_word_embeddings": false,
89
  "torch_dtype": "bfloat16",
90
  "transformers_version": "4.51.0",
91
+ "vision_config": {
92
+ "_attn_implementation_autoset": true,
93
+ "attention_dropout": 0.0,
94
+ "hidden_act": "gelu",
95
+ "hidden_size": 1408,
96
+ "image_size": 336,
97
+ "initializer_range": 0.02,
98
+ "intermediate_size": 5632,
99
+ "model_type": "llama4_vision_model",
100
+ "multi_modal_projector_bias": false,
101
+ "norm_eps": 1e-05,
102
+ "num_attention_heads": 16,
103
+ "num_channels": 3,
104
+ "num_hidden_layers": 34,
105
+ "patch_size": 14,
106
+ "pixel_shuffle_ratio": 0.5,
107
+ "projector_dropout": 0.0,
108
+ "projector_input_dim": 4096,
109
+ "projector_output_dim": 4096,
110
+ "rope_theta": 10000,
111
+ "torch_dtype": "bfloat16",
112
+ "vision_feature_layer": -1,
113
+ "vision_feature_select_strategy": "default",
114
+ "vision_output_dim": 4096
115
+ }
116
  }
model-00001-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e64309e83c4af80b9f321be61eb4d646195e84e5d3f53558715529a36d068f
3
+ size 4988287884
model-00002-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd0841b2aa28b7de0b95d15d3fd60f8cab5c7ec143cfcb087186bc4230b03503
3
+ size 4998677980
model-00003-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb2b9667111f0658a53a0420ca4a614325891e8eaa76af71a1d6251da2213f1
3
+ size 4999063964
model-00004-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dbcdbdb8db66822b9b684eb94f8bc69f6909383e877c02c3fb5875eb9db552c
3
+ size 4999063496
model-00005-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32182dcf6a824001a1277b77b0cd326016d2ac2f0961ffcc1b7dc1e2155d1189
3
+ size 4998678149
model-00006-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583fe5c2b0689a718f13f7634044b100889a4c8ed2ebf9172564d9dc4f0138c3
3
+ size 4999064201
model-00007-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cb68a4e190cca03a956f3e4fc004761a3b447cc0e71d77980e01e038b7d7da
3
+ size 4998678014
model-00008-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8728430ac5299f37f0f440ec5f04e91bef4a5c891b648f59bbbbbf2d0326fe5c
3
+ size 4999064372
model-00009-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db8445a56464c94197d1fa5591d5b6b4f2c425c9a056e27c840d9efd0e28cd9
3
+ size 4998678082
model-00010-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce39d56050fa738257c39ce28e7d1e4cc8f54e571daed8c8f3d025b66e60ac57
3
+ size 4999064801
model-00011-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68c073f740bae3087b2eea0e2db344f82e8bb2223217777a294f81210caf5b1
3
+ size 4999065282
model-00012-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df16a65faceddaecf6d6e1323094e9fafb1db24b6b40538c977695c511cfcae3
3
+ size 4998679384
model-00013-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5c838b92b947ac6b4bbbae94d79f275ca33821dd8ac543c8ebfcdce6bc43c8
3
+ size 4999065827
model-00014-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376cc819d8f1ef04fe1e248d8175b7ccae4f9ace86b26476aedbf154cae3777
3
+ size 4998679357
model-00015-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2632d2861d776cd14172efe647d8733df2d7c278c6e9bfebf5c707fedf56760
3
+ size 4999065382
model-00016-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6131deaf2b8206d74f6af1cd25207d997f54c12904d942842e79467e295d2adc
3
+ size 4999064903
model-00017-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0db745f062e5fca300ad23bf9e0e6aa8aea841e012d4b817c4a2040dc53be6
3
+ size 4998679551
model-00018-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a73ae8a88b6f24f019b787a865a96408f49f735a588c686250bcd7d2ec0007d8
3
+ size 4999065639
model-00019-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e0d11b2d57413b41a5be384043b4a727e3b427230ba86fc68cc3880277df72
3
+ size 4998679405
model-00020-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7070f22bbb60933804c9dfa23215bfd2a75a4c0b2b1d47227af6714962576d
3
+ size 4999065778
model-00021-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c687ce4d4491c560942a7bee973f83f0828198fbe11e0a5f383a0b9daa315f40
3
+ size 4998679481
model-00022-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61341b46c7e5676a53cfdec41ddbfda4f08ebe2a6736d8d9384484ce9e7ea962
3
+ size 4999064899
model-00023-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ab87cff41baf937dd7962fdbc831a8459b651c1b55068992621d5244f460ea
3
+ size 4999065295
model-00024-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbb5d6becae09f2549525c4a5ab43a7b4ea58eef2e0ac82da8a5cd9155b99dc
3
+ size 4998679395
model-00025-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f84bbcbacf4279ade7d80a0f27ef05b9afcc3d24bcf7b3470b32b9a346e3969f
3
+ size 4999065811
model-00026-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dc8270e131100a1c6aa688bd5287ac96c1c53d6b18877625e2f571f8c49c46c
3
+ size 4998679365
model-00027-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3ce371cd44b2f4e398de391a066babe5c0e534f9f6fd09e7254e5fc5869167
3
+ size 4999065376
model-00028-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96cb2ba03cfb694e3e2a68c047cde2cc6f04132f8c59d36b502fd0f63f2644a1
3
+ size 4999064895
model-00029-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f1303f24962cdf72f7096385abd3ed18f16b435ba54ba39af3264eaa54e592
3
+ size 4998679545
model-00030-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d8d34847c5f6250aa5cc7678f0694ba6f61f8ddcfaff9a405d338d018e6d78
3
+ size 4999065631
model-00031-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63392cc6894830e8744e75bf36e3520c8fd85975b08a26a74cb5971300e2efd5
3
+ size 4998679389
model-00032-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44d81c8d386e61bb7a663b7372ddf7ba41b4c83e1b56d72fbb41bab3d2382d3
3
+ size 4999065797
model-00033-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa31494ff10acaab06f3f620e97e267e5e45f478ca04d13bd3e88945dc8b7ede
3
+ size 4998679471
model-00034-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6295bf9da356cc452ae8bd7bc2ddba09b07f4b0c54436a20848a1b656a9e35bd
3
+ size 4999064901
model-00035-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55cf46efbf8ffa8316f090205c2623f4808fac495be516fca251a7ae7a9e6bf7
3
+ size 4999065267
model-00036-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca74b532fdb543cd1808cf0eb3df1f06b9b5ae12919eb819547b52c767551aea
3
+ size 4998679394
model-00037-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2dd5d96dd9030e64f57da0fd3d6c92eba736d481bd26bce80941032ca063d0
3
+ size 4999065838
model-00038-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc14fe72f19de9a2984dbc595cfb51fe35df3b5a26b448763071ecc5dd35a24
3
+ size 4998679359
model-00039-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d3fc1be1ef564ff3552ebddcff598f1deaf0c862a6af887c562bb98e5a3eeaf
3
+ size 4999065381
model-00040-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb8fa55f1fe41516b4316ca009a811b89e985a7e7b3e6a99ac38fe970c13331
3
+ size 4999064896
model-00041-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3081ec0819c632b2a6ae07cfeb02f0f58b964320431ac98573266da941e5b7
3
+ size 4998679540
model-00042-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae4518a87d933b5ff81bcf08a2b350c599dad629d4d30e821caebbe75d9055d4
3
+ size 3246267624
model-00043-of-00043.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f89a7a1a18d043c045c6eb84bdabbb9a8263c5c9c18a85e6fd66d44f2ce61055
3
+ size 2068971664
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff