at676 commited on
Commit
5dd7c8d
·
verified ·
1 Parent(s): 2e9bddf

Add files using upload-large-folder tool

Browse files
Files changed (47) hide show
  1. config.json +151 -0
  2. generation_config.json +13 -0
  3. model-00001-of-00044.safetensors +3 -0
  4. model-00002-of-00044.safetensors +3 -0
  5. model-00003-of-00044.safetensors +3 -0
  6. model-00004-of-00044.safetensors +3 -0
  7. model-00005-of-00044.safetensors +3 -0
  8. model-00006-of-00044.safetensors +3 -0
  9. model-00007-of-00044.safetensors +3 -0
  10. model-00008-of-00044.safetensors +3 -0
  11. model-00009-of-00044.safetensors +3 -0
  12. model-00010-of-00044.safetensors +3 -0
  13. model-00011-of-00044.safetensors +3 -0
  14. model-00012-of-00044.safetensors +3 -0
  15. model-00013-of-00044.safetensors +3 -0
  16. model-00014-of-00044.safetensors +3 -0
  17. model-00015-of-00044.safetensors +3 -0
  18. model-00016-of-00044.safetensors +3 -0
  19. model-00017-of-00044.safetensors +3 -0
  20. model-00018-of-00044.safetensors +3 -0
  21. model-00019-of-00044.safetensors +3 -0
  22. model-00020-of-00044.safetensors +3 -0
  23. model-00021-of-00044.safetensors +3 -0
  24. model-00022-of-00044.safetensors +3 -0
  25. model-00023-of-00044.safetensors +3 -0
  26. model-00024-of-00044.safetensors +3 -0
  27. model-00025-of-00044.safetensors +3 -0
  28. model-00026-of-00044.safetensors +3 -0
  29. model-00027-of-00044.safetensors +3 -0
  30. model-00028-of-00044.safetensors +3 -0
  31. model-00029-of-00044.safetensors +3 -0
  32. model-00030-of-00044.safetensors +3 -0
  33. model-00031-of-00044.safetensors +3 -0
  34. model-00032-of-00044.safetensors +3 -0
  35. model-00033-of-00044.safetensors +3 -0
  36. model-00034-of-00044.safetensors +3 -0
  37. model-00035-of-00044.safetensors +3 -0
  38. model-00036-of-00044.safetensors +3 -0
  39. model-00037-of-00044.safetensors +3 -0
  40. model-00038-of-00044.safetensors +3 -0
  41. model-00039-of-00044.safetensors +3 -0
  42. model-00040-of-00044.safetensors +3 -0
  43. model-00041-of-00044.safetensors +3 -0
  44. model-00042-of-00044.safetensors +3 -0
  45. model-00043-of-00044.safetensors +3 -0
  46. model-00044-of-00044.safetensors +3 -0
  47. model.safetensors.index.json +0 -0
config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Llama4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": 4,
10
+ "bos_token_id": 200000,
11
+ "eos_token_id": [
12
+ 200001,
13
+ 200007,
14
+ 200008
15
+ ],
16
+ "floor_scale": 8192,
17
+ "for_llm_compressor": false,
18
+ "head_dim": 128,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 5120,
21
+ "initializer_range": 0.02,
22
+ "interleave_moe_layer_step": 1,
23
+ "intermediate_size": 8192,
24
+ "intermediate_size_mlp": 16384,
25
+ "max_position_embeddings": 10485760,
26
+ "model_type": "llama4_text",
27
+ "moe_layers": [
28
+ 0,
29
+ 1,
30
+ 2,
31
+ 3,
32
+ 4,
33
+ 5,
34
+ 6,
35
+ 7,
36
+ 8,
37
+ 9,
38
+ 10,
39
+ 11,
40
+ 12,
41
+ 13,
42
+ 14,
43
+ 15,
44
+ 16,
45
+ 17,
46
+ 18,
47
+ 19,
48
+ 20,
49
+ 21,
50
+ 22,
51
+ 23,
52
+ 24,
53
+ 25,
54
+ 26,
55
+ 27,
56
+ 28,
57
+ 29,
58
+ 30,
59
+ 31,
60
+ 32,
61
+ 33,
62
+ 34,
63
+ 35,
64
+ 36,
65
+ 37,
66
+ 38,
67
+ 39,
68
+ 40,
69
+ 41,
70
+ 42,
71
+ 43,
72
+ 44,
73
+ 45,
74
+ 46,
75
+ 47
76
+ ],
77
+ "no_rope_layers": [
78
+ 1,
79
+ 1,
80
+ 1,
81
+ 0,
82
+ 1,
83
+ 1,
84
+ 1,
85
+ 0,
86
+ 1,
87
+ 1,
88
+ 1,
89
+ 0,
90
+ 1,
91
+ 1,
92
+ 1,
93
+ 0,
94
+ 1,
95
+ 1,
96
+ 1,
97
+ 0,
98
+ 1,
99
+ 1,
100
+ 1,
101
+ 0,
102
+ 1,
103
+ 1,
104
+ 1,
105
+ 0,
106
+ 1,
107
+ 1,
108
+ 1,
109
+ 0,
110
+ 1,
111
+ 1,
112
+ 1,
113
+ 0,
114
+ 1,
115
+ 1,
116
+ 1,
117
+ 0,
118
+ 1,
119
+ 1,
120
+ 1,
121
+ 0,
122
+ 1,
123
+ 1,
124
+ 1,
125
+ 0
126
+ ],
127
+ "num_attention_heads": 40,
128
+ "num_experts_per_tok": 1,
129
+ "num_hidden_layers": 48,
130
+ "num_key_value_heads": 8,
131
+ "num_local_experts": 16,
132
+ "output_router_logits": false,
133
+ "pad_token_id": 200018,
134
+ "rms_norm_eps": 1e-05,
135
+ "rope_scaling": {
136
+ "factor": 8.0,
137
+ "high_freq_factor": 4.0,
138
+ "low_freq_factor": 1.0,
139
+ "original_max_position_embeddings": 8192,
140
+ "rope_type": "llama3"
141
+ },
142
+ "rope_theta": 500000.0,
143
+ "router_aux_loss_coef": 0.001,
144
+ "router_jitter_noise": 0.0,
145
+ "tie_word_embeddings": false,
146
+ "torch_dtype": "bfloat16",
147
+ "transformers_version": "4.51.0",
148
+ "use_cache": true,
149
+ "use_qk_norm": true,
150
+ "vocab_size": 202048
151
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 200000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 200001,
6
+ 200007,
7
+ 200008
8
+ ],
9
+ "pad_token_id": 200018,
10
+ "temperature": 0.6,
11
+ "top_p": 0.9,
12
+ "transformers_version": "4.51.0"
13
+ }
model-00001-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd50358f7b04d54c6631caaa43e0db767c1984ca01e5cd25da0e76f3f1db3043
3
+ size 4963046256
model-00002-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9dcf0764a30196f7c1129ce00928c23fcf2416d6737c4f494cce5dd0335206
3
+ size 4991414616
model-00003-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e82b39599068d1abcaaeb71cada366ffd7a70b78a7ba4e09310ff460330cf36
3
+ size 4991414616
model-00004-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a2c33c64d3183a4c70f2767db6bf3722960c0f4a1140b0e888f45ae81f2b14a
3
+ size 4949656448
model-00005-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed00404b0b076965c511f80ac3efc6a44decef2cca61b6235ac9b2a45d532af7
3
+ size 4991414600
model-00006-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b800312fb91a4268f317661472129771676f1d00495f071163ad99e4cf0f2735
3
+ size 4991414608
model-00007-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af64b39cd2ed4b5dca92e239600611616c514a53ef85b748c606250e747fd192
3
+ size 4991414616
model-00008-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33adf65df32ac2b980c63add82f485234e1ea4788a679b9de334c807b3a40684
3
+ size 4991414624
model-00009-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c04cfbf75b8f6ea86ac28d6eb68466518843296ef00f9d20183058881fca72
3
+ size 4991414624
model-00010-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec967d79a778c495ff528b2746658f1af175a4f2d1ab98baeaa54c9ea48c3de3
3
+ size 4991414664
model-00011-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70703dda8bee92469ad5e961f7b44686941a8f680fca0fe43cf6c122fdc92fdb
3
+ size 4991578648
model-00012-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87769d0928e05bda10bd7f3d581be9b87e7236d647feaaa458625f597b54538
3
+ size 4949492544
model-00013-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d4b311c45078ec5c3dff7ba97f3578f4d0b9bdbed8e2a68d0fc9710c91a1c31
3
+ size 4991414672
model-00014-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97b9b3d64707047ef17037681ab1a5114b29f55555964cbadf8c89b33414a286
3
+ size 4991414680
model-00015-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08f43b9d564dccc5903799d11dc6dd8d2ac11d77e7483d67eed60517c8c1d87
3
+ size 4991414680
model-00016-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9179f7d33f775056ce230ab6212a44fe32213a6c3f3744f92b7679bf35f9a7bc
3
+ size 4991414688
model-00017-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328712aceb5fd1fc943fd0fb8f4f79df7fcdd6ea0175d6bbb0beb32da8c83d48
3
+ size 4991414680
model-00018-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbed8adc3090cf9d3ccaeb29fffbf6469317fd1f4a831f09b76bcb1bc3800e75
3
+ size 4991414688
model-00019-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553e44e8f47115ed7e16b45cdd082b30ae9ef51bf5d268963f9258069fc44f04
3
+ size 4949656528
model-00020-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4891b8b645928498b01ed4c467c77aa74c596f69301d8243184fbc970de95d9d
3
+ size 4991414672
model-00021-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:673da3dd1f8bfd9a2e8bd5d7712075dc386c2c9628715a67d92eeb387e8b523b
3
+ size 4991414672
model-00022-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282a86c437b019db998265a8b92ceeb39cc512df3bb16025eb63c2aee23746f2
3
+ size 4991414680
model-00023-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d92e29f7f06fb1d7160ba820332b99229a01b4ed142ab4c4b0c908aefe4756
3
+ size 4991414688
model-00024-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703aa46e69cf7de7a5237b3c80b27a0b233bf45cadec99e871744ca00aab7013
3
+ size 4991414688
model-00025-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ec76f6449d6aab7f02eda88078fede564d4d1f6175bb3633fc1a11de694bad
3
+ size 4991414680
model-00026-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af1f92f4b68083d40250ecce7ec4286f111d9426077ce05438e4da5cd13459e
3
+ size 4991578648
model-00027-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e1145dffb7355f312e0cdfc8cc9bc3a95bd0aef309bb9a1e8495b76407f459
3
+ size 4949492552
model-00028-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd0d47e7c57f5d8c82b42e5d9c19bb34a1fa71bf1faed1c9894a6d04ab067cf
3
+ size 4991414672
model-00029-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34795b75833650f796df084308c2e51cd58763b9123b219e17363f4f314b9da4
3
+ size 4991414680
model-00030-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94b4c7006396b2c1cf84a39602d155548309272e51ca5e0c37178b10a45b372
3
+ size 4991414680
model-00031-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f903d57649045be12883adcc0ea46cf38ebb07c70ed40ce33ba5b43e12205f
3
+ size 4991414688
model-00032-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b00f43d8c41266e7301f11b767f718cdb484c35895e73b7b884629238902b2
3
+ size 4991414680
model-00033-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b9e2f935376c1e3130d106625a8c24a8106e38499c74b0d234c0efba7029b4
3
+ size 4991414680
model-00034-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e89764b06d5211a05ac4264d7eb39bf0721301d47cbb57eb019e762e92b30b5
3
+ size 4981113824
model-00035-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e252317b7e8f92fe8cbde0a8f25c96a1db3d474b2e7736f14921b599c0c2b8
3
+ size 4959957376
model-00036-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0d04fe43e963c5613d45580858d5ea76d30f667dcae5cc82cbb382f83df876
3
+ size 4991414672
model-00037-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c176292a3ba61d8dcff8c86ca0a86a6954fe0fefa686ca47b261305309aa090
3
+ size 4991414680
model-00038-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e640208e3053fbb71d02b6bbab46f80a8565b7da35b0df43870166fefe28e23
3
+ size 4991414680
model-00039-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f082f4cfe01f5ac7d3bfd60645caaafbc3ce84e80640ea1da489e56c742a19
3
+ size 4991414688
model-00040-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcd8a9fcad73ed0e71325940036a699a1bdac97f4394f8ecfae5c90904c8565
3
+ size 4991414680
model-00041-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a436dc86cb6f961a9e4de792200d26b024c7a312896a0133f19e3869fc7825
3
+ size 4991578640
model-00042-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e93459f349a110a571f32ddb8a7d45e7ad77e7a365be37f4a7c0788808df9a
3
+ size 4949492560
model-00043-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b0c7280dd6e94f8dce86daa47b5b5f47685538f8f81210f5c0ea925fc7d36a
3
+ size 4110619496
model-00044-of-00044.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db44ae776dad0ffe96d1c71f2dad1c88f643deb1d07efc90d38d54e04668f57b
3
+ size 2068971648
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff