danielhanchen commited on
Commit
0c50027
·
verified ·
1 Parent(s): d49960f

Add files using upload-large-folder tool

Browse files
Files changed (29) hide show
  1. .gitattributes +27 -0
  2. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00001-of-00010.gguf +3 -0
  3. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00002-of-00010.gguf +3 -0
  4. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00003-of-00010.gguf +3 -0
  5. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00004-of-00010.gguf +3 -0
  6. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00005-of-00010.gguf +3 -0
  7. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00006-of-00010.gguf +3 -0
  8. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00007-of-00010.gguf +3 -0
  9. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00008-of-00010.gguf +3 -0
  10. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00009-of-00010.gguf +3 -0
  11. Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00010-of-00010.gguf +3 -0
  12. Q6_K/DeepSeek-V3-0324-Q6_K-00001-of-00012.gguf +3 -0
  13. Q6_K/DeepSeek-V3-0324-Q6_K-00002-of-00012.gguf +3 -0
  14. Q6_K/DeepSeek-V3-0324-Q6_K-00003-of-00012.gguf +3 -0
  15. Q6_K/DeepSeek-V3-0324-Q6_K-00004-of-00012.gguf +3 -0
  16. Q6_K/DeepSeek-V3-0324-Q6_K-00005-of-00012.gguf +3 -0
  17. Q6_K/DeepSeek-V3-0324-Q6_K-00006-of-00012.gguf +3 -0
  18. Q6_K/DeepSeek-V3-0324-Q6_K-00007-of-00012.gguf +3 -0
  19. Q6_K/DeepSeek-V3-0324-Q6_K-00008-of-00012.gguf +3 -0
  20. Q6_K/DeepSeek-V3-0324-Q6_K-00009-of-00012.gguf +3 -0
  21. Q6_K/DeepSeek-V3-0324-Q6_K-00010-of-00012.gguf +3 -0
  22. Q6_K/DeepSeek-V3-0324-Q6_K-00011-of-00012.gguf +3 -0
  23. Q6_K/DeepSeek-V3-0324-Q6_K-00012-of-00012.gguf +3 -0
  24. UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00001-of-00005.gguf +3 -0
  25. UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00002-of-00005.gguf +3 -0
  26. UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00003-of-00005.gguf +3 -0
  27. UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00004-of-00005.gguf +3 -0
  28. UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00005-of-00005.gguf +3 -0
  29. config.json +70 -0
.gitattributes CHANGED
@@ -69,3 +69,30 @@ Q2_K/DeepSeek-V3-0324-Q2_K-00001-of-00005.gguf filter=lfs diff=lfs merge=lfs -te
69
  Q2_K/DeepSeek-V3-0324-Q2_K-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
70
  Q3_K_M/DeepSeek-V3-0324-Q3_K_M-00001-of-00007.gguf filter=lfs diff=lfs merge=lfs -text
71
  Q4_K_M/DeepSeek-V3-0324-Q4_K_M-00003-of-00009.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  Q2_K/DeepSeek-V3-0324-Q2_K-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
70
  Q3_K_M/DeepSeek-V3-0324-Q3_K_M-00001-of-00007.gguf filter=lfs diff=lfs merge=lfs -text
71
  Q4_K_M/DeepSeek-V3-0324-Q4_K_M-00003-of-00009.gguf filter=lfs diff=lfs merge=lfs -text
72
+ Q6_K/DeepSeek-V3-0324-Q6_K-00012-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
73
+ UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00005-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
74
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00010-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
75
+ Q6_K/DeepSeek-V3-0324-Q6_K-00011-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
76
+ Q6_K/DeepSeek-V3-0324-Q6_K-00010-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
77
+ Q6_K/DeepSeek-V3-0324-Q6_K-00004-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
78
+ Q6_K/DeepSeek-V3-0324-Q6_K-00009-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
79
+ Q6_K/DeepSeek-V3-0324-Q6_K-00001-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
80
+ Q6_K/DeepSeek-V3-0324-Q6_K-00008-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
81
+ Q6_K/DeepSeek-V3-0324-Q6_K-00005-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
82
+ Q6_K/DeepSeek-V3-0324-Q6_K-00006-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
83
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00004-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
84
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00005-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
85
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00003-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
86
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00007-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
87
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00008-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
88
+ Q6_K/DeepSeek-V3-0324-Q6_K-00007-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
89
+ Q6_K/DeepSeek-V3-0324-Q6_K-00003-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
90
+ Q6_K/DeepSeek-V3-0324-Q6_K-00002-of-00012.gguf filter=lfs diff=lfs merge=lfs -text
91
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00009-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
92
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00001-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
93
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00006-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
94
+ Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00002-of-00010.gguf filter=lfs diff=lfs merge=lfs -text
95
+ UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00001-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
96
+ UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
97
+ UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00004-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
98
+ UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00003-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00001-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1fd13f61dc17ab3e5a1b6f8d73390acc5cae36c6a8741e32040519ce68984b
3
+ size 49573632960
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00002-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e8614f0696443d04e8b31739922f960ca9837f4aa199d4b75b9af48de265f3
3
+ size 48506507776
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00003-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db657e111a648e15328fb47dc0e50ade96bc162679670f9deb3fb12b44ddd64a
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00004-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ddf3018c75e7955617e794d2cd2fdec3d92410b559960144442c73cc2a32cc
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00005-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a855e529690fbf8504bb47a88587243f58fc05a9a9944b70866828f7c2608cc
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00006-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13899d58e2c13a6005928b9dc4ccfc48f543db955396882a6ebcbd2749e7b06b
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00007-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b76fc595be23703f6092cb051136df7c4c6cf317b30433d5540cfc49b944b1a
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00008-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379b3dc2c215918442649d00a85430c4fbaa551de858c965c57a566815c911dc
3
+ size 48506507808
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00009-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e026951941323f399668efe99ab3647f0d8baaa39b1d4881b8bef8c8afabfe65
3
+ size 49999602080
Q5_K_M/DeepSeek-V3-0324-Q5_K_M-00010-of-00010.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca14897a0164d540f3cab79940c5fd9664363c307c43949c3911d6377f89160
3
+ size 36277768864
Q6_K/DeepSeek-V3-0324-Q6_K-00001-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912f894b79597237d809a5e6034f09ffabfca78e321438e74bc2bf0b37f5bdb2
3
+ size 47094115200
Q6_K/DeepSeek-V3-0324-Q6_K-00002-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c47d9a1933722505157935c299b89485e6de8a2f86ff61146777deae9d13213
3
+ size 47227297696
Q6_K/DeepSeek-V3-0324-Q6_K-00003-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8b8b5b37f4f6597563246b3192537f4a51bd0b0f25120fefb7fa1cea261dc0
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00004-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce68bb41db1530b02c6bdfe2b19cef392922ecc2e04df1e2b31314b878c5ef4
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00005-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2c07235c80a9039d59f9f365c70a830c5144e89cb8d3ed9dc0fab39a0683ff
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00006-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580b1c3dcd1ba43bda874b5e0373930ebaf737e83a2752092967075418e77d6f
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00007-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03b6f8897a19f08bbbb2fcf16760c6d3616ba089c078fa2cb25f7956d481f29
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00008-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617d9e94e91b67fafc3417692e68ca69049f0313143c6e17616ca0da8ae2aefe
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00009-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42927844820ae7f91701617bf39024109eda9338bea4f40a99d47c4f62c0cb3e
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00010-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fef82962924c0a8b6e3ea07df6b3def1912a747a4c0172a792c603b817ed3af
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00011-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40dd87ff4f3e097041f32d187ba4377c1544a1e2df35f9d5bfe70ea589bd844
3
+ size 47227297760
Q6_K/DeepSeek-V3-0324-Q6_K-00012-of-00012.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66631b3864ca7601d31fe0346161b62b91da5ce98dff1b42e051ab904e55df8c
3
+ size 31431234528
UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00001-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d972b3ec13ee8f3b37e4d087da1d49c1f0494125ef9f3b5c206eb9e6d223a1
3
+ size 49797297120
UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00002-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449a92150fe18101352351d80cd954717f470611fa5b1206babc23d76f295361
3
+ size 49997709824
UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00003-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deec2e79c58ca7bee52533c2fe1a91045244afe31572bf3ce4a4f342173f3726
3
+ size 49997709824
UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00004-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dca617e27db5a0ceba9cb961cc64d5d61b28049fa11a6aea22e473275e1cec1
3
+ size 49997709824
UD-Q2_K_XL/DeepSeek-V3-0324-UD-Q2_K_XL-00005-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad3af5a9111947e15c8f633cfad4a161a71be0c26b078a4692a89d8db5e52c9
3
+ size 26811168832
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "aux_loss_alpha": 0.001,
13
+ "bos_token_id": 0,
14
+ "eos_token_id": 1,
15
+ "ep_size": 1,
16
+ "first_k_dense_replace": 3,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_nope_head_dim": 128,
38
+ "qk_rope_head_dim": 64,
39
+ "quantization_config": {
40
+ "activation_scheme": "dynamic",
41
+ "fmt": "e4m3",
42
+ "quant_method": "fp8",
43
+ "weight_block_size": [
44
+ 128,
45
+ 128
46
+ ]
47
+ },
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": {
50
+ "beta_fast": 32,
51
+ "beta_slow": 1,
52
+ "factor": 40,
53
+ "mscale": 1.0,
54
+ "mscale_all_dim": 1.0,
55
+ "original_max_position_embeddings": 4096,
56
+ "type": "yarn"
57
+ },
58
+ "rope_theta": 10000,
59
+ "routed_scaling_factor": 2.5,
60
+ "scoring_func": "sigmoid",
61
+ "seq_aux": true,
62
+ "tie_word_embeddings": false,
63
+ "topk_group": 4,
64
+ "topk_method": "noaux_tc",
65
+ "torch_dtype": "bfloat16",
66
+ "transformers_version": "4.46.3",
67
+ "use_cache": true,
68
+ "v_head_dim": 128,
69
+ "vocab_size": 129280
70
+ }