Update models
Browse files- .gitattributes +15 -0
- DeepSeek-R1-Distill-Qwen-32B-Q2_K.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q3_K_L.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q3_K_M.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q3_K_S.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q4_0.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q4_K_S.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q5_0.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q5_K_M.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q5_K_S.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q6_K.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-Q8_0.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-f16-00001-of-00003.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-f16-00002-of-00003.gguf +3 -0
- DeepSeek-R1-Distill-Qwen-32B-f16-00003-of-00003.gguf +3 -0
- config.json +27 -0
.gitattributes
CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
DeepSeek-R1-Distill-Qwen-32B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
DeepSeek-R1-Distill-Qwen-32B-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
DeepSeek-R1-Distill-Qwen-32B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
DeepSeek-R1-Distill-Qwen-32B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
DeepSeek-R1-Distill-Qwen-32B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
DeepSeek-R1-Distill-Qwen-32B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
DeepSeek-R1-Distill-Qwen-32B-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
DeepSeek-R1-Distill-Qwen-32B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
DeepSeek-R1-Distill-Qwen-32B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
DeepSeek-R1-Distill-Qwen-32B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
DeepSeek-R1-Distill-Qwen-32B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
DeepSeek-R1-Distill-Qwen-32B-f16-00001-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
DeepSeek-R1-Distill-Qwen-32B-f16-00002-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
DeepSeek-R1-Distill-Qwen-32B-f16-00003-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
|
DeepSeek-R1-Distill-Qwen-32B-Q2_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e6f2a80eebc3b489b2c533009dc12d2ad366ba3cbf9c077982962d718334354
|
3 |
+
size 12313098112
|
DeepSeek-R1-Distill-Qwen-32B-Q3_K_L.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9ccd47bda91404c92f145d4a96efc57187fa024afc70e702fce0cb63b4239ab
|
3 |
+
size 17247078272
|
DeepSeek-R1-Distill-Qwen-32B-Q3_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:301c3e1290ea27cabae9c6b0e3ad84dcfe9b6d42ac03e125f990a82600b54cfa
|
3 |
+
size 15935047552
|
DeepSeek-R1-Distill-Qwen-32B-Q3_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67d0e0871c3a5e5dd64ba43c9cb6abb021f4246318a3e9d4fcdcdce7803821c9
|
3 |
+
size 14392330112
|
DeepSeek-R1-Distill-Qwen-32B-Q4_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:475b4fceea16cbacb81b4115fe74477e5c43385707072fc1a49220111e38de22
|
3 |
+
size 18640230272
|
DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74421d8d141ce525039084818e6570b9a3d6d13f38743ff4c1be8de91c3cca70
|
3 |
+
size 19851335552
|
DeepSeek-R1-Distill-Qwen-32B-Q4_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73ec58abb9c6715922e23a540d59a9ff3b1ca417400a26f4d98a72c95c48be4e
|
3 |
+
size 18784409472
|
DeepSeek-R1-Distill-Qwen-32B-Q5_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a34e2f72c09a8389763b519fdfde58cf03188a60e022cdd057729addc19779e
|
3 |
+
size 22638253952
|
DeepSeek-R1-Distill-Qwen-32B-Q5_K_M.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5e9da049d6fd38b1859bbcca8bf2f2577749547d47ab71f1427e34a4da2cce4
|
3 |
+
size 23262156672
|
DeepSeek-R1-Distill-Qwen-32B-Q5_K_S.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4d374e526523ab05d9c80d012bb85aa84ad08133d2cb8b0e4d758f1f74c2096
|
3 |
+
size 22638253952
|
DeepSeek-R1-Distill-Qwen-32B-Q6_K.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27dc5e445dc2b6f3dbe65558fb56354dd7ef3e02d4caa48a12dabd2a0dae8836
|
3 |
+
size 26886154112
|
DeepSeek-R1-Distill-Qwen-32B-Q8_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed58ab6a70d641610833ca33d2e16745cb986a4de505b060f796307f887b7ae6
|
3 |
+
size 34820884352
|
DeepSeek-R1-Distill-Qwen-32B-f16-00001-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aa1140f4f710f36b54f503a8e309658f243f881285e93fe4952c06494c275f7
|
3 |
+
size 29845223808
|
DeepSeek-R1-Distill-Qwen-32B-f16-00002-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ccf5b737f3af27d477820b4b68490b8c9188547b73cb1789e3fba10a35662eb
|
3 |
+
size 29823611296
|
DeepSeek-R1-Distill-Qwen-32B-f16-00003-of-00003.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea3e4d9c31f917dfecb8d7ef93757a95f7360ece676137eefe650323ee0096e3
|
3 |
+
size 5867134368
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen2ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 151643,
|
7 |
+
"eos_token_id": 151643,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 5120,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 27648,
|
12 |
+
"max_position_embeddings": 131072,
|
13 |
+
"max_window_layers": 64,
|
14 |
+
"model_type": "qwen2",
|
15 |
+
"num_attention_heads": 40,
|
16 |
+
"num_hidden_layers": 64,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 1000000.0,
|
20 |
+
"sliding_window": 131072,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.43.1",
|
24 |
+
"use_cache": true,
|
25 |
+
"use_sliding_window": false,
|
26 |
+
"vocab_size": 152064
|
27 |
+
}
|