Holy-fox commited on
Commit
afd3e27
·
verified ·
1 Parent(s): 2e20621

Upload 73 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +43 -3
  3. config.json +29 -0
  4. mergekit_config.yml +10 -0
  5. model-00001-of-00066.safetensors +3 -0
  6. model-00002-of-00066.safetensors +3 -0
  7. model-00003-of-00066.safetensors +3 -0
  8. model-00004-of-00066.safetensors +3 -0
  9. model-00005-of-00066.safetensors +3 -0
  10. model-00006-of-00066.safetensors +3 -0
  11. model-00007-of-00066.safetensors +3 -0
  12. model-00008-of-00066.safetensors +3 -0
  13. model-00009-of-00066.safetensors +3 -0
  14. model-00010-of-00066.safetensors +3 -0
  15. model-00011-of-00066.safetensors +3 -0
  16. model-00012-of-00066.safetensors +3 -0
  17. model-00013-of-00066.safetensors +3 -0
  18. model-00014-of-00066.safetensors +3 -0
  19. model-00015-of-00066.safetensors +3 -0
  20. model-00016-of-00066.safetensors +3 -0
  21. model-00017-of-00066.safetensors +3 -0
  22. model-00018-of-00066.safetensors +3 -0
  23. model-00019-of-00066.safetensors +3 -0
  24. model-00020-of-00066.safetensors +3 -0
  25. model-00021-of-00066.safetensors +3 -0
  26. model-00022-of-00066.safetensors +3 -0
  27. model-00023-of-00066.safetensors +3 -0
  28. model-00024-of-00066.safetensors +3 -0
  29. model-00025-of-00066.safetensors +3 -0
  30. model-00026-of-00066.safetensors +3 -0
  31. model-00027-of-00066.safetensors +3 -0
  32. model-00028-of-00066.safetensors +3 -0
  33. model-00029-of-00066.safetensors +3 -0
  34. model-00030-of-00066.safetensors +3 -0
  35. model-00031-of-00066.safetensors +3 -0
  36. model-00032-of-00066.safetensors +3 -0
  37. model-00033-of-00066.safetensors +3 -0
  38. model-00034-of-00066.safetensors +3 -0
  39. model-00035-of-00066.safetensors +3 -0
  40. model-00036-of-00066.safetensors +3 -0
  41. model-00037-of-00066.safetensors +3 -0
  42. model-00038-of-00066.safetensors +3 -0
  43. model-00039-of-00066.safetensors +3 -0
  44. model-00040-of-00066.safetensors +3 -0
  45. model-00041-of-00066.safetensors +3 -0
  46. model-00042-of-00066.safetensors +3 -0
  47. model-00043-of-00066.safetensors +3 -0
  48. model-00044-of-00066.safetensors +3 -0
  49. model-00045-of-00066.safetensors +3 -0
  50. model-00046-of-00066.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,43 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese
4
+ library_name: transformers
5
+ tags:
6
+ - mergekit
7
+ - merge
8
+
9
+ ---
10
+ # SKYCAVE-R1-32B-v0.1
11
+
12
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
13
+
14
+ ## Merge Details
15
+ ### Merge Method
16
+
17
+ This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese](https://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese) as a base.
18
+
19
+ ### Models Merged
20
+
21
+ The following models were included in the merge:
22
+ * SKYCAVE_element_QR_jp
23
+ * SKYCAVE_element_Sky_jp
24
+ * SKYCAVE_element_R1_jp_02
25
+ * SKYCAVE_element_R1_jp_03
26
+ * SKYCAVE_element_R1_jp_01
27
+
28
+ ### Configuration
29
+
30
+ The following YAML configuration was used to produce this model:
31
+
32
+ ```yaml
33
+ merge_method: model_stock
34
+ base_model: cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese
35
+ models:
36
+ - model: SKYCAVE_element_QR_jp
37
+ - model: SKYCAVE_element_R1_jp_01
38
+ - model: SKYCAVE_element_R1_jp_02
39
+ - model: SKYCAVE_element_R1_jp_03
40
+ - model: SKYCAVE_element_Sky_jp
41
+ dtype: bfloat16
42
+ name: SKYCAVE-R1-32B-v0.1
43
+ ```
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 5120,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 27648,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 64,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 40,
17
+ "num_hidden_layers": 64,
18
+ "num_key_value_heads": 8,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.48.1",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ merge_method: model_stock
2
+ base_model: cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese
3
+ models:
4
+ - model: SKYCAVE_element_QR_jp
5
+ - model: SKYCAVE_element_R1_jp_01
6
+ - model: SKYCAVE_element_R1_jp_02
7
+ - model: SKYCAVE_element_R1_jp_03
8
+ - model: SKYCAVE_element_Sky_jp
9
+ dtype: bfloat16
10
+ name: SKYCAVE-R1-32B-v0.1
model-00001-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60cb17732bf5453c578aec8a85234a3e9792e9560c25ff58cca7eeccb17025bb
3
+ size 1557135488
model-00002-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c42d8e131511a390d6ed5393d50160d1970c49d94b185fd67c466c54513cb25
3
+ size 1557135504
model-00003-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d607166f155ca8059fe9bb266721a1dbf7bd11d591c37996bd96d8c3d5a5b874
3
+ size 975222224
model-00004-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a4f225052f0c0f373686522ec03bf4f929fc56ba27f6eca1fe5e712edb61ee
3
+ size 975211880
model-00005-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd000cd3642d8477715cdbf7835624fe8df9df77895719b3615bc42b4ff2e9f5
3
+ size 975211888
model-00006-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64068918fa83a39c6d1657ccf07f5f5d6958b4d5fedca9f689db218b28b0ef8b
3
+ size 975211888
model-00007-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371342f86d49699744d64ba97a3e6f972de5a39f70cdc450b2f4a433df38fcfa
3
+ size 975211888
model-00008-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f576b5e1c24d47925262e7f9b215733af7ea19a81d528ae9d815be25e8543ce
3
+ size 975211888
model-00009-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a74bfa466ca46938a0b02145cc043384c0f4fd5de0dad30c699011a414b25ec
3
+ size 975211888
model-00010-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb508223b0aa30dbc38e1333a470804d952a949424523987e32d626532d8d1dd
3
+ size 975211888
model-00011-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb618daa0bede903575fc90e4d612338b90d69335e2035edf0df176cd7da952
3
+ size 975211888
model-00012-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae793d23bab5ab64f1c73df0e3bcaa4840f94454396972cd9807fded4d1d387c
3
+ size 975211888
model-00013-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5116eaa67924fae32ce172d1035027bee904134ecfda09403fa143223372a4be
3
+ size 975211888
model-00014-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55aff65ae7d2b5f1004f1be321f0bb70c59dae34caa8f2236d994791698daea9
3
+ size 975211888
model-00015-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad640788ad498fefb7bfb03b636466508a7af714b1397362751a159f2cbb7db
3
+ size 975211880
model-00016-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1f4223b7a160aef96a12393d1303c7afb431bee223941be5d26ff14470b410
3
+ size 975211888
model-00017-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f9db5fe8f8eeb013592a08aea334866170ae322237b144cb4dbe5462497a5f4
3
+ size 975211888
model-00018-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f905ac2cf3b4423e89f1655462499444d9e85e3e65e8553140c0f429c734fb93
3
+ size 975211888
model-00019-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeacb399243e4e9650b48e357948296be56e824d31e2dcd464511b8a40e33790
3
+ size 975211888
model-00020-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6196d9c600073d5ceaa5d4ea56ae156cb0ccb2d590db3d4667a5cfa53131abff
3
+ size 975211888
model-00021-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3792456741e6a9f631669ed0d2057d737aec5fe4238493f01e5a18d4957ff43d
3
+ size 975211888
model-00022-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd68039d60c5f585c197bebacea127383b2b8e60cd0c09ee09631eddeb1ec60
3
+ size 975211888
model-00023-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4b50b7b23450e9e500c3b6d37908f1ebffda2e37f660787427afe8da635665b
3
+ size 975211888
model-00024-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdfca6aca27311a8b7eab972fe6a3eed522435790092a16e43c08da6488478fa
3
+ size 975211888
model-00025-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a7e9601436ebbe376b461be61f239186955796051a3d6afe7d499ea1f93dead
3
+ size 975211888
model-00026-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4d25f9fbe3fb9e2a10672cbcb13fa28002ec17a9c29e8a21454bc7490b75ce8
3
+ size 975211880
model-00027-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f96db731cd85b4d78257456812b5fe4f61eccb103069c1c7730e35f1df9874
3
+ size 975211888
model-00028-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42c9225f541002f0b071b2c0648bc5a74fc097997dbab3ae3c9c5f60f95d4497
3
+ size 975211888
model-00029-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dffa18aa0277c9896d29e90843d7a8e7c9cf5e1f026cc0ab55ff288d623cb50a
3
+ size 975211888
model-00030-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac451677ae976f316c196c70316b7bc9f4d08c5ab47615870757b31b7d7e7dc
3
+ size 975211888
model-00031-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfaa3b5efe16028ab460eb00676fdc533a35ea39fb549a1279d5703a69d272fc
3
+ size 975211888
model-00032-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26e00ea8f24e4a2a295b579dcbf02ee3d36f11f92b02375a164b7a4de6c7c42a
3
+ size 975211888
model-00033-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe4f5e350ef3a18d6692a92fa628e833bf98794d4002ee68a2f174ae277e289
3
+ size 975211888
model-00034-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f12d1080ce57aa00c64d5be536f2fc10a6346c05c3ec12cfac15ed12a062a18
3
+ size 975211888
model-00035-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a9f4da6f3c27dfea1cf504ad3fb1b80855733960cf33dafa405e78746c96e3
3
+ size 975211888
model-00036-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b169a0c1e9048080b5b3828741221c25c88faab5eb85c5344621bfbd87799fff
3
+ size 975211888
model-00037-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0b17f190c90498801094e1e1784fc689d50058c3eb66ec4726781baee51e93
3
+ size 975211880
model-00038-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ab4f4d27fd05ac7dd0b1f5a85c87fd9becb915ccdba870606ea0b50763a978
3
+ size 975211888
model-00039-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee047b0a5e6a41446dcd8cbe06db31f78d23bb9d89987a38b823fd8ff050e812
3
+ size 975211888
model-00040-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48fbccc9aa8141639efaf8ec91f66bfd678342822fc5de7a5492ca7eece98518
3
+ size 975211888
model-00041-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a411d375432de6b1afdf410cac57c9d202effab5ee02c25b3514f60536e24405
3
+ size 975211888
model-00042-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672ca5893815ce121c04354320ce5ffe7c9bf82bf71dabd94979b360f3b87b06
3
+ size 975211888
model-00043-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe1f4ba6706b903028332aa1328fdde01577a80f133dc0304578957837f3db7
3
+ size 975211888
model-00044-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22642172a7ecf2be6078cd59188ebed27a0764d93e71533282c81676f3b51207
3
+ size 975211888
model-00045-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3415776d7c5d6985e5833f3b55be51f3dfb1cd7d4e3a152f6ad8b5990f57ce46
3
+ size 975211888
model-00046-of-00066.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3138f3a2b100bf7d3575fce4f1db9255569709b2f85771d9bbe04beaf308217
3
+ size 975211888