Arihant Tripathi
commited on
qwen_new_mage_per_domain_balanced_moe
Browse files- README.md +11 -6
- config.json +1 -1
- evaluation_results.json +8 -0
- model-00001-of-00012.safetensors +3 -0
- model-00002-of-00012.safetensors +3 -0
- model-00003-of-00012.safetensors +3 -0
- model-00004-of-00012.safetensors +3 -0
- model-00005-of-00012.safetensors +3 -0
- model-00006-of-00012.safetensors +3 -0
- model-00007-of-00012.safetensors +3 -0
- model-00008-of-00012.safetensors +3 -0
- model-00009-of-00012.safetensors +3 -0
- model-00010-of-00012.safetensors +3 -0
- model-00011-of-00012.safetensors +3 -0
- model-00012-of-00012.safetensors +3 -0
- model.safetensors.index.json +0 -0
README.md
CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss:
|
22 |
-
- Accuracy: 0.
|
23 |
|
24 |
## Model description
|
25 |
|
@@ -50,10 +50,15 @@ The following hyperparameters were used during training:
|
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
52 |
|:-------------:|:------:|:----:|:---------------:|:--------:|
|
53 |
-
|
|
54 |
-
|
|
55 |
-
|
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
|
59 |
### Framework versions
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 2.8781
|
22 |
+
- Accuracy: 0.5357
|
23 |
|
24 |
## Model description
|
25 |
|
|
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
52 |
|:-------------:|:------:|:----:|:---------------:|:--------:|
|
53 |
+
| 5.4785 | 0.0006 | 100 | 6.6629 | 0.5689 |
|
54 |
+
| 4.8644 | 0.0013 | 200 | 10.6619 | 0.5316 |
|
55 |
+
| 4.5014 | 0.0019 | 300 | 3.0574 | 0.5299 |
|
56 |
+
| 3.3262 | 0.0025 | 400 | 3.2657 | 0.4643 |
|
57 |
+
| 2.7274 | 0.0032 | 500 | 2.0543 | 0.5314 |
|
58 |
+
| 2.3305 | 0.0038 | 600 | 1.9673 | 0.4682 |
|
59 |
+
| 2.4483 | 0.0044 | 700 | 2.7203 | 0.5357 |
|
60 |
+
| 3.201 | 0.0051 | 800 | 3.5143 | 0.5357 |
|
61 |
+
| 2.8675 | 0.0057 | 900 | 2.8781 | 0.5357 |
|
62 |
|
63 |
|
64 |
### Framework versions
|
config.json
CHANGED
@@ -32,7 +32,7 @@
|
|
32 |
"shared_expert_intermediate_size": 5632,
|
33 |
"sliding_window": null,
|
34 |
"tie_word_embeddings": false,
|
35 |
-
"torch_dtype": "
|
36 |
"transformers_version": "4.49.0",
|
37 |
"use_cache": true,
|
38 |
"use_sliding_window": false,
|
|
|
32 |
"shared_expert_intermediate_size": 5632,
|
33 |
"sliding_window": null,
|
34 |
"tie_word_embeddings": false,
|
35 |
+
"torch_dtype": "float32",
|
36 |
"transformers_version": "4.49.0",
|
37 |
"use_cache": true,
|
38 |
"use_sliding_window": false,
|
evaluation_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_loss": NaN,
|
3 |
+
"eval_accuracy": 0.5342839036755387,
|
4 |
+
"eval_runtime": 5653.9393,
|
5 |
+
"eval_samples_per_second": 2.791,
|
6 |
+
"eval_steps_per_second": 2.791,
|
7 |
+
"epoch": 0.0025348703096977803
|
8 |
+
}
|
model-00001-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35b3dc035b6760d5bcbed1067923f118d7b3577ac004a92dd2ab8c1b6d1db743
|
3 |
+
size 4990221104
|
model-00002-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32aaaec990f4903d580f5a8bbbe8b324962b638f8f2b228f0686a882f0811833
|
3 |
+
size 4991306528
|
model-00003-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cfdce745903a9227dc466a6b06077f56a093ada6641e2ded20fd2c339059ac7
|
3 |
+
size 4990298240
|
model-00004-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d38121cb1d24e44e400cc5334268364e80cdbbc6c2b8eef83d43af81f4dd825
|
3 |
+
size 4990757696
|
model-00005-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3e346c8291370da473f259bb58ab14d6bbf998c802c3244f788180c749be13d
|
3 |
+
size 4991306600
|
model-00006-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c00370a395737b486018825d1b9d2f25c2a7c73490d2d8796ee79ef107e2705
|
3 |
+
size 4991306936
|
model-00007-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1c92cbd233968293e977f8bc8c5559d6c5759f3dc3d67bd76aea9e0bd5b643d
|
3 |
+
size 4991306952
|
model-00008-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e639e4e349a22ee82aa3f38713b86845d3a2d091d6d36db284aac6a4bd54932d
|
3 |
+
size 4968238032
|
model-00009-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ae8046cd8b640ccc4574301daa4094b28c5f3dc4e9c2d6661d9be0f05087db
|
3 |
+
size 4989749864
|
model-00010-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5a0e70ebd04f8100fb2059cbb53b8997d6c1400aa1d6631f14119518d400e10
|
3 |
+
size 4991306928
|
model-00011-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f675aba552735b1fa296cfd57f9d3b7f3d6c3c485ab9b8b71d9c9a6fe229d89
|
3 |
+
size 4991306936
|
model-00012-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74e9fbda0980b30b87af9b46ea7a1524baa5590d86a2e740ed446e75cdeb4860
|
3 |
+
size 1141959976
|
model.safetensors.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|