enkhtogtokh
commited on
Commit
•
e026985
1
Parent(s):
6fea021
enkhtogtokh/mistral-dpo
Browse files
README.md
CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
17 |
|
18 |
This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
- Loss: 0.
|
21 |
-
- Rewards/chosen:
|
22 |
-
- Rewards/rejected: -6.
|
23 |
- Rewards/accuracies: 1.0
|
24 |
-
- Rewards/margins:
|
25 |
-
- Logps/rejected: -
|
26 |
-
- Logps/chosen: -
|
27 |
-
- Logits/rejected: -
|
28 |
-
- Logits/chosen: -
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -58,11 +58,11 @@ The following hyperparameters were used during training:
|
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
60 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
|
67 |
|
68 |
### Framework versions
|
|
|
17 |
|
18 |
This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.0001
|
21 |
+
- Rewards/chosen: 2.5279
|
22 |
+
- Rewards/rejected: -6.8729
|
23 |
- Rewards/accuracies: 1.0
|
24 |
+
- Rewards/margins: 9.4009
|
25 |
+
- Logps/rejected: -86.5415
|
26 |
+
- Logps/chosen: -10.6380
|
27 |
+
- Logits/rejected: -2.2909
|
28 |
+
- Logits/chosen: -2.3250
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
60 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
61 |
+
| 0.5916 | 0.09 | 10 | 0.3053 | 0.6309 | -0.4316 | 1.0 | 1.0625 | -22.1280 | -29.6084 | -2.4417 | -2.4765 |
|
62 |
+
| 0.127 | 0.19 | 20 | 0.0029 | 1.9393 | -4.1219 | 1.0 | 6.0612 | -59.0316 | -16.5245 | -2.3738 | -2.4158 |
|
63 |
+
| 0.0013 | 0.28 | 30 | 0.0003 | 2.3840 | -5.7860 | 1.0 | 8.1700 | -75.6720 | -12.0770 | -2.3067 | -2.3466 |
|
64 |
+
| 0.0002 | 0.37 | 40 | 0.0001 | 2.4704 | -6.5625 | 1.0 | 9.0328 | -83.4367 | -11.2135 | -2.2895 | -2.3248 |
|
65 |
+
| 0.0002 | 0.46 | 50 | 0.0001 | 2.5279 | -6.8729 | 1.0 | 9.4009 | -86.5415 | -10.6380 | -2.2909 | -2.3250 |
|
66 |
|
67 |
|
68 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path":
|
5 |
"bias": "none",
|
6 |
"fan_in_fan_out": false,
|
7 |
"inference_mode": true,
|
@@ -19,8 +19,8 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
-
"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": null,
|
5 |
"bias": "none",
|
6 |
"fan_in_fan_out": false,
|
7 |
"inference_mode": true,
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
+
"v_proj",
|
23 |
+
"q_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32102ac3c9c262725e1b6938c5139c8eb864b44d30ad34a3c1970ea7e8cb8243
|
3 |
+
size 20508696
|
runs/Jan18_13-55-33_eebeae6ed607/events.out.tfevents.1705586247.eebeae6ed607.388.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:738f74c1edd09a72072823bc2a6c5dd88bcc55083327a8ac921ab8112da64bd1
|
3 |
+
size 12559
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4091
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f7385158d7cf2ab875d03c3adab143e2f245ebac1124bb3113f2649ff54bcd2
|
3 |
size 4091
|