enkhtogtokh/mistral-dpo

Browse files

Files changed (5) hide show

README.md +13 -13
adapter_config.json +3 -3
adapter_model.safetensors +2 -2
runs/Jan18_13-55-33_eebeae6ed607/events.out.tfevents.1705586247.eebeae6ed607.388.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0012
-- Rewards/chosen: 23.3318
-- Rewards/rejected: -6.7489
 - Rewards/accuracies: 1.0
-- Rewards/margins: 30.0806
-- Logps/rejected: -87.3513
-- Logps/chosen: -337.4500
-- Logits/rejected: -1.2781
-- Logits/chosen: -1.6769
 ## Model description
@@ -58,11 +58,11 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.5937        | 0.01  | 10   | 0.3601          | 0.7833         | -0.3607          | 0.9904             | 1.1441          | -23.4698       | -562.9344    | -1.1371         | -1.4401       |
-| 0.0908        | 0.02  | 20   | 1.1245          | 8.8420         | -2.7352          | 0.9615             | 11.5772         | -47.2141       | -482.3473    | -1.1942         | -1.5504       |
-| 0.0683        | 0.03  | 30   | 0.2541          | 17.6490        | -4.7403          | 0.9904             | 22.3893         | -67.2654       | -394.2778    | -1.2341         | -1.6426       |
-| 0.0009        | 0.04  | 40   | 0.0015          | 22.5664        | -5.9863          | 1.0                | 28.5527         | -79.7251       | -345.1035    | -1.2763         | -1.6781       |
-| 0.0003        | 0.05  | 50   | 0.0012          | 23.3318        | -6.7489          | 1.0                | 30.0806         | -87.3513       | -337.4500    | -1.2781         | -1.6769       |
 ### Framework versions

 This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0001
+- Rewards/chosen: 2.5279
+- Rewards/rejected: -6.8729
 - Rewards/accuracies: 1.0
+- Rewards/margins: 9.4009
+- Logps/rejected: -86.5415
+- Logps/chosen: -10.6380
+- Logits/rejected: -2.2909
+- Logits/chosen: -2.3250
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.5916        | 0.09  | 10   | 0.3053          | 0.6309         | -0.4316          | 1.0                | 1.0625          | -22.1280       | -29.6084     | -2.4417         | -2.4765       |
+| 0.127         | 0.19  | 20   | 0.0029          | 1.9393         | -4.1219          | 1.0                | 6.0612          | -59.0316       | -16.5245     | -2.3738         | -2.4158       |
+| 0.0013        | 0.28  | 30   | 0.0003          | 2.3840         | -5.7860          | 1.0                | 8.1700          | -75.6720       | -12.0770     | -2.3067         | -2.3466       |
+| 0.0002        | 0.37  | 40   | 0.0001          | 2.4704         | -6.5625          | 1.0                | 9.0328          | -83.4367       | -11.2135     | -2.2895         | -2.3248       |
+| 0.0002        | 0.46  | 50   | 0.0001          | 2.5279         | -6.8729          | 1.0                | 9.4009          | -86.5415       | -10.6380     | -2.2909         | -2.3250       |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "TheBloke/OpenHermes-2-Mistral-7B-GPTQ",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -19,8 +19,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f80f99035c681c845263561663e67f34c2d083918f74e36fab1f698934cf052c
-size 6832600

 version https://git-lfs.github.com/spec/v1
+oid sha256:32102ac3c9c262725e1b6938c5139c8eb864b44d30ad34a3c1970ea7e8cb8243
+size 20508696

runs/Jan18_13-55-33_eebeae6ed607/events.out.tfevents.1705586247.eebeae6ed607.388.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:738f74c1edd09a72072823bc2a6c5dd88bcc55083327a8ac921ab8112da64bd1
+size 12559

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63b7be98043f6e4899e95c30b2a63ccbdfb2a8332e1daa417547649d18aa0c8d
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f7385158d7cf2ab875d03c3adab143e2f245ebac1124bb3113f2649ff54bcd2
 size 4091