yakazimir
/

qwen_ce_entropy

@@ -3,15 +3,9 @@ library_name: transformers
 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
-- alignment-handbook
 - trl
 - simpo
 - generated_from_trainer
-- trl
-- simpo
-- generated_from_trainer
-datasets:
-- yakazimir/ultrafeedback_binarized
 model-index:
 - name: qwen_ce_entropy
   results: []
@@ -22,18 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
 # qwen_ce_entropy
-This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.6698
-- Rewards/chosen: -1.2671
-- Rewards/rejected: -1.4042
 - Rewards/accuracies: 0.5475
-- Rewards/margins: 0.1371
-- Logps/rejected: -1.4042
-- Logps/chosen: -1.2671
-- Logits/rejected: 0.0682
-- Logits/chosen: 0.0048
-- Semantic Entropy: 0.8052
 ## Model description
@@ -66,22 +59,22 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Semantic Entropy |
-|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------------:|
-| 1.6978        | 0.2141 | 400  | 1.7217          | -1.3251        | -1.4461          | 0.5542             | 0.1210          | -1.4461        | -1.3251      | 0.3247          | 0.2440        | 0.7926           |
-| 1.6742        | 0.4282 | 800  | 1.6949          | -1.2943        | -1.4240          | 0.5512             | 0.1298          | -1.4240        | -1.2943      | 0.3049          | 0.2297        | 0.8009           |
-| 1.6257        | 0.6422 | 1200 | 1.6872          | -1.2855        | -1.4133          | 0.5497             | 0.1277          | -1.4133        | -1.2855      | 0.3184          | 0.2421        | 0.8030           |
-| 1.6968        | 0.8563 | 1600 | 1.6820          | -1.2796        | -1.4093          | 0.5497             | 0.1297          | -1.4093        | -1.2796      | 0.2761          | 0.2028        | 0.8045           |
-| 1.6822        | 1.0704 | 2000 | 1.6787          | -1.2761        | -1.4082          | 0.5475             | 0.1321          | -1.4082        | -1.2761      | 0.2216          | 0.1509        | 0.8049           |
-| 1.6094        | 1.2845 | 2400 | 1.6751          | -1.2721        | -1.4072          | 0.5512             | 0.1351          | -1.4072        | -1.2721      | 0.1377          | 0.0723        | 0.8058           |
-| 1.6739        | 1.4986 | 2800 | 1.6730          | -1.2698        | -1.4034          | 0.5497             | 0.1336          | -1.4034        | -1.2698      | 0.2715          | 0.1954        | 0.8061           |
-| 1.6298        | 1.7127 | 3200 | 1.6720          | -1.2684        | -1.4016          | 0.5482             | 0.1333          | -1.4016        | -1.2684      | 0.2307          | 0.1571        | 0.8069           |
-| 1.5782        | 1.9267 | 3600 | 1.6708          | -1.2674        | -1.4010          | 0.5475             | 0.1337          | -1.4010        | -1.2674      | 0.1038          | 0.0385        | 0.8067           |
-| 1.5741        | 2.1408 | 4000 | 1.6708          | -1.2680        | -1.4049          | 0.5497             | 0.1369          | -1.4049        | -1.2680      | 0.1744          | 0.1034        | 0.8055           |
-| 1.5681        | 2.3549 | 4400 | 1.6702          | -1.2675        | -1.4043          | 0.5475             | 0.1369          | -1.4043        | -1.2675      | 0.1310          | 0.0627        | 0.8052           |
-| 1.5524        | 2.5690 | 4800 | 1.6700          | -1.2673        | -1.4043          | 0.5482             | 0.1370          | -1.4043        | -1.2673      | 0.0834          | 0.0188        | 0.8053           |
-| 1.537         | 2.7831 | 5200 | 1.6700          | -1.2672        | -1.4043          | 0.5482             | 0.1371          | -1.4043        | -1.2672      | 0.0930          | 0.0277        | 0.8053           |
-| 1.5699        | 2.9972 | 5600 | 1.6698          | -1.2671        | -1.4042          | 0.5475             | 0.1371          | -1.4042        | -1.2671      | 0.0682          | 0.0048        | 0.8052           |
 ### Framework versions

 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
 - trl
 - simpo
 - generated_from_trainer
 model-index:
 - name: qwen_ce_entropy
   results: []
 # qwen_ce_entropy
+This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.2625
+- Rewards/chosen: -1.2622
+- Rewards/rejected: -1.3864
 - Rewards/accuracies: 0.5475
+- Rewards/margins: 0.1242
+- Logps/rejected: -1.3864
+- Logps/chosen: -1.2622
+- Logits/rejected: 0.1431
+- Logits/chosen: 0.0760
 ## Model description
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 1.2903        | 0.2141 | 400  | 1.3234          | -1.3231        | -1.4418          | 0.5556             | 0.1187          | -1.4418        | -1.3231      | 0.3478          | 0.2657        |
+| 1.2586        | 0.4282 | 800  | 1.2926          | -1.2924        | -1.4167          | 0.5482             | 0.1243          | -1.4167        | -1.2924      | 0.3140          | 0.2391        |
+| 1.217         | 0.6422 | 1200 | 1.2836          | -1.2833        | -1.4047          | 0.5475             | 0.1213          | -1.4047        | -1.2833      | 0.2906          | 0.2178        |
+| 1.299         | 0.8563 | 1600 | 1.2774          | -1.2772        | -1.3985          | 0.5467             | 0.1213          | -1.3985        | -1.2772      | 0.2371          | 0.1683        |
+| 1.2617        | 1.0704 | 2000 | 1.2726          | -1.2724        | -1.3958          | 0.5482             | 0.1234          | -1.3958        | -1.2724      | 0.1842          | 0.1180        |
+| 1.1894        | 1.2845 | 2400 | 1.2689          | -1.2687        | -1.3924          | 0.5460             | 0.1238          | -1.3924        | -1.2687      | 0.1212          | 0.0586        |
+| 1.2779        | 1.4986 | 2800 | 1.2662          | -1.2659        | -1.3880          | 0.5453             | 0.1221          | -1.3880        | -1.2659      | 0.1199          | 0.0573        |
+| 1.225         | 1.7127 | 3200 | 1.2650          | -1.2647        | -1.3872          | 0.5490             | 0.1225          | -1.3872        | -1.2647      | 0.1854          | 0.1171        |
+| 1.1621        | 1.9267 | 3600 | 1.2636          | -1.2634        | -1.3853          | 0.5475             | 0.1219          | -1.3853        | -1.2634      | 0.1551          | 0.0880        |
+| 1.1565        | 2.1408 | 4000 | 1.2633          | -1.2631        | -1.3880          | 0.5482             | 0.1250          | -1.3880        | -1.2631      | 0.0952          | 0.0325        |
+| 1.1515        | 2.3549 | 4400 | 1.2629          | -1.2626        | -1.3868          | 0.5467             | 0.1242          | -1.3868        | -1.2626      | 0.0880          | 0.0251        |
+| 1.1364        | 2.5690 | 4800 | 1.2625          | -1.2623        | -1.3865          | 0.5467             | 0.1242          | -1.3865        | -1.2623      | 0.1292          | 0.0630        |
+| 1.1256        | 2.7831 | 5200 | 1.2626          | -1.2623        | -1.3864          | 0.5475             | 0.1241          | -1.3864        | -1.2623      | 0.1208          | 0.0553        |
+| 1.1655        | 2.9972 | 5600 | 1.2625          | -1.2622        | -1.3864          | 0.5475             | 0.1242          | -1.3864        | -1.2622      | 0.1431          | 0.0760        |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,23 +1,9 @@
 {
     "epoch": 2.999297541394882,
-    "eval_logits/chosen": 0.004835990257561207,
-    "eval_logits/rejected": 0.06824379414319992,
-    "eval_logps/chosen": -1.2670738697052002,
-    "eval_logps/rejected": -1.4042190313339233,
-    "eval_loss": 1.6698354482650757,
-    "eval_rewards/accuracies": 0.5474777221679688,
-    "eval_rewards/chosen": -1.2670738697052002,
-    "eval_rewards/margins": 0.13714538514614105,
-    "eval_rewards/rejected": -1.4042190313339233,
-    "eval_runtime": 33.6516,
-    "eval_samples": 1345,
-    "eval_samples_per_second": 39.968,
-    "eval_semantic_entropy": 0.8052493929862976,
-    "eval_steps_per_second": 10.014,
     "total_flos": 0.0,
-    "train_loss": 1.640899122824931,
-    "train_runtime": 28500.0022,
     "train_samples": 59790,
-    "train_samples_per_second": 6.294,
-    "train_steps_per_second": 0.197
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 1.2315016932184573,
+    "train_runtime": 30313.5413,
     "train_samples": 59790,
+    "train_samples_per_second": 5.917,
+    "train_steps_per_second": 0.185
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
-    "train_loss": 1.640899122824931,
-    "train_runtime": 28500.0022,
     "train_samples": 59790,
-    "train_samples_per_second": 6.294,
-    "train_steps_per_second": 0.197
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 1.2315016932184573,
+    "train_runtime": 30313.5413,
     "train_samples": 59790,
+    "train_samples_per_second": 5.917,
+    "train_steps_per_second": 0.185
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff