yakazimir
/

qwen_unl_entropy

@@ -3,15 +3,9 @@ library_name: transformers
 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
-- alignment-handbook
 - trl
 - simpo
 - generated_from_trainer
-- trl
-- simpo
-- generated_from_trainer
-datasets:
-- yakazimir/ultrafeedback_binarized
 model-index:
 - name: qwen_unl_entropy
   results: []
@@ -22,18 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
 # qwen_unl_entropy
-This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.0457
-- Rewards/chosen: -1.3358
-- Rewards/rejected: -1.6120
-- Rewards/accuracies: 0.5927
-- Rewards/margins: 0.2762
-- Logps/rejected: -1.6120
-- Logps/chosen: -1.3358
-- Logits/rejected: 0.2351
-- Logits/chosen: 0.1504
-- Semantic Entropy: 0.7755
 ## Model description
@@ -66,22 +59,22 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Semantic Entropy |
-|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:----------------:|
-| 2.0607        | 0.2141 | 400  | 2.0925          | -1.3454        | -1.4757          | 0.5564             | 0.1303          | -1.4757        | -1.3454      | 0.3297          | 0.2452        | 0.7910           |
-| 2.0818        | 0.4282 | 800  | 2.0714          | -1.3264        | -1.4818          | 0.5586             | 0.1554          | -1.4818        | -1.3264      | 0.3658          | 0.2814        | 0.7938           |
-| 2.0333        | 0.6422 | 1200 | 2.0611          | -1.3249        | -1.5070          | 0.5734             | 0.1821          | -1.5070        | -1.3249      | 0.4174          | 0.3272        | 0.7900           |
-| 2.0467        | 0.8563 | 1600 | 2.0564          | -1.3256        | -1.5170          | 0.5764             | 0.1913          | -1.5170        | -1.3256      | 0.3362          | 0.2519        | 0.7920           |
-| 2.1246        | 1.0704 | 2000 | 2.0505          | -1.3186        | -1.5340          | 0.5772             | 0.2154          | -1.5340        | -1.3186      | 0.4597          | 0.3633        | 0.7884           |
-| 2.0207        | 1.2845 | 2400 | 2.0472          | -1.3301        | -1.5773          | 0.5875             | 0.2473          | -1.5773        | -1.3301      | 0.3860          | 0.2934        | 0.7809           |
-| 2.0557        | 1.4986 | 2800 | 2.0434          | -1.3280        | -1.5872          | 0.5846             | 0.2592          | -1.5872        | -1.3280      | 0.4563          | 0.3580        | 0.7788           |
-| 2.0057        | 1.7127 | 3200 | 2.0460          | -1.3270        | -1.5754          | 0.5883             | 0.2484          | -1.5754        | -1.3270      | 0.3804          | 0.2876        | 0.7828           |
-| 1.971         | 1.9267 | 3600 | 2.0442          | -1.3219        | -1.5700          | 0.5861             | 0.2480          | -1.5700        | -1.3219      | 0.3035          | 0.2161        | 0.7832           |
-| 1.9276        | 2.1408 | 4000 | 2.0445          | -1.3312        | -1.6024          | 0.5905             | 0.2711          | -1.6024        | -1.3312      | 0.3771          | 0.2823        | 0.7784           |
-| 1.9438        | 2.3549 | 4400 | 2.0449          | -1.3381        | -1.6152          | 0.5942             | 0.2771          | -1.6152        | -1.3381      | 0.2493          | 0.1633        | 0.7754           |
-| 1.9434        | 2.5690 | 4800 | 2.0459          | -1.3386        | -1.6198          | 0.5942             | 0.2812          | -1.6198        | -1.3386      | 0.2611          | 0.1743        | 0.7744           |
-| 1.9666        | 2.7831 | 5200 | 2.0454          | -1.3354        | -1.6132          | 0.5942             | 0.2777          | -1.6132        | -1.3354      | 0.2918          | 0.2027        | 0.7755           |
-| 1.9498        | 2.9972 | 5600 | 2.0457          | -1.3358        | -1.6120          | 0.5927             | 0.2762          | -1.6120        | -1.3358      | 0.2351          | 0.1504        | 0.7755           |
 ### Framework versions

 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
 - trl
 - simpo
 - generated_from_trainer
 model-index:
 - name: qwen_unl_entropy
   results: []
 # qwen_unl_entropy
+This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6475
+- Rewards/chosen: -1.3030
+- Rewards/rejected: -1.4992
+- Rewards/accuracies: 0.5712
+- Rewards/margins: 0.1962
+- Logps/rejected: -1.4992
+- Logps/chosen: -1.3030
+- Logits/rejected: 0.0833
+- Logits/chosen: 0.0165
 ## Model description
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 1.6549        | 0.2141 | 400  | 1.6939          | -1.3375        | -1.4631          | 0.5564             | 0.1256          | -1.4631        | -1.3375      | 0.3664          | 0.2799        |
+| 1.6692        | 0.4282 | 800  | 1.6718          | -1.3151        | -1.4532          | 0.5579             | 0.1381          | -1.4532        | -1.3151      | 0.3708          | 0.2889        |
+| 1.6206        | 0.6422 | 1200 | 1.6640          | -1.3083        | -1.4522          | 0.5564             | 0.1438          | -1.4522        | -1.3083      | 0.3523          | 0.2714        |
+| 1.6566        | 0.8563 | 1600 | 1.6600          | -1.3096        | -1.4585          | 0.5593             | 0.1488          | -1.4585        | -1.3096      | 0.3578          | 0.2764        |
+| 1.7104        | 1.0704 | 2000 | 1.6553          | -1.3006        | -1.4569          | 0.5660             | 0.1563          | -1.4569        | -1.3006      | 0.2528          | 0.1781        |
+| 1.6123        | 1.2845 | 2400 | 1.6521          | -1.3029        | -1.4743          | 0.5668             | 0.1713          | -1.4743        | -1.3029      | 0.1650          | 0.0956        |
+| 1.6688        | 1.4986 | 2800 | 1.6486          | -1.3000        | -1.4729          | 0.5690             | 0.1729          | -1.4729        | -1.3000      | 0.1751          | 0.1050        |
+| 1.6012        | 1.7127 | 3200 | 1.6495          | -1.3009        | -1.4722          | 0.5668             | 0.1713          | -1.4722        | -1.3009      | 0.2139          | 0.1401        |
+| 1.5646        | 1.9267 | 3600 | 1.6478          | -1.2987        | -1.4778          | 0.5705             | 0.1791          | -1.4778        | -1.2987      | 0.1771          | 0.1052        |
+| 1.5351        | 2.1408 | 4000 | 1.6470          | -1.3020        | -1.4952          | 0.5712             | 0.1932          | -1.4952        | -1.3020      | 0.1238          | 0.0547        |
+| 1.5307        | 2.3549 | 4400 | 1.6469          | -1.3054        | -1.5043          | 0.5712             | 0.1988          | -1.5043        | -1.3054      | 0.0587          | -0.0064       |
+| 1.5433        | 2.5690 | 4800 | 1.6472          | -1.3037        | -1.5017          | 0.5727             | 0.1980          | -1.5017        | -1.3037      | 0.1609          | 0.0880        |
+| 1.5671        | 2.7831 | 5200 | 1.6473          | -1.3030        | -1.4994          | 0.5720             | 0.1964          | -1.4994        | -1.3030      | 0.0927          | 0.0252        |
+| 1.5482        | 2.9972 | 5600 | 1.6475          | -1.3030        | -1.4992          | 0.5712             | 0.1962          | -1.4992        | -1.3030      | 0.0833          | 0.0165        |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,23 +1,9 @@
 {
     "epoch": 2.999297541394882,
-    "eval_logits/chosen": 0.15043768286705017,
-    "eval_logits/rejected": 0.235087051987648,
-    "eval_logps/chosen": -1.3358262777328491,
-    "eval_logps/rejected": -1.6119952201843262,
-    "eval_loss": 2.045694351196289,
-    "eval_rewards/accuracies": 0.5927299857139587,
-    "eval_rewards/chosen": -1.3358262777328491,
-    "eval_rewards/margins": 0.2761690616607666,
-    "eval_rewards/rejected": -1.6119952201843262,
-    "eval_runtime": 34.0463,
-    "eval_samples": 1345,
-    "eval_samples_per_second": 39.505,
-    "eval_semantic_entropy": 0.7755221128463745,
-    "eval_steps_per_second": 9.898,
     "total_flos": 0.0,
-    "train_loss": 2.028473087245443,
-    "train_runtime": 28359.3461,
     "train_samples": 59790,
-    "train_samples_per_second": 6.325,
-    "train_steps_per_second": 0.198
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 1.6264250374623148,
+    "train_runtime": 30148.9193,
     "train_samples": 59790,
+    "train_samples_per_second": 5.949,
+    "train_steps_per_second": 0.186
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
-    "train_loss": 2.028473087245443,
-    "train_runtime": 28359.3461,
     "train_samples": 59790,
-    "train_samples_per_second": 6.325,
-    "train_steps_per_second": 0.198
 }

 {
     "epoch": 2.999297541394882,
     "total_flos": 0.0,
+    "train_loss": 1.6264250374623148,
+    "train_runtime": 30148.9193,
     "train_samples": 59790,
+    "train_samples_per_second": 5.949,
+    "train_steps_per_second": 0.186
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff