End of training

Files changed (9) hide show

README.md CHANGED Viewed

@@ -1,9 +1,11 @@
 ---
 base_model: Qwen/Qwen2.5-Math-7B
 library_name: transformers
 model_name: Qwen-2.5-7B-Simple-RL
 tags:
 - generated_from_trainer
 - trl
 - grpo
 licence: license
@@ -11,7 +13,7 @@ licence: license
 # Model Card for Qwen-2.5-7B-Simple-RL
-This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: Qwen/Qwen2.5-Math-7B
+datasets: DigitalLearningGmbH/MATH-lighteval
 library_name: transformers
 model_name: Qwen-2.5-7B-Simple-RL
 tags:
 - generated_from_trainer
+- open-r1
 - trl
 - grpo
 licence: license
 # Model Card for Qwen-2.5-7B-Simple-RL
+This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B) on the [DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,4 +1,9 @@
 {
     "total_flos": 0.0,
     "train_loss": 4841.422249500714,
     "train_runtime": 180396.3107,

 {
+    "eval_loss": 0.03460121154785156,
+    "eval_runtime": 32312.0729,
+    "eval_samples": 5000,
+    "eval_samples_per_second": 0.155,
+    "eval_steps_per_second": 0.011,
     "total_flos": 0.0,
     "train_loss": 4841.422249500714,
     "train_runtime": 180396.3107,

config.json CHANGED Viewed

@@ -23,7 +23,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0.dev0",
-  "use_cache": false,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 152064

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0.dev0",
+  "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 152064

eval_results.json ADDED Viewed

+{
+    "eval_loss": 0.03460121154785156,
+    "eval_runtime": 32312.0729,
+    "eval_samples": 5000,
+    "eval_samples_per_second": 0.155,
+    "eval_steps_per_second": 0.011
+}

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a8bde7239257eed27091ec9545e68b2d0395950f88e565034f0b9b5d6c6c2d2
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:c29d9eecbf30eef037859879dbc6af4acc10a3ff8fb79cb4732cbca41c35fbe3
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e65051a3f9aff3decbcebe2fe1fd733b8b2a3f49cdaddb9ee05e376d641ee68f
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab9b09035bebd0c729f8bbd320c3152f62583df612cf7faaac3eac466ae6557c
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2748b0db14a42cb85aae37da787c2c1ed7945d123bb011c1c1cd663efb9eb98e
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2c78dec2dd3327d10869c4da125411e4e739412f00b94cdc15580e698d9ac77
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6a67067036ff02755fe45939dafa807e50071fd7bd0bab11c3531e1855e9c50
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d39c18161aa93e39851678ad03057059b91c440ffec216018c112758dcec9a8
 size 1089994880

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13ba1276091f016faca687c17af3ac5e6e54a286f23c98addb7de0e7096eaf0f
 size 7480

 version https://git-lfs.github.com/spec/v1
+oid sha256:1373fdcb93653901ac277fd852565f578e438f5f5b258915dc888ae42be7ad33
 size 7480