Model save

Browse files

Files changed (9) hide show

README.md +1 -1
all_results.json +9 -4
eval_results.json +7 -0
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
train_results.json +4 -4
trainer_state.json +25 -25
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/gongc1995-city-university-of-hong-kong/huggingface/runs/gpjjd7iy)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/gongc1995-city-university-of-hong-kong/huggingface/runs/ujs4gdxc)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,13 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.7375955581665039,
-    "train_runtime": 1563.9615,
     "train_samples": 100,
-    "train_samples_per_second": 0.128,
-    "train_steps_per_second": 0.004
 }

 {
+    "eval_loss": 0.013707819394767284,
+    "eval_runtime": 477.0113,
+    "eval_samples": 99,
+    "eval_samples_per_second": 0.208,
+    "eval_steps_per_second": 0.015,
     "total_flos": 0.0,
+    "train_loss": 0.00647640476624171,
+    "train_runtime": 734.399,
     "train_samples": 100,
+    "train_samples_per_second": 0.272,
+    "train_steps_per_second": 0.008
 }

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "eval_loss": 0.013707819394767284,
+    "eval_runtime": 477.0113,
+    "eval_samples": 99,
+    "eval_samples_per_second": 0.208,
+    "eval_steps_per_second": 0.015
+}

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b83cddb211b5eaac1e8c74417e558fef9caa1f85e9a9c513a3ed0d1741f9b14
 size 4997744872

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cc36673e60fb752c7dd7fcbfdaa037e9842b55638e4628cf36b2d070cc14e9e
 size 4997744872

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62ee171fb6902f4393b24c3547df1df7c395f025f03fa179c5fcb18922fbe6d1
 size 4997235176

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c297b437f03539e116fed4bd7cff1af1ef3f8b363a6a3b50a0d84103af7a050
 size 4997235176

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c62a57ce1a0e0f64bfd4d189425662c8bb0476e7241c8a21d3ea0cbc23dfdc63
 size 3843741912

 version https://git-lfs.github.com/spec/v1
+oid sha256:15fd97537260b1a02f25f183494d8babb32dae0c785807436fc98ecd6045aaef
 size 3843741912

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.7375955581665039,
-    "train_runtime": 1563.9615,
     "train_samples": 100,
-    "train_samples_per_second": 0.128,
-    "train_steps_per_second": 0.004
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.00647640476624171,
+    "train_runtime": 734.399,
     "train_samples": 100,
+    "train_samples_per_second": 0.272,
+    "train_steps_per_second": 0.008
 }

trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.96,
   "eval_steps": 100,
   "global_step": 6,
   "is_hyper_param_search": false,
@@ -10,35 +10,35 @@
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 555.18984375,
-      "epoch": 1.6400000000000001,
-      "grad_norm": 0.9047526121139526,
-      "kl": 22.304883193969726,
       "learning_rate": 1.9098300562505264e-07,
-      "loss": 0.9364,
-      "reward": 0.47890625,
-      "reward_std": 0.3222931930795312,
-      "rewards/accuracy_reward": 0.07734375,
-      "rewards/format_reward": 0.009375,
-      "rewards/tag_count_reward": 0.3921875,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 546.359375,
-      "epoch": 1.96,
-      "kl": 0.04564642906188965,
-      "reward": 0.5244140625,
-      "reward_std": 0.2628240427002311,
-      "rewards/accuracy_reward": 0.01953125,
-      "rewards/format_reward": 0.01171875,
-      "rewards/tag_count_reward": 0.4931640625,
       "step": 6,
       "total_flos": 0.0,
-      "train_loss": 0.7375955581665039,
-      "train_runtime": 1563.9615,
-      "train_samples_per_second": 0.128,
-      "train_steps_per_second": 0.004
     }
   ],
   "logging_steps": 5,
@@ -52,8 +52,8 @@
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
-        "should_save": false,
-        "should_training_stop": false
       },
       "attributes": {}
     }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9230769230769231,
   "eval_steps": 100,
   "global_step": 6,
   "is_hyper_param_search": false,
   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 390.85,
+      "epoch": 1.6153846153846154,
+      "grad_norm": 1.6735994815826416,
+      "kl": 0.005230331420898437,
       "learning_rate": 1.9098300562505264e-07,
+      "loss": -0.0077,
+      "reward": 0.3171875,
+      "reward_std": 0.17235727477818727,
+      "rewards/accuracy_reward": 0.04375,
+      "rewards/format_reward": 0.0,
+      "rewards/tag_count_reward": 0.2734375,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 402.8125,
+      "epoch": 1.9230769230769231,
+      "kl": 0.0454254150390625,
+      "reward": 0.48046875,
+      "reward_std": 0.24859222024679184,
+      "rewards/accuracy_reward": 0.03125,
+      "rewards/format_reward": 0.03125,
+      "rewards/tag_count_reward": 0.41796875,
       "step": 6,
       "total_flos": 0.0,
+      "train_loss": 0.00647640476624171,
+      "train_runtime": 734.399,
+      "train_samples_per_second": 0.272,
+      "train_steps_per_second": 0.008
     }
   ],
   "logging_steps": 5,
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8b6d401ff70d5d7d7b2e297a6161262bd64b3f1f486100eb7299dfa2daeba23
 size 8056

 version https://git-lfs.github.com/spec/v1
+oid sha256:df875688a799b0d1a443e4f63a78775021d767f1fadd11731aafda5d1d04281a
 size 8056