Model save

Browse files

Files changed (9) hide show

README.md +1 -1
all_results.json +3 -3
model-00001-of-00004.safetensors +2 -2
model-00002-of-00004.safetensors +2 -2
model-00003-of-00004.safetensors +2 -2
model-00004-of-00004.safetensors +3 -0
train_results.json +3 -3
trainer_state.json +13 -34
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/f0mu5ifb)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/jika9i2p)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.05527676269412041,
-    "train_runtime": 960.6746,
     "train_samples": 7500,
-    "train_samples_per_second": 1.066,
     "train_steps_per_second": 0.008
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.008086015470325947,
+    "train_runtime": 126.4484,
     "train_samples": 7500,
+    "train_samples_per_second": 1.012,
     "train_steps_per_second": 0.008
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b248d4c16c86a58d89a7fa1bd7202c679100b417f2147a8aa6826d332cf03b3
-size 2624790528

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7d24237ce54e25513e9336f74d6992ac6033b8bbe91e0875c0c036443733398
+size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63ae15675479e5997187ac4e9c25e645d9187ac442197893b3a5ee18b868c4f1
-size 24576

 version https://git-lfs.github.com/spec/v1
+oid sha256:77bc12eda65c80a9a5dd5a966b2dfeb1c63539e6e687c3fec779beea4045a656
+size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e9b081d4695d71e0f71f2f0d21be0fd603083275831f1786a2935b4e51312e4
-size 1579814912

 version https://git-lfs.github.com/spec/v1
+oid sha256:412401c4cb2c5cfef003d40f2c9ca6bfc309c6b1bbde6a70b19b2c558590f025
+size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06dabedb79b5e0d1fcefcdc2a7aee009d76b6fe85157ad46e6f6ba4ba0122700
+size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.05527676269412041,
-    "train_runtime": 960.6746,
     "train_samples": 7500,
-    "train_samples_per_second": 1.066,
     "train_steps_per_second": 0.008
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.008086015470325947,
+    "train_runtime": 126.4484,
     "train_samples": 7500,
+    "train_samples_per_second": 1.012,
     "train_steps_per_second": 0.008
 }

trainer_state.json CHANGED Viewed

@@ -3,58 +3,37 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 100,
-  "global_step": 8,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 535.0468978881836,
-      "epoch": 0.125,
-      "grad_norm": 1.6275352239608765,
       "kl": 0.0,
       "learning_rate": 3e-06,
-      "loss": 0.0685,
-      "reward": 0.770089328289032,
-      "reward_std": 0.3429133668541908,
-      "rewards/accuracy_reward": 0.770089328289032,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
-      "clip_ratio": 0.0,
-      "completion_length": 555.9333419799805,
-      "epoch": 0.625,
-      "grad_norm": 3.0009214878082275,
-      "kl": 0.002394258975982666,
-      "learning_rate": 1.1662185990655286e-06,
-      "loss": 0.0451,
-      "reward": 0.7642299458384514,
-      "reward_std": 0.3406056701205671,
-      "rewards/accuracy_reward": 0.7642299458384514,
-      "rewards/format_reward": 0.0,
-      "step": 5
-    },
-    {
-      "clip_ratio": 0.0,
-      "completion_length": 559.236998240153,
       "epoch": 1.0,
-      "kl": 0.010375022888183594,
-      "reward": 0.8098958681027094,
-      "reward_std": 0.27716156281530857,
-      "rewards/accuracy_reward": 0.8098958681027094,
-      "rewards/format_reward": 0.0,
-      "step": 8,
       "total_flos": 0.0,
-      "train_loss": 0.05527676269412041,
-      "train_runtime": 960.6746,
-      "train_samples_per_second": 1.066,
       "train_steps_per_second": 0.008
     }
   ],
   "logging_steps": 5,
-  "max_steps": 8,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 100,
+  "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 516.796875,
+      "epoch": 1.0,
+      "grad_norm": 0.5813263058662415,
       "kl": 0.0,
       "learning_rate": 3e-06,
+      "loss": 0.0081,
+      "reward": 0.7098214626312256,
+      "reward_std": 0.3545062728226185,
+      "rewards/accuracy_reward": 0.7098214626312256,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
       "epoch": 1.0,
+      "step": 1,
       "total_flos": 0.0,
+      "train_loss": 0.008086015470325947,
+      "train_runtime": 126.4484,
+      "train_samples_per_second": 1.012,
       "train_steps_per_second": 0.008
     }
   ],
   "logging_steps": 5,
+  "max_steps": 1,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c6f6b35b6beb62a7df7622d585484daf12e3aa05d722d95db2ec6ae5a6737cf
-size 8120

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf04e394615126d67569c09d6f52f0442c2fbadf27759f7ca1f142ab47ec0a69
+size 7992