xiwenc1 commited on
Commit
95784b9
·
verified ·
1 Parent(s): 1bf8304

Model save

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +21 -8
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/myopen-rs/huggingface/runs/9mjk517m)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/myopen-rs/huggingface/runs/sh56i1gy)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.005753870498389006,
4
- "train_runtime": 9057.1764,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 2.65,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 2.328130582016623e-05,
4
+ "train_runtime": 113.9487,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 210.621,
7
+ "train_steps_per_second": 4.388
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.005753870498389006,
4
- "train_runtime": 9057.1764,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 2.65,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 2.328130582016623e-05,
4
+ "train_runtime": 113.9487,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 210.621,
7
+ "train_steps_per_second": 4.388
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5714285714285714,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6509,13 +6509,26 @@
6509
  "step": 500
6510
  },
6511
  {
6512
- "epoch": 0.5714285714285714,
6513
- "step": 500,
 
 
 
 
 
 
 
 
 
 
 
 
 
6514
  "total_flos": 0.0,
6515
- "train_loss": 0.005753870498389006,
6516
- "train_runtime": 9057.1764,
6517
- "train_samples_per_second": 2.65,
6518
- "train_steps_per_second": 0.055
6519
  }
6520
  ],
6521
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5725714285714286,
5
  "eval_steps": 500,
6
+ "global_step": 501,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6509
  "step": 500
6510
  },
6511
  {
6512
+ "completion_length": 1138.708366394043,
6513
+ "epoch": 0.5725714285714286,
6514
+ "grad_norm": 1.5487465858459473,
6515
+ "kl": 0.29150390625,
6516
+ "learning_rate": 1.0001096618257236e-07,
6517
+ "loss": 0.0117,
6518
+ "reward": 0.21772483922541142,
6519
+ "reward_std": 0.14172286912798882,
6520
+ "rewards/cosine_scaled_reward": -0.009133230894804,
6521
+ "rewards/format_reward": 0.8333333432674408,
6522
+ "step": 501
6523
+ },
6524
+ {
6525
+ "epoch": 0.5725714285714286,
6526
+ "step": 501,
6527
  "total_flos": 0.0,
6528
+ "train_loss": 2.328130582016623e-05,
6529
+ "train_runtime": 113.9487,
6530
+ "train_samples_per_second": 210.621,
6531
+ "train_steps_per_second": 4.388
6532
  }
6533
  ],
6534
  "logging_steps": 1,