phd2023 commited on
Commit
c776cd3
·
verified ·
1 Parent(s): 4f1ae5a

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/f0mu5ifb)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sjhxbug218-nvidia/huggingface/runs/jika9i2p)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.05527676269412041,
4
- "train_runtime": 960.6746,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.066,
7
  "train_steps_per_second": 0.008
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.008086015470325947,
4
+ "train_runtime": 126.4484,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.012,
7
  "train_steps_per_second": 0.008
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b248d4c16c86a58d89a7fa1bd7202c679100b417f2147a8aa6826d332cf03b3
3
- size 2624790528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d24237ce54e25513e9336f74d6992ac6033b8bbe91e0875c0c036443733398
3
+ size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63ae15675479e5997187ac4e9c25e645d9187ac442197893b3a5ee18b868c4f1
3
- size 24576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77bc12eda65c80a9a5dd5a966b2dfeb1c63539e6e687c3fec779beea4045a656
3
+ size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e9b081d4695d71e0f71f2f0d21be0fd603083275831f1786a2935b4e51312e4
3
- size 1579814912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412401c4cb2c5cfef003d40f2c9ca6bfc309c6b1bbde6a70b19b2c558590f025
3
+ size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06dabedb79b5e0d1fcefcdc2a7aee009d76b6fe85157ad46e6f6ba4ba0122700
3
+ size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.05527676269412041,
4
- "train_runtime": 960.6746,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.066,
7
  "train_steps_per_second": 0.008
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.008086015470325947,
4
+ "train_runtime": 126.4484,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.012,
7
  "train_steps_per_second": 0.008
8
  }
trainer_state.json CHANGED
@@ -3,58 +3,37 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 100,
6
- "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 535.0468978881836,
14
- "epoch": 0.125,
15
- "grad_norm": 1.6275352239608765,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
- "loss": 0.0685,
19
- "reward": 0.770089328289032,
20
- "reward_std": 0.3429133668541908,
21
- "rewards/accuracy_reward": 0.770089328289032,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
26
- "clip_ratio": 0.0,
27
- "completion_length": 555.9333419799805,
28
- "epoch": 0.625,
29
- "grad_norm": 3.0009214878082275,
30
- "kl": 0.002394258975982666,
31
- "learning_rate": 1.1662185990655286e-06,
32
- "loss": 0.0451,
33
- "reward": 0.7642299458384514,
34
- "reward_std": 0.3406056701205671,
35
- "rewards/accuracy_reward": 0.7642299458384514,
36
- "rewards/format_reward": 0.0,
37
- "step": 5
38
- },
39
- {
40
- "clip_ratio": 0.0,
41
- "completion_length": 559.236998240153,
42
  "epoch": 1.0,
43
- "kl": 0.010375022888183594,
44
- "reward": 0.8098958681027094,
45
- "reward_std": 0.27716156281530857,
46
- "rewards/accuracy_reward": 0.8098958681027094,
47
- "rewards/format_reward": 0.0,
48
- "step": 8,
49
  "total_flos": 0.0,
50
- "train_loss": 0.05527676269412041,
51
- "train_runtime": 960.6746,
52
- "train_samples_per_second": 1.066,
53
  "train_steps_per_second": 0.008
54
  }
55
  ],
56
  "logging_steps": 5,
57
- "max_steps": 8,
58
  "num_input_tokens_seen": 0,
59
  "num_train_epochs": 1,
60
  "save_steps": 500,
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 100,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
+ "completion_length": 516.796875,
14
+ "epoch": 1.0,
15
+ "grad_norm": 0.5813263058662415,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
+ "loss": 0.0081,
19
+ "reward": 0.7098214626312256,
20
+ "reward_std": 0.3545062728226185,
21
+ "rewards/accuracy_reward": 0.7098214626312256,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
25
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "epoch": 1.0,
27
+ "step": 1,
 
 
 
 
 
28
  "total_flos": 0.0,
29
+ "train_loss": 0.008086015470325947,
30
+ "train_runtime": 126.4484,
31
+ "train_samples_per_second": 1.012,
32
  "train_steps_per_second": 0.008
33
  }
34
  ],
35
  "logging_steps": 5,
36
+ "max_steps": 1,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 500,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6f6b35b6beb62a7df7622d585484daf12e3aa05d722d95db2ec6ae5a6737cf
3
- size 8120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf04e394615126d67569c09d6f52f0442c2fbadf27759f7ca1f142ab47ec0a69
3
+ size 7992