RyanYr commited on
Commit
66b2aa9
·
verified ·
1 Parent(s): 798c136

Save model at global step 40

Browse files
config.yaml CHANGED
@@ -28,6 +28,7 @@ actor_rollout_ref:
28
  use_remove_padding: true
29
  use_liger: false
30
  save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
 
31
  actor:
32
  strategy: fsdp
33
  ppo_mini_batch_size: 64
@@ -60,7 +61,7 @@ actor_rollout_ref:
60
  lr_warmup_steps_ratio: 0.0
61
  min_lr_ratio: null
62
  warmup_style: constant
63
- total_training_steps: 1620
64
  weight_decay: 0.01
65
  fsdp_config:
66
  wrap_policy:
@@ -94,7 +95,7 @@ actor_rollout_ref:
94
  free_cache_engine: false
95
  load_format: dummy_dtensor
96
  tensor_model_parallel_size: 4
97
- max_num_batched_tokens: 4864
98
  max_model_len: null
99
  max_num_seqs: 1024
100
  log_prob_micro_batch_size: null
@@ -121,7 +122,7 @@ critic:
121
  lr_warmup_steps_ratio: 0.0
122
  min_lr_ratio: null
123
  warmup_style: constant
124
- total_training_steps: 1620
125
  weight_decay: 0.01
126
  model:
127
  path: meta-llama/Llama-3.2-3B-Instruct
@@ -191,8 +192,8 @@ algorithm:
191
  target_kl: 0.1
192
  trainer:
193
  balance_batch: true
194
- total_epochs: 12
195
- total_training_steps: null
196
  project_name: value-LLM
197
  experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
198
  logger:
 
28
  use_remove_padding: true
29
  use_liger: false
30
  save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
31
+ tokenizer_chat_template: null
32
  actor:
33
  strategy: fsdp
34
  ppo_mini_batch_size: 64
 
61
  lr_warmup_steps_ratio: 0.0
62
  min_lr_ratio: null
63
  warmup_style: constant
64
+ total_training_steps: 800
65
  weight_decay: 0.01
66
  fsdp_config:
67
  wrap_policy:
 
95
  free_cache_engine: false
96
  load_format: dummy_dtensor
97
  tensor_model_parallel_size: 4
98
+ max_num_batched_tokens: 5864
99
  max_model_len: null
100
  max_num_seqs: 1024
101
  log_prob_micro_batch_size: null
 
122
  lr_warmup_steps_ratio: 0.0
123
  min_lr_ratio: null
124
  warmup_style: constant
125
+ total_training_steps: 800
126
  weight_decay: 0.01
127
  model:
128
  path: meta-llama/Llama-3.2-3B-Instruct
 
192
  target_kl: 0.1
193
  trainer:
194
  balance_batch: true
195
+ total_epochs: 100
196
+ total_training_steps: 800
197
  project_name: value-LLM
198
  experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
199
  logger:
extra_state_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ce985955afb375595a798d4408ef3bd0b25994c2d8e843dc1dd6aff61fa8250
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8bab196b2c5d02d4582802762f8a925afc5068b4745c8f7c70b3a48eadf4199
3
  size 14632
extra_state_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a20414f86e7920d1019c1ec9238de19472260848f3fcba6597f1c94a3dd5b7
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630bc6c4950574d0f46d2a887ff124f6b7442bc520533f77cb6d7d4672d13e02
3
  size 14632
extra_state_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c0b40aeeba98b90a130ac6ebcb75615ce47bd91a4ab4ec722519d12a8650f3
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0080c2297d874cbc32ba8e72176db3ac211b0a4f46c8b23a748c9dedd14c22cd
3
  size 14632
extra_state_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f8fbbba43d24a55f7a92c8eb56e483536361e71c8cf8262837dd5953a4d212f
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0537117720ae4b428d42b66da5a91c3c469d92881a161b08933d3ff43c447c19
3
  size 14632
model_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6a76271a1c5fb577fb3f6f2f632f4be0e02fde635b524b31f2c3177d71c240c
3
  size 3212915290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6af562edc8047476c9200dbc3d81206ca66e9f62de0e202572bb87f7743eca
3
  size 3212915290
model_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac4cffaa0cb6867e15a2598c8625f5226c68d7c192f4d6fefe9c7e6190ef0841
3
  size 3212903002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b7d629ae91e828b044bb7a03f851dbc6037b07c8ae7247dd9707c597281126
3
  size 3212903002
model_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc691dfbe4c2167b6f460009309e4c12fc8795974f34e2c193fcf34bb75c588d
3
  size 3212903002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fa2c959c0f7ea7efc5961eecfaad1770357b971e4be104167a020e1115c4ff
3
  size 3212903002
model_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24b99a707d6b2eeec059b029130eafd506447bb7c0b8b36c3fc03f9c1622800e
3
  size 3212903002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf4c26389309fa8e78bd79dbcdcf7e442eaab779e828f178a8c0e1a05de1ea2
3
  size 3212903002
optim_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcdbe8c36f412db6f8e0a144250e7f8fe0499a22009d172ebae4ff053a5e9b9c
3
  size 6425532375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d09621da6ff4f909770333e223c8a8a9362b6ef2d5f93cdeddd9c02659d99d
3
  size 6425532375
optim_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e7c9b419789ab26db52a4c4827419669200f0d2edeb36913e7170969ab7ef8
3
  size 6425532375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b877f6efc223a50cba7c915a3fd2a7c0d8624560ed30c5df7f0a4ce40eb4099
3
  size 6425532375
optim_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:014eb67b2c0450c08a3695f3e5f70f97e44ac3a9fc8451e6d4a74a3c1482f3f2
3
  size 6425532375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff9ba4d66224b1a6c6eb68cdb837f039e2955eae4d9469f9ef7859fe715114e
3
  size 6425532375
optim_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e03650170c0b87ca38734773e9be23308546150b990228597698a35cbb84402a
3
  size 6425532375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1078c66783e247ae70c5290b6fc33c69f4c3e195226295b9262f26d332df3ada
3
  size 6425532375