RyanYr commited on
Commit
dc64b42
·
verified ·
1 Parent(s): 31bca4a

Save model at global step 40

Browse files
config.yaml CHANGED
@@ -28,6 +28,7 @@ actor_rollout_ref:
28
  use_remove_padding: true
29
  use_liger: false
30
  save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
 
31
  actor:
32
  strategy: fsdp
33
  ppo_mini_batch_size: 64
@@ -60,7 +61,7 @@ actor_rollout_ref:
60
  lr_warmup_steps_ratio: 0.0
61
  min_lr_ratio: null
62
  warmup_style: constant
63
- total_training_steps: 1620
64
  weight_decay: 0.01
65
  fsdp_config:
66
  wrap_policy:
@@ -94,7 +95,7 @@ actor_rollout_ref:
94
  free_cache_engine: false
95
  load_format: dummy_dtensor
96
  tensor_model_parallel_size: 4
97
- max_num_batched_tokens: 4864
98
  max_model_len: null
99
  max_num_seqs: 1024
100
  log_prob_micro_batch_size: null
@@ -121,7 +122,7 @@ critic:
121
  lr_warmup_steps_ratio: 0.0
122
  min_lr_ratio: null
123
  warmup_style: constant
124
- total_training_steps: 1620
125
  weight_decay: 0.01
126
  model:
127
  path: meta-llama/Llama-3.2-3B-Instruct
@@ -191,8 +192,8 @@ algorithm:
191
  target_kl: 0.1
192
  trainer:
193
  balance_batch: true
194
- total_epochs: 12
195
- total_training_steps: null
196
  project_name: value-LLM
197
  experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
198
  logger:
 
28
  use_remove_padding: true
29
  use_liger: false
30
  save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
31
+ tokenizer_chat_template: null
32
  actor:
33
  strategy: fsdp
34
  ppo_mini_batch_size: 64
 
61
  lr_warmup_steps_ratio: 0.0
62
  min_lr_ratio: null
63
  warmup_style: constant
64
+ total_training_steps: 800
65
  weight_decay: 0.01
66
  fsdp_config:
67
  wrap_policy:
 
95
  free_cache_engine: false
96
  load_format: dummy_dtensor
97
  tensor_model_parallel_size: 4
98
+ max_num_batched_tokens: 5864
99
  max_model_len: null
100
  max_num_seqs: 1024
101
  log_prob_micro_batch_size: null
 
122
  lr_warmup_steps_ratio: 0.0
123
  min_lr_ratio: null
124
  warmup_style: constant
125
+ total_training_steps: 800
126
  weight_decay: 0.01
127
  model:
128
  path: meta-llama/Llama-3.2-3B-Instruct
 
192
  target_kl: 0.1
193
  trainer:
194
  balance_batch: true
195
+ total_epochs: 100
196
+ total_training_steps: 800
197
  project_name: value-LLM
198
  experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
199
  logger:
data.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e509dfbb0976aa42f2a17aaf7c763aaf3876257c3d1f9cec03ccf7406ee26c4a
3
  size 1492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb23deb8ea5dfeecb96152a859181931227d3f194fd6c72f7e0b9f8e093a0687
3
  size 1492
extra_state_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1268e9aa07b5bf0b6e7f8a82bbef08667d9bf19c816443bc74909e8775f2aa8a
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ab08b34f7000fc99b714608686487f4b0ef5f419a1c23610e1c44c7aaad8137
3
  size 14632
extra_state_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edb0d33891e39c7b6c62dd9b5af87516001ba3bf756d5fa8ae086b601ef00754
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9fc86f6e469cb0834476b71b525358133f74ff5ee77ed4bce6eac57aa550ca
3
  size 14632
extra_state_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f375391e56ced691d83d7960525e831c29dcae3ca70ef20db607d44e9dfdf95
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d321ba942354b0228acd80bb829660d58c1006ff7e77e69386c788b91137c6b8
3
  size 14632
extra_state_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d50d61eaecff9490b24d2f7572a78f1896ed930081d60b40600a9b9bc4eb497
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:720ecba3014916b6c126dc326620e82c0dc2b8c061000f556b103b296edc5656
3
  size 14632
model_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1478f584aa6845728b69347241813b0370f639dd3868d7cbb58a40f216dccf86
3
  size 3606904378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c53ea6f2a6cb64fd77dc67a8f74271d9e9f816057fed6e6a844e2c975027cd8
3
  size 3606904378
model_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d20aecde1052438556eee0cfaeb0a4303889b5321d9ad2e0ec5449983b793ebb
3
  size 3606904378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4dd613af1b3003db1f41abbfd4dcfbc692c4cc20d98628c2403df90def5ef36
3
  size 3606904378
model_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:734e06393aa3f82150b0d05e182a348a202e38222901c08dcb49425bee7d7967
3
  size 3606904378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c763d759786ea16dd576800f784cbec1b09cc611ad742756627de39431b2dc73
3
  size 3606904378
model_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a9a9f3e8ef9c950679bade1f8e0a2e24aa79aa3a9605934b893e6df6d85f81
3
  size 3606904378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81896bcb1fdc86b7d0e0184a3100a7bc3b16e463d168e58e9b621d36a44fff62
3
  size 3606904378
optim_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:381ad5d2e5e0c3e2bc83bdf483629ad1901e72404c5eaa129083e6f357fd4ea6
3
  size 6425526231
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a582c7139f24d83b5b8af25c4192358693bc05a2ffb4c6d243f839f8df1dcd
3
  size 6425526231
optim_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e146328cbc57d191f62dbd32b6c10394a894ff57563777e5665373cbaf9c057a
3
  size 6425526231
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60311c90329a3d77edd048ec0a32a50a4a809b858dd6f124018d9557d5729ce2
3
  size 6425526231
optim_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b547ec061309bb282cb3365e9a4518f0148359a2950f7ccd18c6024887fb53
3
  size 6425526231
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c64018a79156ccfeca9ccb81b0bc1bba4f6634a787e49711869675ec173af2
3
  size 6425526231
optim_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4815e24f515b191ce0be91e18cca4794ad0450582fa28bbde26f3ed146ba27e4
3
  size 6425526231
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d1e65f2f85f46bf19546e2ed0dc34b3f2df603cee9071452a60d58ed17c6b0
3
  size 6425526231