RyanYr
/

ppo-dapo-llama3.2-3B-it-lr-mbs64_actor

RyanYr commited on May 29

Commit

dc64b42

verified ·

1 Parent(s): 31bca4a

Save model at global step 40

Files changed (14) hide show

config.yaml CHANGED Viewed

@@ -28,6 +28,7 @@ actor_rollout_ref:
     use_remove_padding: true
     use_liger: false
     save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
   actor:
     strategy: fsdp
     ppo_mini_batch_size: 64
@@ -60,7 +61,7 @@ actor_rollout_ref:
       lr_warmup_steps_ratio: 0.0
       min_lr_ratio: null
       warmup_style: constant
-      total_training_steps: 1620
       weight_decay: 0.01
     fsdp_config:
       wrap_policy:
@@ -94,7 +95,7 @@ actor_rollout_ref:
     free_cache_engine: false
     load_format: dummy_dtensor
     tensor_model_parallel_size: 4
-    max_num_batched_tokens: 4864
     max_model_len: null
     max_num_seqs: 1024
     log_prob_micro_batch_size: null
@@ -121,7 +122,7 @@ critic:
     lr_warmup_steps_ratio: 0.0
     min_lr_ratio: null
     warmup_style: constant
-    total_training_steps: 1620
     weight_decay: 0.01
   model:
     path: meta-llama/Llama-3.2-3B-Instruct
@@ -191,8 +192,8 @@ algorithm:
     target_kl: 0.1
 trainer:
   balance_batch: true
-  total_epochs: 12
-  total_training_steps: null
   project_name: value-LLM
   experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
   logger:

     use_remove_padding: true
     use_liger: false
     save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
+    tokenizer_chat_template: null
   actor:
     strategy: fsdp
     ppo_mini_batch_size: 64
       lr_warmup_steps_ratio: 0.0
       min_lr_ratio: null
       warmup_style: constant
+      total_training_steps: 800
       weight_decay: 0.01
     fsdp_config:
       wrap_policy:
     free_cache_engine: false
     load_format: dummy_dtensor
     tensor_model_parallel_size: 4
+    max_num_batched_tokens: 5864
     max_model_len: null
     max_num_seqs: 1024
     log_prob_micro_batch_size: null
     lr_warmup_steps_ratio: 0.0
     min_lr_ratio: null
     warmup_style: constant
+    total_training_steps: 800
     weight_decay: 0.01
   model:
     path: meta-llama/Llama-3.2-3B-Instruct
     target_kl: 0.1
 trainer:
   balance_batch: true
+  total_epochs: 100
+  total_training_steps: 800
   project_name: value-LLM
   experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
   logger:

data.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e509dfbb0976aa42f2a17aaf7c763aaf3876257c3d1f9cec03ccf7406ee26c4a
 size 1492

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb23deb8ea5dfeecb96152a859181931227d3f194fd6c72f7e0b9f8e093a0687
 size 1492

extra_state_world_size_4_rank_0.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1268e9aa07b5bf0b6e7f8a82bbef08667d9bf19c816443bc74909e8775f2aa8a
 size 14632

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ab08b34f7000fc99b714608686487f4b0ef5f419a1c23610e1c44c7aaad8137
 size 14632

extra_state_world_size_4_rank_1.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edb0d33891e39c7b6c62dd9b5af87516001ba3bf756d5fa8ae086b601ef00754
 size 14632

 version https://git-lfs.github.com/spec/v1
+oid sha256:af9fc86f6e469cb0834476b71b525358133f74ff5ee77ed4bce6eac57aa550ca
 size 14632

extra_state_world_size_4_rank_2.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f375391e56ced691d83d7960525e831c29dcae3ca70ef20db607d44e9dfdf95
 size 14632

 version https://git-lfs.github.com/spec/v1
+oid sha256:d321ba942354b0228acd80bb829660d58c1006ff7e77e69386c788b91137c6b8
 size 14632

extra_state_world_size_4_rank_3.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d50d61eaecff9490b24d2f7572a78f1896ed930081d60b40600a9b9bc4eb497
 size 14632

 version https://git-lfs.github.com/spec/v1
+oid sha256:720ecba3014916b6c126dc326620e82c0dc2b8c061000f556b103b296edc5656
 size 14632

model_world_size_4_rank_0.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1478f584aa6845728b69347241813b0370f639dd3868d7cbb58a40f216dccf86
 size 3606904378

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c53ea6f2a6cb64fd77dc67a8f74271d9e9f816057fed6e6a844e2c975027cd8
 size 3606904378

model_world_size_4_rank_1.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d20aecde1052438556eee0cfaeb0a4303889b5321d9ad2e0ec5449983b793ebb
 size 3606904378

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4dd613af1b3003db1f41abbfd4dcfbc692c4cc20d98628c2403df90def5ef36
 size 3606904378

model_world_size_4_rank_2.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:734e06393aa3f82150b0d05e182a348a202e38222901c08dcb49425bee7d7967
 size 3606904378

 version https://git-lfs.github.com/spec/v1
+oid sha256:c763d759786ea16dd576800f784cbec1b09cc611ad742756627de39431b2dc73
 size 3606904378

model_world_size_4_rank_3.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98a9a9f3e8ef9c950679bade1f8e0a2e24aa79aa3a9605934b893e6df6d85f81
 size 3606904378

 version https://git-lfs.github.com/spec/v1
+oid sha256:81896bcb1fdc86b7d0e0184a3100a7bc3b16e463d168e58e9b621d36a44fff62
 size 3606904378

optim_world_size_4_rank_0.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:381ad5d2e5e0c3e2bc83bdf483629ad1901e72404c5eaa129083e6f357fd4ea6
 size 6425526231

 version https://git-lfs.github.com/spec/v1
+oid sha256:20a582c7139f24d83b5b8af25c4192358693bc05a2ffb4c6d243f839f8df1dcd
 size 6425526231

optim_world_size_4_rank_1.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e146328cbc57d191f62dbd32b6c10394a894ff57563777e5665373cbaf9c057a
 size 6425526231

 version https://git-lfs.github.com/spec/v1
+oid sha256:60311c90329a3d77edd048ec0a32a50a4a809b858dd6f124018d9557d5729ce2
 size 6425526231

optim_world_size_4_rank_2.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6b547ec061309bb282cb3365e9a4518f0148359a2950f7ccd18c6024887fb53
 size 6425526231

 version https://git-lfs.github.com/spec/v1
+oid sha256:39c64018a79156ccfeca9ccb81b0bc1bba4f6634a787e49711869675ec173af2
 size 6425526231

optim_world_size_4_rank_3.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4815e24f515b191ce0be91e18cca4794ad0450582fa28bbde26f3ed146ba27e4
 size 6425526231

 version https://git-lfs.github.com/spec/v1
+oid sha256:16d1e65f2f85f46bf19546e2ed0dc34b3f2df603cee9071452a60d58ed17c6b0
 size 6425526231