|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9826771653543307, |
|
"eval_steps": 100, |
|
"global_step": 158, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 652.5913221359253, |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 0.5412344932556152, |
|
"kl": 0.00025856494903564453, |
|
"learning_rate": 1.875e-06, |
|
"loss": 0.0, |
|
"reward": 0.5777902046218515, |
|
"reward_std": 0.32899713758379223, |
|
"rewards/accuracy_reward": 0.5776785971596837, |
|
"rewards/format_reward": 0.00011160714784637093, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 694.438868522644, |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 0.24628566205501556, |
|
"kl": 0.0019156813621520996, |
|
"learning_rate": 2.994130233112417e-06, |
|
"loss": 0.0001, |
|
"reward": 0.6052455639466643, |
|
"reward_std": 0.26475782548077403, |
|
"rewards/accuracy_reward": 0.6052455639466643, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 651.5066148757935, |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.14223581552505493, |
|
"kl": 0.0024756908416748045, |
|
"learning_rate": 2.9286218000371976e-06, |
|
"loss": 0.0001, |
|
"reward": 0.6724330654367805, |
|
"reward_std": 0.23531078966334462, |
|
"rewards/accuracy_reward": 0.6724330654367805, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 642.1838449478149, |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 0.1239105761051178, |
|
"kl": 0.0031515121459960937, |
|
"learning_rate": 2.7934718587800417e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7046875322237611, |
|
"reward_std": 0.19434297760017216, |
|
"rewards/accuracy_reward": 0.7046875322237611, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 627.14924659729, |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.13240313529968262, |
|
"kl": 0.003639984130859375, |
|
"learning_rate": 2.595268609058752e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7179687809199095, |
|
"reward_std": 0.19313886840827763, |
|
"rewards/accuracy_reward": 0.7179687809199095, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 626.9296024322509, |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.15062075853347778, |
|
"kl": 0.004168796539306641, |
|
"learning_rate": 2.343673931461171e-06, |
|
"loss": 0.0002, |
|
"reward": 0.6809152102097868, |
|
"reward_std": 0.1983337783254683, |
|
"rewards/accuracy_reward": 0.6809152102097868, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 610.840876197815, |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 0.11126791685819626, |
|
"kl": 0.004203128814697266, |
|
"learning_rate": 2.0509523964971355e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7165178887546062, |
|
"reward_std": 0.1934912689961493, |
|
"rewards/accuracy_reward": 0.7165178887546062, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 592.2336000569661, |
|
"epoch": 1.0, |
|
"grad_norm": 0.17808477580547333, |
|
"kl": 0.0042411295572916665, |
|
"learning_rate": 1.7313733994479534e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7291666994492213, |
|
"reward_std": 0.1846819964547952, |
|
"rewards/accuracy_reward": 0.7291666994492213, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 609.6807176589966, |
|
"epoch": 1.125984251968504, |
|
"grad_norm": 0.08229045569896698, |
|
"kl": 0.004135942459106446, |
|
"learning_rate": 1.4005155653473445e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7156250355765224, |
|
"reward_std": 0.20761510250158607, |
|
"rewards/accuracy_reward": 0.7156250355765224, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 611.2092897415162, |
|
"epoch": 1.2519685039370079, |
|
"grad_norm": 0.16556662321090698, |
|
"kl": 0.0037270545959472655, |
|
"learning_rate": 1.0745073324985549e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7110491398721933, |
|
"reward_std": 0.18295098417438566, |
|
"rewards/accuracy_reward": 0.7110491398721933, |
|
"rewards/format_reward": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 606.3881959915161, |
|
"epoch": 1.3779527559055118, |
|
"grad_norm": 0.09432197362184525, |
|
"kl": 0.0037145614624023438, |
|
"learning_rate": 7.692407340588056e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7323661027476192, |
|
"reward_std": 0.1929833421483636, |
|
"rewards/accuracy_reward": 0.7323661027476192, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 604.5668788909912, |
|
"epoch": 1.5039370078740157, |
|
"grad_norm": 0.38694441318511963, |
|
"kl": 0.004090404510498047, |
|
"learning_rate": 4.995967037450238e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7164062798023224, |
|
"reward_std": 0.18084403886459768, |
|
"rewards/accuracy_reward": 0.7164062798023224, |
|
"rewards/format_reward": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 606.2777070999146, |
|
"epoch": 1.6299212598425197, |
|
"grad_norm": 0.15648125112056732, |
|
"kl": 0.0037802696228027345, |
|
"learning_rate": 2.787196699446389e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7242187837138772, |
|
"reward_std": 0.19052648572251202, |
|
"rewards/accuracy_reward": 0.7242187837138772, |
|
"rewards/format_reward": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 605.7184408187866, |
|
"epoch": 1.7559055118110236, |
|
"grad_norm": 0.4628942608833313, |
|
"kl": 0.003756284713745117, |
|
"learning_rate": 1.1737679983668259e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7152902094647289, |
|
"reward_std": 0.20197481904178857, |
|
"rewards/accuracy_reward": 0.7152902094647289, |
|
"rewards/format_reward": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 605.061745262146, |
|
"epoch": 1.8818897637795275, |
|
"grad_norm": 0.1207461878657341, |
|
"kl": 0.007715559005737305, |
|
"learning_rate": 2.343312866591163e-08, |
|
"loss": 0.0003, |
|
"reward": 0.7013393187895417, |
|
"reward_std": 0.1918664438650012, |
|
"rewards/accuracy_reward": 0.7013393187895417, |
|
"rewards/format_reward": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 607.0647583007812, |
|
"epoch": 1.9826771653543307, |
|
"kl": 0.0038232803344726562, |
|
"reward": 0.7250279379077256, |
|
"reward_std": 0.17406430409755558, |
|
"rewards/accuracy_reward": 0.7250279379077256, |
|
"rewards/format_reward": 0.0, |
|
"step": 158, |
|
"total_flos": 0.0, |
|
"train_loss": 0.00014615306474896194, |
|
"train_runtime": 28501.5732, |
|
"train_samples_per_second": 0.624, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 158, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|