Dongwei's picture
Model save
7c36af4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9826771653543307,
"eval_steps": 100,
"global_step": 158,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 652.5913221359253,
"epoch": 0.12598425196850394,
"grad_norm": 0.5412344932556152,
"kl": 0.00025856494903564453,
"learning_rate": 1.875e-06,
"loss": 0.0,
"reward": 0.5777902046218515,
"reward_std": 0.32899713758379223,
"rewards/accuracy_reward": 0.5776785971596837,
"rewards/format_reward": 0.00011160714784637093,
"step": 10
},
{
"completion_length": 694.438868522644,
"epoch": 0.25196850393700787,
"grad_norm": 0.24628566205501556,
"kl": 0.0019156813621520996,
"learning_rate": 2.994130233112417e-06,
"loss": 0.0001,
"reward": 0.6052455639466643,
"reward_std": 0.26475782548077403,
"rewards/accuracy_reward": 0.6052455639466643,
"rewards/format_reward": 0.0,
"step": 20
},
{
"completion_length": 651.5066148757935,
"epoch": 0.3779527559055118,
"grad_norm": 0.14223581552505493,
"kl": 0.0024756908416748045,
"learning_rate": 2.9286218000371976e-06,
"loss": 0.0001,
"reward": 0.6724330654367805,
"reward_std": 0.23531078966334462,
"rewards/accuracy_reward": 0.6724330654367805,
"rewards/format_reward": 0.0,
"step": 30
},
{
"completion_length": 642.1838449478149,
"epoch": 0.5039370078740157,
"grad_norm": 0.1239105761051178,
"kl": 0.0031515121459960937,
"learning_rate": 2.7934718587800417e-06,
"loss": 0.0001,
"reward": 0.7046875322237611,
"reward_std": 0.19434297760017216,
"rewards/accuracy_reward": 0.7046875322237611,
"rewards/format_reward": 0.0,
"step": 40
},
{
"completion_length": 627.14924659729,
"epoch": 0.6299212598425197,
"grad_norm": 0.13240313529968262,
"kl": 0.003639984130859375,
"learning_rate": 2.595268609058752e-06,
"loss": 0.0001,
"reward": 0.7179687809199095,
"reward_std": 0.19313886840827763,
"rewards/accuracy_reward": 0.7179687809199095,
"rewards/format_reward": 0.0,
"step": 50
},
{
"completion_length": 626.9296024322509,
"epoch": 0.7559055118110236,
"grad_norm": 0.15062075853347778,
"kl": 0.004168796539306641,
"learning_rate": 2.343673931461171e-06,
"loss": 0.0002,
"reward": 0.6809152102097868,
"reward_std": 0.1983337783254683,
"rewards/accuracy_reward": 0.6809152102097868,
"rewards/format_reward": 0.0,
"step": 60
},
{
"completion_length": 610.840876197815,
"epoch": 0.8818897637795275,
"grad_norm": 0.11126791685819626,
"kl": 0.004203128814697266,
"learning_rate": 2.0509523964971355e-06,
"loss": 0.0002,
"reward": 0.7165178887546062,
"reward_std": 0.1934912689961493,
"rewards/accuracy_reward": 0.7165178887546062,
"rewards/format_reward": 0.0,
"step": 70
},
{
"completion_length": 592.2336000569661,
"epoch": 1.0,
"grad_norm": 0.17808477580547333,
"kl": 0.0042411295572916665,
"learning_rate": 1.7313733994479534e-06,
"loss": 0.0002,
"reward": 0.7291666994492213,
"reward_std": 0.1846819964547952,
"rewards/accuracy_reward": 0.7291666994492213,
"rewards/format_reward": 0.0,
"step": 80
},
{
"completion_length": 609.6807176589966,
"epoch": 1.125984251968504,
"grad_norm": 0.08229045569896698,
"kl": 0.004135942459106446,
"learning_rate": 1.4005155653473445e-06,
"loss": 0.0002,
"reward": 0.7156250355765224,
"reward_std": 0.20761510250158607,
"rewards/accuracy_reward": 0.7156250355765224,
"rewards/format_reward": 0.0,
"step": 90
},
{
"completion_length": 611.2092897415162,
"epoch": 1.2519685039370079,
"grad_norm": 0.16556662321090698,
"kl": 0.0037270545959472655,
"learning_rate": 1.0745073324985549e-06,
"loss": 0.0001,
"reward": 0.7110491398721933,
"reward_std": 0.18295098417438566,
"rewards/accuracy_reward": 0.7110491398721933,
"rewards/format_reward": 0.0,
"step": 100
},
{
"completion_length": 606.3881959915161,
"epoch": 1.3779527559055118,
"grad_norm": 0.09432197362184525,
"kl": 0.0037145614624023438,
"learning_rate": 7.692407340588056e-07,
"loss": 0.0001,
"reward": 0.7323661027476192,
"reward_std": 0.1929833421483636,
"rewards/accuracy_reward": 0.7323661027476192,
"rewards/format_reward": 0.0,
"step": 110
},
{
"completion_length": 604.5668788909912,
"epoch": 1.5039370078740157,
"grad_norm": 0.38694441318511963,
"kl": 0.004090404510498047,
"learning_rate": 4.995967037450238e-07,
"loss": 0.0002,
"reward": 0.7164062798023224,
"reward_std": 0.18084403886459768,
"rewards/accuracy_reward": 0.7164062798023224,
"rewards/format_reward": 0.0,
"step": 120
},
{
"completion_length": 606.2777070999146,
"epoch": 1.6299212598425197,
"grad_norm": 0.15648125112056732,
"kl": 0.0037802696228027345,
"learning_rate": 2.787196699446389e-07,
"loss": 0.0002,
"reward": 0.7242187837138772,
"reward_std": 0.19052648572251202,
"rewards/accuracy_reward": 0.7242187837138772,
"rewards/format_reward": 0.0,
"step": 130
},
{
"completion_length": 605.7184408187866,
"epoch": 1.7559055118110236,
"grad_norm": 0.4628942608833313,
"kl": 0.003756284713745117,
"learning_rate": 1.1737679983668259e-07,
"loss": 0.0002,
"reward": 0.7152902094647289,
"reward_std": 0.20197481904178857,
"rewards/accuracy_reward": 0.7152902094647289,
"rewards/format_reward": 0.0,
"step": 140
},
{
"completion_length": 605.061745262146,
"epoch": 1.8818897637795275,
"grad_norm": 0.1207461878657341,
"kl": 0.007715559005737305,
"learning_rate": 2.343312866591163e-08,
"loss": 0.0003,
"reward": 0.7013393187895417,
"reward_std": 0.1918664438650012,
"rewards/accuracy_reward": 0.7013393187895417,
"rewards/format_reward": 0.0,
"step": 150
},
{
"completion_length": 607.0647583007812,
"epoch": 1.9826771653543307,
"kl": 0.0038232803344726562,
"reward": 0.7250279379077256,
"reward_std": 0.17406430409755558,
"rewards/accuracy_reward": 0.7250279379077256,
"rewards/format_reward": 0.0,
"step": 158,
"total_flos": 0.0,
"train_loss": 0.00014615306474896194,
"train_runtime": 28501.5732,
"train_samples_per_second": 0.624,
"train_steps_per_second": 0.006
}
],
"logging_steps": 10,
"max_steps": 158,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}