|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 593.9107284545898, |
|
"epoch": 0.017057569296375266, |
|
"grad_norm": 0.4269763231277466, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0167, |
|
"reward": 0.6294643059372902, |
|
"reward_std": 0.35917505994439125, |
|
"rewards/accuracy_reward": 0.6283482387661934, |
|
"rewards/format_reward": 0.0011160714784637094, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 605.0845737457275, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 1.2334303855895996, |
|
"kl": 0.0003300309181213379, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0188, |
|
"reward": 0.6121652042493224, |
|
"reward_std": 0.3490289170295, |
|
"rewards/accuracy_reward": 0.6116071688011289, |
|
"rewards/format_reward": 0.0005580357392318547, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 612.8330627441406, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 4.521842956542969, |
|
"kl": 0.0027939796447753905, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.0577, |
|
"reward": 0.6854911021888256, |
|
"reward_std": 0.2840955166146159, |
|
"rewards/accuracy_reward": 0.6854911021888256, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 604.9821670532226, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 1.5347368717193604, |
|
"kl": 0.015767669677734374, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.07, |
|
"reward": 0.7609375312924385, |
|
"reward_std": 0.21701927129179238, |
|
"rewards/accuracy_reward": 0.7609375312924385, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 591.6801628112793, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.6672903895378113, |
|
"kl": 0.005676651000976562, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.0579, |
|
"reward": 0.7703125327825546, |
|
"reward_std": 0.20545081831514836, |
|
"rewards/accuracy_reward": 0.7703125327825546, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 599.0326194763184, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 2323443.0, |
|
"kl": 85.62775764465331, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 3.4627, |
|
"reward": 0.761160746216774, |
|
"reward_std": 0.1914736282080412, |
|
"rewards/accuracy_reward": 0.761160746216774, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 599.8656478881836, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 31896284.0, |
|
"kl": 4184.925457000732, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 166.556, |
|
"reward": 0.7537946760654449, |
|
"reward_std": 0.1866186775267124, |
|
"rewards/accuracy_reward": 0.7537946760654449, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 591.7852882385254, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.18381251394748688, |
|
"kl": 175321907.9516266, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 6963359.2, |
|
"reward": 0.7421875357627868, |
|
"reward_std": 0.18768558986485004, |
|
"rewards/accuracy_reward": 0.7421875357627868, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 581.8495811462402, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 187.1607208251953, |
|
"kl": 179490.89021034242, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 7215.3883, |
|
"reward": 0.7627232506871223, |
|
"reward_std": 0.18175358334556221, |
|
"rewards/accuracy_reward": 0.7627232506871223, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 598.5533767700196, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 170799.046875, |
|
"kl": 1.5941051483154296, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.0995, |
|
"reward": 0.7484375357627868, |
|
"reward_std": 0.18508986476808786, |
|
"rewards/accuracy_reward": 0.7484375357627868, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 602.7493576049804, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 23.201330184936523, |
|
"kl": 0.7593372344970704, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.071, |
|
"reward": 0.7455357477068901, |
|
"reward_std": 0.1892501600086689, |
|
"rewards/accuracy_reward": 0.7455357477068901, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 590.3078407287597, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 1513.896484375, |
|
"kl": 0.4785778045654297, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.059, |
|
"reward": 0.7736607506871224, |
|
"reward_std": 0.185056865029037, |
|
"rewards/accuracy_reward": 0.7736607506871224, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 586.8786061604818, |
|
"epoch": 0.9893390191897654, |
|
"kl": 2.2987874348958335, |
|
"reward": 0.77194944024086, |
|
"reward_std": 0.1801375082383553, |
|
"rewards/accuracy_reward": 0.77194944024086, |
|
"rewards/format_reward": 0.0, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 600926.2988391328, |
|
"train_runtime": 7660.7166, |
|
"train_samples_per_second": 0.979, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|