|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 653.8359603881836, |
|
"epoch": 0.0064, |
|
"grad_norm": 0.36436545848846436, |
|
"kl": 0.0, |
|
"learning_rate": 1.875e-07, |
|
"loss": 0.0432, |
|
"reward": 0.4322916753590107, |
|
"reward_std": 0.38348349183797836, |
|
"rewards/accuracy_reward": 0.4322916753590107, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 613.5866012573242, |
|
"epoch": 0.032, |
|
"grad_norm": 0.5317748188972473, |
|
"kl": 0.00015977025032043457, |
|
"learning_rate": 9.375e-07, |
|
"loss": 0.0488, |
|
"reward": 0.4667968899011612, |
|
"reward_std": 0.37143108155578375, |
|
"rewards/accuracy_reward": 0.4667968899011612, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 631.8177268981933, |
|
"epoch": 0.064, |
|
"grad_norm": 0.40818989276885986, |
|
"kl": 0.00026123523712158204, |
|
"learning_rate": 1.875e-06, |
|
"loss": 0.0416, |
|
"reward": 0.4348958468064666, |
|
"reward_std": 0.39116307385265825, |
|
"rewards/accuracy_reward": 0.4348958468064666, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 599.0187690734863, |
|
"epoch": 0.096, |
|
"grad_norm": 0.432265967130661, |
|
"kl": 0.024783754348754884, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 0.0572, |
|
"reward": 0.42291667982935904, |
|
"reward_std": 0.3527421932667494, |
|
"rewards/accuracy_reward": 0.42291667982935904, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 622.3146034240723, |
|
"epoch": 0.128, |
|
"grad_norm": 4.753727912902832, |
|
"kl": 2060.8252431869505, |
|
"learning_rate": 2.993961440992859e-06, |
|
"loss": 95.1839, |
|
"reward": 0.4718750104308128, |
|
"reward_std": 0.37911620922386646, |
|
"rewards/accuracy_reward": 0.4718750104308128, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 607.3390830993652, |
|
"epoch": 0.16, |
|
"grad_norm": 770.6433715820312, |
|
"kl": 46.15056457519531, |
|
"learning_rate": 2.9695130976348534e-06, |
|
"loss": 2.3634, |
|
"reward": 0.5104166828095913, |
|
"reward_std": 0.35664580315351485, |
|
"rewards/accuracy_reward": 0.5104166828095913, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 603.0812713623047, |
|
"epoch": 0.192, |
|
"grad_norm": 204.195556640625, |
|
"kl": 8.570521545410156, |
|
"learning_rate": 2.9265847744427307e-06, |
|
"loss": 0.4851, |
|
"reward": 0.5395833477377892, |
|
"reward_std": 0.36477144751697776, |
|
"rewards/accuracy_reward": 0.5395833477377892, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 574.7453285217285, |
|
"epoch": 0.224, |
|
"grad_norm": 32.18656921386719, |
|
"kl": 3.325732421875, |
|
"learning_rate": 2.865716319988224e-06, |
|
"loss": 0.2502, |
|
"reward": 0.5796875208616257, |
|
"reward_std": 0.3091961059719324, |
|
"rewards/accuracy_reward": 0.5796875208616257, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 599.6526222229004, |
|
"epoch": 0.256, |
|
"grad_norm": 50.4512825012207, |
|
"kl": 38.948974609375, |
|
"learning_rate": 2.7876731904027993e-06, |
|
"loss": 1.7796, |
|
"reward": 0.5656250193715096, |
|
"reward_std": 0.31268223002552986, |
|
"rewards/accuracy_reward": 0.5656250193715096, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 599.9609573364257, |
|
"epoch": 0.288, |
|
"grad_norm": 16.936426162719727, |
|
"kl": 3.242694091796875, |
|
"learning_rate": 2.6934368233226715e-06, |
|
"loss": 0.2207, |
|
"reward": 0.5697916842997074, |
|
"reward_std": 0.3523787297308445, |
|
"rewards/accuracy_reward": 0.5697916842997074, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 605.2927291870117, |
|
"epoch": 0.32, |
|
"grad_norm": 66.08646392822266, |
|
"kl": 0.9429168701171875, |
|
"learning_rate": 2.584192295741087e-06, |
|
"loss": 0.051, |
|
"reward": 0.5895833544433117, |
|
"reward_std": 0.31378277521580455, |
|
"rewards/accuracy_reward": 0.5895833544433117, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 581.4265869140625, |
|
"epoch": 0.352, |
|
"grad_norm": 15.162301063537598, |
|
"kl": 1.656298828125, |
|
"learning_rate": 2.461313420977536e-06, |
|
"loss": 0.1251, |
|
"reward": 0.5880208499729633, |
|
"reward_std": 0.3129058893769979, |
|
"rewards/accuracy_reward": 0.5880208499729633, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 592.2036628723145, |
|
"epoch": 0.384, |
|
"grad_norm": 453.90277099609375, |
|
"kl": 3.770269775390625, |
|
"learning_rate": 2.3263454721781537e-06, |
|
"loss": 0.2258, |
|
"reward": 0.5901041820645332, |
|
"reward_std": 0.34070247821509836, |
|
"rewards/accuracy_reward": 0.5901041820645332, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 588.8729347229004, |
|
"epoch": 0.416, |
|
"grad_norm": 11.494514465332031, |
|
"kl": 18979226.522857666, |
|
"learning_rate": 2.18098574960932e-06, |
|
"loss": 1317335.5, |
|
"reward": 0.5802083477377892, |
|
"reward_std": 0.32252306006848813, |
|
"rewards/accuracy_reward": 0.5802083477377892, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 614.9432525634766, |
|
"epoch": 0.448, |
|
"grad_norm": 33.81232833862305, |
|
"kl": 11880.595022583007, |
|
"learning_rate": 2.027062236122014e-06, |
|
"loss": 793.7966, |
|
"reward": 0.5052083469927311, |
|
"reward_std": 0.34817213341593745, |
|
"rewards/accuracy_reward": 0.5052083469927311, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 607.9666870117187, |
|
"epoch": 0.48, |
|
"grad_norm": 2.919191598892212, |
|
"kl": 1.0650543212890624, |
|
"learning_rate": 1.866510609206841e-06, |
|
"loss": 0.0985, |
|
"reward": 0.5817708484828472, |
|
"reward_std": 0.32593765016645193, |
|
"rewards/accuracy_reward": 0.5817708484828472, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 629.8442901611328, |
|
"epoch": 0.512, |
|
"grad_norm": 14.79494571685791, |
|
"kl": 74.44385070800782, |
|
"learning_rate": 1.7013498987264833e-06, |
|
"loss": 1.66, |
|
"reward": 0.5494791820645333, |
|
"reward_std": 0.30361743047833445, |
|
"rewards/accuracy_reward": 0.5494791820645333, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 600.0541847229003, |
|
"epoch": 0.544, |
|
"grad_norm": 724178.6875, |
|
"kl": 1415.3565673828125, |
|
"learning_rate": 1.5336570964437077e-06, |
|
"loss": 81.5247, |
|
"reward": 0.5484375156462192, |
|
"reward_std": 0.3187117656692863, |
|
"rewards/accuracy_reward": 0.5484375156462192, |
|
"rewards/format_reward": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.4109558105469, |
|
"epoch": 0.576, |
|
"grad_norm": 32.454586029052734, |
|
"kl": 0.6003936767578125, |
|
"learning_rate": 1.3655410366448499e-06, |
|
"loss": 0.0909, |
|
"reward": 0.5557291798293591, |
|
"reward_std": 0.33659778758883474, |
|
"rewards/accuracy_reward": 0.5557291798293591, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 612.1968910217286, |
|
"epoch": 0.608, |
|
"grad_norm": 227.21060180664062, |
|
"kl": 4.417057800292969, |
|
"learning_rate": 1.199115876325091e-06, |
|
"loss": 0.1735, |
|
"reward": 0.49843751490116117, |
|
"reward_std": 0.32474707160145044, |
|
"rewards/accuracy_reward": 0.49843751490116117, |
|
"rewards/format_reward": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.434352159500122, |
|
"learning_rate": 1.036474508437579e-06, |
|
"loss": 0.5235, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 598.3372240684015, |
|
"eval_kl": 479.5919916898512, |
|
"eval_loss": 14.18670654296875, |
|
"eval_reward": 0.46924961448537644, |
|
"eval_reward_std": 0.32887630529112094, |
|
"eval_rewards/accuracy_reward": 0.46922463449321206, |
|
"eval_rewards/format_reward": 2.498001673167272e-05, |
|
"eval_runtime": 11244.4676, |
|
"eval_samples_per_second": 0.445, |
|
"eval_steps_per_second": 0.009, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 594.5505378723144, |
|
"epoch": 0.672, |
|
"grad_norm": 6.814489841461182, |
|
"kl": 5.030734252929688, |
|
"learning_rate": 8.796622425502193e-07, |
|
"loss": 0.0663, |
|
"reward": 0.5638021010905504, |
|
"reward_std": 0.3266499313525856, |
|
"rewards/accuracy_reward": 0.5638021010905504, |
|
"rewards/format_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 624.2099197387695, |
|
"epoch": 0.704, |
|
"grad_norm": 1.0256500244140625, |
|
"kl": 0.5462493896484375, |
|
"learning_rate": 7.30651083891141e-07, |
|
"loss": 0.0706, |
|
"reward": 0.5015625156462192, |
|
"reward_std": 0.3283187661319971, |
|
"rewards/accuracy_reward": 0.5015625156462192, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 638.3364791870117, |
|
"epoch": 0.736, |
|
"grad_norm": 25.669376373291016, |
|
"kl": 0.4528076171875, |
|
"learning_rate": 5.913149342387704e-07, |
|
"loss": 0.0513, |
|
"reward": 0.5244791820645333, |
|
"reward_std": 0.32383032198995354, |
|
"rewards/accuracy_reward": 0.5244791820645333, |
|
"rewards/format_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 589.191682434082, |
|
"epoch": 0.768, |
|
"grad_norm": 71.88641357421875, |
|
"kl": 2.2827545166015626, |
|
"learning_rate": 4.63406026519703e-07, |
|
"loss": 0.1746, |
|
"reward": 0.5442708507180214, |
|
"reward_std": 0.3280566889792681, |
|
"rewards/accuracy_reward": 0.5442708507180214, |
|
"rewards/format_reward": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 622.8927276611328, |
|
"epoch": 0.8, |
|
"grad_norm": 1.517931580543518, |
|
"kl": 0.1043121337890625, |
|
"learning_rate": 3.4853288946298335e-07, |
|
"loss": 0.0476, |
|
"reward": 0.5057291813194752, |
|
"reward_std": 0.3326381642371416, |
|
"rewards/accuracy_reward": 0.5057291813194752, |
|
"rewards/format_reward": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 598.4495010375977, |
|
"epoch": 0.832, |
|
"grad_norm": 0.7930722236633301, |
|
"kl": 0.6085357666015625, |
|
"learning_rate": 2.48140119418046e-07, |
|
"loss": 0.0675, |
|
"reward": 0.5453125152736902, |
|
"reward_std": 0.3504544053226709, |
|
"rewards/accuracy_reward": 0.5453125152736902, |
|
"rewards/format_reward": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 614.9172065734863, |
|
"epoch": 0.864, |
|
"grad_norm": 1384.82177734375, |
|
"kl": 1.9542694091796875, |
|
"learning_rate": 1.634902137174483e-07, |
|
"loss": 0.1328, |
|
"reward": 0.5380208536982536, |
|
"reward_std": 0.33980549313127995, |
|
"rewards/accuracy_reward": 0.5380208536982536, |
|
"rewards/format_reward": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 611.6901245117188, |
|
"epoch": 0.896, |
|
"grad_norm": 3.5404388904571533, |
|
"kl": 6.846340942382812, |
|
"learning_rate": 9.564769404039419e-08, |
|
"loss": 0.4182, |
|
"reward": 0.5447916828095913, |
|
"reward_std": 0.3115722266957164, |
|
"rewards/accuracy_reward": 0.5447916828095913, |
|
"rewards/format_reward": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 603.5270988464356, |
|
"epoch": 0.928, |
|
"grad_norm": 11.985507011413574, |
|
"kl": 20.116134643554688, |
|
"learning_rate": 4.546571943496969e-08, |
|
"loss": 1.5089, |
|
"reward": 0.5437500163912773, |
|
"reward_std": 0.3408747211098671, |
|
"rewards/accuracy_reward": 0.5437500163912773, |
|
"rewards/format_reward": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 601.6755386352539, |
|
"epoch": 0.96, |
|
"grad_norm": 1.2476218938827515, |
|
"kl": 0.5209381103515625, |
|
"learning_rate": 1.357535734809795e-08, |
|
"loss": 0.0685, |
|
"reward": 0.5718750171363354, |
|
"reward_std": 0.32924772184342144, |
|
"rewards/accuracy_reward": 0.5718750171363354, |
|
"rewards/format_reward": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 608.6760620117187, |
|
"epoch": 0.992, |
|
"grad_norm": 40.877498626708984, |
|
"kl": 0.2295989990234375, |
|
"learning_rate": 3.77647586240204e-10, |
|
"loss": 0.0664, |
|
"reward": 0.5557291798293591, |
|
"reward_std": 0.3136133692227304, |
|
"rewards/accuracy_reward": 0.5557291798293591, |
|
"rewards/format_reward": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 594.5208511352539, |
|
"epoch": 0.9984, |
|
"kl": 3.6969146728515625, |
|
"reward": 0.5312500149011612, |
|
"reward_std": 0.2874857783317566, |
|
"rewards/accuracy_reward": 0.5312500149011612, |
|
"rewards/format_reward": 0.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 42253.74840821018, |
|
"train_runtime": 36420.7375, |
|
"train_samples_per_second": 0.206, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|