|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.028914269191846177, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005782853838369235, |
|
"grad_norm": 0.3306373357772827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.843, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005782853838369235, |
|
"eval_loss": 0.9051669239997864, |
|
"eval_runtime": 26.4054, |
|
"eval_samples_per_second": 27.608, |
|
"eval_steps_per_second": 13.823, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001156570767673847, |
|
"grad_norm": 0.47444525361061096, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9431, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017348561515107706, |
|
"grad_norm": 0.3817633390426636, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.8436, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002313141535347694, |
|
"grad_norm": 0.4596897065639496, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9052, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0028914269191846176, |
|
"grad_norm": 0.4641701281070709, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.9076, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0034697123030215412, |
|
"grad_norm": 0.4002288579940796, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7742, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004047997686858464, |
|
"grad_norm": 0.40169280767440796, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.8444, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004626283070695388, |
|
"grad_norm": 0.5613084435462952, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0238, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005204568454532312, |
|
"grad_norm": 0.6021634340286255, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.9818, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005782853838369235, |
|
"grad_norm": 0.4571888744831085, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8416, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006361139222206159, |
|
"grad_norm": 0.5815986394882202, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 0.9123, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0069394246060430825, |
|
"grad_norm": 0.6056288480758667, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 0.8216, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007517709989880006, |
|
"grad_norm": 0.6877990365028381, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 0.8851, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007517709989880006, |
|
"eval_loss": 0.8058395385742188, |
|
"eval_runtime": 24.6769, |
|
"eval_samples_per_second": 29.542, |
|
"eval_steps_per_second": 14.791, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008095995373716929, |
|
"grad_norm": 0.56659334897995, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.8059, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008674280757553852, |
|
"grad_norm": 0.484801709651947, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 0.7943, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009252566141390776, |
|
"grad_norm": 0.668626070022583, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.8542, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0098308515252277, |
|
"grad_norm": 0.5725303292274475, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 0.7044, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010409136909064623, |
|
"grad_norm": 0.6510726809501648, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.7037, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010987422292901547, |
|
"grad_norm": 0.6378070116043091, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 0.7157, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01156570767673847, |
|
"grad_norm": 0.7792880535125732, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.7768, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012143993060575394, |
|
"grad_norm": 0.46598246693611145, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 0.5836, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012722278444412318, |
|
"grad_norm": 0.8034200668334961, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.7133, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.013300563828249241, |
|
"grad_norm": 0.7533989548683167, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 0.6965, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.013878849212086165, |
|
"grad_norm": 0.5304470062255859, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 0.6367, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.014457134595923089, |
|
"grad_norm": 0.7445480823516846, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 0.6986, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015035419979760012, |
|
"grad_norm": 0.8515669703483582, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.6763, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015035419979760012, |
|
"eval_loss": 0.6640514135360718, |
|
"eval_runtime": 24.6725, |
|
"eval_samples_per_second": 29.547, |
|
"eval_steps_per_second": 14.794, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015613705363596936, |
|
"grad_norm": 0.5554443597793579, |
|
"learning_rate": 3.083613409639764e-05, |
|
"loss": 0.6288, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.016191990747433858, |
|
"grad_norm": 0.6648669838905334, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 0.6552, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.016770276131270783, |
|
"grad_norm": 0.728183925151825, |
|
"learning_rate": 2.6961477393196126e-05, |
|
"loss": 0.6793, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.017348561515107705, |
|
"grad_norm": 0.5944912433624268, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01792684689894463, |
|
"grad_norm": 0.6181674599647522, |
|
"learning_rate": 2.303852260680388e-05, |
|
"loss": 0.6846, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018505132282781552, |
|
"grad_norm": 0.8404297232627869, |
|
"learning_rate": 2.1089138373994223e-05, |
|
"loss": 0.7196, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.019083417666618478, |
|
"grad_norm": 0.706020176410675, |
|
"learning_rate": 1.9163865903602374e-05, |
|
"loss": 0.6079, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0196617030504554, |
|
"grad_norm": 0.7011454701423645, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 0.7039, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.020239988434292325, |
|
"grad_norm": 0.6933149695396423, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 0.6284, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.020818273818129247, |
|
"grad_norm": 0.7218239903450012, |
|
"learning_rate": 1.3650237506511331e-05, |
|
"loss": 0.6569, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.021396559201966172, |
|
"grad_norm": 0.7234783172607422, |
|
"learning_rate": 1.1937535882101281e-05, |
|
"loss": 0.6429, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021974844585803094, |
|
"grad_norm": 0.5774780511856079, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 0.5611, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.022553129969640016, |
|
"grad_norm": 0.7207251191139221, |
|
"learning_rate": 8.763798791745411e-06, |
|
"loss": 0.6489, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.022553129969640016, |
|
"eval_loss": 0.6252636909484863, |
|
"eval_runtime": 24.6955, |
|
"eval_samples_per_second": 29.52, |
|
"eval_steps_per_second": 14.78, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02313141535347694, |
|
"grad_norm": 0.6595151424407959, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 0.6198, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.023709700737313863, |
|
"grad_norm": 0.7210184931755066, |
|
"learning_rate": 5.989850859999227e-06, |
|
"loss": 0.607, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02428798612115079, |
|
"grad_norm": 0.7432602643966675, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 0.6241, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02486627150498771, |
|
"grad_norm": 0.7371325492858887, |
|
"learning_rate": 3.6839958911476957e-06, |
|
"loss": 0.639, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.025444556888824636, |
|
"grad_norm": 0.690451979637146, |
|
"learning_rate": 2.7248368952908053e-06, |
|
"loss": 0.6563, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.026022842272661557, |
|
"grad_norm": 0.6509324312210083, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 0.5702, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.026601127656498483, |
|
"grad_norm": 0.6391157507896423, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 0.6043, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.027179413040335405, |
|
"grad_norm": 0.7080129384994507, |
|
"learning_rate": 6.907519900580861e-07, |
|
"loss": 0.6471, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02775769842417233, |
|
"grad_norm": 0.6260955333709717, |
|
"learning_rate": 3.077914851215585e-07, |
|
"loss": 0.6057, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.028335983808009252, |
|
"grad_norm": 0.7455611824989319, |
|
"learning_rate": 7.706665667180091e-08, |
|
"loss": 0.649, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.028914269191846177, |
|
"grad_norm": 0.7520791292190552, |
|
"learning_rate": 0.0, |
|
"loss": 0.6281, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9617734090358784.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|