|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 25.0, |
|
"eval_steps": 500, |
|
"global_step": 1725, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.780346820809248e-07, |
|
"loss": 0.005, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.202312138728324e-06, |
|
"loss": 0.0008, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0404624277456647e-05, |
|
"loss": 0.0019, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5606936416184973e-05, |
|
"loss": 0.0019, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.0809248554913295e-05, |
|
"loss": 0.0016, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.6011560693641617e-05, |
|
"loss": 0.0027, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.1213872832369946e-05, |
|
"loss": 0.0032, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6416184971098265e-05, |
|
"loss": 0.0013, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.002811400219798088, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9415, |
|
"eval_samples_per_second": 42.235, |
|
"eval_steps_per_second": 1.545, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.161849710982659e-05, |
|
"loss": 0.0033, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.6820809248554915e-05, |
|
"loss": 0.0034, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.2023121387283234e-05, |
|
"loss": 0.0015, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.722543352601156e-05, |
|
"loss": 0.0042, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.242774566473989e-05, |
|
"loss": 0.0025, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.763005780346822e-05, |
|
"loss": 0.0012, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.283236994219653e-05, |
|
"loss": 0.002, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.803468208092485e-05, |
|
"loss": 0.0006, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.0026117784436792135, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9167, |
|
"eval_samples_per_second": 42.781, |
|
"eval_steps_per_second": 1.565, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.323699421965318e-05, |
|
"loss": 0.001, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.84393063583815e-05, |
|
"loss": 0.0016, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 9.364161849710983e-05, |
|
"loss": 0.0011, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.884393063583816e-05, |
|
"loss": 0.001, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.954896907216495e-05, |
|
"loss": 0.0026, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.896907216494846e-05, |
|
"loss": 0.0008, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.838917525773196e-05, |
|
"loss": 0.0022, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.780927835051546e-05, |
|
"loss": 0.0025, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.003930480219423771, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9341, |
|
"eval_samples_per_second": 42.396, |
|
"eval_steps_per_second": 1.551, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.722938144329897e-05, |
|
"loss": 0.0019, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 9.664948453608248e-05, |
|
"loss": 0.0024, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.606958762886598e-05, |
|
"loss": 0.0007, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 9.54896907216495e-05, |
|
"loss": 0.0026, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.490979381443299e-05, |
|
"loss": 0.0009, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 9.43298969072165e-05, |
|
"loss": 0.0012, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 9.375e-05, |
|
"loss": 0.0004, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.0036886015441268682, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9271, |
|
"eval_samples_per_second": 42.551, |
|
"eval_steps_per_second": 1.557, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.317010309278351e-05, |
|
"loss": 0.0018, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.259020618556701e-05, |
|
"loss": 0.0017, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 9.201030927835051e-05, |
|
"loss": 0.0014, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 9.143041237113402e-05, |
|
"loss": 0.0005, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.085051546391753e-05, |
|
"loss": 0.001, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 9.027061855670103e-05, |
|
"loss": 0.002, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 8.969072164948454e-05, |
|
"loss": 0.0016, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 8.911082474226806e-05, |
|
"loss": 0.0005, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.009117466397583485, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9694, |
|
"eval_samples_per_second": 41.638, |
|
"eval_steps_per_second": 1.523, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 8.853092783505154e-05, |
|
"loss": 0.0005, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 8.795103092783505e-05, |
|
"loss": 0.0051, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 8.737113402061856e-05, |
|
"loss": 0.0004, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 8.679123711340206e-05, |
|
"loss": 0.0012, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 8.621134020618558e-05, |
|
"loss": 0.001, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 8.563144329896907e-05, |
|
"loss": 0.0015, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 8.505154639175259e-05, |
|
"loss": 0.0016, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.447164948453608e-05, |
|
"loss": 0.0009, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.0005720060435123742, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9399, |
|
"eval_samples_per_second": 42.271, |
|
"eval_steps_per_second": 1.546, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 8.38917525773196e-05, |
|
"loss": 0.0005, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 8.331185567010311e-05, |
|
"loss": 0.0006, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 8.273195876288659e-05, |
|
"loss": 0.0014, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 8.21520618556701e-05, |
|
"loss": 0.0019, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 8.157216494845362e-05, |
|
"loss": 0.001, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 8.099226804123711e-05, |
|
"loss": 0.0008, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 8.041237113402063e-05, |
|
"loss": 0.0016, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.00027213190332986414, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9188, |
|
"eval_samples_per_second": 42.735, |
|
"eval_steps_per_second": 1.563, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 7.983247422680414e-05, |
|
"loss": 0.0006, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 7.925257731958762e-05, |
|
"loss": 0.007, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.867268041237113e-05, |
|
"loss": 0.0004, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 7.809278350515465e-05, |
|
"loss": 0.0016, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 7.751288659793814e-05, |
|
"loss": 0.0006, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 7.693298969072166e-05, |
|
"loss": 0.0011, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 7.635309278350515e-05, |
|
"loss": 0.0014, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 7.577319587628867e-05, |
|
"loss": 0.0012, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.011139851063489914, |
|
"eval_max_distance": 5, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9435, |
|
"eval_samples_per_second": 42.193, |
|
"eval_steps_per_second": 1.544, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 7.519329896907217e-05, |
|
"loss": 0.0008, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 7.461340206185568e-05, |
|
"loss": 0.0011, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 7.403350515463919e-05, |
|
"loss": 0.0025, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.345360824742269e-05, |
|
"loss": 0.003, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.287371134020619e-05, |
|
"loss": 0.004, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 7.22938144329897e-05, |
|
"loss": 0.002, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 7.17139175257732e-05, |
|
"loss": 0.0007, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 7.113402061855671e-05, |
|
"loss": 0.0008, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.0003953798732254654, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.995, |
|
"eval_samples_per_second": 41.102, |
|
"eval_steps_per_second": 1.504, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 7.055412371134022e-05, |
|
"loss": 0.0005, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 6.99742268041237e-05, |
|
"loss": 0.0004, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.939432989690722e-05, |
|
"loss": 0.0013, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 6.881443298969073e-05, |
|
"loss": 0.0002, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 6.823453608247423e-05, |
|
"loss": 0.0011, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.765463917525774e-05, |
|
"loss": 0.0018, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 6.707474226804124e-05, |
|
"loss": 0.0018, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.00027754431357607245, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9222, |
|
"eval_samples_per_second": 42.659, |
|
"eval_steps_per_second": 1.561, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 6.649484536082475e-05, |
|
"loss": 0.0011, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 6.591494845360825e-05, |
|
"loss": 0.0006, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 6.533505154639176e-05, |
|
"loss": 0.0011, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 6.475515463917527e-05, |
|
"loss": 0.0013, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 6.417525773195877e-05, |
|
"loss": 0.0006, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 6.359536082474227e-05, |
|
"loss": 0.0018, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 6.301546391752578e-05, |
|
"loss": 0.0016, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 6.243556701030928e-05, |
|
"loss": 0.0028, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.00033258015173487365, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9385, |
|
"eval_samples_per_second": 42.301, |
|
"eval_steps_per_second": 1.548, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 6.185567010309279e-05, |
|
"loss": 0.0009, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 6.12757731958763e-05, |
|
"loss": 0.0005, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 6.069587628865979e-05, |
|
"loss": 0.0011, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 6.01159793814433e-05, |
|
"loss": 0.0007, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 5.953608247422681e-05, |
|
"loss": 0.0012, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 5.8956185567010315e-05, |
|
"loss": 0.0021, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 5.837628865979382e-05, |
|
"loss": 0.0006, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 5.779639175257732e-05, |
|
"loss": 0.0008, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0002690624096430838, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9448, |
|
"eval_samples_per_second": 42.163, |
|
"eval_steps_per_second": 1.543, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 5.721649484536082e-05, |
|
"loss": 0.001, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 5.663659793814433e-05, |
|
"loss": 0.0012, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 5.605670103092784e-05, |
|
"loss": 0.0005, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 5.5476804123711345e-05, |
|
"loss": 0.0011, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 5.489690721649485e-05, |
|
"loss": 0.0026, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 5.431701030927835e-05, |
|
"loss": 0.0009, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 5.3737113402061854e-05, |
|
"loss": 0.001, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0004277784610167146, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9315, |
|
"eval_samples_per_second": 42.454, |
|
"eval_steps_per_second": 1.553, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 5.3157216494845366e-05, |
|
"loss": 0.0007, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 5.257731958762887e-05, |
|
"loss": 0.0008, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 5.1997422680412376e-05, |
|
"loss": 0.0005, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 5.1417525773195874e-05, |
|
"loss": 0.0004, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 5.083762886597938e-05, |
|
"loss": 0.0006, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 5.025773195876289e-05, |
|
"loss": 0.0025, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 4.9677835051546396e-05, |
|
"loss": 0.0009, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 4.9097938144329895e-05, |
|
"loss": 0.0026, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.0005385838449001312, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.993, |
|
"eval_samples_per_second": 41.144, |
|
"eval_steps_per_second": 1.505, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 4.8518041237113407e-05, |
|
"loss": 0.0016, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 4.793814432989691e-05, |
|
"loss": 0.0014, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.735824742268041e-05, |
|
"loss": 0.0007, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 4.677835051546392e-05, |
|
"loss": 0.0031, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 4.619845360824743e-05, |
|
"loss": 0.0008, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 4.561855670103093e-05, |
|
"loss": 0.0028, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 4.503865979381444e-05, |
|
"loss": 0.0004, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.4458762886597936e-05, |
|
"loss": 0.0015, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.0007138837827369571, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9688, |
|
"eval_samples_per_second": 41.651, |
|
"eval_steps_per_second": 1.524, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 4.387886597938145e-05, |
|
"loss": 0.0005, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 4.329896907216495e-05, |
|
"loss": 0.0011, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 4.271907216494845e-05, |
|
"loss": 0.0001, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 4.213917525773196e-05, |
|
"loss": 0.0009, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 4.155927835051547e-05, |
|
"loss": 0.0017, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 4.097938144329897e-05, |
|
"loss": 0.0013, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 4.039948453608248e-05, |
|
"loss": 0.0009, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.0006717974320054054, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9244, |
|
"eval_samples_per_second": 42.612, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 3.9819587628865976e-05, |
|
"loss": 0.0015, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 3.923969072164949e-05, |
|
"loss": 0.0003, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 3.865979381443299e-05, |
|
"loss": 0.0006, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 3.807989690721649e-05, |
|
"loss": 0.0013, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0009, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 3.692010309278351e-05, |
|
"loss": 0.0004, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 3.6340206185567014e-05, |
|
"loss": 0.0008, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 3.576030927835052e-05, |
|
"loss": 0.0014, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.00033988503855653107, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9136, |
|
"eval_samples_per_second": 42.851, |
|
"eval_steps_per_second": 1.568, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 3.5180412371134024e-05, |
|
"loss": 0.0016, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 3.460051546391753e-05, |
|
"loss": 0.0004, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 3.4020618556701034e-05, |
|
"loss": 0.002, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 3.344072164948453e-05, |
|
"loss": 0.0012, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 3.2860824742268044e-05, |
|
"loss": 0.0002, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 3.228092783505155e-05, |
|
"loss": 0.0006, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 3.1701030927835054e-05, |
|
"loss": 0.0044, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.112113402061856e-05, |
|
"loss": 0.001, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.00037691937177442014, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9496, |
|
"eval_samples_per_second": 42.059, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 3.0541237113402065e-05, |
|
"loss": 0.0005, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 2.9961340206185566e-05, |
|
"loss": 0.0009, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 2.9381443298969075e-05, |
|
"loss": 0.0008, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 2.8801546391752577e-05, |
|
"loss": 0.0007, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 2.8221649484536085e-05, |
|
"loss": 0.0016, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 2.764175257731959e-05, |
|
"loss": 0.0012, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 2.7061855670103092e-05, |
|
"loss": 0.0007, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.001327142701484263, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9144, |
|
"eval_samples_per_second": 42.834, |
|
"eval_steps_per_second": 1.567, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 2.64819587628866e-05, |
|
"loss": 0.0005, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 2.5902061855670106e-05, |
|
"loss": 0.0011, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 2.5322164948453607e-05, |
|
"loss": 0.0006, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 2.4742268041237116e-05, |
|
"loss": 0.0004, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 2.416237113402062e-05, |
|
"loss": 0.002, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 2.3582474226804126e-05, |
|
"loss": 0.0003, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 2.3002577319587628e-05, |
|
"loss": 0.0011, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 2.2422680412371136e-05, |
|
"loss": 0.0013, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.0012958323350176215, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9255, |
|
"eval_samples_per_second": 42.587, |
|
"eval_steps_per_second": 1.558, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 2.184278350515464e-05, |
|
"loss": 0.0017, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 2.1262886597938146e-05, |
|
"loss": 0.0011, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 2.0682989690721648e-05, |
|
"loss": 0.0014, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 2.0103092783505157e-05, |
|
"loss": 0.0006, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 1.952319587628866e-05, |
|
"loss": 0.0003, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 1.8943298969072167e-05, |
|
"loss": 0.0002, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 1.8363402061855672e-05, |
|
"loss": 0.0024, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.7783505154639177e-05, |
|
"loss": 0.0007, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.0002567500632721931, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9342, |
|
"eval_samples_per_second": 42.395, |
|
"eval_steps_per_second": 1.551, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 1.7203608247422682e-05, |
|
"loss": 0.0006, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 1.6623711340206187e-05, |
|
"loss": 0.002, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"learning_rate": 1.6043814432989692e-05, |
|
"loss": 0.0007, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 1.5463917525773197e-05, |
|
"loss": 0.0007, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"learning_rate": 1.4884020618556702e-05, |
|
"loss": 0.0016, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 1.4304123711340206e-05, |
|
"loss": 0.0008, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 21.91, |
|
"learning_rate": 1.3724226804123713e-05, |
|
"loss": 0.0016, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.0002821955131366849, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9113, |
|
"eval_samples_per_second": 42.902, |
|
"eval_steps_per_second": 1.57, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 1.3144329896907218e-05, |
|
"loss": 0.0018, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 1.2564432989690723e-05, |
|
"loss": 0.0027, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 22.3, |
|
"learning_rate": 1.1984536082474228e-05, |
|
"loss": 0.0002, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 1.1404639175257733e-05, |
|
"loss": 0.001, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 1.0824742268041238e-05, |
|
"loss": 0.0007, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 1.0244845360824743e-05, |
|
"loss": 0.0009, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"learning_rate": 9.664948453608248e-06, |
|
"loss": 0.0012, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"learning_rate": 9.085051546391753e-06, |
|
"loss": 0.0013, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.00030223012436181307, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9675, |
|
"eval_samples_per_second": 41.677, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 8.505154639175259e-06, |
|
"loss": 0.0025, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"learning_rate": 7.925257731958764e-06, |
|
"loss": 0.001, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 7.345360824742269e-06, |
|
"loss": 0.0004, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 6.765463917525773e-06, |
|
"loss": 0.0006, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 23.61, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 0.001, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 5.605670103092784e-06, |
|
"loss": 0.0012, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 5.025773195876289e-06, |
|
"loss": 0.0013, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.445876288659794e-06, |
|
"loss": 0.0004, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.0002631743554957211, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9278, |
|
"eval_samples_per_second": 42.536, |
|
"eval_steps_per_second": 1.556, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 3.865979381443299e-06, |
|
"loss": 0.0018, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 3.2860824742268044e-06, |
|
"loss": 0.0006, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 24.39, |
|
"learning_rate": 2.7061855670103095e-06, |
|
"loss": 0.0012, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 2.1262886597938146e-06, |
|
"loss": 0.0009, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 1.5463917525773197e-06, |
|
"loss": 0.0007, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 9.664948453608248e-07, |
|
"loss": 0.0009, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 3.8659793814432993e-07, |
|
"loss": 0.001, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.0002593309909570962, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0449, |
|
"eval_samples_per_second": 40.1, |
|
"eval_steps_per_second": 1.467, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 1725, |
|
"total_flos": 459342194208768.0, |
|
"train_loss": 0.0013090899151048043, |
|
"train_runtime": 199.817, |
|
"train_samples_per_second": 256.61, |
|
"train_steps_per_second": 8.633 |
|
} |
|
], |
|
"logging_steps": 9, |
|
"max_steps": 1725, |
|
"num_train_epochs": 25, |
|
"save_steps": 18, |
|
"total_flos": 459342194208768.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|