|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 45885, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.1791239921551537e-08, |
|
"loss": 0.0002, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.011985181956853e-06, |
|
"loss": 0.0017, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.0023970363913707e-05, |
|
"loss": 0.0012, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.503595554587056e-05, |
|
"loss": 0.0008, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.0047940727827413e-05, |
|
"loss": 0.001, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.5059925909784264e-05, |
|
"loss": 0.0008, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.007191109174112e-05, |
|
"loss": 0.0007, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.5083896273697975e-05, |
|
"loss": 0.0007, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0095881455654826e-05, |
|
"loss": 0.0006, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.5107866637611684e-05, |
|
"loss": 0.0006, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.011985181956853e-05, |
|
"loss": 0.0005, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.5131837001525385e-05, |
|
"loss": 0.0004, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.014382218348224e-05, |
|
"loss": 0.0003, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.51558073654391e-05, |
|
"loss": 0.0004, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.002156034577637911, |
|
"eval_max_distance": 12, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 31.367, |
|
"eval_samples_per_second": 18.778, |
|
"eval_steps_per_second": 0.128, |
|
"step": 3059 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.016779254739595e-05, |
|
"loss": 0.0004, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.517977772935281e-05, |
|
"loss": 0.0003, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.019176291130965e-05, |
|
"loss": 0.0004, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.520374809326651e-05, |
|
"loss": 0.0003, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.021573327522337e-05, |
|
"loss": 0.0004, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.522771845718021e-05, |
|
"loss": 0.0003, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.997336303758234e-05, |
|
"loss": 0.0003, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.941640836884929e-05, |
|
"loss": 0.0003, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.885945370011624e-05, |
|
"loss": 0.0004, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.830249903138319e-05, |
|
"loss": 0.0003, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.774554436265014e-05, |
|
"loss": 0.0004, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.718858969391709e-05, |
|
"loss": 0.0003, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.663163502518404e-05, |
|
"loss": 0.0003, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.0011327904649078846, |
|
"eval_max_distance": 5, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.6053, |
|
"eval_samples_per_second": 19.245, |
|
"eval_steps_per_second": 0.131, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.6074680356451e-05, |
|
"loss": 0.0003, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.551772568771794e-05, |
|
"loss": 0.0003, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.496077101898489e-05, |
|
"loss": 0.0003, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 9.440381635025185e-05, |
|
"loss": 0.0003, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 9.384686168151879e-05, |
|
"loss": 0.0004, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.328990701278575e-05, |
|
"loss": 0.0003, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.273295234405269e-05, |
|
"loss": 0.0003, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 9.217599767531964e-05, |
|
"loss": 0.0003, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.16190430065866e-05, |
|
"loss": 0.0003, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.106208833785354e-05, |
|
"loss": 0.0003, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 9.05051336691205e-05, |
|
"loss": 0.0003, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.994817900038746e-05, |
|
"loss": 0.0003, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.939122433165439e-05, |
|
"loss": 0.0003, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.0010147562716156244, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7374, |
|
"eval_samples_per_second": 19.162, |
|
"eval_steps_per_second": 0.13, |
|
"step": 9177 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.883426966292136e-05, |
|
"loss": 0.0003, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 8.82773149941883e-05, |
|
"loss": 0.0002, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.772036032545526e-05, |
|
"loss": 0.0003, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.716340565672221e-05, |
|
"loss": 0.0002, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 8.660645098798916e-05, |
|
"loss": 0.0003, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 8.604949631925611e-05, |
|
"loss": 0.0003, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 8.549254165052306e-05, |
|
"loss": 0.0003, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 8.493558698179001e-05, |
|
"loss": 0.0003, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 8.437863231305696e-05, |
|
"loss": 0.0003, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.382167764432391e-05, |
|
"loss": 0.0003, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.326472297559086e-05, |
|
"loss": 0.0003, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 8.270776830685781e-05, |
|
"loss": 0.0003, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.215081363812476e-05, |
|
"loss": 0.0003, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 8.159385896939171e-05, |
|
"loss": 0.0003, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.0011835404438897967, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.6791, |
|
"eval_samples_per_second": 19.199, |
|
"eval_steps_per_second": 0.13, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 8.103690430065866e-05, |
|
"loss": 0.0003, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 8.047994963192561e-05, |
|
"loss": 0.0003, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.992299496319256e-05, |
|
"loss": 0.0003, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 7.936604029445953e-05, |
|
"loss": 0.0002, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 7.880908562572646e-05, |
|
"loss": 0.0003, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.825213095699341e-05, |
|
"loss": 0.0003, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.769517628826036e-05, |
|
"loss": 0.0003, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 7.713822161952731e-05, |
|
"loss": 0.0003, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 7.658126695079428e-05, |
|
"loss": 0.0002, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.602431228206122e-05, |
|
"loss": 0.0003, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 7.546735761332817e-05, |
|
"loss": 0.0003, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 7.491040294459513e-05, |
|
"loss": 0.0002, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.435344827586207e-05, |
|
"loss": 0.0003, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.000761075527407229, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.6946, |
|
"eval_samples_per_second": 19.189, |
|
"eval_steps_per_second": 0.13, |
|
"step": 15295 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 7.379649360712903e-05, |
|
"loss": 0.0002, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 7.323953893839598e-05, |
|
"loss": 0.0002, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.268258426966292e-05, |
|
"loss": 0.0003, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.212562960092988e-05, |
|
"loss": 0.0002, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 7.156867493219682e-05, |
|
"loss": 0.0003, |
|
"step": 16330 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 7.101172026346378e-05, |
|
"loss": 0.0002, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 7.045476559473073e-05, |
|
"loss": 0.0002, |
|
"step": 16790 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.989781092599767e-05, |
|
"loss": 0.0003, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 6.934085625726463e-05, |
|
"loss": 0.0003, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 6.878390158853158e-05, |
|
"loss": 0.0002, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 6.822694691979853e-05, |
|
"loss": 0.0003, |
|
"step": 17710 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 6.766999225106548e-05, |
|
"loss": 0.0003, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 6.711303758233242e-05, |
|
"loss": 0.0002, |
|
"step": 18170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.0009165782830677927, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7001, |
|
"eval_samples_per_second": 19.186, |
|
"eval_steps_per_second": 0.13, |
|
"step": 18354 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 6.655608291359938e-05, |
|
"loss": 0.0003, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 6.599912824486634e-05, |
|
"loss": 0.0002, |
|
"step": 18630 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 6.544217357613329e-05, |
|
"loss": 0.0002, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 6.488521890740024e-05, |
|
"loss": 0.0003, |
|
"step": 19090 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 6.432826423866719e-05, |
|
"loss": 0.0002, |
|
"step": 19320 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 6.377130956993414e-05, |
|
"loss": 0.0002, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 6.321435490120109e-05, |
|
"loss": 0.0003, |
|
"step": 19780 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.265740023246804e-05, |
|
"loss": 0.0002, |
|
"step": 20010 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 6.210044556373499e-05, |
|
"loss": 0.0002, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 6.154349089500194e-05, |
|
"loss": 0.0003, |
|
"step": 20470 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 6.098653622626889e-05, |
|
"loss": 0.0002, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 6.042958155753584e-05, |
|
"loss": 0.0002, |
|
"step": 20930 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 5.9872626888802796e-05, |
|
"loss": 0.0003, |
|
"step": 21160 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 5.931567222006974e-05, |
|
"loss": 0.0002, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.0007532148738391697, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 31.1332, |
|
"eval_samples_per_second": 18.919, |
|
"eval_steps_per_second": 0.128, |
|
"step": 21413 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5.87587175513367e-05, |
|
"loss": 0.0003, |
|
"step": 21620 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 5.820176288260365e-05, |
|
"loss": 0.0002, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 5.764480821387059e-05, |
|
"loss": 0.0002, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 5.708785354513755e-05, |
|
"loss": 0.0002, |
|
"step": 22310 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 5.653089887640449e-05, |
|
"loss": 0.0003, |
|
"step": 22540 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 5.597394420767145e-05, |
|
"loss": 0.0003, |
|
"step": 22770 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 5.54169895389384e-05, |
|
"loss": 0.0002, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 5.486003487020534e-05, |
|
"loss": 0.0002, |
|
"step": 23230 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 5.43030802014723e-05, |
|
"loss": 0.0003, |
|
"step": 23460 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 5.374612553273926e-05, |
|
"loss": 0.0002, |
|
"step": 23690 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 5.31891708640062e-05, |
|
"loss": 0.0002, |
|
"step": 23920 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 5.263221619527316e-05, |
|
"loss": 0.0002, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 5.20752615265401e-05, |
|
"loss": 0.0002, |
|
"step": 24380 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.0007926285616122186, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7407, |
|
"eval_samples_per_second": 19.16, |
|
"eval_steps_per_second": 0.13, |
|
"step": 24472 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 5.151830685780705e-05, |
|
"loss": 0.0002, |
|
"step": 24610 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 5.096135218907401e-05, |
|
"loss": 0.0002, |
|
"step": 24840 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 5.040439752034095e-05, |
|
"loss": 0.0002, |
|
"step": 25070 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 4.984744285160791e-05, |
|
"loss": 0.0002, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 4.929048818287485e-05, |
|
"loss": 0.0002, |
|
"step": 25530 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 4.873353351414181e-05, |
|
"loss": 0.0002, |
|
"step": 25760 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 4.817657884540876e-05, |
|
"loss": 0.0002, |
|
"step": 25990 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.761962417667571e-05, |
|
"loss": 0.0002, |
|
"step": 26220 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 4.706266950794266e-05, |
|
"loss": 0.0002, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.650571483920961e-05, |
|
"loss": 0.0002, |
|
"step": 26680 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.594876017047656e-05, |
|
"loss": 0.0002, |
|
"step": 26910 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 4.539180550174351e-05, |
|
"loss": 0.0003, |
|
"step": 27140 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 4.483485083301046e-05, |
|
"loss": 0.0002, |
|
"step": 27370 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.0007057118928059936, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.795, |
|
"eval_samples_per_second": 19.127, |
|
"eval_steps_per_second": 0.13, |
|
"step": 27531 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.427789616427741e-05, |
|
"loss": 0.0002, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.372094149554436e-05, |
|
"loss": 0.0002, |
|
"step": 27830 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.3163986826811313e-05, |
|
"loss": 0.0002, |
|
"step": 28060 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 4.2607032158078264e-05, |
|
"loss": 0.0002, |
|
"step": 28290 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 4.205007748934522e-05, |
|
"loss": 0.0002, |
|
"step": 28520 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 4.149312282061217e-05, |
|
"loss": 0.0002, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.0936168151879115e-05, |
|
"loss": 0.0002, |
|
"step": 28980 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 4.0379213483146065e-05, |
|
"loss": 0.0002, |
|
"step": 29210 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 3.982225881441302e-05, |
|
"loss": 0.0002, |
|
"step": 29440 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 3.926530414567997e-05, |
|
"loss": 0.0003, |
|
"step": 29670 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 3.870834947694692e-05, |
|
"loss": 0.0002, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.8151394808213873e-05, |
|
"loss": 0.0002, |
|
"step": 30130 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.7594440139480824e-05, |
|
"loss": 0.0002, |
|
"step": 30360 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.7037485470747774e-05, |
|
"loss": 0.0002, |
|
"step": 30590 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.000787499884609133, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7843, |
|
"eval_samples_per_second": 19.133, |
|
"eval_steps_per_second": 0.13, |
|
"step": 30590 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 3.6480530802014724e-05, |
|
"loss": 0.0002, |
|
"step": 30820 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 3.5923576133281675e-05, |
|
"loss": 0.0002, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 3.5366621464548625e-05, |
|
"loss": 0.0002, |
|
"step": 31280 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 3.4809666795815576e-05, |
|
"loss": 0.0002, |
|
"step": 31510 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 3.4252712127082526e-05, |
|
"loss": 0.0002, |
|
"step": 31740 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 3.369575745834948e-05, |
|
"loss": 0.0002, |
|
"step": 31970 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 3.313880278961643e-05, |
|
"loss": 0.0002, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 3.258184812088338e-05, |
|
"loss": 0.0002, |
|
"step": 32430 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 3.202489345215033e-05, |
|
"loss": 0.0002, |
|
"step": 32660 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3.1467938783417284e-05, |
|
"loss": 0.0002, |
|
"step": 32890 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 3.0910984114684235e-05, |
|
"loss": 0.0002, |
|
"step": 33120 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 3.0354029445951182e-05, |
|
"loss": 0.0002, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 2.9797074777218132e-05, |
|
"loss": 0.0002, |
|
"step": 33580 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.000802784226834774, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.8756, |
|
"eval_samples_per_second": 19.077, |
|
"eval_steps_per_second": 0.13, |
|
"step": 33649 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 2.9240120108485086e-05, |
|
"loss": 0.0002, |
|
"step": 33810 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 2.8683165439752036e-05, |
|
"loss": 0.0002, |
|
"step": 34040 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 2.8126210771018983e-05, |
|
"loss": 0.0002, |
|
"step": 34270 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 2.756925610228594e-05, |
|
"loss": 0.0002, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 2.7012301433552887e-05, |
|
"loss": 0.0002, |
|
"step": 34730 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 2.6455346764819838e-05, |
|
"loss": 0.0002, |
|
"step": 34960 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 2.5898392096086788e-05, |
|
"loss": 0.0002, |
|
"step": 35190 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 2.5341437427353742e-05, |
|
"loss": 0.0002, |
|
"step": 35420 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 2.4784482758620692e-05, |
|
"loss": 0.0002, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 2.4227528089887643e-05, |
|
"loss": 0.0002, |
|
"step": 35880 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 2.3670573421154593e-05, |
|
"loss": 0.0002, |
|
"step": 36110 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 2.3113618752421543e-05, |
|
"loss": 0.0002, |
|
"step": 36340 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 2.2556664083688494e-05, |
|
"loss": 0.0002, |
|
"step": 36570 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0007881763740442693, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7194, |
|
"eval_samples_per_second": 19.174, |
|
"eval_steps_per_second": 0.13, |
|
"step": 36708 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 2.1999709414955444e-05, |
|
"loss": 0.0002, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 2.1442754746222398e-05, |
|
"loss": 0.0002, |
|
"step": 37030 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 2.0885800077489345e-05, |
|
"loss": 0.0002, |
|
"step": 37260 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 2.03288454087563e-05, |
|
"loss": 0.0002, |
|
"step": 37490 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 1.9771890740023245e-05, |
|
"loss": 0.0002, |
|
"step": 37720 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 1.92149360712902e-05, |
|
"loss": 0.0002, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 1.865798140255715e-05, |
|
"loss": 0.0002, |
|
"step": 38180 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 1.81010267338241e-05, |
|
"loss": 0.0002, |
|
"step": 38410 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 1.754407206509105e-05, |
|
"loss": 0.0002, |
|
"step": 38640 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 1.6987117396358e-05, |
|
"loss": 0.0002, |
|
"step": 38870 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 1.6430162727624954e-05, |
|
"loss": 0.0002, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 1.5873208058891905e-05, |
|
"loss": 0.0002, |
|
"step": 39330 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 1.5316253390158855e-05, |
|
"loss": 0.0002, |
|
"step": 39560 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.000838827807456255, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.7487, |
|
"eval_samples_per_second": 19.155, |
|
"eval_steps_per_second": 0.13, |
|
"step": 39767 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 1.4759298721425804e-05, |
|
"loss": 0.0002, |
|
"step": 39790 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 1.4202344052692756e-05, |
|
"loss": 0.0002, |
|
"step": 40020 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 1.3645389383959706e-05, |
|
"loss": 0.0002, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 1.3088434715226658e-05, |
|
"loss": 0.0002, |
|
"step": 40480 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.2531480046493607e-05, |
|
"loss": 0.0002, |
|
"step": 40710 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 1.1974525377760557e-05, |
|
"loss": 0.0002, |
|
"step": 40940 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 1.141757070902751e-05, |
|
"loss": 0.0002, |
|
"step": 41170 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 1.0860616040294461e-05, |
|
"loss": 0.0002, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 1.0303661371561412e-05, |
|
"loss": 0.0002, |
|
"step": 41630 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 9.746706702828362e-06, |
|
"loss": 0.0002, |
|
"step": 41860 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 9.189752034095312e-06, |
|
"loss": 0.0002, |
|
"step": 42090 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 8.632797365362263e-06, |
|
"loss": 0.0002, |
|
"step": 42320 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 8.075842696629215e-06, |
|
"loss": 0.0002, |
|
"step": 42550 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 7.518888027896165e-06, |
|
"loss": 0.0002, |
|
"step": 42780 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.0008361997315660119, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.9876, |
|
"eval_samples_per_second": 19.008, |
|
"eval_steps_per_second": 0.129, |
|
"step": 42826 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 6.9619333591631155e-06, |
|
"loss": 0.0002, |
|
"step": 43010 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 6.404978690430067e-06, |
|
"loss": 0.0002, |
|
"step": 43240 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 5.848024021697017e-06, |
|
"loss": 0.0002, |
|
"step": 43470 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 5.291069352963967e-06, |
|
"loss": 0.0002, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 4.7341146842309186e-06, |
|
"loss": 0.0002, |
|
"step": 43930 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 4.177160015497869e-06, |
|
"loss": 0.0002, |
|
"step": 44160 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 3.6202053467648197e-06, |
|
"loss": 0.0002, |
|
"step": 44390 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 3.063250678031771e-06, |
|
"loss": 0.0002, |
|
"step": 44620 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 2.5062960092987217e-06, |
|
"loss": 0.0002, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 1.949341340565672e-06, |
|
"loss": 0.0002, |
|
"step": 45080 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 1.392386671832623e-06, |
|
"loss": 0.0002, |
|
"step": 45310 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 8.354320030995738e-07, |
|
"loss": 0.0002, |
|
"step": 45540 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 2.784773343665246e-07, |
|
"loss": 0.0002, |
|
"step": 45770 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.0008401814266107976, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 30.8673, |
|
"eval_samples_per_second": 19.082, |
|
"eval_steps_per_second": 0.13, |
|
"step": 45885 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 45885, |
|
"total_flos": 7.988818341631795e+16, |
|
"train_loss": 0.0002746593983470409, |
|
"train_runtime": 6476.6732, |
|
"train_samples_per_second": 531.266, |
|
"train_steps_per_second": 7.085 |
|
} |
|
], |
|
"logging_steps": 230, |
|
"max_steps": 45885, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 4589, |
|
"total_flos": 7.988818341631795e+16, |
|
"train_batch_size": 75, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|