|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.900343642611684, |
|
"eval_steps": 500, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01718213058419244, |
|
"grad_norm": 6.171707630157471, |
|
"learning_rate": 1.7182130584192443e-06, |
|
"loss": 0.9772, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03436426116838488, |
|
"grad_norm": 8.092323303222656, |
|
"learning_rate": 3.4364261168384886e-06, |
|
"loss": 0.8776, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05154639175257732, |
|
"grad_norm": 4.56458044052124, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 0.9078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06872852233676977, |
|
"grad_norm": 4.5016679763793945, |
|
"learning_rate": 6.872852233676977e-06, |
|
"loss": 0.6541, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"grad_norm": 3.0442380905151367, |
|
"learning_rate": 8.591065292096221e-06, |
|
"loss": 0.4879, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 3.105207681655884, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 0.4411, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12027491408934708, |
|
"grad_norm": 2.3539834022521973, |
|
"learning_rate": 1.2027491408934708e-05, |
|
"loss": 0.326, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13745704467353953, |
|
"grad_norm": 2.5519046783447266, |
|
"learning_rate": 1.3745704467353954e-05, |
|
"loss": 0.285, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15463917525773196, |
|
"grad_norm": 4.304392337799072, |
|
"learning_rate": 1.5463917525773197e-05, |
|
"loss": 0.2539, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"grad_norm": 2.098220109939575, |
|
"learning_rate": 1.7182130584192442e-05, |
|
"loss": 0.2023, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18900343642611683, |
|
"grad_norm": 1.672839879989624, |
|
"learning_rate": 1.8900343642611683e-05, |
|
"loss": 0.1724, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 1.0948542356491089, |
|
"learning_rate": 2.0618556701030927e-05, |
|
"loss": 0.1859, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22336769759450173, |
|
"grad_norm": 1.5368601083755493, |
|
"learning_rate": 2.2336769759450175e-05, |
|
"loss": 0.1309, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24054982817869416, |
|
"grad_norm": 1.4471988677978516, |
|
"learning_rate": 2.4054982817869417e-05, |
|
"loss": 0.134, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25773195876288657, |
|
"grad_norm": 1.2296886444091797, |
|
"learning_rate": 2.5773195876288658e-05, |
|
"loss": 0.1439, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27491408934707906, |
|
"grad_norm": 4.241024017333984, |
|
"learning_rate": 2.749140893470791e-05, |
|
"loss": 0.1214, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2920962199312715, |
|
"grad_norm": 0.9207940697669983, |
|
"learning_rate": 2.920962199312715e-05, |
|
"loss": 0.1141, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"grad_norm": 0.7457907199859619, |
|
"learning_rate": 3.0927835051546395e-05, |
|
"loss": 0.0938, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32646048109965636, |
|
"grad_norm": 2.0314087867736816, |
|
"learning_rate": 3.2646048109965636e-05, |
|
"loss": 0.1287, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3436426116838488, |
|
"grad_norm": 1.9431196451187134, |
|
"learning_rate": 3.4364261168384884e-05, |
|
"loss": 0.112, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36082474226804123, |
|
"grad_norm": 1.023748755455017, |
|
"learning_rate": 3.6082474226804125e-05, |
|
"loss": 0.1039, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37800687285223367, |
|
"grad_norm": 1.6307401657104492, |
|
"learning_rate": 3.7800687285223366e-05, |
|
"loss": 0.1076, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3951890034364261, |
|
"grad_norm": 1.2871341705322266, |
|
"learning_rate": 3.9518900343642614e-05, |
|
"loss": 0.1007, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 1.1707206964492798, |
|
"learning_rate": 4.1237113402061855e-05, |
|
"loss": 0.0978, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42955326460481097, |
|
"grad_norm": 1.1074360609054565, |
|
"learning_rate": 4.2955326460481096e-05, |
|
"loss": 0.0853, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.44673539518900346, |
|
"grad_norm": 1.1564663648605347, |
|
"learning_rate": 4.467353951890035e-05, |
|
"loss": 0.0928, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4639175257731959, |
|
"grad_norm": 0.8830773830413818, |
|
"learning_rate": 4.639175257731959e-05, |
|
"loss": 0.0852, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.48109965635738833, |
|
"grad_norm": 0.8775057792663574, |
|
"learning_rate": 4.810996563573883e-05, |
|
"loss": 0.0897, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.49828178694158076, |
|
"grad_norm": 0.5895084142684937, |
|
"learning_rate": 4.982817869415808e-05, |
|
"loss": 0.0741, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 1.0802148580551147, |
|
"learning_rate": 5.1546391752577315e-05, |
|
"loss": 0.0829, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5326460481099656, |
|
"grad_norm": 1.7113603353500366, |
|
"learning_rate": 5.326460481099656e-05, |
|
"loss": 0.0781, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5498281786941581, |
|
"grad_norm": 1.0171607732772827, |
|
"learning_rate": 5.498281786941582e-05, |
|
"loss": 0.0794, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5670103092783505, |
|
"grad_norm": 1.2694463729858398, |
|
"learning_rate": 5.670103092783505e-05, |
|
"loss": 0.0828, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.584192439862543, |
|
"grad_norm": 1.6448224782943726, |
|
"learning_rate": 5.84192439862543e-05, |
|
"loss": 0.0738, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6013745704467354, |
|
"grad_norm": 1.3152124881744385, |
|
"learning_rate": 6.013745704467354e-05, |
|
"loss": 0.0805, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 0.9917396306991577, |
|
"learning_rate": 6.185567010309279e-05, |
|
"loss": 0.0719, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6357388316151202, |
|
"grad_norm": 1.0059962272644043, |
|
"learning_rate": 6.357388316151203e-05, |
|
"loss": 0.0648, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6529209621993127, |
|
"grad_norm": 0.8844972848892212, |
|
"learning_rate": 6.529209621993127e-05, |
|
"loss": 0.0778, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6701030927835051, |
|
"grad_norm": 0.7589945793151855, |
|
"learning_rate": 6.701030927835051e-05, |
|
"loss": 0.0924, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6872852233676976, |
|
"grad_norm": 1.062225580215454, |
|
"learning_rate": 6.872852233676977e-05, |
|
"loss": 0.0637, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7044673539518901, |
|
"grad_norm": 1.1478430032730103, |
|
"learning_rate": 7.044673539518901e-05, |
|
"loss": 0.0878, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"grad_norm": 1.2245433330535889, |
|
"learning_rate": 7.216494845360825e-05, |
|
"loss": 0.0856, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.738831615120275, |
|
"grad_norm": 0.8035943508148193, |
|
"learning_rate": 7.38831615120275e-05, |
|
"loss": 0.0789, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7560137457044673, |
|
"grad_norm": 1.5007230043411255, |
|
"learning_rate": 7.560137457044673e-05, |
|
"loss": 0.099, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7731958762886598, |
|
"grad_norm": 0.8082581162452698, |
|
"learning_rate": 7.731958762886599e-05, |
|
"loss": 0.0818, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7903780068728522, |
|
"grad_norm": 1.0343904495239258, |
|
"learning_rate": 7.903780068728523e-05, |
|
"loss": 0.0622, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8075601374570447, |
|
"grad_norm": 0.7941983342170715, |
|
"learning_rate": 8.075601374570447e-05, |
|
"loss": 0.077, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.7006020545959473, |
|
"learning_rate": 8.247422680412371e-05, |
|
"loss": 0.056, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8419243986254296, |
|
"grad_norm": 0.5468656420707703, |
|
"learning_rate": 8.419243986254296e-05, |
|
"loss": 0.0611, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8591065292096219, |
|
"grad_norm": 0.581874668598175, |
|
"learning_rate": 8.591065292096219e-05, |
|
"loss": 0.0544, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8762886597938144, |
|
"grad_norm": 0.7868462800979614, |
|
"learning_rate": 8.762886597938145e-05, |
|
"loss": 0.0639, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8934707903780069, |
|
"grad_norm": 0.9123062491416931, |
|
"learning_rate": 8.93470790378007e-05, |
|
"loss": 0.0529, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9106529209621993, |
|
"grad_norm": 0.9630204439163208, |
|
"learning_rate": 9.106529209621993e-05, |
|
"loss": 0.0599, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"grad_norm": 1.0028278827667236, |
|
"learning_rate": 9.278350515463918e-05, |
|
"loss": 0.0746, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9450171821305842, |
|
"grad_norm": 0.8045145869255066, |
|
"learning_rate": 9.450171821305843e-05, |
|
"loss": 0.0604, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9621993127147767, |
|
"grad_norm": 0.5860382914543152, |
|
"learning_rate": 9.621993127147767e-05, |
|
"loss": 0.0635, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.979381443298969, |
|
"grad_norm": 0.9446794986724854, |
|
"learning_rate": 9.793814432989691e-05, |
|
"loss": 0.0711, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9965635738831615, |
|
"grad_norm": 0.9152433276176453, |
|
"learning_rate": 9.965635738831616e-05, |
|
"loss": 0.0651, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.013745704467354, |
|
"grad_norm": 0.7524177432060242, |
|
"learning_rate": 9.999987081161148e-05, |
|
"loss": 0.0593, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 1.0932648181915283, |
|
"learning_rate": 9.999934598492723e-05, |
|
"loss": 0.0585, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0481099656357389, |
|
"grad_norm": 0.5448580384254456, |
|
"learning_rate": 9.999841744990731e-05, |
|
"loss": 0.0705, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0652920962199313, |
|
"grad_norm": 0.8481371402740479, |
|
"learning_rate": 9.999708521404896e-05, |
|
"loss": 0.0763, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0824742268041236, |
|
"grad_norm": 0.8610166311264038, |
|
"learning_rate": 9.999534928810904e-05, |
|
"loss": 0.0598, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0996563573883162, |
|
"grad_norm": 0.807761549949646, |
|
"learning_rate": 9.999320968610386e-05, |
|
"loss": 0.0567, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1168384879725086, |
|
"grad_norm": 0.4783917963504791, |
|
"learning_rate": 9.999066642530917e-05, |
|
"loss": 0.056, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"grad_norm": 0.6751272678375244, |
|
"learning_rate": 9.998771952625992e-05, |
|
"loss": 0.0498, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1512027491408934, |
|
"grad_norm": 0.8272377848625183, |
|
"learning_rate": 9.998436901275022e-05, |
|
"loss": 0.0449, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.168384879725086, |
|
"grad_norm": 0.8059535026550293, |
|
"learning_rate": 9.998061491183297e-05, |
|
"loss": 0.0624, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1855670103092784, |
|
"grad_norm": 0.7479894757270813, |
|
"learning_rate": 9.997645725381986e-05, |
|
"loss": 0.0471, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.2027491408934707, |
|
"grad_norm": 0.6483791470527649, |
|
"learning_rate": 9.997189607228092e-05, |
|
"loss": 0.0497, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2199312714776633, |
|
"grad_norm": 0.8845646381378174, |
|
"learning_rate": 9.99669314040444e-05, |
|
"loss": 0.0617, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 0.8434107303619385, |
|
"learning_rate": 9.996156328919635e-05, |
|
"loss": 0.0447, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.254295532646048, |
|
"grad_norm": 0.6829891800880432, |
|
"learning_rate": 9.995579177108041e-05, |
|
"loss": 0.059, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2714776632302405, |
|
"grad_norm": 0.5923603773117065, |
|
"learning_rate": 9.994961689629738e-05, |
|
"loss": 0.0483, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2886597938144329, |
|
"grad_norm": 0.48384591937065125, |
|
"learning_rate": 9.994303871470489e-05, |
|
"loss": 0.0565, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3058419243986255, |
|
"grad_norm": 0.7825417518615723, |
|
"learning_rate": 9.993605727941697e-05, |
|
"loss": 0.0545, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3230240549828178, |
|
"grad_norm": 0.9657111167907715, |
|
"learning_rate": 9.992867264680361e-05, |
|
"loss": 0.0532, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"grad_norm": 1.0996328592300415, |
|
"learning_rate": 9.992088487649038e-05, |
|
"loss": 0.0637, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3573883161512028, |
|
"grad_norm": 0.8697621822357178, |
|
"learning_rate": 9.991269403135783e-05, |
|
"loss": 0.0445, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3745704467353952, |
|
"grad_norm": 0.4780273735523224, |
|
"learning_rate": 9.990410017754108e-05, |
|
"loss": 0.0509, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3917525773195876, |
|
"grad_norm": 0.386453777551651, |
|
"learning_rate": 9.989510338442925e-05, |
|
"loss": 0.0465, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.40893470790378, |
|
"grad_norm": 0.7011645436286926, |
|
"learning_rate": 9.98857037246649e-05, |
|
"loss": 0.0659, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4261168384879725, |
|
"grad_norm": 0.47305113077163696, |
|
"learning_rate": 9.987590127414344e-05, |
|
"loss": 0.0391, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"grad_norm": 0.6128239035606384, |
|
"learning_rate": 9.986569611201251e-05, |
|
"loss": 0.0433, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4604810996563573, |
|
"grad_norm": 0.6045581698417664, |
|
"learning_rate": 9.985508832067139e-05, |
|
"loss": 0.0485, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.47766323024055, |
|
"grad_norm": 0.6033497452735901, |
|
"learning_rate": 9.984407798577027e-05, |
|
"loss": 0.049, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4948453608247423, |
|
"grad_norm": 0.47953736782073975, |
|
"learning_rate": 9.98326651962096e-05, |
|
"loss": 0.0539, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5120274914089347, |
|
"grad_norm": 0.8113358020782471, |
|
"learning_rate": 9.982085004413933e-05, |
|
"loss": 0.0481, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.529209621993127, |
|
"grad_norm": 0.5726741552352905, |
|
"learning_rate": 9.980863262495821e-05, |
|
"loss": 0.0512, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 0.6560239195823669, |
|
"learning_rate": 9.979601303731306e-05, |
|
"loss": 0.0464, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.563573883161512, |
|
"grad_norm": 0.5235106348991394, |
|
"learning_rate": 9.978299138309781e-05, |
|
"loss": 0.0486, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5807560137457046, |
|
"grad_norm": 0.6439309120178223, |
|
"learning_rate": 9.976956776745287e-05, |
|
"loss": 0.0536, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.597938144329897, |
|
"grad_norm": 0.8001301884651184, |
|
"learning_rate": 9.975574229876417e-05, |
|
"loss": 0.0641, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6151202749140894, |
|
"grad_norm": 0.6167306900024414, |
|
"learning_rate": 9.974151508866231e-05, |
|
"loss": 0.0372, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6323024054982818, |
|
"grad_norm": 0.5872222781181335, |
|
"learning_rate": 9.972688625202164e-05, |
|
"loss": 0.0452, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.4873111844062805, |
|
"learning_rate": 9.97118559069594e-05, |
|
"loss": 0.0524, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.4051572382450104, |
|
"learning_rate": 9.969642417483466e-05, |
|
"loss": 0.0407, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6838487972508591, |
|
"grad_norm": 0.43837177753448486, |
|
"learning_rate": 9.968059118024744e-05, |
|
"loss": 0.0471, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.7010309278350515, |
|
"grad_norm": 0.5712767243385315, |
|
"learning_rate": 9.966435705103765e-05, |
|
"loss": 0.0398, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.718213058419244, |
|
"grad_norm": 0.6849848628044128, |
|
"learning_rate": 9.964772191828407e-05, |
|
"loss": 0.0428, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7353951890034365, |
|
"grad_norm": 0.3726734220981598, |
|
"learning_rate": 9.96306859163033e-05, |
|
"loss": 0.0584, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"grad_norm": 0.3805288076400757, |
|
"learning_rate": 9.961324918264865e-05, |
|
"loss": 0.0414, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7697594501718212, |
|
"grad_norm": 0.45931509137153625, |
|
"learning_rate": 9.959541185810906e-05, |
|
"loss": 0.0438, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7869415807560136, |
|
"grad_norm": 0.4892406761646271, |
|
"learning_rate": 9.957717408670793e-05, |
|
"loss": 0.0402, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8041237113402062, |
|
"grad_norm": 0.6873617768287659, |
|
"learning_rate": 9.955853601570202e-05, |
|
"loss": 0.049, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.8213058419243986, |
|
"grad_norm": 0.8491326570510864, |
|
"learning_rate": 9.953949779558017e-05, |
|
"loss": 0.0532, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.8384879725085912, |
|
"grad_norm": 0.45153722167015076, |
|
"learning_rate": 9.952005958006217e-05, |
|
"loss": 0.0403, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"grad_norm": 0.608630359172821, |
|
"learning_rate": 9.950022152609745e-05, |
|
"loss": 0.0479, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.872852233676976, |
|
"grad_norm": 0.5155346989631653, |
|
"learning_rate": 9.947998379386388e-05, |
|
"loss": 0.045, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8900343642611683, |
|
"grad_norm": 0.2339087724685669, |
|
"learning_rate": 9.945934654676639e-05, |
|
"loss": 0.0361, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9072164948453607, |
|
"grad_norm": 0.4478403329849243, |
|
"learning_rate": 9.943830995143577e-05, |
|
"loss": 0.0355, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9243986254295533, |
|
"grad_norm": 0.3183349072933197, |
|
"learning_rate": 9.941687417772718e-05, |
|
"loss": 0.0349, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.9415807560137457, |
|
"grad_norm": 0.3854424059391022, |
|
"learning_rate": 9.939503939871893e-05, |
|
"loss": 0.0329, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"grad_norm": 0.7198600172996521, |
|
"learning_rate": 9.937280579071095e-05, |
|
"loss": 0.0372, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.9759450171821307, |
|
"grad_norm": 0.5371730923652649, |
|
"learning_rate": 9.935017353322347e-05, |
|
"loss": 0.0388, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.993127147766323, |
|
"grad_norm": 0.6025398373603821, |
|
"learning_rate": 9.932714280899547e-05, |
|
"loss": 0.0334, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0103092783505154, |
|
"grad_norm": 0.4118864834308624, |
|
"learning_rate": 9.930371380398331e-05, |
|
"loss": 0.0429, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.027491408934708, |
|
"grad_norm": 0.5828579664230347, |
|
"learning_rate": 9.927988670735915e-05, |
|
"loss": 0.0377, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.0446735395189, |
|
"grad_norm": 0.45602017641067505, |
|
"learning_rate": 9.925566171150945e-05, |
|
"loss": 0.0364, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 0.236759752035141, |
|
"learning_rate": 9.923103901203343e-05, |
|
"loss": 0.0456, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0790378006872854, |
|
"grad_norm": 0.5670115947723389, |
|
"learning_rate": 9.920601880774148e-05, |
|
"loss": 0.0514, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.0962199312714778, |
|
"grad_norm": 0.5565935373306274, |
|
"learning_rate": 9.918060130065354e-05, |
|
"loss": 0.0413, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.11340206185567, |
|
"grad_norm": 0.28620976209640503, |
|
"learning_rate": 9.915478669599747e-05, |
|
"loss": 0.0345, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.1305841924398625, |
|
"grad_norm": 0.598374605178833, |
|
"learning_rate": 9.912857520220743e-05, |
|
"loss": 0.0409, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.147766323024055, |
|
"grad_norm": 0.4782467186450958, |
|
"learning_rate": 9.910196703092216e-05, |
|
"loss": 0.0341, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1649484536082473, |
|
"grad_norm": 0.3740648925304413, |
|
"learning_rate": 9.907496239698327e-05, |
|
"loss": 0.0334, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.1821305841924397, |
|
"grad_norm": 0.514352560043335, |
|
"learning_rate": 9.904756151843353e-05, |
|
"loss": 0.033, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.1993127147766325, |
|
"grad_norm": 0.6190779209136963, |
|
"learning_rate": 9.90197646165151e-05, |
|
"loss": 0.0382, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.216494845360825, |
|
"grad_norm": 0.45846354961395264, |
|
"learning_rate": 9.899157191566775e-05, |
|
"loss": 0.0352, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.2336769759450172, |
|
"grad_norm": 0.3824189603328705, |
|
"learning_rate": 9.8962983643527e-05, |
|
"loss": 0.0401, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.2508591065292096, |
|
"grad_norm": 0.3505632281303406, |
|
"learning_rate": 9.893400003092237e-05, |
|
"loss": 0.0335, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.268041237113402, |
|
"grad_norm": 0.55964595079422, |
|
"learning_rate": 9.890462131187543e-05, |
|
"loss": 0.0349, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.2852233676975944, |
|
"grad_norm": 0.41852259635925293, |
|
"learning_rate": 9.887484772359795e-05, |
|
"loss": 0.0489, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.3024054982817868, |
|
"grad_norm": 0.7132606506347656, |
|
"learning_rate": 9.884467950648998e-05, |
|
"loss": 0.0531, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.319587628865979, |
|
"grad_norm": 0.5213425755500793, |
|
"learning_rate": 9.881411690413796e-05, |
|
"loss": 0.034, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.336769759450172, |
|
"grad_norm": 0.6458540558815002, |
|
"learning_rate": 9.878316016331262e-05, |
|
"loss": 0.0473, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.3539518900343643, |
|
"grad_norm": 0.41432708501815796, |
|
"learning_rate": 9.875180953396714e-05, |
|
"loss": 0.0321, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.3711340206185567, |
|
"grad_norm": 0.3965621888637543, |
|
"learning_rate": 9.872006526923503e-05, |
|
"loss": 0.0351, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.388316151202749, |
|
"grad_norm": 0.2506723999977112, |
|
"learning_rate": 9.868792762542814e-05, |
|
"loss": 0.0402, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.4054982817869415, |
|
"grad_norm": 0.2948648929595947, |
|
"learning_rate": 9.865539686203455e-05, |
|
"loss": 0.0335, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.422680412371134, |
|
"grad_norm": 0.5881168842315674, |
|
"learning_rate": 9.862247324171652e-05, |
|
"loss": 0.0473, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.4398625429553267, |
|
"grad_norm": 0.5597307085990906, |
|
"learning_rate": 9.858915703030829e-05, |
|
"loss": 0.0387, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.457044673539519, |
|
"grad_norm": 0.3447171449661255, |
|
"learning_rate": 9.855544849681404e-05, |
|
"loss": 0.0395, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 0.675528347492218, |
|
"learning_rate": 9.852134791340567e-05, |
|
"loss": 0.0303, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.491408934707904, |
|
"grad_norm": 0.4080379903316498, |
|
"learning_rate": 9.848685555542055e-05, |
|
"loss": 0.0414, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.508591065292096, |
|
"grad_norm": 0.34045320749282837, |
|
"learning_rate": 9.845197170135939e-05, |
|
"loss": 0.0291, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.5257731958762886, |
|
"grad_norm": 0.34041810035705566, |
|
"learning_rate": 9.841669663288391e-05, |
|
"loss": 0.0287, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.542955326460481, |
|
"grad_norm": 0.35550206899642944, |
|
"learning_rate": 9.838103063481464e-05, |
|
"loss": 0.035, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.5601374570446733, |
|
"grad_norm": 0.5085458755493164, |
|
"learning_rate": 9.834497399512855e-05, |
|
"loss": 0.0286, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 0.3794465959072113, |
|
"learning_rate": 9.830852700495676e-05, |
|
"loss": 0.0383, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.5945017182130585, |
|
"grad_norm": 0.20820270478725433, |
|
"learning_rate": 9.82716899585822e-05, |
|
"loss": 0.0229, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.611683848797251, |
|
"grad_norm": 0.31715983152389526, |
|
"learning_rate": 9.823446315343723e-05, |
|
"loss": 0.0267, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.6288659793814433, |
|
"grad_norm": 0.518182635307312, |
|
"learning_rate": 9.819684689010119e-05, |
|
"loss": 0.0328, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.6460481099656357, |
|
"grad_norm": 0.3830466568470001, |
|
"learning_rate": 9.815884147229804e-05, |
|
"loss": 0.0289, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.663230240549828, |
|
"grad_norm": 0.4509371817111969, |
|
"learning_rate": 9.812044720689387e-05, |
|
"loss": 0.0369, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.680412371134021, |
|
"grad_norm": 0.5616033673286438, |
|
"learning_rate": 9.808166440389446e-05, |
|
"loss": 0.0264, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.6975945017182132, |
|
"grad_norm": 0.5223531723022461, |
|
"learning_rate": 9.80424933764427e-05, |
|
"loss": 0.0265, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.7147766323024056, |
|
"grad_norm": 0.5588796734809875, |
|
"learning_rate": 9.800293444081612e-05, |
|
"loss": 0.0298, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.731958762886598, |
|
"grad_norm": 0.5224287509918213, |
|
"learning_rate": 9.796298791642435e-05, |
|
"loss": 0.0334, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.7491408934707904, |
|
"grad_norm": 0.510735809803009, |
|
"learning_rate": 9.792265412580654e-05, |
|
"loss": 0.0344, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.7663230240549828, |
|
"grad_norm": 0.46988189220428467, |
|
"learning_rate": 9.788193339462866e-05, |
|
"loss": 0.034, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.783505154639175, |
|
"grad_norm": 0.43194422125816345, |
|
"learning_rate": 9.7840826051681e-05, |
|
"loss": 0.033, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.8006872852233675, |
|
"grad_norm": 0.5727249383926392, |
|
"learning_rate": 9.779933242887542e-05, |
|
"loss": 0.0321, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.81786941580756, |
|
"grad_norm": 0.3941832482814789, |
|
"learning_rate": 9.775745286124277e-05, |
|
"loss": 0.0286, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.8350515463917527, |
|
"grad_norm": 0.5706576704978943, |
|
"learning_rate": 9.771518768693004e-05, |
|
"loss": 0.0271, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.852233676975945, |
|
"grad_norm": 0.5128160715103149, |
|
"learning_rate": 9.76725372471978e-05, |
|
"loss": 0.0434, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.8694158075601375, |
|
"grad_norm": 0.34409016370773315, |
|
"learning_rate": 9.762950188641728e-05, |
|
"loss": 0.0314, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.88659793814433, |
|
"grad_norm": 0.532747209072113, |
|
"learning_rate": 9.758608195206771e-05, |
|
"loss": 0.0369, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.9037800687285222, |
|
"grad_norm": 0.5421701073646545, |
|
"learning_rate": 9.754227779473349e-05, |
|
"loss": 0.0404, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.9209621993127146, |
|
"grad_norm": 0.36500459909439087, |
|
"learning_rate": 9.749808976810128e-05, |
|
"loss": 0.0332, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.9381443298969074, |
|
"grad_norm": 0.5636774897575378, |
|
"learning_rate": 9.745351822895727e-05, |
|
"loss": 0.0309, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.9553264604811, |
|
"grad_norm": 0.408263236284256, |
|
"learning_rate": 9.740856353718419e-05, |
|
"loss": 0.033, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.972508591065292, |
|
"grad_norm": 0.4448431432247162, |
|
"learning_rate": 9.736322605575845e-05, |
|
"loss": 0.0248, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.9896907216494846, |
|
"grad_norm": 0.3676033020019531, |
|
"learning_rate": 9.731750615074724e-05, |
|
"loss": 0.036, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.006872852233677, |
|
"grad_norm": 0.3884856104850769, |
|
"learning_rate": 9.727140419130553e-05, |
|
"loss": 0.0256, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.0240549828178693, |
|
"grad_norm": 0.4114404320716858, |
|
"learning_rate": 9.72249205496731e-05, |
|
"loss": 0.0273, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.0412371134020617, |
|
"grad_norm": 0.5628842711448669, |
|
"learning_rate": 9.717805560117149e-05, |
|
"loss": 0.0254, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.058419243986254, |
|
"grad_norm": 0.34935763478279114, |
|
"learning_rate": 9.71308097242011e-05, |
|
"loss": 0.0246, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.075601374570447, |
|
"grad_norm": 0.8378509283065796, |
|
"learning_rate": 9.708318330023798e-05, |
|
"loss": 0.0358, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.0927835051546393, |
|
"grad_norm": 0.4501832127571106, |
|
"learning_rate": 9.703517671383086e-05, |
|
"loss": 0.0314, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.1099656357388317, |
|
"grad_norm": 0.5251947641372681, |
|
"learning_rate": 9.698679035259801e-05, |
|
"loss": 0.0291, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.127147766323024, |
|
"grad_norm": 0.36063244938850403, |
|
"learning_rate": 9.693802460722405e-05, |
|
"loss": 0.0244, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.1443298969072164, |
|
"grad_norm": 0.2640397548675537, |
|
"learning_rate": 9.688887987145691e-05, |
|
"loss": 0.0291, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.161512027491409, |
|
"grad_norm": 0.35009852051734924, |
|
"learning_rate": 9.683935654210457e-05, |
|
"loss": 0.0355, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.178694158075601, |
|
"grad_norm": 0.455991268157959, |
|
"learning_rate": 9.678945501903188e-05, |
|
"loss": 0.0244, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.195876288659794, |
|
"grad_norm": 0.2577104866504669, |
|
"learning_rate": 9.673917570515732e-05, |
|
"loss": 0.0277, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.2130584192439864, |
|
"grad_norm": 0.46351000666618347, |
|
"learning_rate": 9.668851900644975e-05, |
|
"loss": 0.0249, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.2302405498281788, |
|
"grad_norm": 0.4203677773475647, |
|
"learning_rate": 9.663748533192516e-05, |
|
"loss": 0.0251, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.247422680412371, |
|
"grad_norm": 0.24778026342391968, |
|
"learning_rate": 9.658607509364337e-05, |
|
"loss": 0.0286, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.2646048109965635, |
|
"grad_norm": 0.5941663980484009, |
|
"learning_rate": 9.653428870670459e-05, |
|
"loss": 0.0375, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.281786941580756, |
|
"grad_norm": 0.6710448265075684, |
|
"learning_rate": 9.648212658924625e-05, |
|
"loss": 0.0268, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.2989690721649483, |
|
"grad_norm": 0.40934911370277405, |
|
"learning_rate": 9.642958916243946e-05, |
|
"loss": 0.0187, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.3161512027491407, |
|
"grad_norm": 0.3697362542152405, |
|
"learning_rate": 9.637667685048575e-05, |
|
"loss": 0.0286, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.40777045488357544, |
|
"learning_rate": 9.63233900806135e-05, |
|
"loss": 0.0232, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.350515463917526, |
|
"grad_norm": 0.2753160893917084, |
|
"learning_rate": 9.62697292830746e-05, |
|
"loss": 0.0305, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.3676975945017182, |
|
"grad_norm": 0.5245633721351624, |
|
"learning_rate": 9.6215694891141e-05, |
|
"loss": 0.0268, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.3848797250859106, |
|
"grad_norm": 0.4454520344734192, |
|
"learning_rate": 9.616128734110103e-05, |
|
"loss": 0.0334, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.402061855670103, |
|
"grad_norm": 0.40832188725471497, |
|
"learning_rate": 9.61065070722561e-05, |
|
"loss": 0.0375, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.4192439862542954, |
|
"grad_norm": 0.4421581029891968, |
|
"learning_rate": 9.6051354526917e-05, |
|
"loss": 0.0291, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.436426116838488, |
|
"grad_norm": 0.3832218050956726, |
|
"learning_rate": 9.59958301504004e-05, |
|
"loss": 0.0348, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.4536082474226806, |
|
"grad_norm": 0.2825784683227539, |
|
"learning_rate": 9.593993439102526e-05, |
|
"loss": 0.0285, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.470790378006873, |
|
"grad_norm": 0.4989912211894989, |
|
"learning_rate": 9.588366770010914e-05, |
|
"loss": 0.0298, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.4879725085910653, |
|
"grad_norm": 0.38946759700775146, |
|
"learning_rate": 9.582703053196464e-05, |
|
"loss": 0.0294, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.5051546391752577, |
|
"grad_norm": 0.3553588092327118, |
|
"learning_rate": 9.577002334389569e-05, |
|
"loss": 0.0281, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.52233676975945, |
|
"grad_norm": 0.48752427101135254, |
|
"learning_rate": 9.571264659619382e-05, |
|
"loss": 0.026, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.5395189003436425, |
|
"grad_norm": 0.3820585310459137, |
|
"learning_rate": 9.565490075213452e-05, |
|
"loss": 0.0259, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.556701030927835, |
|
"grad_norm": 0.35598281025886536, |
|
"learning_rate": 9.55967862779735e-05, |
|
"loss": 0.0343, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.5738831615120272, |
|
"grad_norm": 0.4193035364151001, |
|
"learning_rate": 9.55383036429428e-05, |
|
"loss": 0.0296, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.59106529209622, |
|
"grad_norm": 0.4993601441383362, |
|
"learning_rate": 9.547945331924717e-05, |
|
"loss": 0.0236, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.6082474226804124, |
|
"grad_norm": 0.3591003119945526, |
|
"learning_rate": 9.542023578206015e-05, |
|
"loss": 0.0301, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.625429553264605, |
|
"grad_norm": 0.30369478464126587, |
|
"learning_rate": 9.536065150952025e-05, |
|
"loss": 0.0327, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.642611683848797, |
|
"grad_norm": 0.37964117527008057, |
|
"learning_rate": 9.530070098272712e-05, |
|
"loss": 0.0351, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.6597938144329896, |
|
"grad_norm": 0.4031108617782593, |
|
"learning_rate": 9.524038468573764e-05, |
|
"loss": 0.0334, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.6769759450171824, |
|
"grad_norm": 0.24876996874809265, |
|
"learning_rate": 9.517970310556202e-05, |
|
"loss": 0.0238, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.6941580756013748, |
|
"grad_norm": 0.5632336139678955, |
|
"learning_rate": 9.511865673215986e-05, |
|
"loss": 0.0245, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.711340206185567, |
|
"grad_norm": 0.4374890625476837, |
|
"learning_rate": 9.50572460584362e-05, |
|
"loss": 0.0364, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.7285223367697595, |
|
"grad_norm": 0.4703497588634491, |
|
"learning_rate": 9.499547158023755e-05, |
|
"loss": 0.0248, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.745704467353952, |
|
"grad_norm": 0.3067072927951813, |
|
"learning_rate": 9.493333379634786e-05, |
|
"loss": 0.0203, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.7628865979381443, |
|
"grad_norm": 0.5396534204483032, |
|
"learning_rate": 9.487083320848454e-05, |
|
"loss": 0.0296, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.7800687285223367, |
|
"grad_norm": 0.2977238595485687, |
|
"learning_rate": 9.480797032129432e-05, |
|
"loss": 0.0243, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.797250859106529, |
|
"grad_norm": 0.35456737875938416, |
|
"learning_rate": 9.474474564234931e-05, |
|
"loss": 0.0331, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.8144329896907214, |
|
"grad_norm": 0.4030454456806183, |
|
"learning_rate": 9.468115968214276e-05, |
|
"loss": 0.0271, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.8316151202749142, |
|
"grad_norm": 0.4009501039981842, |
|
"learning_rate": 9.461721295408505e-05, |
|
"loss": 0.025, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.8487972508591066, |
|
"grad_norm": 0.34113046526908875, |
|
"learning_rate": 9.455290597449945e-05, |
|
"loss": 0.0298, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.865979381443299, |
|
"grad_norm": 0.4473305344581604, |
|
"learning_rate": 9.448823926261805e-05, |
|
"loss": 0.0293, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.8831615120274914, |
|
"grad_norm": 0.4152556359767914, |
|
"learning_rate": 9.442321334057748e-05, |
|
"loss": 0.0365, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.9003436426116838, |
|
"grad_norm": 0.5801966190338135, |
|
"learning_rate": 9.435782873341474e-05, |
|
"loss": 0.0283, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.917525773195876, |
|
"grad_norm": 0.5143575668334961, |
|
"learning_rate": 9.429208596906296e-05, |
|
"loss": 0.0276, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.934707903780069, |
|
"grad_norm": 0.28408244252204895, |
|
"learning_rate": 9.422598557834712e-05, |
|
"loss": 0.0266, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.9518900343642613, |
|
"grad_norm": 0.30861398577690125, |
|
"learning_rate": 9.415952809497979e-05, |
|
"loss": 0.0307, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.9690721649484537, |
|
"grad_norm": 0.5015305280685425, |
|
"learning_rate": 9.409271405555677e-05, |
|
"loss": 0.0238, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.986254295532646, |
|
"grad_norm": 0.42114853858947754, |
|
"learning_rate": 9.402554399955281e-05, |
|
"loss": 0.0297, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 4.0034364261168385, |
|
"grad_norm": 0.38618704676628113, |
|
"learning_rate": 9.395801846931726e-05, |
|
"loss": 0.0274, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 4.020618556701031, |
|
"grad_norm": 0.44997620582580566, |
|
"learning_rate": 9.389013801006961e-05, |
|
"loss": 0.0294, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 4.037800687285223, |
|
"grad_norm": 0.4600159227848053, |
|
"learning_rate": 9.382190316989518e-05, |
|
"loss": 0.0286, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.054982817869416, |
|
"grad_norm": 0.35218673944473267, |
|
"learning_rate": 9.375331449974066e-05, |
|
"loss": 0.0248, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.072164948453608, |
|
"grad_norm": 0.26790571212768555, |
|
"learning_rate": 9.368437255340965e-05, |
|
"loss": 0.0287, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 4.0893470790378, |
|
"grad_norm": 0.356351375579834, |
|
"learning_rate": 9.361507788755818e-05, |
|
"loss": 0.0207, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 4.106529209621993, |
|
"grad_norm": 0.3762167692184448, |
|
"learning_rate": 9.354543106169029e-05, |
|
"loss": 0.0303, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 4.123711340206185, |
|
"grad_norm": 0.21644559502601624, |
|
"learning_rate": 9.347543263815339e-05, |
|
"loss": 0.0262, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.140893470790378, |
|
"grad_norm": 0.3905271887779236, |
|
"learning_rate": 9.340508318213383e-05, |
|
"loss": 0.0267, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 4.158075601374571, |
|
"grad_norm": 0.25276127457618713, |
|
"learning_rate": 9.333438326165227e-05, |
|
"loss": 0.0292, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 4.175257731958763, |
|
"grad_norm": 0.21106575429439545, |
|
"learning_rate": 9.326333344755912e-05, |
|
"loss": 0.0218, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 4.1924398625429555, |
|
"grad_norm": 0.37403470277786255, |
|
"learning_rate": 9.319193431352993e-05, |
|
"loss": 0.0261, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 4.209621993127148, |
|
"grad_norm": 0.23083224892616272, |
|
"learning_rate": 9.312018643606074e-05, |
|
"loss": 0.0268, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.22680412371134, |
|
"grad_norm": 0.29775136709213257, |
|
"learning_rate": 9.304809039446347e-05, |
|
"loss": 0.0286, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.243986254295533, |
|
"grad_norm": 0.39073804020881653, |
|
"learning_rate": 9.297564677086118e-05, |
|
"loss": 0.0231, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.261168384879725, |
|
"grad_norm": 0.3536919951438904, |
|
"learning_rate": 9.290285615018342e-05, |
|
"loss": 0.0269, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.278350515463917, |
|
"grad_norm": 0.37961915135383606, |
|
"learning_rate": 9.282971912016149e-05, |
|
"loss": 0.0312, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.29553264604811, |
|
"grad_norm": 0.444950670003891, |
|
"learning_rate": 9.275623627132368e-05, |
|
"loss": 0.0275, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.312714776632302, |
|
"grad_norm": 0.3781861364841461, |
|
"learning_rate": 9.268240819699054e-05, |
|
"loss": 0.0285, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.329896907216495, |
|
"grad_norm": 0.2931497395038605, |
|
"learning_rate": 9.260823549327002e-05, |
|
"loss": 0.0258, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.347079037800687, |
|
"grad_norm": 0.26529255509376526, |
|
"learning_rate": 9.253371875905274e-05, |
|
"loss": 0.026, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.364261168384879, |
|
"grad_norm": 0.6221164464950562, |
|
"learning_rate": 9.245885859600712e-05, |
|
"loss": 0.0366, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.381443298969073, |
|
"grad_norm": 0.39402952790260315, |
|
"learning_rate": 9.238365560857447e-05, |
|
"loss": 0.0237, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.398625429553265, |
|
"grad_norm": 0.33800095319747925, |
|
"learning_rate": 9.230811040396423e-05, |
|
"loss": 0.0328, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.415807560137457, |
|
"grad_norm": 0.21751320362091064, |
|
"learning_rate": 9.223222359214891e-05, |
|
"loss": 0.0315, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.43298969072165, |
|
"grad_norm": 0.34266844391822815, |
|
"learning_rate": 9.215599578585936e-05, |
|
"loss": 0.0374, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.450171821305842, |
|
"grad_norm": 0.3306879997253418, |
|
"learning_rate": 9.207942760057958e-05, |
|
"loss": 0.023, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.4673539518900345, |
|
"grad_norm": 0.3142191767692566, |
|
"learning_rate": 9.200251965454199e-05, |
|
"loss": 0.0263, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.484536082474227, |
|
"grad_norm": 0.4040457010269165, |
|
"learning_rate": 9.192527256872226e-05, |
|
"loss": 0.0269, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.501718213058419, |
|
"grad_norm": 0.3506450951099396, |
|
"learning_rate": 9.184768696683443e-05, |
|
"loss": 0.0227, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.518900343642612, |
|
"grad_norm": 0.4573422074317932, |
|
"learning_rate": 9.176976347532575e-05, |
|
"loss": 0.0312, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.536082474226804, |
|
"grad_norm": 0.2337106168270111, |
|
"learning_rate": 9.169150272337172e-05, |
|
"loss": 0.028, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.553264604810996, |
|
"grad_norm": 0.28833648562431335, |
|
"learning_rate": 9.161290534287099e-05, |
|
"loss": 0.0245, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.570446735395189, |
|
"grad_norm": 0.24830326437950134, |
|
"learning_rate": 9.153397196844017e-05, |
|
"loss": 0.0218, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.587628865979381, |
|
"grad_norm": 0.55474853515625, |
|
"learning_rate": 9.145470323740885e-05, |
|
"loss": 0.0247, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.6048109965635735, |
|
"grad_norm": 0.49275097250938416, |
|
"learning_rate": 9.137509978981435e-05, |
|
"loss": 0.0276, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.621993127147766, |
|
"grad_norm": 0.30603882670402527, |
|
"learning_rate": 9.129516226839658e-05, |
|
"loss": 0.0208, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.639175257731958, |
|
"grad_norm": 0.32763534784317017, |
|
"learning_rate": 9.121489131859286e-05, |
|
"loss": 0.0267, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.6563573883161515, |
|
"grad_norm": 0.41010305285453796, |
|
"learning_rate": 9.113428758853268e-05, |
|
"loss": 0.0223, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.673539518900344, |
|
"grad_norm": 0.2709559500217438, |
|
"learning_rate": 9.105335172903253e-05, |
|
"loss": 0.0253, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.690721649484536, |
|
"grad_norm": 0.23412011563777924, |
|
"learning_rate": 9.097208439359057e-05, |
|
"loss": 0.0146, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.707903780068729, |
|
"grad_norm": 0.4020395576953888, |
|
"learning_rate": 9.08904862383814e-05, |
|
"loss": 0.0188, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.725085910652921, |
|
"grad_norm": 0.2301657497882843, |
|
"learning_rate": 9.080855792225076e-05, |
|
"loss": 0.0227, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.742268041237113, |
|
"grad_norm": 0.3554215133190155, |
|
"learning_rate": 9.072630010671015e-05, |
|
"loss": 0.0213, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.759450171821306, |
|
"grad_norm": 0.39176028966903687, |
|
"learning_rate": 9.064371345593161e-05, |
|
"loss": 0.0208, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.776632302405498, |
|
"grad_norm": 0.304556280374527, |
|
"learning_rate": 9.056079863674223e-05, |
|
"loss": 0.0211, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.793814432989691, |
|
"grad_norm": 0.3643367290496826, |
|
"learning_rate": 9.047755631861884e-05, |
|
"loss": 0.0237, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.810996563573883, |
|
"grad_norm": 0.379891961812973, |
|
"learning_rate": 9.039398717368259e-05, |
|
"loss": 0.025, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.828178694158075, |
|
"grad_norm": 0.3559863269329071, |
|
"learning_rate": 9.031009187669353e-05, |
|
"loss": 0.0204, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.845360824742268, |
|
"grad_norm": 0.43560439348220825, |
|
"learning_rate": 9.02258711050451e-05, |
|
"loss": 0.0254, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.862542955326461, |
|
"grad_norm": 0.28781425952911377, |
|
"learning_rate": 9.014132553875878e-05, |
|
"loss": 0.0319, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.879725085910653, |
|
"grad_norm": 0.2920217514038086, |
|
"learning_rate": 9.005645586047847e-05, |
|
"loss": 0.0259, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.896907216494846, |
|
"grad_norm": 0.28286901116371155, |
|
"learning_rate": 8.997126275546509e-05, |
|
"loss": 0.0213, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.914089347079038, |
|
"grad_norm": 0.3962228298187256, |
|
"learning_rate": 8.988574691159095e-05, |
|
"loss": 0.0257, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.9312714776632305, |
|
"grad_norm": 0.43689677119255066, |
|
"learning_rate": 8.979990901933428e-05, |
|
"loss": 0.0296, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.948453608247423, |
|
"grad_norm": 0.35005268454551697, |
|
"learning_rate": 8.971374977177356e-05, |
|
"loss": 0.0259, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.965635738831615, |
|
"grad_norm": 0.6228940486907959, |
|
"learning_rate": 8.962726986458207e-05, |
|
"loss": 0.0307, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.982817869415808, |
|
"grad_norm": 0.2236226499080658, |
|
"learning_rate": 8.954046999602211e-05, |
|
"loss": 0.0249, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.42723384499549866, |
|
"learning_rate": 8.945335086693942e-05, |
|
"loss": 0.0244, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 5.017182130584192, |
|
"grad_norm": 0.2944800853729248, |
|
"learning_rate": 8.936591318075764e-05, |
|
"loss": 0.0248, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 5.034364261168385, |
|
"grad_norm": 0.30557361245155334, |
|
"learning_rate": 8.927815764347242e-05, |
|
"loss": 0.0204, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 5.051546391752577, |
|
"grad_norm": 0.3732447922229767, |
|
"learning_rate": 8.919008496364587e-05, |
|
"loss": 0.0308, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 5.0687285223367695, |
|
"grad_norm": 0.30933091044425964, |
|
"learning_rate": 8.910169585240078e-05, |
|
"loss": 0.029, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 5.085910652920962, |
|
"grad_norm": 0.554576575756073, |
|
"learning_rate": 8.901299102341494e-05, |
|
"loss": 0.03, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 5.103092783505154, |
|
"grad_norm": 0.43166640400886536, |
|
"learning_rate": 8.892397119291526e-05, |
|
"loss": 0.0241, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 5.120274914089347, |
|
"grad_norm": 0.4924606680870056, |
|
"learning_rate": 8.883463707967211e-05, |
|
"loss": 0.0224, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 5.13745704467354, |
|
"grad_norm": 0.3466743230819702, |
|
"learning_rate": 8.874498940499346e-05, |
|
"loss": 0.0234, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"grad_norm": 0.5283942222595215, |
|
"learning_rate": 8.865502889271901e-05, |
|
"loss": 0.0416, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.171821305841925, |
|
"grad_norm": 0.3286329209804535, |
|
"learning_rate": 8.85647562692145e-05, |
|
"loss": 0.0249, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 5.189003436426117, |
|
"grad_norm": 0.5245858430862427, |
|
"learning_rate": 8.847417226336561e-05, |
|
"loss": 0.0272, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 5.206185567010309, |
|
"grad_norm": 0.3810178339481354, |
|
"learning_rate": 8.83832776065723e-05, |
|
"loss": 0.0238, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 5.223367697594502, |
|
"grad_norm": 0.48333027958869934, |
|
"learning_rate": 8.829207303274279e-05, |
|
"loss": 0.0262, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 5.240549828178694, |
|
"grad_norm": 0.4119150638580322, |
|
"learning_rate": 8.820055927828762e-05, |
|
"loss": 0.0243, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 5.257731958762887, |
|
"grad_norm": 0.26957616209983826, |
|
"learning_rate": 8.810873708211383e-05, |
|
"loss": 0.0228, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 5.274914089347079, |
|
"grad_norm": 0.502048134803772, |
|
"learning_rate": 8.801660718561875e-05, |
|
"loss": 0.0215, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 5.292096219931271, |
|
"grad_norm": 0.3506264090538025, |
|
"learning_rate": 8.79241703326843e-05, |
|
"loss": 0.0335, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 5.309278350515464, |
|
"grad_norm": 0.42758750915527344, |
|
"learning_rate": 8.78314272696708e-05, |
|
"loss": 0.0294, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 5.326460481099656, |
|
"grad_norm": 0.25186318159103394, |
|
"learning_rate": 8.773837874541099e-05, |
|
"loss": 0.0319, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.3436426116838485, |
|
"grad_norm": 0.310088187456131, |
|
"learning_rate": 8.7645025511204e-05, |
|
"loss": 0.0263, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 5.360824742268041, |
|
"grad_norm": 0.3250679075717926, |
|
"learning_rate": 8.755136832080927e-05, |
|
"loss": 0.027, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 5.378006872852234, |
|
"grad_norm": 0.3429087698459625, |
|
"learning_rate": 8.745740793044046e-05, |
|
"loss": 0.024, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 5.3951890034364265, |
|
"grad_norm": 0.2694869637489319, |
|
"learning_rate": 8.736314509875934e-05, |
|
"loss": 0.0256, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 5.412371134020619, |
|
"grad_norm": 0.32267141342163086, |
|
"learning_rate": 8.726858058686968e-05, |
|
"loss": 0.0269, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.429553264604811, |
|
"grad_norm": 0.3753204643726349, |
|
"learning_rate": 8.717371515831112e-05, |
|
"loss": 0.0209, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 5.446735395189004, |
|
"grad_norm": 0.258056640625, |
|
"learning_rate": 8.707854957905294e-05, |
|
"loss": 0.0236, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 5.463917525773196, |
|
"grad_norm": 0.19171011447906494, |
|
"learning_rate": 8.698308461748799e-05, |
|
"loss": 0.0241, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 5.481099656357388, |
|
"grad_norm": 0.20204898715019226, |
|
"learning_rate": 8.688732104442632e-05, |
|
"loss": 0.0166, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 5.498281786941581, |
|
"grad_norm": 0.4373096227645874, |
|
"learning_rate": 8.679125963308909e-05, |
|
"loss": 0.0271, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.515463917525773, |
|
"grad_norm": 0.24470694363117218, |
|
"learning_rate": 8.669490115910234e-05, |
|
"loss": 0.0193, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 5.5326460481099655, |
|
"grad_norm": 0.3045484125614166, |
|
"learning_rate": 8.659824640049063e-05, |
|
"loss": 0.0203, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 5.549828178694158, |
|
"grad_norm": 0.18822188675403595, |
|
"learning_rate": 8.650129613767075e-05, |
|
"loss": 0.0179, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 5.56701030927835, |
|
"grad_norm": 0.2681497037410736, |
|
"learning_rate": 8.640405115344557e-05, |
|
"loss": 0.0179, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 5.584192439862543, |
|
"grad_norm": 0.2358640730381012, |
|
"learning_rate": 8.630651223299755e-05, |
|
"loss": 0.0231, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 5.601374570446735, |
|
"grad_norm": 0.33891239762306213, |
|
"learning_rate": 8.620868016388252e-05, |
|
"loss": 0.02, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.618556701030927, |
|
"grad_norm": 0.5138821005821228, |
|
"learning_rate": 8.611055573602323e-05, |
|
"loss": 0.0258, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 5.63573883161512, |
|
"grad_norm": 0.20864839851856232, |
|
"learning_rate": 8.601213974170303e-05, |
|
"loss": 0.0178, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 5.652920962199313, |
|
"grad_norm": 0.1877431720495224, |
|
"learning_rate": 8.591343297555947e-05, |
|
"loss": 0.0208, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 5.670103092783505, |
|
"grad_norm": 0.38843151926994324, |
|
"learning_rate": 8.581443623457785e-05, |
|
"loss": 0.0277, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.687285223367698, |
|
"grad_norm": 0.22977161407470703, |
|
"learning_rate": 8.571515031808484e-05, |
|
"loss": 0.0169, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 5.70446735395189, |
|
"grad_norm": 0.39261528849601746, |
|
"learning_rate": 8.561557602774196e-05, |
|
"loss": 0.0151, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 5.721649484536083, |
|
"grad_norm": 0.20939397811889648, |
|
"learning_rate": 8.551571416753912e-05, |
|
"loss": 0.0247, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 5.738831615120275, |
|
"grad_norm": 0.3138323724269867, |
|
"learning_rate": 8.54155655437882e-05, |
|
"loss": 0.0202, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.756013745704467, |
|
"grad_norm": 0.2958749830722809, |
|
"learning_rate": 8.531513096511646e-05, |
|
"loss": 0.0239, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.77319587628866, |
|
"grad_norm": 0.43186619877815247, |
|
"learning_rate": 8.521441124246002e-05, |
|
"loss": 0.028, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 5.790378006872852, |
|
"grad_norm": 0.3215327262878418, |
|
"learning_rate": 8.511340718905737e-05, |
|
"loss": 0.0273, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 5.8075601374570445, |
|
"grad_norm": 0.584010899066925, |
|
"learning_rate": 8.501211962044275e-05, |
|
"loss": 0.0241, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 5.824742268041237, |
|
"grad_norm": 0.4198577404022217, |
|
"learning_rate": 8.491054935443954e-05, |
|
"loss": 0.0197, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.841924398625429, |
|
"grad_norm": 0.4115603566169739, |
|
"learning_rate": 8.480869721115375e-05, |
|
"loss": 0.0201, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.859106529209622, |
|
"grad_norm": 0.19503287971019745, |
|
"learning_rate": 8.470656401296732e-05, |
|
"loss": 0.0226, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 5.876288659793815, |
|
"grad_norm": 0.3533823490142822, |
|
"learning_rate": 8.460415058453153e-05, |
|
"loss": 0.0245, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 5.893470790378007, |
|
"grad_norm": 0.22459329664707184, |
|
"learning_rate": 8.450145775276024e-05, |
|
"loss": 0.0203, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 5.9106529209622, |
|
"grad_norm": 0.5531524419784546, |
|
"learning_rate": 8.439848634682337e-05, |
|
"loss": 0.0347, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 5.927835051546392, |
|
"grad_norm": 0.3939720690250397, |
|
"learning_rate": 8.429523719814008e-05, |
|
"loss": 0.0217, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.945017182130584, |
|
"grad_norm": 0.26560521125793457, |
|
"learning_rate": 8.419171114037214e-05, |
|
"loss": 0.0249, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 5.962199312714777, |
|
"grad_norm": 0.26765570044517517, |
|
"learning_rate": 8.40879090094171e-05, |
|
"loss": 0.022, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 5.979381443298969, |
|
"grad_norm": 0.32663553953170776, |
|
"learning_rate": 8.398383164340167e-05, |
|
"loss": 0.0234, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 5.9965635738831615, |
|
"grad_norm": 0.3831205368041992, |
|
"learning_rate": 8.387947988267482e-05, |
|
"loss": 0.0265, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 6.013745704467354, |
|
"grad_norm": 0.35195666551589966, |
|
"learning_rate": 8.37748545698011e-05, |
|
"loss": 0.0182, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.030927835051546, |
|
"grad_norm": 0.3900887370109558, |
|
"learning_rate": 8.366995654955375e-05, |
|
"loss": 0.0234, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 6.048109965635739, |
|
"grad_norm": 0.41412341594696045, |
|
"learning_rate": 8.356478666890798e-05, |
|
"loss": 0.0254, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 6.065292096219931, |
|
"grad_norm": 0.2592662572860718, |
|
"learning_rate": 8.345934577703403e-05, |
|
"loss": 0.0163, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 6.082474226804123, |
|
"grad_norm": 0.3936319947242737, |
|
"learning_rate": 8.335363472529038e-05, |
|
"loss": 0.0266, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 6.099656357388316, |
|
"grad_norm": 0.3583790957927704, |
|
"learning_rate": 8.324765436721688e-05, |
|
"loss": 0.0178, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 6.116838487972508, |
|
"grad_norm": 0.4558425843715668, |
|
"learning_rate": 8.314140555852777e-05, |
|
"loss": 0.0259, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 6.134020618556701, |
|
"grad_norm": 0.3604467213153839, |
|
"learning_rate": 8.303488915710484e-05, |
|
"loss": 0.027, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 6.151202749140894, |
|
"grad_norm": 0.22830836474895477, |
|
"learning_rate": 8.292810602299059e-05, |
|
"loss": 0.0239, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 6.168384879725086, |
|
"grad_norm": 0.18954436480998993, |
|
"learning_rate": 8.282105701838106e-05, |
|
"loss": 0.0203, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 6.185567010309279, |
|
"grad_norm": 0.25453153252601624, |
|
"learning_rate": 8.271374300761911e-05, |
|
"loss": 0.0247, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.202749140893471, |
|
"grad_norm": 0.3951425552368164, |
|
"learning_rate": 8.260616485718727e-05, |
|
"loss": 0.0256, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 6.219931271477663, |
|
"grad_norm": 0.3867959976196289, |
|
"learning_rate": 8.249832343570082e-05, |
|
"loss": 0.0218, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 6.237113402061856, |
|
"grad_norm": 0.24521775543689728, |
|
"learning_rate": 8.239021961390078e-05, |
|
"loss": 0.0258, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 6.254295532646048, |
|
"grad_norm": 0.3367408215999603, |
|
"learning_rate": 8.228185426464684e-05, |
|
"loss": 0.0184, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 6.2714776632302405, |
|
"grad_norm": 0.28449004888534546, |
|
"learning_rate": 8.217322826291032e-05, |
|
"loss": 0.0235, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 6.288659793814433, |
|
"grad_norm": 0.23285141587257385, |
|
"learning_rate": 8.206434248576718e-05, |
|
"loss": 0.0249, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 6.305841924398625, |
|
"grad_norm": 0.4478093087673187, |
|
"learning_rate": 8.195519781239079e-05, |
|
"loss": 0.023, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 6.323024054982818, |
|
"grad_norm": 0.3469564914703369, |
|
"learning_rate": 8.1845795124045e-05, |
|
"loss": 0.022, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 6.34020618556701, |
|
"grad_norm": 0.24919480085372925, |
|
"learning_rate": 8.173613530407691e-05, |
|
"loss": 0.0191, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 6.357388316151202, |
|
"grad_norm": 0.27461591362953186, |
|
"learning_rate": 8.162621923790974e-05, |
|
"loss": 0.0222, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.374570446735396, |
|
"grad_norm": 0.35929545760154724, |
|
"learning_rate": 8.151604781303577e-05, |
|
"loss": 0.021, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 6.391752577319588, |
|
"grad_norm": 0.4438592791557312, |
|
"learning_rate": 8.140562191900909e-05, |
|
"loss": 0.0266, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 6.40893470790378, |
|
"grad_norm": 0.35622140765190125, |
|
"learning_rate": 8.129494244743842e-05, |
|
"loss": 0.0227, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 6.426116838487973, |
|
"grad_norm": 0.3558623790740967, |
|
"learning_rate": 8.118401029197996e-05, |
|
"loss": 0.0294, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 6.443298969072165, |
|
"grad_norm": 0.30522775650024414, |
|
"learning_rate": 8.107282634833015e-05, |
|
"loss": 0.0221, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 6.4604810996563575, |
|
"grad_norm": 0.3831705152988434, |
|
"learning_rate": 8.096139151421842e-05, |
|
"loss": 0.0198, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 6.47766323024055, |
|
"grad_norm": 0.2840515673160553, |
|
"learning_rate": 8.084970668939998e-05, |
|
"loss": 0.0215, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 6.494845360824742, |
|
"grad_norm": 0.30647212266921997, |
|
"learning_rate": 8.07377727756485e-05, |
|
"loss": 0.0179, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 6.512027491408935, |
|
"grad_norm": 0.2785893380641937, |
|
"learning_rate": 8.06255906767489e-05, |
|
"loss": 0.0205, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 6.529209621993127, |
|
"grad_norm": 0.30499890446662903, |
|
"learning_rate": 8.051316129849e-05, |
|
"loss": 0.0273, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.546391752577319, |
|
"grad_norm": 0.26266801357269287, |
|
"learning_rate": 8.04004855486572e-05, |
|
"loss": 0.0264, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 6.563573883161512, |
|
"grad_norm": 0.24231982231140137, |
|
"learning_rate": 8.02875643370252e-05, |
|
"loss": 0.0213, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 6.580756013745704, |
|
"grad_norm": 0.21040430665016174, |
|
"learning_rate": 8.01743985753506e-05, |
|
"loss": 0.0182, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 6.597938144329897, |
|
"grad_norm": 0.30289342999458313, |
|
"learning_rate": 8.006098917736461e-05, |
|
"loss": 0.0231, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 6.615120274914089, |
|
"grad_norm": 0.3555678427219391, |
|
"learning_rate": 7.994733705876558e-05, |
|
"loss": 0.0188, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 6.632302405498281, |
|
"grad_norm": 0.2579226493835449, |
|
"learning_rate": 7.983344313721166e-05, |
|
"loss": 0.0218, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 6.649484536082475, |
|
"grad_norm": 0.3365667164325714, |
|
"learning_rate": 7.971930833231338e-05, |
|
"loss": 0.0202, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.14933505654335022, |
|
"learning_rate": 7.960493356562624e-05, |
|
"loss": 0.0208, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 6.683848797250859, |
|
"grad_norm": 0.1804179847240448, |
|
"learning_rate": 7.949031976064327e-05, |
|
"loss": 0.0237, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 6.701030927835052, |
|
"grad_norm": 0.3504364788532257, |
|
"learning_rate": 7.937546784278753e-05, |
|
"loss": 0.0185, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.718213058419244, |
|
"grad_norm": 0.2884169816970825, |
|
"learning_rate": 7.926037873940469e-05, |
|
"loss": 0.0166, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 6.7353951890034365, |
|
"grad_norm": 0.3002106547355652, |
|
"learning_rate": 7.91450533797555e-05, |
|
"loss": 0.0198, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 6.752577319587629, |
|
"grad_norm": 0.4687197506427765, |
|
"learning_rate": 7.902949269500835e-05, |
|
"loss": 0.0222, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 6.769759450171821, |
|
"grad_norm": 0.3774946331977844, |
|
"learning_rate": 7.891369761823164e-05, |
|
"loss": 0.0245, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 6.786941580756014, |
|
"grad_norm": 0.43464595079421997, |
|
"learning_rate": 7.879766908438638e-05, |
|
"loss": 0.0238, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 6.804123711340206, |
|
"grad_norm": 0.338309109210968, |
|
"learning_rate": 7.868140803031853e-05, |
|
"loss": 0.0322, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 6.821305841924398, |
|
"grad_norm": 0.4015257954597473, |
|
"learning_rate": 7.85649153947515e-05, |
|
"loss": 0.0219, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 6.838487972508591, |
|
"grad_norm": 0.45906925201416016, |
|
"learning_rate": 7.844819211827861e-05, |
|
"loss": 0.0219, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 6.855670103092783, |
|
"grad_norm": 0.25680992007255554, |
|
"learning_rate": 7.83312391433553e-05, |
|
"loss": 0.0191, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 6.872852233676976, |
|
"grad_norm": 0.35143017768859863, |
|
"learning_rate": 7.821405741429179e-05, |
|
"loss": 0.0172, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.890034364261169, |
|
"grad_norm": 0.2712146043777466, |
|
"learning_rate": 7.809664787724527e-05, |
|
"loss": 0.0207, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.907216494845361, |
|
"grad_norm": 0.29727092385292053, |
|
"learning_rate": 7.79790114802123e-05, |
|
"loss": 0.0156, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 6.9243986254295535, |
|
"grad_norm": 0.2169165462255478, |
|
"learning_rate": 7.786114917302118e-05, |
|
"loss": 0.0213, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 6.941580756013746, |
|
"grad_norm": 0.27955862879753113, |
|
"learning_rate": 7.77430619073243e-05, |
|
"loss": 0.0205, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 6.958762886597938, |
|
"grad_norm": 0.1645563244819641, |
|
"learning_rate": 7.762475063659038e-05, |
|
"loss": 0.0233, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.975945017182131, |
|
"grad_norm": 0.22075164318084717, |
|
"learning_rate": 7.750621631609684e-05, |
|
"loss": 0.0229, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 6.993127147766323, |
|
"grad_norm": 0.19816423952579498, |
|
"learning_rate": 7.738745990292208e-05, |
|
"loss": 0.0269, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 7.010309278350515, |
|
"grad_norm": 0.2696300148963928, |
|
"learning_rate": 7.726848235593771e-05, |
|
"loss": 0.0209, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 7.027491408934708, |
|
"grad_norm": 0.3369583785533905, |
|
"learning_rate": 7.714928463580084e-05, |
|
"loss": 0.0192, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 7.0446735395189, |
|
"grad_norm": 0.20987503230571747, |
|
"learning_rate": 7.702986770494633e-05, |
|
"loss": 0.0203, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.061855670103093, |
|
"grad_norm": 0.220789834856987, |
|
"learning_rate": 7.691023252757901e-05, |
|
"loss": 0.0176, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 7.079037800687285, |
|
"grad_norm": 0.37767869234085083, |
|
"learning_rate": 7.679038006966587e-05, |
|
"loss": 0.0208, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 7.096219931271477, |
|
"grad_norm": 0.3345067799091339, |
|
"learning_rate": 7.66703112989283e-05, |
|
"loss": 0.0168, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 7.11340206185567, |
|
"grad_norm": 0.3052999973297119, |
|
"learning_rate": 7.655002718483424e-05, |
|
"loss": 0.0143, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 7.130584192439863, |
|
"grad_norm": 0.3365825414657593, |
|
"learning_rate": 7.64295286985904e-05, |
|
"loss": 0.0206, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 7.147766323024055, |
|
"grad_norm": 0.19445881247520447, |
|
"learning_rate": 7.630881681313436e-05, |
|
"loss": 0.0164, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 7.164948453608248, |
|
"grad_norm": 0.3136243224143982, |
|
"learning_rate": 7.618789250312675e-05, |
|
"loss": 0.0141, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 7.18213058419244, |
|
"grad_norm": 0.19267341494560242, |
|
"learning_rate": 7.606675674494341e-05, |
|
"loss": 0.0178, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 7.1993127147766325, |
|
"grad_norm": 0.1413758099079132, |
|
"learning_rate": 7.594541051666742e-05, |
|
"loss": 0.0179, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 7.216494845360825, |
|
"grad_norm": 0.19496262073516846, |
|
"learning_rate": 7.582385479808127e-05, |
|
"loss": 0.0141, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.233676975945017, |
|
"grad_norm": 0.27552464604377747, |
|
"learning_rate": 7.570209057065894e-05, |
|
"loss": 0.0184, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 7.25085910652921, |
|
"grad_norm": 0.19228124618530273, |
|
"learning_rate": 7.558011881755797e-05, |
|
"loss": 0.0144, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 7.268041237113402, |
|
"grad_norm": 0.29144996404647827, |
|
"learning_rate": 7.545794052361149e-05, |
|
"loss": 0.0172, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 7.285223367697594, |
|
"grad_norm": 0.21185532212257385, |
|
"learning_rate": 7.533555667532035e-05, |
|
"loss": 0.0126, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 7.302405498281787, |
|
"grad_norm": 0.4170054495334625, |
|
"learning_rate": 7.521296826084503e-05, |
|
"loss": 0.0231, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 7.319587628865979, |
|
"grad_norm": 0.26662755012512207, |
|
"learning_rate": 7.50901762699978e-05, |
|
"loss": 0.0161, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 7.3367697594501715, |
|
"grad_norm": 0.323034405708313, |
|
"learning_rate": 7.496718169423462e-05, |
|
"loss": 0.0172, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 7.353951890034364, |
|
"grad_norm": 0.22829285264015198, |
|
"learning_rate": 7.484398552664722e-05, |
|
"loss": 0.018, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 7.371134020618557, |
|
"grad_norm": 0.4607219099998474, |
|
"learning_rate": 7.472058876195496e-05, |
|
"loss": 0.0302, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 7.3883161512027495, |
|
"grad_norm": 0.3345796763896942, |
|
"learning_rate": 7.459699239649696e-05, |
|
"loss": 0.0177, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.405498281786942, |
|
"grad_norm": 0.35420554876327515, |
|
"learning_rate": 7.447319742822392e-05, |
|
"loss": 0.0166, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 7.422680412371134, |
|
"grad_norm": 0.2642367482185364, |
|
"learning_rate": 7.43492048566901e-05, |
|
"loss": 0.0186, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 7.439862542955327, |
|
"grad_norm": 0.3100736141204834, |
|
"learning_rate": 7.422501568304535e-05, |
|
"loss": 0.0242, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 7.457044673539519, |
|
"grad_norm": 0.34664222598075867, |
|
"learning_rate": 7.410063091002682e-05, |
|
"loss": 0.0139, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 7.474226804123711, |
|
"grad_norm": 0.2938918471336365, |
|
"learning_rate": 7.397605154195106e-05, |
|
"loss": 0.016, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 7.491408934707904, |
|
"grad_norm": 0.34358811378479004, |
|
"learning_rate": 7.385127858470582e-05, |
|
"loss": 0.0178, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 7.508591065292096, |
|
"grad_norm": 0.2687462568283081, |
|
"learning_rate": 7.372631304574194e-05, |
|
"loss": 0.0186, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 7.525773195876289, |
|
"grad_norm": 0.24897870421409607, |
|
"learning_rate": 7.36011559340652e-05, |
|
"loss": 0.0178, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 7.542955326460481, |
|
"grad_norm": 0.36554020643234253, |
|
"learning_rate": 7.347580826022821e-05, |
|
"loss": 0.0218, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 7.560137457044673, |
|
"grad_norm": 0.25389084219932556, |
|
"learning_rate": 7.335027103632223e-05, |
|
"loss": 0.016, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.577319587628866, |
|
"grad_norm": 0.3902638256549835, |
|
"learning_rate": 7.322454527596898e-05, |
|
"loss": 0.0179, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 7.594501718213058, |
|
"grad_norm": 0.220624178647995, |
|
"learning_rate": 7.30986319943125e-05, |
|
"loss": 0.0147, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 7.6116838487972505, |
|
"grad_norm": 0.3258158266544342, |
|
"learning_rate": 7.29725322080109e-05, |
|
"loss": 0.0179, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 7.628865979381443, |
|
"grad_norm": 0.34806087613105774, |
|
"learning_rate": 7.28462469352282e-05, |
|
"loss": 0.0218, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 7.646048109965636, |
|
"grad_norm": 0.28883498907089233, |
|
"learning_rate": 7.271977719562611e-05, |
|
"loss": 0.017, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 7.6632302405498285, |
|
"grad_norm": 0.17664246261119843, |
|
"learning_rate": 7.259312401035572e-05, |
|
"loss": 0.0154, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 7.680412371134021, |
|
"grad_norm": 0.3173231780529022, |
|
"learning_rate": 7.246628840204935e-05, |
|
"loss": 0.0209, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 7.697594501718213, |
|
"grad_norm": 0.34185221791267395, |
|
"learning_rate": 7.233927139481224e-05, |
|
"loss": 0.0174, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 7.714776632302406, |
|
"grad_norm": 0.3024695813655853, |
|
"learning_rate": 7.221207401421428e-05, |
|
"loss": 0.021, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 7.731958762886598, |
|
"grad_norm": 0.3330129086971283, |
|
"learning_rate": 7.208469728728178e-05, |
|
"loss": 0.0295, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.74914089347079, |
|
"grad_norm": 0.29602715373039246, |
|
"learning_rate": 7.195714224248912e-05, |
|
"loss": 0.0182, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 7.766323024054983, |
|
"grad_norm": 0.18014559149742126, |
|
"learning_rate": 7.182940990975048e-05, |
|
"loss": 0.0196, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 7.783505154639175, |
|
"grad_norm": 0.2823367714881897, |
|
"learning_rate": 7.170150132041146e-05, |
|
"loss": 0.0233, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 7.8006872852233675, |
|
"grad_norm": 0.31760045886039734, |
|
"learning_rate": 7.15734175072409e-05, |
|
"loss": 0.0155, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 7.81786941580756, |
|
"grad_norm": 0.2565371096134186, |
|
"learning_rate": 7.144515950442232e-05, |
|
"loss": 0.0221, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 7.835051546391752, |
|
"grad_norm": 0.39871808886528015, |
|
"learning_rate": 7.131672834754582e-05, |
|
"loss": 0.0184, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 7.852233676975945, |
|
"grad_norm": 0.3175216615200043, |
|
"learning_rate": 7.11881250735995e-05, |
|
"loss": 0.0184, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 7.869415807560138, |
|
"grad_norm": 0.37690746784210205, |
|
"learning_rate": 7.105935072096125e-05, |
|
"loss": 0.0198, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 7.88659793814433, |
|
"grad_norm": 0.24447882175445557, |
|
"learning_rate": 7.093040632939023e-05, |
|
"loss": 0.0151, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 7.903780068728523, |
|
"grad_norm": 0.2845030725002289, |
|
"learning_rate": 7.08012929400186e-05, |
|
"loss": 0.0168, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.920962199312715, |
|
"grad_norm": 0.2334176003932953, |
|
"learning_rate": 7.067201159534299e-05, |
|
"loss": 0.0238, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 7.938144329896907, |
|
"grad_norm": 0.2806495726108551, |
|
"learning_rate": 7.054256333921623e-05, |
|
"loss": 0.021, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 7.9553264604811, |
|
"grad_norm": 0.25240814685821533, |
|
"learning_rate": 7.041294921683876e-05, |
|
"loss": 0.0153, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 7.972508591065292, |
|
"grad_norm": 0.4571000337600708, |
|
"learning_rate": 7.02831702747503e-05, |
|
"loss": 0.0175, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 7.989690721649485, |
|
"grad_norm": 0.27207332849502563, |
|
"learning_rate": 7.01532275608214e-05, |
|
"loss": 0.0198, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 8.006872852233677, |
|
"grad_norm": 0.3235473930835724, |
|
"learning_rate": 7.002312212424488e-05, |
|
"loss": 0.0243, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 8.02405498281787, |
|
"grad_norm": 0.20169375836849213, |
|
"learning_rate": 6.989285501552751e-05, |
|
"loss": 0.0254, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 8.041237113402062, |
|
"grad_norm": 0.23877793550491333, |
|
"learning_rate": 6.976242728648137e-05, |
|
"loss": 0.0125, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 8.058419243986254, |
|
"grad_norm": 0.2380063384771347, |
|
"learning_rate": 6.963183999021546e-05, |
|
"loss": 0.0293, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 8.075601374570446, |
|
"grad_norm": 0.27434396743774414, |
|
"learning_rate": 6.95010941811272e-05, |
|
"loss": 0.0203, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.092783505154639, |
|
"grad_norm": 0.24492555856704712, |
|
"learning_rate": 6.93701909148938e-05, |
|
"loss": 0.0196, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 8.109965635738831, |
|
"grad_norm": 0.22814416885375977, |
|
"learning_rate": 6.923913124846397e-05, |
|
"loss": 0.0174, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 8.127147766323024, |
|
"grad_norm": 0.2595348358154297, |
|
"learning_rate": 6.910791624004907e-05, |
|
"loss": 0.0151, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 8.144329896907216, |
|
"grad_norm": 0.40572383999824524, |
|
"learning_rate": 6.897654694911486e-05, |
|
"loss": 0.021, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 8.161512027491408, |
|
"grad_norm": 0.36821913719177246, |
|
"learning_rate": 6.884502443637273e-05, |
|
"loss": 0.0167, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 8.1786941580756, |
|
"grad_norm": 0.2500125467777252, |
|
"learning_rate": 6.871334976377132e-05, |
|
"loss": 0.016, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 8.195876288659793, |
|
"grad_norm": 0.2473415732383728, |
|
"learning_rate": 6.858152399448773e-05, |
|
"loss": 0.0187, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 8.213058419243985, |
|
"grad_norm": 0.2067149579524994, |
|
"learning_rate": 6.844954819291918e-05, |
|
"loss": 0.0264, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 8.230240549828178, |
|
"grad_norm": 0.24544283747673035, |
|
"learning_rate": 6.831742342467418e-05, |
|
"loss": 0.0207, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 8.24742268041237, |
|
"grad_norm": 0.30843910574913025, |
|
"learning_rate": 6.818515075656412e-05, |
|
"loss": 0.017, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.264604810996564, |
|
"grad_norm": 0.3309854567050934, |
|
"learning_rate": 6.805273125659455e-05, |
|
"loss": 0.0179, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 8.281786941580757, |
|
"grad_norm": 0.21837979555130005, |
|
"learning_rate": 6.792016599395655e-05, |
|
"loss": 0.011, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 8.29896907216495, |
|
"grad_norm": 0.3258560597896576, |
|
"learning_rate": 6.778745603901817e-05, |
|
"loss": 0.0168, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 8.316151202749142, |
|
"grad_norm": 0.3291252553462982, |
|
"learning_rate": 6.765460246331573e-05, |
|
"loss": 0.0197, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 0.33732980489730835, |
|
"learning_rate": 6.752160633954515e-05, |
|
"loss": 0.0138, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 8.350515463917526, |
|
"grad_norm": 0.2825522720813751, |
|
"learning_rate": 6.73884687415534e-05, |
|
"loss": 0.0156, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 8.367697594501719, |
|
"grad_norm": 0.28338858485221863, |
|
"learning_rate": 6.725519074432965e-05, |
|
"loss": 0.0215, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 8.384879725085911, |
|
"grad_norm": 0.258777916431427, |
|
"learning_rate": 6.712177342399679e-05, |
|
"loss": 0.0197, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 8.402061855670103, |
|
"grad_norm": 0.3022059202194214, |
|
"learning_rate": 6.698821785780257e-05, |
|
"loss": 0.0177, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 8.419243986254296, |
|
"grad_norm": 0.23812155425548553, |
|
"learning_rate": 6.685452512411102e-05, |
|
"loss": 0.0179, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.436426116838488, |
|
"grad_norm": 0.1747688353061676, |
|
"learning_rate": 6.672069630239366e-05, |
|
"loss": 0.0207, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 8.45360824742268, |
|
"grad_norm": 0.38623926043510437, |
|
"learning_rate": 6.658673247322086e-05, |
|
"loss": 0.02, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 8.470790378006873, |
|
"grad_norm": 0.2657296657562256, |
|
"learning_rate": 6.645263471825303e-05, |
|
"loss": 0.0139, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 8.487972508591065, |
|
"grad_norm": 0.3186751902103424, |
|
"learning_rate": 6.631840412023201e-05, |
|
"loss": 0.0163, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 8.505154639175258, |
|
"grad_norm": 0.22730350494384766, |
|
"learning_rate": 6.618404176297217e-05, |
|
"loss": 0.015, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 8.52233676975945, |
|
"grad_norm": 0.4089230000972748, |
|
"learning_rate": 6.604954873135178e-05, |
|
"loss": 0.017, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 8.539518900343642, |
|
"grad_norm": 0.2689635753631592, |
|
"learning_rate": 6.591492611130421e-05, |
|
"loss": 0.0166, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 8.556701030927835, |
|
"grad_norm": 0.2454978972673416, |
|
"learning_rate": 6.578017498980913e-05, |
|
"loss": 0.0133, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 8.573883161512027, |
|
"grad_norm": 0.17714907228946686, |
|
"learning_rate": 6.564529645488383e-05, |
|
"loss": 0.018, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 8.59106529209622, |
|
"grad_norm": 0.26513901352882385, |
|
"learning_rate": 6.551029159557431e-05, |
|
"loss": 0.0194, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.608247422680412, |
|
"grad_norm": 0.20744717121124268, |
|
"learning_rate": 6.537516150194656e-05, |
|
"loss": 0.0187, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 8.625429553264604, |
|
"grad_norm": 0.21573315560817719, |
|
"learning_rate": 6.523990726507777e-05, |
|
"loss": 0.0178, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 8.642611683848797, |
|
"grad_norm": 0.1612836867570877, |
|
"learning_rate": 6.510452997704748e-05, |
|
"loss": 0.0195, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 8.65979381443299, |
|
"grad_norm": 0.23485371470451355, |
|
"learning_rate": 6.496903073092878e-05, |
|
"loss": 0.016, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 8.676975945017182, |
|
"grad_norm": 0.25484392046928406, |
|
"learning_rate": 6.483341062077948e-05, |
|
"loss": 0.0141, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 8.694158075601374, |
|
"grad_norm": 0.24695904552936554, |
|
"learning_rate": 6.46976707416333e-05, |
|
"loss": 0.0122, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.711340206185566, |
|
"grad_norm": 0.10241147875785828, |
|
"learning_rate": 6.456181218949096e-05, |
|
"loss": 0.0175, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 8.728522336769759, |
|
"grad_norm": 0.31217408180236816, |
|
"learning_rate": 6.442583606131143e-05, |
|
"loss": 0.0151, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 8.745704467353953, |
|
"grad_norm": 0.3693694472312927, |
|
"learning_rate": 6.428974345500299e-05, |
|
"loss": 0.0199, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 8.762886597938145, |
|
"grad_norm": 0.23745276033878326, |
|
"learning_rate": 6.415353546941441e-05, |
|
"loss": 0.0221, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.780068728522338, |
|
"grad_norm": 0.20179122686386108, |
|
"learning_rate": 6.401721320432604e-05, |
|
"loss": 0.0155, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 8.79725085910653, |
|
"grad_norm": 0.36349353194236755, |
|
"learning_rate": 6.388077776044102e-05, |
|
"loss": 0.0183, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 8.814432989690722, |
|
"grad_norm": 0.274783194065094, |
|
"learning_rate": 6.374423023937621e-05, |
|
"loss": 0.0153, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 8.831615120274915, |
|
"grad_norm": 0.29849973320961, |
|
"learning_rate": 6.360757174365355e-05, |
|
"loss": 0.0174, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 8.848797250859107, |
|
"grad_norm": 0.21367676556110382, |
|
"learning_rate": 6.34708033766909e-05, |
|
"loss": 0.0181, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 8.8659793814433, |
|
"grad_norm": 0.23595260083675385, |
|
"learning_rate": 6.333392624279333e-05, |
|
"loss": 0.0174, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 8.883161512027492, |
|
"grad_norm": 0.16049842536449432, |
|
"learning_rate": 6.319694144714407e-05, |
|
"loss": 0.0151, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 8.900343642611684, |
|
"grad_norm": 0.2062782198190689, |
|
"learning_rate": 6.30598500957957e-05, |
|
"loss": 0.0211, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 8.917525773195877, |
|
"grad_norm": 0.3576521873474121, |
|
"learning_rate": 6.292265329566108e-05, |
|
"loss": 0.0149, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 8.934707903780069, |
|
"grad_norm": 0.28101012110710144, |
|
"learning_rate": 6.278535215450458e-05, |
|
"loss": 0.0162, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.951890034364261, |
|
"grad_norm": 0.2609540522098541, |
|
"learning_rate": 6.264794778093297e-05, |
|
"loss": 0.0171, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 8.969072164948454, |
|
"grad_norm": 0.27727997303009033, |
|
"learning_rate": 6.25104412843866e-05, |
|
"loss": 0.0132, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 8.986254295532646, |
|
"grad_norm": 0.21067747473716736, |
|
"learning_rate": 6.237283377513036e-05, |
|
"loss": 0.0168, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 9.003436426116838, |
|
"grad_norm": 0.41480588912963867, |
|
"learning_rate": 6.223512636424478e-05, |
|
"loss": 0.0197, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 9.02061855670103, |
|
"grad_norm": 0.2617255449295044, |
|
"learning_rate": 6.209732016361696e-05, |
|
"loss": 0.0106, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 9.037800687285223, |
|
"grad_norm": 0.1343929022550583, |
|
"learning_rate": 6.19594162859317e-05, |
|
"loss": 0.0186, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 9.054982817869416, |
|
"grad_norm": 0.22022658586502075, |
|
"learning_rate": 6.182141584466247e-05, |
|
"loss": 0.0152, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 9.072164948453608, |
|
"grad_norm": 0.19647003710269928, |
|
"learning_rate": 6.168331995406244e-05, |
|
"loss": 0.0124, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 9.0893470790378, |
|
"grad_norm": 0.225993350148201, |
|
"learning_rate": 6.154512972915542e-05, |
|
"loss": 0.0182, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 9.106529209621993, |
|
"grad_norm": 0.2652854919433594, |
|
"learning_rate": 6.140684628572688e-05, |
|
"loss": 0.0203, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 9.123711340206185, |
|
"grad_norm": 0.18200494349002838, |
|
"learning_rate": 6.126847074031507e-05, |
|
"loss": 0.0241, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 9.140893470790378, |
|
"grad_norm": 0.24488599598407745, |
|
"learning_rate": 6.113000421020176e-05, |
|
"loss": 0.0178, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 9.15807560137457, |
|
"grad_norm": 0.28431186079978943, |
|
"learning_rate": 6.099144781340347e-05, |
|
"loss": 0.0231, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 9.175257731958762, |
|
"grad_norm": 0.2814132869243622, |
|
"learning_rate": 6.0852802668662256e-05, |
|
"loss": 0.0191, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 9.192439862542955, |
|
"grad_norm": 0.33205386996269226, |
|
"learning_rate": 6.071406989543678e-05, |
|
"loss": 0.0177, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 9.209621993127147, |
|
"grad_norm": 0.24390940368175507, |
|
"learning_rate": 6.057525061389324e-05, |
|
"loss": 0.0217, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 9.22680412371134, |
|
"grad_norm": 0.18197228014469147, |
|
"learning_rate": 6.04363459448963e-05, |
|
"loss": 0.0126, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 9.243986254295532, |
|
"grad_norm": 0.2006153017282486, |
|
"learning_rate": 6.0297357010000124e-05, |
|
"loss": 0.0171, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 9.261168384879726, |
|
"grad_norm": 0.199944868683815, |
|
"learning_rate": 6.0158284931439177e-05, |
|
"loss": 0.0165, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 9.278350515463918, |
|
"grad_norm": 0.1962256133556366, |
|
"learning_rate": 6.001913083211932e-05, |
|
"loss": 0.0198, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 9.29553264604811, |
|
"grad_norm": 0.2808385491371155, |
|
"learning_rate": 5.987989583560864e-05, |
|
"loss": 0.0164, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 9.312714776632303, |
|
"grad_norm": 0.24396586418151855, |
|
"learning_rate": 5.9740581066128435e-05, |
|
"loss": 0.0202, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 9.329896907216495, |
|
"grad_norm": 0.28668099641799927, |
|
"learning_rate": 5.9601187648544056e-05, |
|
"loss": 0.0156, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 9.347079037800688, |
|
"grad_norm": 0.25964459776878357, |
|
"learning_rate": 5.946171670835594e-05, |
|
"loss": 0.0197, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 9.36426116838488, |
|
"grad_norm": 0.3509371876716614, |
|
"learning_rate": 5.932216937169044e-05, |
|
"loss": 0.0229, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 9.381443298969073, |
|
"grad_norm": 0.29809918999671936, |
|
"learning_rate": 5.918254676529076e-05, |
|
"loss": 0.0134, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 9.398625429553265, |
|
"grad_norm": 0.20090153813362122, |
|
"learning_rate": 5.904285001650783e-05, |
|
"loss": 0.0184, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 9.415807560137457, |
|
"grad_norm": 0.3226790726184845, |
|
"learning_rate": 5.890308025329125e-05, |
|
"loss": 0.017, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 9.43298969072165, |
|
"grad_norm": 0.2159719467163086, |
|
"learning_rate": 5.876323860418016e-05, |
|
"loss": 0.0133, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 9.450171821305842, |
|
"grad_norm": 0.2575574219226837, |
|
"learning_rate": 5.8623326198294116e-05, |
|
"loss": 0.0156, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.467353951890034, |
|
"grad_norm": 0.2184896171092987, |
|
"learning_rate": 5.8483344165323975e-05, |
|
"loss": 0.0156, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 9.484536082474227, |
|
"grad_norm": 0.2843054533004761, |
|
"learning_rate": 5.834329363552279e-05, |
|
"loss": 0.0163, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 9.50171821305842, |
|
"grad_norm": 0.3006589710712433, |
|
"learning_rate": 5.820317573969669e-05, |
|
"loss": 0.0155, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 9.518900343642612, |
|
"grad_norm": 0.23060756921768188, |
|
"learning_rate": 5.806299160919573e-05, |
|
"loss": 0.0127, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 9.536082474226804, |
|
"grad_norm": 0.23474593460559845, |
|
"learning_rate": 5.792274237590471e-05, |
|
"loss": 0.0151, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 9.553264604810996, |
|
"grad_norm": 0.2775484621524811, |
|
"learning_rate": 5.7782429172234206e-05, |
|
"loss": 0.0194, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 9.570446735395189, |
|
"grad_norm": 0.20381006598472595, |
|
"learning_rate": 5.7642053131111186e-05, |
|
"loss": 0.0205, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 9.587628865979381, |
|
"grad_norm": 0.2642858028411865, |
|
"learning_rate": 5.7501615385970044e-05, |
|
"loss": 0.012, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 9.604810996563574, |
|
"grad_norm": 0.14698222279548645, |
|
"learning_rate": 5.7361117070743374e-05, |
|
"loss": 0.0151, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 9.621993127147766, |
|
"grad_norm": 0.2586089074611664, |
|
"learning_rate": 5.722055931985285e-05, |
|
"loss": 0.0173, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.639175257731958, |
|
"grad_norm": 0.2857683002948761, |
|
"learning_rate": 5.707994326820002e-05, |
|
"loss": 0.0173, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 9.65635738831615, |
|
"grad_norm": 0.25553369522094727, |
|
"learning_rate": 5.693927005115719e-05, |
|
"loss": 0.0193, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 9.673539518900343, |
|
"grad_norm": 0.2712913453578949, |
|
"learning_rate": 5.679854080455821e-05, |
|
"loss": 0.0104, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 9.690721649484535, |
|
"grad_norm": 0.2559773325920105, |
|
"learning_rate": 5.665775666468933e-05, |
|
"loss": 0.0144, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 9.707903780068728, |
|
"grad_norm": 0.3383992910385132, |
|
"learning_rate": 5.651691876828007e-05, |
|
"loss": 0.0144, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 9.72508591065292, |
|
"grad_norm": 0.3301098048686981, |
|
"learning_rate": 5.637602825249394e-05, |
|
"loss": 0.0157, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 9.742268041237114, |
|
"grad_norm": 0.22163395583629608, |
|
"learning_rate": 5.6235086254919324e-05, |
|
"loss": 0.0112, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 9.759450171821307, |
|
"grad_norm": 0.10947784781455994, |
|
"learning_rate": 5.609409391356031e-05, |
|
"loss": 0.0212, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 9.776632302405499, |
|
"grad_norm": 0.17621196806430817, |
|
"learning_rate": 5.595305236682743e-05, |
|
"loss": 0.0099, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 9.793814432989691, |
|
"grad_norm": 0.1827089488506317, |
|
"learning_rate": 5.581196275352858e-05, |
|
"loss": 0.018, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.810996563573884, |
|
"grad_norm": 0.20134034752845764, |
|
"learning_rate": 5.567082621285969e-05, |
|
"loss": 0.02, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 9.828178694158076, |
|
"grad_norm": 0.2766471803188324, |
|
"learning_rate": 5.5529643884395654e-05, |
|
"loss": 0.0125, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 9.845360824742269, |
|
"grad_norm": 0.1543634682893753, |
|
"learning_rate": 5.538841690808101e-05, |
|
"loss": 0.0166, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 9.862542955326461, |
|
"grad_norm": 0.2152809351682663, |
|
"learning_rate": 5.524714642422084e-05, |
|
"loss": 0.01, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 9.879725085910653, |
|
"grad_norm": 0.32943928241729736, |
|
"learning_rate": 5.510583357347149e-05, |
|
"loss": 0.0166, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 9.896907216494846, |
|
"grad_norm": 0.24659444391727448, |
|
"learning_rate": 5.4964479496831425e-05, |
|
"loss": 0.0173, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 9.914089347079038, |
|
"grad_norm": 0.173888698220253, |
|
"learning_rate": 5.482308533563193e-05, |
|
"loss": 0.0094, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 9.93127147766323, |
|
"grad_norm": 0.19505925476551056, |
|
"learning_rate": 5.468165223152798e-05, |
|
"loss": 0.0142, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 9.948453608247423, |
|
"grad_norm": 0.25433164834976196, |
|
"learning_rate": 5.454018132648897e-05, |
|
"loss": 0.015, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 9.965635738831615, |
|
"grad_norm": 0.26114964485168457, |
|
"learning_rate": 5.439867376278952e-05, |
|
"loss": 0.0136, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.982817869415808, |
|
"grad_norm": 0.36945995688438416, |
|
"learning_rate": 5.425713068300022e-05, |
|
"loss": 0.0213, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.4136284291744232, |
|
"learning_rate": 5.411555322997846e-05, |
|
"loss": 0.0235, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 10.017182130584192, |
|
"grad_norm": 0.2530066967010498, |
|
"learning_rate": 5.3973942546859145e-05, |
|
"loss": 0.0159, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 10.034364261168385, |
|
"grad_norm": 0.3279346227645874, |
|
"learning_rate": 5.3832299777045495e-05, |
|
"loss": 0.0123, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 10.051546391752577, |
|
"grad_norm": 0.2813730239868164, |
|
"learning_rate": 5.36906260641998e-05, |
|
"loss": 0.0152, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 10.06872852233677, |
|
"grad_norm": 0.2074098438024521, |
|
"learning_rate": 5.354892255223421e-05, |
|
"loss": 0.0134, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 10.085910652920962, |
|
"grad_norm": 0.2736356854438782, |
|
"learning_rate": 5.3407190385301456e-05, |
|
"loss": 0.0104, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 10.103092783505154, |
|
"grad_norm": 0.25040575861930847, |
|
"learning_rate": 5.3265430707785666e-05, |
|
"loss": 0.0172, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 10.120274914089347, |
|
"grad_norm": 0.3141660988330841, |
|
"learning_rate": 5.312364466429307e-05, |
|
"loss": 0.0125, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 10.137457044673539, |
|
"grad_norm": 0.16908888518810272, |
|
"learning_rate": 5.298183339964281e-05, |
|
"loss": 0.0117, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 10.154639175257731, |
|
"grad_norm": 0.2192607969045639, |
|
"learning_rate": 5.283999805885764e-05, |
|
"loss": 0.0212, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 10.171821305841924, |
|
"grad_norm": 0.14075499773025513, |
|
"learning_rate": 5.269813978715474e-05, |
|
"loss": 0.0171, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 10.189003436426116, |
|
"grad_norm": 0.15797455608844757, |
|
"learning_rate": 5.255625972993642e-05, |
|
"loss": 0.0143, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 10.206185567010309, |
|
"grad_norm": 0.22639349102973938, |
|
"learning_rate": 5.24143590327809e-05, |
|
"loss": 0.0174, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 10.223367697594501, |
|
"grad_norm": 0.22572936117649078, |
|
"learning_rate": 5.227243884143306e-05, |
|
"loss": 0.0123, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 10.240549828178693, |
|
"grad_norm": 0.24433186650276184, |
|
"learning_rate": 5.213050030179515e-05, |
|
"loss": 0.0152, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 10.257731958762886, |
|
"grad_norm": 0.2180275022983551, |
|
"learning_rate": 5.198854455991763e-05, |
|
"loss": 0.0136, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 10.27491408934708, |
|
"grad_norm": 0.1412176787853241, |
|
"learning_rate": 5.184657276198978e-05, |
|
"loss": 0.0083, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 10.292096219931272, |
|
"grad_norm": 0.23186911642551422, |
|
"learning_rate": 5.170458605433059e-05, |
|
"loss": 0.0128, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 10.309278350515465, |
|
"grad_norm": 0.2739560604095459, |
|
"learning_rate": 5.15625855833794e-05, |
|
"loss": 0.0212, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.326460481099657, |
|
"grad_norm": 0.2591661512851715, |
|
"learning_rate": 5.1420572495686646e-05, |
|
"loss": 0.0153, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 10.34364261168385, |
|
"grad_norm": 0.301039457321167, |
|
"learning_rate": 5.127854793790473e-05, |
|
"loss": 0.0128, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 10.360824742268042, |
|
"grad_norm": 0.30792465806007385, |
|
"learning_rate": 5.113651305677856e-05, |
|
"loss": 0.0206, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 10.378006872852234, |
|
"grad_norm": 0.20730407536029816, |
|
"learning_rate": 5.099446899913648e-05, |
|
"loss": 0.0184, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 10.395189003436426, |
|
"grad_norm": 0.2361646145582199, |
|
"learning_rate": 5.085241691188086e-05, |
|
"loss": 0.0142, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 10.412371134020619, |
|
"grad_norm": 0.15994442999362946, |
|
"learning_rate": 5.071035794197898e-05, |
|
"loss": 0.0128, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 10.429553264604811, |
|
"grad_norm": 0.1956380158662796, |
|
"learning_rate": 5.0568293236453614e-05, |
|
"loss": 0.0139, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 10.446735395189004, |
|
"grad_norm": 0.14793916046619415, |
|
"learning_rate": 5.042622394237391e-05, |
|
"loss": 0.01, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 10.463917525773196, |
|
"grad_norm": 0.23033088445663452, |
|
"learning_rate": 5.0284151206845996e-05, |
|
"loss": 0.0104, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 10.481099656357388, |
|
"grad_norm": 0.21595941483974457, |
|
"learning_rate": 5.014207617700388e-05, |
|
"loss": 0.0208, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 10.49828178694158, |
|
"grad_norm": 0.325511634349823, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0226, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 10.515463917525773, |
|
"grad_norm": 0.24100159108638763, |
|
"learning_rate": 4.985792382299614e-05, |
|
"loss": 0.013, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 10.532646048109966, |
|
"grad_norm": 0.2464800477027893, |
|
"learning_rate": 4.9715848793154e-05, |
|
"loss": 0.0171, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 10.549828178694158, |
|
"grad_norm": 0.24693673849105835, |
|
"learning_rate": 4.957377605762611e-05, |
|
"loss": 0.015, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 10.56701030927835, |
|
"grad_norm": 0.13398700952529907, |
|
"learning_rate": 4.94317067635464e-05, |
|
"loss": 0.0126, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 10.584192439862543, |
|
"grad_norm": 0.2720285654067993, |
|
"learning_rate": 4.9289642058021043e-05, |
|
"loss": 0.0161, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 10.601374570446735, |
|
"grad_norm": 0.2861359119415283, |
|
"learning_rate": 4.914758308811913e-05, |
|
"loss": 0.0137, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 10.618556701030927, |
|
"grad_norm": 0.15878301858901978, |
|
"learning_rate": 4.900553100086353e-05, |
|
"loss": 0.0173, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 10.63573883161512, |
|
"grad_norm": 0.33061495423316956, |
|
"learning_rate": 4.886348694322145e-05, |
|
"loss": 0.013, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 10.652920962199312, |
|
"grad_norm": 0.30866488814353943, |
|
"learning_rate": 4.8721452062095294e-05, |
|
"loss": 0.0168, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 10.670103092783505, |
|
"grad_norm": 0.24568206071853638, |
|
"learning_rate": 4.8579427504313366e-05, |
|
"loss": 0.0152, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 10.687285223367697, |
|
"grad_norm": 0.24803771078586578, |
|
"learning_rate": 4.843741441662062e-05, |
|
"loss": 0.0178, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 10.70446735395189, |
|
"grad_norm": 0.17046746611595154, |
|
"learning_rate": 4.829541394566942e-05, |
|
"loss": 0.0124, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 10.721649484536082, |
|
"grad_norm": 0.22589251399040222, |
|
"learning_rate": 4.8153427238010227e-05, |
|
"loss": 0.014, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 10.738831615120276, |
|
"grad_norm": 0.24486307799816132, |
|
"learning_rate": 4.801145544008239e-05, |
|
"loss": 0.014, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 10.756013745704468, |
|
"grad_norm": 0.13196790218353271, |
|
"learning_rate": 4.7869499698204864e-05, |
|
"loss": 0.0144, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 10.77319587628866, |
|
"grad_norm": 0.20505741238594055, |
|
"learning_rate": 4.772756115856695e-05, |
|
"loss": 0.0146, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 10.790378006872853, |
|
"grad_norm": 0.22166849672794342, |
|
"learning_rate": 4.758564096721911e-05, |
|
"loss": 0.0143, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 10.807560137457045, |
|
"grad_norm": 0.27348771691322327, |
|
"learning_rate": 4.7443740270063584e-05, |
|
"loss": 0.0137, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 10.824742268041238, |
|
"grad_norm": 0.2516573667526245, |
|
"learning_rate": 4.7301860212845264e-05, |
|
"loss": 0.0142, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 10.84192439862543, |
|
"grad_norm": 0.28733956813812256, |
|
"learning_rate": 4.7160001941142365e-05, |
|
"loss": 0.0123, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 10.859106529209622, |
|
"grad_norm": 0.3413456678390503, |
|
"learning_rate": 4.7018166600357204e-05, |
|
"loss": 0.0142, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 10.876288659793815, |
|
"grad_norm": 0.3347049653530121, |
|
"learning_rate": 4.687635533570693e-05, |
|
"loss": 0.0137, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 10.893470790378007, |
|
"grad_norm": 0.3175305426120758, |
|
"learning_rate": 4.673456929221434e-05, |
|
"loss": 0.0205, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 10.9106529209622, |
|
"grad_norm": 0.1658443957567215, |
|
"learning_rate": 4.6592809614698556e-05, |
|
"loss": 0.013, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 10.927835051546392, |
|
"grad_norm": 0.12746182084083557, |
|
"learning_rate": 4.645107744776581e-05, |
|
"loss": 0.0126, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 10.945017182130584, |
|
"grad_norm": 0.20812661945819855, |
|
"learning_rate": 4.6309373935800205e-05, |
|
"loss": 0.0149, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 10.962199312714777, |
|
"grad_norm": 0.18740630149841309, |
|
"learning_rate": 4.616770022295451e-05, |
|
"loss": 0.0115, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 10.97938144329897, |
|
"grad_norm": 0.18948382139205933, |
|
"learning_rate": 4.602605745314087e-05, |
|
"loss": 0.0197, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 10.996563573883162, |
|
"grad_norm": 0.4297175109386444, |
|
"learning_rate": 4.5884446770021555e-05, |
|
"loss": 0.016, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 11.013745704467354, |
|
"grad_norm": 0.2623024880886078, |
|
"learning_rate": 4.574286931699978e-05, |
|
"loss": 0.0142, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 11.030927835051546, |
|
"grad_norm": 0.2243795096874237, |
|
"learning_rate": 4.560132623721049e-05, |
|
"loss": 0.0156, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 11.048109965635739, |
|
"grad_norm": 0.20103001594543457, |
|
"learning_rate": 4.545981867351104e-05, |
|
"loss": 0.0116, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 11.065292096219931, |
|
"grad_norm": 0.11890780925750732, |
|
"learning_rate": 4.5318347768472035e-05, |
|
"loss": 0.0081, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 11.082474226804123, |
|
"grad_norm": 0.26694929599761963, |
|
"learning_rate": 4.517691466436807e-05, |
|
"loss": 0.0155, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 11.099656357388316, |
|
"grad_norm": 0.18821591138839722, |
|
"learning_rate": 4.5035520503168586e-05, |
|
"loss": 0.0104, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 11.116838487972508, |
|
"grad_norm": 0.27548283338546753, |
|
"learning_rate": 4.4894166426528524e-05, |
|
"loss": 0.0114, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 11.1340206185567, |
|
"grad_norm": 0.1965043544769287, |
|
"learning_rate": 4.4752853575779185e-05, |
|
"loss": 0.0104, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 11.151202749140893, |
|
"grad_norm": 0.21741580963134766, |
|
"learning_rate": 4.4611583091919e-05, |
|
"loss": 0.0117, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 11.168384879725085, |
|
"grad_norm": 0.1215846836566925, |
|
"learning_rate": 4.4470356115604364e-05, |
|
"loss": 0.0093, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.185567010309278, |
|
"grad_norm": 0.1946978121995926, |
|
"learning_rate": 4.432917378714032e-05, |
|
"loss": 0.0194, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 11.20274914089347, |
|
"grad_norm": 0.22516775131225586, |
|
"learning_rate": 4.418803724647144e-05, |
|
"loss": 0.0149, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 11.219931271477662, |
|
"grad_norm": 0.22346094250679016, |
|
"learning_rate": 4.4046947633172566e-05, |
|
"loss": 0.0091, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 11.237113402061855, |
|
"grad_norm": 0.23928742110729218, |
|
"learning_rate": 4.3905906086439704e-05, |
|
"loss": 0.0164, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 11.254295532646047, |
|
"grad_norm": 0.34528031945228577, |
|
"learning_rate": 4.3764913745080695e-05, |
|
"loss": 0.0145, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 11.271477663230241, |
|
"grad_norm": 0.134693905711174, |
|
"learning_rate": 4.362397174750608e-05, |
|
"loss": 0.0076, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 11.288659793814434, |
|
"grad_norm": 0.35505372285842896, |
|
"learning_rate": 4.348308123171994e-05, |
|
"loss": 0.0138, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 11.305841924398626, |
|
"grad_norm": 0.17052118480205536, |
|
"learning_rate": 4.334224333531068e-05, |
|
"loss": 0.012, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 11.323024054982818, |
|
"grad_norm": 0.19103099405765533, |
|
"learning_rate": 4.32014591954418e-05, |
|
"loss": 0.013, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 11.34020618556701, |
|
"grad_norm": 0.20789751410484314, |
|
"learning_rate": 4.306072994884282e-05, |
|
"loss": 0.0091, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 11.357388316151203, |
|
"grad_norm": 0.2590029537677765, |
|
"learning_rate": 4.292005673179998e-05, |
|
"loss": 0.008, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 11.374570446735396, |
|
"grad_norm": 0.16030985116958618, |
|
"learning_rate": 4.277944068014716e-05, |
|
"loss": 0.0142, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 11.391752577319588, |
|
"grad_norm": 0.34259387850761414, |
|
"learning_rate": 4.263888292925664e-05, |
|
"loss": 0.0115, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 11.40893470790378, |
|
"grad_norm": 0.24973253905773163, |
|
"learning_rate": 4.249838461402997e-05, |
|
"loss": 0.0112, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 11.426116838487973, |
|
"grad_norm": 0.40062564611434937, |
|
"learning_rate": 4.235794686888882e-05, |
|
"loss": 0.0111, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 11.443298969072165, |
|
"grad_norm": 0.23818433284759521, |
|
"learning_rate": 4.22175708277658e-05, |
|
"loss": 0.0124, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 11.460481099656358, |
|
"grad_norm": 0.17521892488002777, |
|
"learning_rate": 4.207725762409529e-05, |
|
"loss": 0.0186, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 11.47766323024055, |
|
"grad_norm": 0.2232678085565567, |
|
"learning_rate": 4.19370083908043e-05, |
|
"loss": 0.012, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 11.494845360824742, |
|
"grad_norm": 0.1600189507007599, |
|
"learning_rate": 4.179682426030331e-05, |
|
"loss": 0.0107, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 11.512027491408935, |
|
"grad_norm": 0.3540445566177368, |
|
"learning_rate": 4.1656706364477214e-05, |
|
"loss": 0.0182, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 11.529209621993127, |
|
"grad_norm": 0.39657342433929443, |
|
"learning_rate": 4.151665583467604e-05, |
|
"loss": 0.0157, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 11.54639175257732, |
|
"grad_norm": 0.35762307047843933, |
|
"learning_rate": 4.137667380170591e-05, |
|
"loss": 0.0115, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 11.563573883161512, |
|
"grad_norm": 0.28293389081954956, |
|
"learning_rate": 4.123676139581984e-05, |
|
"loss": 0.0194, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 11.580756013745704, |
|
"grad_norm": 0.1835634410381317, |
|
"learning_rate": 4.1096919746708754e-05, |
|
"loss": 0.0143, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 11.597938144329897, |
|
"grad_norm": 0.1975705772638321, |
|
"learning_rate": 4.095714998349218e-05, |
|
"loss": 0.016, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 11.615120274914089, |
|
"grad_norm": 0.17618152499198914, |
|
"learning_rate": 4.081745323470926e-05, |
|
"loss": 0.0198, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 11.632302405498281, |
|
"grad_norm": 0.1503658890724182, |
|
"learning_rate": 4.067783062830955e-05, |
|
"loss": 0.0156, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 11.649484536082474, |
|
"grad_norm": 0.28605377674102783, |
|
"learning_rate": 4.053828329164407e-05, |
|
"loss": 0.0146, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 11.666666666666666, |
|
"grad_norm": 0.3132267892360687, |
|
"learning_rate": 4.0398812351455955e-05, |
|
"loss": 0.0102, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 11.683848797250858, |
|
"grad_norm": 0.2057536542415619, |
|
"learning_rate": 4.025941893387159e-05, |
|
"loss": 0.0176, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 11.70103092783505, |
|
"grad_norm": 0.2427815943956375, |
|
"learning_rate": 4.012010416439136e-05, |
|
"loss": 0.0132, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 11.718213058419243, |
|
"grad_norm": 0.2931414246559143, |
|
"learning_rate": 3.998086916788069e-05, |
|
"loss": 0.0108, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 11.735395189003437, |
|
"grad_norm": 0.2122270166873932, |
|
"learning_rate": 3.9841715068560835e-05, |
|
"loss": 0.0146, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 11.75257731958763, |
|
"grad_norm": 0.3742753565311432, |
|
"learning_rate": 3.970264298999991e-05, |
|
"loss": 0.0128, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 11.769759450171822, |
|
"grad_norm": 0.13350647687911987, |
|
"learning_rate": 3.956365405510369e-05, |
|
"loss": 0.0105, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 11.786941580756015, |
|
"grad_norm": 0.2694711685180664, |
|
"learning_rate": 3.942474938610677e-05, |
|
"loss": 0.0117, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 11.804123711340207, |
|
"grad_norm": 0.2818795144557953, |
|
"learning_rate": 3.9285930104563234e-05, |
|
"loss": 0.0086, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 11.8213058419244, |
|
"grad_norm": 0.2870750427246094, |
|
"learning_rate": 3.914719733133776e-05, |
|
"loss": 0.012, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 11.838487972508592, |
|
"grad_norm": 0.14688880741596222, |
|
"learning_rate": 3.900855218659655e-05, |
|
"loss": 0.0169, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 11.855670103092784, |
|
"grad_norm": 0.1673170030117035, |
|
"learning_rate": 3.886999578979824e-05, |
|
"loss": 0.011, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 11.872852233676976, |
|
"grad_norm": 0.3427187502384186, |
|
"learning_rate": 3.873152925968495e-05, |
|
"loss": 0.0172, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 11.890034364261169, |
|
"grad_norm": 0.32928958535194397, |
|
"learning_rate": 3.859315371427312e-05, |
|
"loss": 0.0157, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 11.907216494845361, |
|
"grad_norm": 0.2496093362569809, |
|
"learning_rate": 3.8454870270844593e-05, |
|
"loss": 0.0119, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 11.924398625429554, |
|
"grad_norm": 0.15401820838451385, |
|
"learning_rate": 3.831668004593756e-05, |
|
"loss": 0.0115, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 11.941580756013746, |
|
"grad_norm": 0.14115320146083832, |
|
"learning_rate": 3.8178584155337525e-05, |
|
"loss": 0.0106, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 11.958762886597938, |
|
"grad_norm": 0.20622394979000092, |
|
"learning_rate": 3.804058371406831e-05, |
|
"loss": 0.0138, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 11.97594501718213, |
|
"grad_norm": 0.11186587810516357, |
|
"learning_rate": 3.790267983638305e-05, |
|
"loss": 0.0152, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 11.993127147766323, |
|
"grad_norm": 0.18001288175582886, |
|
"learning_rate": 3.776487363575524e-05, |
|
"loss": 0.0098, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 12.010309278350515, |
|
"grad_norm": 0.3391369879245758, |
|
"learning_rate": 3.762716622486965e-05, |
|
"loss": 0.0234, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 12.027491408934708, |
|
"grad_norm": 0.19333554804325104, |
|
"learning_rate": 3.748955871561341e-05, |
|
"loss": 0.0127, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.0446735395189, |
|
"grad_norm": 0.2803151607513428, |
|
"learning_rate": 3.735205221906703e-05, |
|
"loss": 0.0141, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 12.061855670103093, |
|
"grad_norm": 0.28076592087745667, |
|
"learning_rate": 3.721464784549543e-05, |
|
"loss": 0.0116, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 12.079037800687285, |
|
"grad_norm": 0.3014523386955261, |
|
"learning_rate": 3.7077346704338935e-05, |
|
"loss": 0.014, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 12.096219931271477, |
|
"grad_norm": 0.15294674038887024, |
|
"learning_rate": 3.694014990420433e-05, |
|
"loss": 0.0133, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 12.11340206185567, |
|
"grad_norm": 0.21652719378471375, |
|
"learning_rate": 3.680305855285593e-05, |
|
"loss": 0.0106, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 12.130584192439862, |
|
"grad_norm": 0.24568617343902588, |
|
"learning_rate": 3.6666073757206686e-05, |
|
"loss": 0.0114, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 12.147766323024054, |
|
"grad_norm": 0.2690240144729614, |
|
"learning_rate": 3.6529196623309115e-05, |
|
"loss": 0.016, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 12.164948453608247, |
|
"grad_norm": 0.21522220969200134, |
|
"learning_rate": 3.6392428256346475e-05, |
|
"loss": 0.0136, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 12.18213058419244, |
|
"grad_norm": 0.25682464241981506, |
|
"learning_rate": 3.625576976062379e-05, |
|
"loss": 0.0119, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 12.199312714776632, |
|
"grad_norm": 0.15618295967578888, |
|
"learning_rate": 3.6119222239559e-05, |
|
"loss": 0.0131, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 12.216494845360824, |
|
"grad_norm": 0.21718665957450867, |
|
"learning_rate": 3.598278679567397e-05, |
|
"loss": 0.0153, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 12.233676975945016, |
|
"grad_norm": 0.17358386516571045, |
|
"learning_rate": 3.5846464530585624e-05, |
|
"loss": 0.0105, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 12.250859106529209, |
|
"grad_norm": 0.2519778907299042, |
|
"learning_rate": 3.571025654499702e-05, |
|
"loss": 0.0157, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 12.268041237113403, |
|
"grad_norm": 0.26433685421943665, |
|
"learning_rate": 3.557416393868859e-05, |
|
"loss": 0.0131, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 12.285223367697595, |
|
"grad_norm": 0.2645297646522522, |
|
"learning_rate": 3.543818781050906e-05, |
|
"loss": 0.0098, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 12.302405498281788, |
|
"grad_norm": 0.23010118305683136, |
|
"learning_rate": 3.530232925836673e-05, |
|
"loss": 0.018, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 12.31958762886598, |
|
"grad_norm": 0.08610416948795319, |
|
"learning_rate": 3.516658937922051e-05, |
|
"loss": 0.0095, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 12.336769759450172, |
|
"grad_norm": 0.14161959290504456, |
|
"learning_rate": 3.503096926907123e-05, |
|
"loss": 0.0153, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 12.353951890034365, |
|
"grad_norm": 0.3274645209312439, |
|
"learning_rate": 3.4895470022952536e-05, |
|
"loss": 0.0118, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 12.371134020618557, |
|
"grad_norm": 0.16021353006362915, |
|
"learning_rate": 3.476009273492225e-05, |
|
"loss": 0.0138, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 12.38831615120275, |
|
"grad_norm": 0.2030124068260193, |
|
"learning_rate": 3.462483849805346e-05, |
|
"loss": 0.0106, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 12.405498281786942, |
|
"grad_norm": 0.15385638177394867, |
|
"learning_rate": 3.4489708404425704e-05, |
|
"loss": 0.0102, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 12.422680412371134, |
|
"grad_norm": 0.10668976604938507, |
|
"learning_rate": 3.4354703545116185e-05, |
|
"loss": 0.0109, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 12.439862542955327, |
|
"grad_norm": 0.16402071714401245, |
|
"learning_rate": 3.421982501019087e-05, |
|
"loss": 0.0108, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 12.457044673539519, |
|
"grad_norm": 0.10426975041627884, |
|
"learning_rate": 3.4085073888695804e-05, |
|
"loss": 0.0103, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 12.474226804123711, |
|
"grad_norm": 0.23913106322288513, |
|
"learning_rate": 3.3950451268648235e-05, |
|
"loss": 0.0103, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 12.491408934707904, |
|
"grad_norm": 0.1630750596523285, |
|
"learning_rate": 3.381595823702784e-05, |
|
"loss": 0.018, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 12.508591065292096, |
|
"grad_norm": 0.3311632573604584, |
|
"learning_rate": 3.368159587976799e-05, |
|
"loss": 0.0089, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 12.525773195876289, |
|
"grad_norm": 0.45006489753723145, |
|
"learning_rate": 3.354736528174696e-05, |
|
"loss": 0.0124, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 12.542955326460481, |
|
"grad_norm": 0.23996764421463013, |
|
"learning_rate": 3.341326752677916e-05, |
|
"loss": 0.0179, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 12.560137457044673, |
|
"grad_norm": 0.14841718971729279, |
|
"learning_rate": 3.3279303697606354e-05, |
|
"loss": 0.0063, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 12.577319587628866, |
|
"grad_norm": 0.09983796626329422, |
|
"learning_rate": 3.314547487588901e-05, |
|
"loss": 0.0096, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 12.594501718213058, |
|
"grad_norm": 0.17602872848510742, |
|
"learning_rate": 3.301178214219744e-05, |
|
"loss": 0.009, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 12.61168384879725, |
|
"grad_norm": 0.24939224123954773, |
|
"learning_rate": 3.2878226576003225e-05, |
|
"loss": 0.013, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 12.628865979381443, |
|
"grad_norm": 0.17927074432373047, |
|
"learning_rate": 3.274480925567036e-05, |
|
"loss": 0.011, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 12.646048109965635, |
|
"grad_norm": 0.17862512171268463, |
|
"learning_rate": 3.261153125844663e-05, |
|
"loss": 0.0149, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 12.663230240549828, |
|
"grad_norm": 0.2447875738143921, |
|
"learning_rate": 3.247839366045485e-05, |
|
"loss": 0.0137, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 12.68041237113402, |
|
"grad_norm": 0.2494247555732727, |
|
"learning_rate": 3.2345397536684286e-05, |
|
"loss": 0.0126, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 12.697594501718212, |
|
"grad_norm": 0.1975736767053604, |
|
"learning_rate": 3.2212543960981845e-05, |
|
"loss": 0.0104, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 12.714776632302405, |
|
"grad_norm": 0.18755072355270386, |
|
"learning_rate": 3.207983400604347e-05, |
|
"loss": 0.009, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 12.731958762886597, |
|
"grad_norm": 0.2701716423034668, |
|
"learning_rate": 3.1947268743405457e-05, |
|
"loss": 0.0136, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 12.749140893470791, |
|
"grad_norm": 0.2599675953388214, |
|
"learning_rate": 3.1814849243435886e-05, |
|
"loss": 0.0217, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 12.766323024054984, |
|
"grad_norm": 0.25833481550216675, |
|
"learning_rate": 3.168257657532584e-05, |
|
"loss": 0.0135, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 12.783505154639176, |
|
"grad_norm": 0.33644336462020874, |
|
"learning_rate": 3.155045180708085e-05, |
|
"loss": 0.0098, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 12.800687285223368, |
|
"grad_norm": 0.12960873544216156, |
|
"learning_rate": 3.1418476005512265e-05, |
|
"loss": 0.0099, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 12.81786941580756, |
|
"grad_norm": 0.09624414145946503, |
|
"learning_rate": 3.1286650236228696e-05, |
|
"loss": 0.0084, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 12.835051546391753, |
|
"grad_norm": 0.175624817609787, |
|
"learning_rate": 3.115497556362727e-05, |
|
"loss": 0.0137, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 12.852233676975946, |
|
"grad_norm": 0.11060360819101334, |
|
"learning_rate": 3.102345305088516e-05, |
|
"loss": 0.0136, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 12.869415807560138, |
|
"grad_norm": 0.1332932859659195, |
|
"learning_rate": 3.089208375995092e-05, |
|
"loss": 0.0141, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 12.88659793814433, |
|
"grad_norm": 0.1730755716562271, |
|
"learning_rate": 3.0760868751536045e-05, |
|
"loss": 0.0111, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.903780068728523, |
|
"grad_norm": 0.16571182012557983, |
|
"learning_rate": 3.06298090851062e-05, |
|
"loss": 0.0078, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 12.920962199312715, |
|
"grad_norm": 0.2591513395309448, |
|
"learning_rate": 3.0498905818872836e-05, |
|
"loss": 0.0148, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 12.938144329896907, |
|
"grad_norm": 0.1701243668794632, |
|
"learning_rate": 3.036816000978455e-05, |
|
"loss": 0.0159, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 12.9553264604811, |
|
"grad_norm": 0.29323557019233704, |
|
"learning_rate": 3.0237572713518647e-05, |
|
"loss": 0.0127, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 12.972508591065292, |
|
"grad_norm": 0.2534872889518738, |
|
"learning_rate": 3.0107144984472502e-05, |
|
"loss": 0.0163, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 12.989690721649485, |
|
"grad_norm": 0.1676417738199234, |
|
"learning_rate": 2.9976877875755128e-05, |
|
"loss": 0.0083, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 13.006872852233677, |
|
"grad_norm": 0.11713390052318573, |
|
"learning_rate": 2.984677243917861e-05, |
|
"loss": 0.0082, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 13.02405498281787, |
|
"grad_norm": 0.35955625772476196, |
|
"learning_rate": 2.9716829725249707e-05, |
|
"loss": 0.0125, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 13.041237113402062, |
|
"grad_norm": 0.1874362677335739, |
|
"learning_rate": 2.9587050783161252e-05, |
|
"loss": 0.0112, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 13.058419243986254, |
|
"grad_norm": 0.06738214194774628, |
|
"learning_rate": 2.9457436660783784e-05, |
|
"loss": 0.0138, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 13.075601374570446, |
|
"grad_norm": 0.22004689276218414, |
|
"learning_rate": 2.9327988404657002e-05, |
|
"loss": 0.0105, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 13.092783505154639, |
|
"grad_norm": 0.11634822189807892, |
|
"learning_rate": 2.9198707059981413e-05, |
|
"loss": 0.0073, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 13.109965635738831, |
|
"grad_norm": 0.08798322826623917, |
|
"learning_rate": 2.9069593670609775e-05, |
|
"loss": 0.018, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 13.127147766323024, |
|
"grad_norm": 0.11149155348539352, |
|
"learning_rate": 2.8940649279038768e-05, |
|
"loss": 0.0091, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 13.144329896907216, |
|
"grad_norm": 0.1387196183204651, |
|
"learning_rate": 2.8811874926400483e-05, |
|
"loss": 0.0101, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 13.161512027491408, |
|
"grad_norm": 0.10784903913736343, |
|
"learning_rate": 2.868327165245419e-05, |
|
"loss": 0.0125, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 13.1786941580756, |
|
"grad_norm": 0.293300598859787, |
|
"learning_rate": 2.8554840495577682e-05, |
|
"loss": 0.0099, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 13.195876288659793, |
|
"grad_norm": 0.1339499056339264, |
|
"learning_rate": 2.8426582492759134e-05, |
|
"loss": 0.0089, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 13.213058419243985, |
|
"grad_norm": 0.1549367606639862, |
|
"learning_rate": 2.8298498679588525e-05, |
|
"loss": 0.0108, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 13.230240549828178, |
|
"grad_norm": 0.20458447933197021, |
|
"learning_rate": 2.817059009024953e-05, |
|
"loss": 0.0081, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 13.24742268041237, |
|
"grad_norm": 0.17270691692829132, |
|
"learning_rate": 2.8042857757510877e-05, |
|
"loss": 0.0094, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 13.264604810996564, |
|
"grad_norm": 0.17686305940151215, |
|
"learning_rate": 2.7915302712718227e-05, |
|
"loss": 0.0143, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 13.281786941580757, |
|
"grad_norm": 0.2391350120306015, |
|
"learning_rate": 2.7787925985785733e-05, |
|
"loss": 0.0127, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 13.29896907216495, |
|
"grad_norm": 0.21285896003246307, |
|
"learning_rate": 2.7660728605187776e-05, |
|
"loss": 0.0092, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 13.316151202749142, |
|
"grad_norm": 0.2621266841888428, |
|
"learning_rate": 2.753371159795065e-05, |
|
"loss": 0.0128, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 0.13718031346797943, |
|
"learning_rate": 2.740687598964429e-05, |
|
"loss": 0.0113, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 13.350515463917526, |
|
"grad_norm": 0.10009155422449112, |
|
"learning_rate": 2.7280222804373895e-05, |
|
"loss": 0.0088, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 13.367697594501719, |
|
"grad_norm": 0.18854975700378418, |
|
"learning_rate": 2.7153753064771792e-05, |
|
"loss": 0.0102, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 13.384879725085911, |
|
"grad_norm": 0.3908763825893402, |
|
"learning_rate": 2.702746779198912e-05, |
|
"loss": 0.0139, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 13.402061855670103, |
|
"grad_norm": 0.08939257264137268, |
|
"learning_rate": 2.690136800568752e-05, |
|
"loss": 0.0083, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 13.419243986254296, |
|
"grad_norm": 0.2188216745853424, |
|
"learning_rate": 2.6775454724031036e-05, |
|
"loss": 0.0114, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 13.436426116838488, |
|
"grad_norm": 0.13271217048168182, |
|
"learning_rate": 2.6649728963677783e-05, |
|
"loss": 0.0088, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 13.45360824742268, |
|
"grad_norm": 0.2332095205783844, |
|
"learning_rate": 2.6524191739771815e-05, |
|
"loss": 0.0105, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 13.470790378006873, |
|
"grad_norm": 0.11752445995807648, |
|
"learning_rate": 2.639884406593482e-05, |
|
"loss": 0.0099, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 13.487972508591065, |
|
"grad_norm": 0.22209575772285461, |
|
"learning_rate": 2.627368695425808e-05, |
|
"loss": 0.0098, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 13.505154639175258, |
|
"grad_norm": 0.18378068506717682, |
|
"learning_rate": 2.6148721415294186e-05, |
|
"loss": 0.0099, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 13.52233676975945, |
|
"grad_norm": 0.14152808487415314, |
|
"learning_rate": 2.6023948458048965e-05, |
|
"loss": 0.0102, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 13.539518900343642, |
|
"grad_norm": 0.17036782205104828, |
|
"learning_rate": 2.589936908997321e-05, |
|
"loss": 0.0096, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 13.556701030927835, |
|
"grad_norm": 0.16514594852924347, |
|
"learning_rate": 2.5774984316954676e-05, |
|
"loss": 0.0117, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 13.573883161512027, |
|
"grad_norm": 0.23391450941562653, |
|
"learning_rate": 2.5650795143309902e-05, |
|
"loss": 0.0136, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 13.59106529209622, |
|
"grad_norm": 0.22688448429107666, |
|
"learning_rate": 2.552680257177611e-05, |
|
"loss": 0.0099, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 13.608247422680412, |
|
"grad_norm": 0.15916913747787476, |
|
"learning_rate": 2.5403007603503053e-05, |
|
"loss": 0.0089, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 13.625429553264604, |
|
"grad_norm": 0.31960004568099976, |
|
"learning_rate": 2.527941123804504e-05, |
|
"loss": 0.0105, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 13.642611683848797, |
|
"grad_norm": 0.17471857368946075, |
|
"learning_rate": 2.5156014473352785e-05, |
|
"loss": 0.0133, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 13.65979381443299, |
|
"grad_norm": 0.16793015599250793, |
|
"learning_rate": 2.5032818305765383e-05, |
|
"loss": 0.0084, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 13.676975945017182, |
|
"grad_norm": 0.21041658520698547, |
|
"learning_rate": 2.4909823730002203e-05, |
|
"loss": 0.0088, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 13.694158075601374, |
|
"grad_norm": 0.2167925089597702, |
|
"learning_rate": 2.478703173915497e-05, |
|
"loss": 0.0097, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 13.711340206185566, |
|
"grad_norm": 0.16766490042209625, |
|
"learning_rate": 2.4664443324679653e-05, |
|
"loss": 0.0059, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 13.728522336769759, |
|
"grad_norm": 0.16163405776023865, |
|
"learning_rate": 2.454205947638852e-05, |
|
"loss": 0.0122, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 13.745704467353953, |
|
"grad_norm": 0.2345849871635437, |
|
"learning_rate": 2.4419881182442038e-05, |
|
"loss": 0.0115, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 13.762886597938145, |
|
"grad_norm": 0.10330498963594437, |
|
"learning_rate": 2.429790942934106e-05, |
|
"loss": 0.0097, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 13.780068728522338, |
|
"grad_norm": 0.1268969476222992, |
|
"learning_rate": 2.4176145201918726e-05, |
|
"loss": 0.0094, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 13.79725085910653, |
|
"grad_norm": 0.160488098859787, |
|
"learning_rate": 2.4054589483332597e-05, |
|
"loss": 0.0067, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 13.814432989690722, |
|
"grad_norm": 0.26570194959640503, |
|
"learning_rate": 2.3933243255056597e-05, |
|
"loss": 0.0092, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 13.831615120274915, |
|
"grad_norm": 0.3354252278804779, |
|
"learning_rate": 2.3812107496873248e-05, |
|
"loss": 0.0101, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 13.848797250859107, |
|
"grad_norm": 0.1483275294303894, |
|
"learning_rate": 2.3691183186865668e-05, |
|
"loss": 0.0101, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 13.8659793814433, |
|
"grad_norm": 0.26341909170150757, |
|
"learning_rate": 2.3570471301409618e-05, |
|
"loss": 0.0097, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 13.883161512027492, |
|
"grad_norm": 0.16232207417488098, |
|
"learning_rate": 2.3449972815165773e-05, |
|
"loss": 0.0154, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 13.900343642611684, |
|
"grad_norm": 0.19188156723976135, |
|
"learning_rate": 2.332968870107171e-05, |
|
"loss": 0.0069, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 13.917525773195877, |
|
"grad_norm": 0.14537520706653595, |
|
"learning_rate": 2.320961993033415e-05, |
|
"loss": 0.0079, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 13.934707903780069, |
|
"grad_norm": 0.10598124563694, |
|
"learning_rate": 2.3089767472421e-05, |
|
"loss": 0.0117, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 13.951890034364261, |
|
"grad_norm": 0.15896451473236084, |
|
"learning_rate": 2.297013229505367e-05, |
|
"loss": 0.0134, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 13.969072164948454, |
|
"grad_norm": 0.21453918516635895, |
|
"learning_rate": 2.285071536419916e-05, |
|
"loss": 0.0091, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 13.986254295532646, |
|
"grad_norm": 0.17623427510261536, |
|
"learning_rate": 2.2731517644062312e-05, |
|
"loss": 0.012, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 14.003436426116838, |
|
"grad_norm": 0.10579323023557663, |
|
"learning_rate": 2.2612540097077935e-05, |
|
"loss": 0.0067, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 14.02061855670103, |
|
"grad_norm": 0.1269347220659256, |
|
"learning_rate": 2.2493783683903185e-05, |
|
"loss": 0.0092, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 14.037800687285223, |
|
"grad_norm": 0.11808303743600845, |
|
"learning_rate": 2.237524936340963e-05, |
|
"loss": 0.0105, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 14.054982817869416, |
|
"grad_norm": 0.10431456565856934, |
|
"learning_rate": 2.2256938092675722e-05, |
|
"loss": 0.0069, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 14.072164948453608, |
|
"grad_norm": 0.3295063078403473, |
|
"learning_rate": 2.213885082697883e-05, |
|
"loss": 0.0107, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 14.0893470790378, |
|
"grad_norm": 0.19644266366958618, |
|
"learning_rate": 2.2020988519787733e-05, |
|
"loss": 0.0109, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 14.106529209621993, |
|
"grad_norm": 0.14295251667499542, |
|
"learning_rate": 2.1903352122754732e-05, |
|
"loss": 0.0095, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 14.123711340206185, |
|
"grad_norm": 0.1610773205757141, |
|
"learning_rate": 2.178594258570822e-05, |
|
"loss": 0.0092, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 14.140893470790378, |
|
"grad_norm": 0.18880592286586761, |
|
"learning_rate": 2.1668760856644703e-05, |
|
"loss": 0.0082, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 14.15807560137457, |
|
"grad_norm": 0.1384887844324112, |
|
"learning_rate": 2.1551807881721425e-05, |
|
"loss": 0.0087, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 14.175257731958762, |
|
"grad_norm": 0.19572345912456512, |
|
"learning_rate": 2.1435084605248484e-05, |
|
"loss": 0.0122, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 14.192439862542955, |
|
"grad_norm": 0.11073683947324753, |
|
"learning_rate": 2.131859196968149e-05, |
|
"loss": 0.0079, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 14.209621993127147, |
|
"grad_norm": 0.1309373676776886, |
|
"learning_rate": 2.1202330915613638e-05, |
|
"loss": 0.0089, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 14.22680412371134, |
|
"grad_norm": 0.11186233907938004, |
|
"learning_rate": 2.1086302381768385e-05, |
|
"loss": 0.0109, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 14.243986254295532, |
|
"grad_norm": 0.23258423805236816, |
|
"learning_rate": 2.0970507304991656e-05, |
|
"loss": 0.0145, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 14.261168384879726, |
|
"grad_norm": 0.195637047290802, |
|
"learning_rate": 2.0854946620244502e-05, |
|
"loss": 0.0054, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 14.278350515463918, |
|
"grad_norm": 0.17508986592292786, |
|
"learning_rate": 2.0739621260595315e-05, |
|
"loss": 0.0071, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 14.29553264604811, |
|
"grad_norm": 0.07197950035333633, |
|
"learning_rate": 2.0624532157212483e-05, |
|
"loss": 0.005, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 14.312714776632303, |
|
"grad_norm": 0.167429119348526, |
|
"learning_rate": 2.0509680239356728e-05, |
|
"loss": 0.0119, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 14.329896907216495, |
|
"grad_norm": 0.1402851641178131, |
|
"learning_rate": 2.0395066434373767e-05, |
|
"loss": 0.0142, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 14.347079037800688, |
|
"grad_norm": 0.08017238229513168, |
|
"learning_rate": 2.028069166768663e-05, |
|
"loss": 0.0086, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 14.36426116838488, |
|
"grad_norm": 0.3312987685203552, |
|
"learning_rate": 2.016655686278836e-05, |
|
"loss": 0.0123, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 14.381443298969073, |
|
"grad_norm": 0.1905941665172577, |
|
"learning_rate": 2.005266294123443e-05, |
|
"loss": 0.0119, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 14.398625429553265, |
|
"grad_norm": 0.17473655939102173, |
|
"learning_rate": 1.9939010822635384e-05, |
|
"loss": 0.0098, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 14.415807560137457, |
|
"grad_norm": 0.2841387689113617, |
|
"learning_rate": 1.982560142464939e-05, |
|
"loss": 0.007, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 14.43298969072165, |
|
"grad_norm": 0.15247130393981934, |
|
"learning_rate": 1.9712435662974816e-05, |
|
"loss": 0.008, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 14.450171821305842, |
|
"grad_norm": 0.11507634073495865, |
|
"learning_rate": 1.9599514451342816e-05, |
|
"loss": 0.0076, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 14.467353951890034, |
|
"grad_norm": 0.20362359285354614, |
|
"learning_rate": 1.9486838701510012e-05, |
|
"loss": 0.0072, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 14.484536082474227, |
|
"grad_norm": 0.30303680896759033, |
|
"learning_rate": 1.937440932325112e-05, |
|
"loss": 0.0061, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 14.50171821305842, |
|
"grad_norm": 0.18510393798351288, |
|
"learning_rate": 1.926222722435152e-05, |
|
"loss": 0.008, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 14.518900343642612, |
|
"grad_norm": 0.20415428280830383, |
|
"learning_rate": 1.9150293310600042e-05, |
|
"loss": 0.0094, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 14.536082474226804, |
|
"grad_norm": 0.17331425845623016, |
|
"learning_rate": 1.903860848578159e-05, |
|
"loss": 0.006, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 14.553264604810996, |
|
"grad_norm": 0.18844066560268402, |
|
"learning_rate": 1.8927173651669877e-05, |
|
"loss": 0.008, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 14.570446735395189, |
|
"grad_norm": 0.14389431476593018, |
|
"learning_rate": 1.8815989708020055e-05, |
|
"loss": 0.0097, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 14.587628865979381, |
|
"grad_norm": 0.10053612291812897, |
|
"learning_rate": 1.8705057552561595e-05, |
|
"loss": 0.0081, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 14.604810996563574, |
|
"grad_norm": 0.1731092631816864, |
|
"learning_rate": 1.8594378080990915e-05, |
|
"loss": 0.0077, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 14.621993127147766, |
|
"grad_norm": 0.1470867097377777, |
|
"learning_rate": 1.8483952186964237e-05, |
|
"loss": 0.0165, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 14.639175257731958, |
|
"grad_norm": 0.1546664535999298, |
|
"learning_rate": 1.8373780762090266e-05, |
|
"loss": 0.012, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 14.65635738831615, |
|
"grad_norm": 0.2409757524728775, |
|
"learning_rate": 1.82638646959231e-05, |
|
"loss": 0.0074, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 14.673539518900343, |
|
"grad_norm": 0.1771342009305954, |
|
"learning_rate": 1.8154204875955e-05, |
|
"loss": 0.013, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 14.690721649484535, |
|
"grad_norm": 0.11487053334712982, |
|
"learning_rate": 1.804480218760922e-05, |
|
"loss": 0.0074, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 14.707903780068728, |
|
"grad_norm": 0.18041536211967468, |
|
"learning_rate": 1.793565751423284e-05, |
|
"loss": 0.0074, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 14.72508591065292, |
|
"grad_norm": 0.1591220200061798, |
|
"learning_rate": 1.782677173708968e-05, |
|
"loss": 0.0066, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 14.742268041237114, |
|
"grad_norm": 0.21568642556667328, |
|
"learning_rate": 1.771814573535317e-05, |
|
"loss": 0.0079, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 14.759450171821307, |
|
"grad_norm": 0.12885995209217072, |
|
"learning_rate": 1.7609780386099234e-05, |
|
"loss": 0.0092, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 14.776632302405499, |
|
"grad_norm": 0.158493772149086, |
|
"learning_rate": 1.750167656429918e-05, |
|
"loss": 0.0073, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 14.793814432989691, |
|
"grad_norm": 0.14277073740959167, |
|
"learning_rate": 1.739383514281273e-05, |
|
"loss": 0.0096, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 14.810996563573884, |
|
"grad_norm": 0.1386091113090515, |
|
"learning_rate": 1.7286256992380888e-05, |
|
"loss": 0.0069, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 14.828178694158076, |
|
"grad_norm": 0.12749871611595154, |
|
"learning_rate": 1.7178942981618945e-05, |
|
"loss": 0.0127, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 14.845360824742269, |
|
"grad_norm": 0.18113838136196136, |
|
"learning_rate": 1.707189397700943e-05, |
|
"loss": 0.0057, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 14.862542955326461, |
|
"grad_norm": 0.2324298769235611, |
|
"learning_rate": 1.696511084289516e-05, |
|
"loss": 0.0078, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 14.879725085910653, |
|
"grad_norm": 0.09419187903404236, |
|
"learning_rate": 1.6858594441472242e-05, |
|
"loss": 0.0049, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 14.896907216494846, |
|
"grad_norm": 0.17267560958862305, |
|
"learning_rate": 1.6752345632783135e-05, |
|
"loss": 0.0081, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 14.914089347079038, |
|
"grad_norm": 0.24412667751312256, |
|
"learning_rate": 1.664636527470961e-05, |
|
"loss": 0.0075, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 14.93127147766323, |
|
"grad_norm": 0.10746461153030396, |
|
"learning_rate": 1.6540654222965973e-05, |
|
"loss": 0.0081, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 14.948453608247423, |
|
"grad_norm": 0.2459520846605301, |
|
"learning_rate": 1.6435213331092027e-05, |
|
"loss": 0.0083, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 14.965635738831615, |
|
"grad_norm": 0.26834601163864136, |
|
"learning_rate": 1.6330043450446265e-05, |
|
"loss": 0.0061, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 14.982817869415808, |
|
"grad_norm": 0.20477992296218872, |
|
"learning_rate": 1.6225145430198918e-05, |
|
"loss": 0.008, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.19616572558879852, |
|
"learning_rate": 1.6120520117325184e-05, |
|
"loss": 0.0074, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 15.017182130584192, |
|
"grad_norm": 0.14601653814315796, |
|
"learning_rate": 1.6016168356598343e-05, |
|
"loss": 0.005, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 15.034364261168385, |
|
"grad_norm": 0.11024006456136703, |
|
"learning_rate": 1.59120909905829e-05, |
|
"loss": 0.0068, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 15.051546391752577, |
|
"grad_norm": 0.21115554869174957, |
|
"learning_rate": 1.580828885962787e-05, |
|
"loss": 0.0092, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 15.06872852233677, |
|
"grad_norm": 0.23726613819599152, |
|
"learning_rate": 1.5704762801859916e-05, |
|
"loss": 0.0091, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 15.085910652920962, |
|
"grad_norm": 0.08648664504289627, |
|
"learning_rate": 1.560151365317665e-05, |
|
"loss": 0.0112, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 15.103092783505154, |
|
"grad_norm": 0.14515356719493866, |
|
"learning_rate": 1.549854224723978e-05, |
|
"loss": 0.0142, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 15.120274914089347, |
|
"grad_norm": 0.28362399339675903, |
|
"learning_rate": 1.5395849415468505e-05, |
|
"loss": 0.0138, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 15.137457044673539, |
|
"grad_norm": 0.16832397878170013, |
|
"learning_rate": 1.529343598703267e-05, |
|
"loss": 0.0146, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 15.154639175257731, |
|
"grad_norm": 0.16828805208206177, |
|
"learning_rate": 1.5191302788846256e-05, |
|
"loss": 0.0061, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 15.171821305841924, |
|
"grad_norm": 0.12565237283706665, |
|
"learning_rate": 1.508945064556047e-05, |
|
"loss": 0.0054, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 15.189003436426116, |
|
"grad_norm": 0.06071079894900322, |
|
"learning_rate": 1.498788037955728e-05, |
|
"loss": 0.0131, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 15.206185567010309, |
|
"grad_norm": 0.10003683716058731, |
|
"learning_rate": 1.4886592810942629e-05, |
|
"loss": 0.0128, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 15.223367697594501, |
|
"grad_norm": 0.30496570467948914, |
|
"learning_rate": 1.4785588757539991e-05, |
|
"loss": 0.0083, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 15.240549828178693, |
|
"grad_norm": 0.24895748496055603, |
|
"learning_rate": 1.4684869034883554e-05, |
|
"loss": 0.0106, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 15.257731958762886, |
|
"grad_norm": 0.21190665662288666, |
|
"learning_rate": 1.458443445621182e-05, |
|
"loss": 0.0116, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 15.27491408934708, |
|
"grad_norm": 0.29413625597953796, |
|
"learning_rate": 1.448428583246088e-05, |
|
"loss": 0.0078, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 15.292096219931272, |
|
"grad_norm": 0.11148206889629364, |
|
"learning_rate": 1.4384423972258055e-05, |
|
"loss": 0.0073, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 15.309278350515465, |
|
"grad_norm": 0.12171918898820877, |
|
"learning_rate": 1.4284849681915158e-05, |
|
"loss": 0.0073, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 15.326460481099657, |
|
"grad_norm": 0.13666512072086334, |
|
"learning_rate": 1.4185563765422155e-05, |
|
"loss": 0.0072, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 15.34364261168385, |
|
"grad_norm": 0.10317881405353546, |
|
"learning_rate": 1.4086567024440527e-05, |
|
"loss": 0.0059, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 15.360824742268042, |
|
"grad_norm": 0.12942399084568024, |
|
"learning_rate": 1.398786025829698e-05, |
|
"loss": 0.006, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 15.378006872852234, |
|
"grad_norm": 0.22688932716846466, |
|
"learning_rate": 1.3889444263976786e-05, |
|
"loss": 0.0074, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 15.395189003436426, |
|
"grad_norm": 0.184346541762352, |
|
"learning_rate": 1.3791319836117506e-05, |
|
"loss": 0.0094, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 15.412371134020619, |
|
"grad_norm": 0.3060619533061981, |
|
"learning_rate": 1.3693487767002445e-05, |
|
"loss": 0.0066, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 15.429553264604811, |
|
"grad_norm": 0.2948829233646393, |
|
"learning_rate": 1.3595948846554446e-05, |
|
"loss": 0.0098, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 15.446735395189004, |
|
"grad_norm": 0.22396834194660187, |
|
"learning_rate": 1.3498703862329254e-05, |
|
"loss": 0.0088, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 15.463917525773196, |
|
"grad_norm": 0.10084215551614761, |
|
"learning_rate": 1.3401753599509397e-05, |
|
"loss": 0.0075, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 15.481099656357388, |
|
"grad_norm": 0.24087756872177124, |
|
"learning_rate": 1.3305098840897646e-05, |
|
"loss": 0.0113, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 15.49828178694158, |
|
"grad_norm": 0.10929588228464127, |
|
"learning_rate": 1.3208740366910904e-05, |
|
"loss": 0.0079, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 15.515463917525773, |
|
"grad_norm": 0.10467175394296646, |
|
"learning_rate": 1.3112678955573693e-05, |
|
"loss": 0.0078, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 15.532646048109966, |
|
"grad_norm": 0.16169194877147675, |
|
"learning_rate": 1.3016915382512029e-05, |
|
"loss": 0.0073, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 15.549828178694158, |
|
"grad_norm": 0.23535805940628052, |
|
"learning_rate": 1.2921450420947057e-05, |
|
"loss": 0.009, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 15.56701030927835, |
|
"grad_norm": 0.14566457271575928, |
|
"learning_rate": 1.2826284841688885e-05, |
|
"loss": 0.0091, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 15.584192439862543, |
|
"grad_norm": 0.05960577726364136, |
|
"learning_rate": 1.2731419413130325e-05, |
|
"loss": 0.0077, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 15.601374570446735, |
|
"grad_norm": 0.12475630640983582, |
|
"learning_rate": 1.2636854901240681e-05, |
|
"loss": 0.0071, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 15.618556701030927, |
|
"grad_norm": 0.08590589463710785, |
|
"learning_rate": 1.2542592069559556e-05, |
|
"loss": 0.0058, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 15.63573883161512, |
|
"grad_norm": 0.13284145295619965, |
|
"learning_rate": 1.2448631679190736e-05, |
|
"loss": 0.0061, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 15.652920962199312, |
|
"grad_norm": 0.13162577152252197, |
|
"learning_rate": 1.2354974488796017e-05, |
|
"loss": 0.0059, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 15.670103092783505, |
|
"grad_norm": 0.09567181766033173, |
|
"learning_rate": 1.2261621254589022e-05, |
|
"loss": 0.0066, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 15.687285223367697, |
|
"grad_norm": 0.0930383950471878, |
|
"learning_rate": 1.2168572730329214e-05, |
|
"loss": 0.0095, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 15.70446735395189, |
|
"grad_norm": 0.09626749902963638, |
|
"learning_rate": 1.2075829667315708e-05, |
|
"loss": 0.0073, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 15.721649484536082, |
|
"grad_norm": 0.18719489872455597, |
|
"learning_rate": 1.1983392814381273e-05, |
|
"loss": 0.007, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 15.738831615120276, |
|
"grad_norm": 0.1957959532737732, |
|
"learning_rate": 1.1891262917886198e-05, |
|
"loss": 0.0083, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 15.756013745704468, |
|
"grad_norm": 0.21053771674633026, |
|
"learning_rate": 1.1799440721712368e-05, |
|
"loss": 0.0083, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 15.77319587628866, |
|
"grad_norm": 0.2002599984407425, |
|
"learning_rate": 1.170792696725721e-05, |
|
"loss": 0.0065, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 15.790378006872853, |
|
"grad_norm": 0.19797247648239136, |
|
"learning_rate": 1.1616722393427704e-05, |
|
"loss": 0.0062, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 15.807560137457045, |
|
"grad_norm": 0.24263660609722137, |
|
"learning_rate": 1.1525827736634398e-05, |
|
"loss": 0.0067, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 15.824742268041238, |
|
"grad_norm": 0.30493855476379395, |
|
"learning_rate": 1.1435243730785511e-05, |
|
"loss": 0.0128, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 15.84192439862543, |
|
"grad_norm": 0.09943480044603348, |
|
"learning_rate": 1.1344971107280978e-05, |
|
"loss": 0.0042, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 15.859106529209622, |
|
"grad_norm": 0.08174432069063187, |
|
"learning_rate": 1.125501059500656e-05, |
|
"loss": 0.0062, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 15.876288659793815, |
|
"grad_norm": 0.15180669724941254, |
|
"learning_rate": 1.1165362920327898e-05, |
|
"loss": 0.012, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 15.893470790378007, |
|
"grad_norm": 0.09688429534435272, |
|
"learning_rate": 1.1076028807084748e-05, |
|
"loss": 0.0074, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 15.9106529209622, |
|
"grad_norm": 0.0951455757021904, |
|
"learning_rate": 1.0987008976585073e-05, |
|
"loss": 0.0083, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 15.927835051546392, |
|
"grad_norm": 0.08295347541570663, |
|
"learning_rate": 1.0898304147599231e-05, |
|
"loss": 0.0066, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 15.945017182130584, |
|
"grad_norm": 0.20688819885253906, |
|
"learning_rate": 1.0809915036354152e-05, |
|
"loss": 0.0095, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 15.962199312714777, |
|
"grad_norm": 0.09953152388334274, |
|
"learning_rate": 1.0721842356527595e-05, |
|
"loss": 0.0052, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 15.97938144329897, |
|
"grad_norm": 0.27744606137275696, |
|
"learning_rate": 1.063408681924236e-05, |
|
"loss": 0.0105, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 15.996563573883162, |
|
"grad_norm": 0.217054545879364, |
|
"learning_rate": 1.0546649133060583e-05, |
|
"loss": 0.0076, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 16.013745704467354, |
|
"grad_norm": 0.15550248324871063, |
|
"learning_rate": 1.0459530003977908e-05, |
|
"loss": 0.0195, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 16.030927835051546, |
|
"grad_norm": 0.3384007513523102, |
|
"learning_rate": 1.0372730135417936e-05, |
|
"loss": 0.0066, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 16.04810996563574, |
|
"grad_norm": 0.23240487277507782, |
|
"learning_rate": 1.0286250228226434e-05, |
|
"loss": 0.0064, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 16.06529209621993, |
|
"grad_norm": 0.13849616050720215, |
|
"learning_rate": 1.0200090980665739e-05, |
|
"loss": 0.006, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 16.082474226804123, |
|
"grad_norm": 0.09946257621049881, |
|
"learning_rate": 1.0114253088409054e-05, |
|
"loss": 0.0058, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 16.099656357388316, |
|
"grad_norm": 0.17231334745883942, |
|
"learning_rate": 1.0028737244534914e-05, |
|
"loss": 0.0123, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 16.116838487972508, |
|
"grad_norm": 0.23527809977531433, |
|
"learning_rate": 9.943544139521521e-06, |
|
"loss": 0.0047, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 16.1340206185567, |
|
"grad_norm": 0.11022651195526123, |
|
"learning_rate": 9.858674461241229e-06, |
|
"loss": 0.0127, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 16.151202749140893, |
|
"grad_norm": 0.09669523686170578, |
|
"learning_rate": 9.774128894954904e-06, |
|
"loss": 0.0137, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 16.168384879725085, |
|
"grad_norm": 0.21540790796279907, |
|
"learning_rate": 9.68990812330648e-06, |
|
"loss": 0.0063, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 16.185567010309278, |
|
"grad_norm": 0.10020115226507187, |
|
"learning_rate": 9.606012826317417e-06, |
|
"loss": 0.0051, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 16.20274914089347, |
|
"grad_norm": 0.21819765865802765, |
|
"learning_rate": 9.522443681381172e-06, |
|
"loss": 0.0073, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 16.219931271477662, |
|
"grad_norm": 0.15638329088687897, |
|
"learning_rate": 9.439201363257778e-06, |
|
"loss": 0.008, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 16.237113402061855, |
|
"grad_norm": 0.23262475430965424, |
|
"learning_rate": 9.356286544068394e-06, |
|
"loss": 0.0093, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 16.254295532646047, |
|
"grad_norm": 0.05870979651808739, |
|
"learning_rate": 9.273699893289862e-06, |
|
"loss": 0.0041, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 16.27147766323024, |
|
"grad_norm": 0.1267542541027069, |
|
"learning_rate": 9.191442077749257e-06, |
|
"loss": 0.0068, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 16.288659793814432, |
|
"grad_norm": 0.11667685955762863, |
|
"learning_rate": 9.10951376161861e-06, |
|
"loss": 0.0037, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 16.305841924398624, |
|
"grad_norm": 0.13009728491306305, |
|
"learning_rate": 9.027915606409427e-06, |
|
"loss": 0.0088, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 16.323024054982817, |
|
"grad_norm": 0.2052125632762909, |
|
"learning_rate": 8.946648270967473e-06, |
|
"loss": 0.0058, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.34020618556701, |
|
"grad_norm": 0.06314068287611008, |
|
"learning_rate": 8.86571241146732e-06, |
|
"loss": 0.0074, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 16.3573883161512, |
|
"grad_norm": 0.1686325967311859, |
|
"learning_rate": 8.785108681407156e-06, |
|
"loss": 0.0075, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 16.374570446735394, |
|
"grad_norm": 0.07260609418153763, |
|
"learning_rate": 8.704837731603415e-06, |
|
"loss": 0.0055, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 16.391752577319586, |
|
"grad_norm": 0.1643422245979309, |
|
"learning_rate": 8.624900210185648e-06, |
|
"loss": 0.0089, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 16.40893470790378, |
|
"grad_norm": 0.20184507966041565, |
|
"learning_rate": 8.545296762591144e-06, |
|
"loss": 0.01, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 16.42611683848797, |
|
"grad_norm": 0.07979606091976166, |
|
"learning_rate": 8.466028031559836e-06, |
|
"loss": 0.0054, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 16.443298969072163, |
|
"grad_norm": 0.10305003076791763, |
|
"learning_rate": 8.387094657129013e-06, |
|
"loss": 0.0101, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 16.460481099656356, |
|
"grad_norm": 0.14057329297065735, |
|
"learning_rate": 8.308497276628279e-06, |
|
"loss": 0.0038, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 16.477663230240548, |
|
"grad_norm": 0.20156535506248474, |
|
"learning_rate": 8.230236524674256e-06, |
|
"loss": 0.0077, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 16.49484536082474, |
|
"grad_norm": 0.12216826528310776, |
|
"learning_rate": 8.152313033165592e-06, |
|
"loss": 0.0114, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 16.512027491408936, |
|
"grad_norm": 0.22437122464179993, |
|
"learning_rate": 8.074727431277745e-06, |
|
"loss": 0.006, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 16.52920962199313, |
|
"grad_norm": 0.21979045867919922, |
|
"learning_rate": 7.99748034545803e-06, |
|
"loss": 0.0063, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 16.54639175257732, |
|
"grad_norm": 0.13207381963729858, |
|
"learning_rate": 7.920572399420428e-06, |
|
"loss": 0.0069, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 16.563573883161514, |
|
"grad_norm": 0.3022846579551697, |
|
"learning_rate": 7.844004214140665e-06, |
|
"loss": 0.0051, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 16.580756013745706, |
|
"grad_norm": 0.16219815611839294, |
|
"learning_rate": 7.76777640785108e-06, |
|
"loss": 0.0094, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 16.5979381443299, |
|
"grad_norm": 0.2360786646604538, |
|
"learning_rate": 7.691889596035784e-06, |
|
"loss": 0.0075, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 16.61512027491409, |
|
"grad_norm": 0.2144838273525238, |
|
"learning_rate": 7.616344391425534e-06, |
|
"loss": 0.0158, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 16.632302405498283, |
|
"grad_norm": 0.17959503829479218, |
|
"learning_rate": 7.541141403992902e-06, |
|
"loss": 0.0168, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 16.649484536082475, |
|
"grad_norm": 0.06356091052293777, |
|
"learning_rate": 7.4662812409472705e-06, |
|
"loss": 0.0054, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.16825449466705322, |
|
"learning_rate": 7.391764506729992e-06, |
|
"loss": 0.0133, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 16.68384879725086, |
|
"grad_norm": 0.06855875998735428, |
|
"learning_rate": 7.317591803009472e-06, |
|
"loss": 0.0053, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 16.701030927835053, |
|
"grad_norm": 0.07593529671430588, |
|
"learning_rate": 7.243763728676328e-06, |
|
"loss": 0.0102, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 16.718213058419245, |
|
"grad_norm": 0.2639977037906647, |
|
"learning_rate": 7.170280879838515e-06, |
|
"loss": 0.0063, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 16.735395189003437, |
|
"grad_norm": 0.1285523623228073, |
|
"learning_rate": 7.097143849816584e-06, |
|
"loss": 0.0066, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 16.75257731958763, |
|
"grad_norm": 0.07977993786334991, |
|
"learning_rate": 7.024353229138836e-06, |
|
"loss": 0.0098, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 16.769759450171822, |
|
"grad_norm": 0.08578211069107056, |
|
"learning_rate": 6.951909605536544e-06, |
|
"loss": 0.0085, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 16.786941580756015, |
|
"grad_norm": 0.12852418422698975, |
|
"learning_rate": 6.879813563939269e-06, |
|
"loss": 0.0053, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 16.804123711340207, |
|
"grad_norm": 0.16339828073978424, |
|
"learning_rate": 6.808065686470083e-06, |
|
"loss": 0.0065, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 16.8213058419244, |
|
"grad_norm": 0.14644475281238556, |
|
"learning_rate": 6.736666552440896e-06, |
|
"loss": 0.0091, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 16.83848797250859, |
|
"grad_norm": 0.1779240071773529, |
|
"learning_rate": 6.665616738347741e-06, |
|
"loss": 0.0051, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 16.855670103092784, |
|
"grad_norm": 0.2106473445892334, |
|
"learning_rate": 6.5949168178661755e-06, |
|
"loss": 0.0063, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 16.872852233676976, |
|
"grad_norm": 0.09443774074316025, |
|
"learning_rate": 6.524567361846612e-06, |
|
"loss": 0.007, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 16.89003436426117, |
|
"grad_norm": 0.1403152197599411, |
|
"learning_rate": 6.454568938309724e-06, |
|
"loss": 0.0055, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 16.90721649484536, |
|
"grad_norm": 0.2038644254207611, |
|
"learning_rate": 6.384922112441821e-06, |
|
"loss": 0.0047, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 16.924398625429554, |
|
"grad_norm": 0.22020995616912842, |
|
"learning_rate": 6.315627446590367e-06, |
|
"loss": 0.0052, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 16.941580756013746, |
|
"grad_norm": 0.07635272294282913, |
|
"learning_rate": 6.246685500259352e-06, |
|
"loss": 0.0073, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 16.95876288659794, |
|
"grad_norm": 0.18454794585704803, |
|
"learning_rate": 6.1780968301048406e-06, |
|
"loss": 0.0072, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 16.97594501718213, |
|
"grad_norm": 0.3649043142795563, |
|
"learning_rate": 6.10986198993041e-06, |
|
"loss": 0.0048, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 16.993127147766323, |
|
"grad_norm": 0.09747838973999023, |
|
"learning_rate": 6.041981530682756e-06, |
|
"loss": 0.0071, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 17.010309278350515, |
|
"grad_norm": 0.170127272605896, |
|
"learning_rate": 5.9744560004471874e-06, |
|
"loss": 0.0089, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 17.027491408934708, |
|
"grad_norm": 0.11401594430208206, |
|
"learning_rate": 5.907285944443241e-06, |
|
"loss": 0.0064, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 17.0446735395189, |
|
"grad_norm": 0.09278935939073563, |
|
"learning_rate": 5.840471905020223e-06, |
|
"loss": 0.0038, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 17.061855670103093, |
|
"grad_norm": 0.09396050870418549, |
|
"learning_rate": 5.774014421652879e-06, |
|
"loss": 0.0088, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 17.079037800687285, |
|
"grad_norm": 0.22349518537521362, |
|
"learning_rate": 5.707914030937045e-06, |
|
"loss": 0.0116, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 17.096219931271477, |
|
"grad_norm": 0.11371111124753952, |
|
"learning_rate": 5.642171266585272e-06, |
|
"loss": 0.0071, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 17.11340206185567, |
|
"grad_norm": 0.11897526681423187, |
|
"learning_rate": 5.576786659422534e-06, |
|
"loss": 0.0124, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 17.130584192439862, |
|
"grad_norm": 0.11207327991724014, |
|
"learning_rate": 5.51176073738196e-06, |
|
"loss": 0.0056, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 17.147766323024054, |
|
"grad_norm": 0.11493603140115738, |
|
"learning_rate": 5.447094025500554e-06, |
|
"loss": 0.0057, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 17.164948453608247, |
|
"grad_norm": 0.15954163670539856, |
|
"learning_rate": 5.3827870459149665e-06, |
|
"loss": 0.0062, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 17.18213058419244, |
|
"grad_norm": 0.14759565889835358, |
|
"learning_rate": 5.318840317857248e-06, |
|
"loss": 0.0072, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.19931271477663, |
|
"grad_norm": 0.12291218340396881, |
|
"learning_rate": 5.2552543576506965e-06, |
|
"loss": 0.0049, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 17.216494845360824, |
|
"grad_norm": 0.15934151411056519, |
|
"learning_rate": 5.192029678705679e-06, |
|
"loss": 0.0117, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 17.233676975945016, |
|
"grad_norm": 0.06648825109004974, |
|
"learning_rate": 5.1291667915154774e-06, |
|
"loss": 0.0056, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 17.25085910652921, |
|
"grad_norm": 0.08082503825426102, |
|
"learning_rate": 5.066666203652148e-06, |
|
"loss": 0.0076, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 17.2680412371134, |
|
"grad_norm": 0.06852155178785324, |
|
"learning_rate": 5.004528419762455e-06, |
|
"loss": 0.0075, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 17.285223367697593, |
|
"grad_norm": 0.22112947702407837, |
|
"learning_rate": 4.9427539415638106e-06, |
|
"loss": 0.0064, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 17.302405498281786, |
|
"grad_norm": 0.11393307894468307, |
|
"learning_rate": 4.88134326784015e-06, |
|
"loss": 0.0054, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 17.31958762886598, |
|
"grad_norm": 0.07953692972660065, |
|
"learning_rate": 4.8202968944379865e-06, |
|
"loss": 0.0069, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 17.33676975945017, |
|
"grad_norm": 0.12307379394769669, |
|
"learning_rate": 4.759615314262361e-06, |
|
"loss": 0.0086, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 17.353951890034363, |
|
"grad_norm": 0.2868382930755615, |
|
"learning_rate": 4.6992990172728846e-06, |
|
"loss": 0.0067, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 17.371134020618555, |
|
"grad_norm": 0.13150477409362793, |
|
"learning_rate": 4.639348490479755e-06, |
|
"loss": 0.0052, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 17.388316151202748, |
|
"grad_norm": 0.13385729491710663, |
|
"learning_rate": 4.579764217939863e-06, |
|
"loss": 0.0085, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 17.40549828178694, |
|
"grad_norm": 0.05345318466424942, |
|
"learning_rate": 4.5205466807528294e-06, |
|
"loss": 0.0053, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 17.422680412371133, |
|
"grad_norm": 0.09881814569234848, |
|
"learning_rate": 4.4616963570572105e-06, |
|
"loss": 0.0037, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 17.439862542955325, |
|
"grad_norm": 0.15886837244033813, |
|
"learning_rate": 4.403213722026516e-06, |
|
"loss": 0.0094, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 17.457044673539517, |
|
"grad_norm": 0.046078938990831375, |
|
"learning_rate": 4.345099247865486e-06, |
|
"loss": 0.0066, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 17.47422680412371, |
|
"grad_norm": 0.31892022490501404, |
|
"learning_rate": 4.287353403806188e-06, |
|
"loss": 0.0071, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 17.491408934707902, |
|
"grad_norm": 0.23924608528614044, |
|
"learning_rate": 4.229976656104323e-06, |
|
"loss": 0.0065, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 17.508591065292094, |
|
"grad_norm": 0.10865090042352676, |
|
"learning_rate": 4.172969468035359e-06, |
|
"loss": 0.0059, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 17.52577319587629, |
|
"grad_norm": 0.10951688885688782, |
|
"learning_rate": 4.116332299890868e-06, |
|
"loss": 0.0114, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 17.542955326460483, |
|
"grad_norm": 0.08340981602668762, |
|
"learning_rate": 4.060065608974744e-06, |
|
"loss": 0.006, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 17.560137457044675, |
|
"grad_norm": 0.2836835980415344, |
|
"learning_rate": 4.0041698495996095e-06, |
|
"loss": 0.006, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 17.577319587628867, |
|
"grad_norm": 0.34195101261138916, |
|
"learning_rate": 3.948645473083018e-06, |
|
"loss": 0.0111, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 17.59450171821306, |
|
"grad_norm": 0.136323943734169, |
|
"learning_rate": 3.893492927743925e-06, |
|
"loss": 0.0095, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 17.611683848797252, |
|
"grad_norm": 0.19916993379592896, |
|
"learning_rate": 3.838712658898974e-06, |
|
"loss": 0.0065, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 17.628865979381445, |
|
"grad_norm": 0.10761203616857529, |
|
"learning_rate": 3.7843051088590153e-06, |
|
"loss": 0.0042, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 17.646048109965637, |
|
"grad_norm": 0.08006154000759125, |
|
"learning_rate": 3.730270716925394e-06, |
|
"loss": 0.0062, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 17.66323024054983, |
|
"grad_norm": 0.07104959338903427, |
|
"learning_rate": 3.67660991938652e-06, |
|
"loss": 0.005, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 17.68041237113402, |
|
"grad_norm": 0.19979846477508545, |
|
"learning_rate": 3.6233231495142626e-06, |
|
"loss": 0.0053, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 17.697594501718214, |
|
"grad_norm": 0.20565828680992126, |
|
"learning_rate": 3.5704108375605448e-06, |
|
"loss": 0.0055, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 17.714776632302407, |
|
"grad_norm": 0.17104719579219818, |
|
"learning_rate": 3.5178734107537637e-06, |
|
"loss": 0.0105, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 17.7319587628866, |
|
"grad_norm": 0.08832945674657822, |
|
"learning_rate": 3.4657112932954204e-06, |
|
"loss": 0.0056, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 17.74914089347079, |
|
"grad_norm": 0.053497232496738434, |
|
"learning_rate": 3.4139249063566415e-06, |
|
"loss": 0.0037, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 17.766323024054984, |
|
"grad_norm": 0.05350416153669357, |
|
"learning_rate": 3.36251466807484e-06, |
|
"loss": 0.0058, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 17.783505154639176, |
|
"grad_norm": 0.19712647795677185, |
|
"learning_rate": 3.311480993550259e-06, |
|
"loss": 0.0059, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 17.80068728522337, |
|
"grad_norm": 0.1699119359254837, |
|
"learning_rate": 3.2608242948427017e-06, |
|
"loss": 0.008, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 17.81786941580756, |
|
"grad_norm": 0.09976794570684433, |
|
"learning_rate": 3.2105449809681334e-06, |
|
"loss": 0.01, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 17.835051546391753, |
|
"grad_norm": 0.12520617246627808, |
|
"learning_rate": 3.160643457895435e-06, |
|
"loss": 0.0057, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 17.852233676975946, |
|
"grad_norm": 0.23560845851898193, |
|
"learning_rate": 3.111120128543088e-06, |
|
"loss": 0.0127, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 17.869415807560138, |
|
"grad_norm": 0.134457528591156, |
|
"learning_rate": 3.0619753927759565e-06, |
|
"loss": 0.0067, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 17.88659793814433, |
|
"grad_norm": 0.20700249075889587, |
|
"learning_rate": 3.013209647401999e-06, |
|
"loss": 0.0053, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 17.903780068728523, |
|
"grad_norm": 0.1971113532781601, |
|
"learning_rate": 2.964823286169133e-06, |
|
"loss": 0.0073, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 17.920962199312715, |
|
"grad_norm": 0.2505052089691162, |
|
"learning_rate": 2.9168166997620263e-06, |
|
"loss": 0.0066, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 17.938144329896907, |
|
"grad_norm": 0.1978893280029297, |
|
"learning_rate": 2.869190275798911e-06, |
|
"loss": 0.0067, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 17.9553264604811, |
|
"grad_norm": 0.09993084520101547, |
|
"learning_rate": 2.821944398828519e-06, |
|
"loss": 0.005, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 17.972508591065292, |
|
"grad_norm": 0.0836280807852745, |
|
"learning_rate": 2.775079450326917e-06, |
|
"loss": 0.0076, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 17.989690721649485, |
|
"grad_norm": 0.20113223791122437, |
|
"learning_rate": 2.7285958086944786e-06, |
|
"loss": 0.0039, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 18.006872852233677, |
|
"grad_norm": 0.10885003954172134, |
|
"learning_rate": 2.6824938492527595e-06, |
|
"loss": 0.0093, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 18.02405498281787, |
|
"grad_norm": 0.05460460111498833, |
|
"learning_rate": 2.636773944241555e-06, |
|
"loss": 0.003, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 18.04123711340206, |
|
"grad_norm": 0.08809126913547516, |
|
"learning_rate": 2.5914364628158217e-06, |
|
"loss": 0.0044, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 18.058419243986254, |
|
"grad_norm": 0.1788802295923233, |
|
"learning_rate": 2.5464817710427414e-06, |
|
"loss": 0.0072, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 18.075601374570446, |
|
"grad_norm": 0.08905819058418274, |
|
"learning_rate": 2.501910231898724e-06, |
|
"loss": 0.0066, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 18.09278350515464, |
|
"grad_norm": 0.08635038882493973, |
|
"learning_rate": 2.457722205266516e-06, |
|
"loss": 0.0048, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 18.10996563573883, |
|
"grad_norm": 0.3244889974594116, |
|
"learning_rate": 2.413918047932284e-06, |
|
"loss": 0.0084, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 18.127147766323024, |
|
"grad_norm": 0.1447770893573761, |
|
"learning_rate": 2.370498113582731e-06, |
|
"loss": 0.0054, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 18.144329896907216, |
|
"grad_norm": 0.13908503949642181, |
|
"learning_rate": 2.327462752802212e-06, |
|
"loss": 0.0045, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 18.16151202749141, |
|
"grad_norm": 0.15273341536521912, |
|
"learning_rate": 2.2848123130699562e-06, |
|
"loss": 0.0074, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 18.1786941580756, |
|
"grad_norm": 0.09530580788850784, |
|
"learning_rate": 2.2425471387572337e-06, |
|
"loss": 0.0054, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 18.195876288659793, |
|
"grad_norm": 0.0629500150680542, |
|
"learning_rate": 2.2006675711245818e-06, |
|
"loss": 0.0053, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 18.213058419243985, |
|
"grad_norm": 0.13536952435970306, |
|
"learning_rate": 2.15917394831901e-06, |
|
"loss": 0.0076, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 18.230240549828178, |
|
"grad_norm": 0.1433546394109726, |
|
"learning_rate": 2.118066605371344e-06, |
|
"loss": 0.0062, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 18.24742268041237, |
|
"grad_norm": 0.07788240164518356, |
|
"learning_rate": 2.0773458741934646e-06, |
|
"loss": 0.0043, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 18.264604810996563, |
|
"grad_norm": 0.2006695419549942, |
|
"learning_rate": 2.0370120835756513e-06, |
|
"loss": 0.0064, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 18.281786941580755, |
|
"grad_norm": 0.09646660089492798, |
|
"learning_rate": 1.9970655591838917e-06, |
|
"loss": 0.0066, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 18.298969072164947, |
|
"grad_norm": 0.08818981796503067, |
|
"learning_rate": 1.9575066235573205e-06, |
|
"loss": 0.0061, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 18.31615120274914, |
|
"grad_norm": 0.10929910838603973, |
|
"learning_rate": 1.918335596105553e-06, |
|
"loss": 0.0054, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 18.333333333333332, |
|
"grad_norm": 0.23234823346138, |
|
"learning_rate": 1.8795527931061374e-06, |
|
"loss": 0.0088, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 18.350515463917525, |
|
"grad_norm": 0.06758160889148712, |
|
"learning_rate": 1.841158527701975e-06, |
|
"loss": 0.0074, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 18.367697594501717, |
|
"grad_norm": 0.186759814620018, |
|
"learning_rate": 1.8031531098988252e-06, |
|
"loss": 0.0064, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 18.38487972508591, |
|
"grad_norm": 0.12347927689552307, |
|
"learning_rate": 1.765536846562782e-06, |
|
"loss": 0.0066, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 18.4020618556701, |
|
"grad_norm": 0.05850611999630928, |
|
"learning_rate": 1.7283100414178078e-06, |
|
"loss": 0.0071, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 18.419243986254294, |
|
"grad_norm": 0.22844308614730835, |
|
"learning_rate": 1.6914729950432474e-06, |
|
"loss": 0.0052, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 18.436426116838486, |
|
"grad_norm": 0.23077848553657532, |
|
"learning_rate": 1.6550260048714628e-06, |
|
"loss": 0.0094, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 18.45360824742268, |
|
"grad_norm": 0.04616402089595795, |
|
"learning_rate": 1.6189693651853687e-06, |
|
"loss": 0.0079, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 18.47079037800687, |
|
"grad_norm": 0.1235852912068367, |
|
"learning_rate": 1.58330336711609e-06, |
|
"loss": 0.0055, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 18.487972508591064, |
|
"grad_norm": 0.06739755719900131, |
|
"learning_rate": 1.5480282986406136e-06, |
|
"loss": 0.0088, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 18.50515463917526, |
|
"grad_norm": 0.08690838515758514, |
|
"learning_rate": 1.5131444445794506e-06, |
|
"loss": 0.004, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 18.522336769759452, |
|
"grad_norm": 0.12189892679452896, |
|
"learning_rate": 1.4786520865943344e-06, |
|
"loss": 0.0053, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 18.539518900343644, |
|
"grad_norm": 0.10416446626186371, |
|
"learning_rate": 1.4445515031859591e-06, |
|
"loss": 0.0042, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 18.556701030927837, |
|
"grad_norm": 0.13094037771224976, |
|
"learning_rate": 1.4108429696917225e-06, |
|
"loss": 0.0063, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 18.57388316151203, |
|
"grad_norm": 0.07218027114868164, |
|
"learning_rate": 1.3775267582834928e-06, |
|
"loss": 0.0068, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 18.59106529209622, |
|
"grad_norm": 0.10305195301771164, |
|
"learning_rate": 1.34460313796545e-06, |
|
"loss": 0.0059, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 18.608247422680414, |
|
"grad_norm": 0.18571843206882477, |
|
"learning_rate": 1.31207237457186e-06, |
|
"loss": 0.0036, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 18.625429553264606, |
|
"grad_norm": 0.09622086584568024, |
|
"learning_rate": 1.2799347307649756e-06, |
|
"loss": 0.0031, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 18.6426116838488, |
|
"grad_norm": 0.06238604336977005, |
|
"learning_rate": 1.248190466032867e-06, |
|
"loss": 0.0041, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 18.65979381443299, |
|
"grad_norm": 0.07316755503416061, |
|
"learning_rate": 1.2168398366873946e-06, |
|
"loss": 0.0048, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 18.676975945017183, |
|
"grad_norm": 0.28695550560951233, |
|
"learning_rate": 1.1858830958620559e-06, |
|
"loss": 0.0064, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 18.694158075601376, |
|
"grad_norm": 0.18788480758666992, |
|
"learning_rate": 1.155320493510026e-06, |
|
"loss": 0.008, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 18.711340206185568, |
|
"grad_norm": 0.06678231060504913, |
|
"learning_rate": 1.1251522764020638e-06, |
|
"loss": 0.0058, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 18.72852233676976, |
|
"grad_norm": 0.0773329809308052, |
|
"learning_rate": 1.095378688124582e-06, |
|
"loss": 0.0093, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 18.745704467353953, |
|
"grad_norm": 0.12073255330324173, |
|
"learning_rate": 1.0659999690776302e-06, |
|
"loss": 0.0047, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 18.762886597938145, |
|
"grad_norm": 0.09898494184017181, |
|
"learning_rate": 1.0370163564729974e-06, |
|
"loss": 0.007, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 18.780068728522338, |
|
"grad_norm": 0.0713253766298294, |
|
"learning_rate": 1.008428084332247e-06, |
|
"loss": 0.0061, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 18.79725085910653, |
|
"grad_norm": 0.1340664029121399, |
|
"learning_rate": 9.802353834848953e-07, |
|
"loss": 0.0045, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 18.814432989690722, |
|
"grad_norm": 0.08532940596342087, |
|
"learning_rate": 9.524384815664699e-07, |
|
"loss": 0.0038, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 18.831615120274915, |
|
"grad_norm": 0.12806546688079834, |
|
"learning_rate": 9.250376030167429e-07, |
|
"loss": 0.009, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 18.848797250859107, |
|
"grad_norm": 0.07002566009759903, |
|
"learning_rate": 8.980329690778499e-07, |
|
"loss": 0.0035, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 18.8659793814433, |
|
"grad_norm": 0.09975923597812653, |
|
"learning_rate": 8.714247977925749e-07, |
|
"loss": 0.004, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 18.883161512027492, |
|
"grad_norm": 0.1369011402130127, |
|
"learning_rate": 8.452133040025345e-07, |
|
"loss": 0.004, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 18.900343642611684, |
|
"grad_norm": 0.16670043766498566, |
|
"learning_rate": 8.193986993464686e-07, |
|
"loss": 0.0049, |
|
"step": 11000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 11638, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.7686827385256666e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|