|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 322050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0776276975624903, |
|
"grad_norm": 2.2886219024658203, |
|
"learning_rate": 4.992237230243752e-05, |
|
"loss": 6.7024, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1552553951249806, |
|
"grad_norm": 2.4521589279174805, |
|
"learning_rate": 4.9844744604875024e-05, |
|
"loss": 5.9478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2328830926874709, |
|
"grad_norm": 2.662937641143799, |
|
"learning_rate": 4.976711690731253e-05, |
|
"loss": 5.6144, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3105107902499612, |
|
"grad_norm": 2.6923885345458984, |
|
"learning_rate": 4.968948920975004e-05, |
|
"loss": 5.3722, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38813848781245147, |
|
"grad_norm": 3.0573277473449707, |
|
"learning_rate": 4.9611861512187554e-05, |
|
"loss": 5.2099, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4657661853749418, |
|
"grad_norm": 2.492630958557129, |
|
"learning_rate": 4.953423381462506e-05, |
|
"loss": 5.0823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5433938829374321, |
|
"grad_norm": 2.724647045135498, |
|
"learning_rate": 4.945660611706257e-05, |
|
"loss": 4.9667, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6210215804999224, |
|
"grad_norm": 2.4619314670562744, |
|
"learning_rate": 4.937897841950008e-05, |
|
"loss": 4.8775, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6986492780624126, |
|
"grad_norm": 2.758463144302368, |
|
"learning_rate": 4.930135072193759e-05, |
|
"loss": 4.7939, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7762769756249029, |
|
"grad_norm": 3.0213730335235596, |
|
"learning_rate": 4.92237230243751e-05, |
|
"loss": 4.7288, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8539046731873933, |
|
"grad_norm": 2.7506508827209473, |
|
"learning_rate": 4.9146095326812606e-05, |
|
"loss": 4.6724, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9315323707498836, |
|
"grad_norm": 3.3897273540496826, |
|
"learning_rate": 4.906846762925012e-05, |
|
"loss": 4.6203, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0091600683123738, |
|
"grad_norm": 3.2612226009368896, |
|
"learning_rate": 4.899083993168763e-05, |
|
"loss": 4.564, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0867877658748641, |
|
"grad_norm": 2.9909706115722656, |
|
"learning_rate": 4.891321223412514e-05, |
|
"loss": 4.4841, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.1644154634373545, |
|
"grad_norm": 3.00471830368042, |
|
"learning_rate": 4.883558453656264e-05, |
|
"loss": 4.4436, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.2420431609998448, |
|
"grad_norm": 3.588019609451294, |
|
"learning_rate": 4.875795683900016e-05, |
|
"loss": 4.4248, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.319670858562335, |
|
"grad_norm": 3.1261277198791504, |
|
"learning_rate": 4.868032914143767e-05, |
|
"loss": 4.3914, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.3972985561248255, |
|
"grad_norm": 3.248203754425049, |
|
"learning_rate": 4.860270144387518e-05, |
|
"loss": 4.3539, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4749262536873156, |
|
"grad_norm": 3.6183948516845703, |
|
"learning_rate": 4.8525073746312687e-05, |
|
"loss": 4.3439, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.5525539512498059, |
|
"grad_norm": 3.6323795318603516, |
|
"learning_rate": 4.8447446048750194e-05, |
|
"loss": 4.3104, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.6301816488122962, |
|
"grad_norm": 3.8555796146392822, |
|
"learning_rate": 4.836981835118771e-05, |
|
"loss": 4.2775, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.7078093463747865, |
|
"grad_norm": 3.804065465927124, |
|
"learning_rate": 4.8292190653625216e-05, |
|
"loss": 4.2645, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.7854370439372769, |
|
"grad_norm": 3.5051915645599365, |
|
"learning_rate": 4.8214562956062723e-05, |
|
"loss": 4.2555, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.863064741499767, |
|
"grad_norm": 3.28206205368042, |
|
"learning_rate": 4.813693525850024e-05, |
|
"loss": 4.2254, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.9406924390622575, |
|
"grad_norm": 3.6532084941864014, |
|
"learning_rate": 4.8059307560937745e-05, |
|
"loss": 4.2142, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.0183201366247476, |
|
"grad_norm": 3.8629403114318848, |
|
"learning_rate": 4.798167986337525e-05, |
|
"loss": 4.1695, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.095947834187238, |
|
"grad_norm": 3.7742209434509277, |
|
"learning_rate": 4.790405216581276e-05, |
|
"loss": 4.1056, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.1735755317497283, |
|
"grad_norm": 3.638509750366211, |
|
"learning_rate": 4.7826424468250275e-05, |
|
"loss": 4.0926, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.2512032293122184, |
|
"grad_norm": 3.4432594776153564, |
|
"learning_rate": 4.774879677068778e-05, |
|
"loss": 4.0826, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.328830926874709, |
|
"grad_norm": 3.252643346786499, |
|
"learning_rate": 4.76711690731253e-05, |
|
"loss": 4.073, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.406458624437199, |
|
"grad_norm": 3.611611843109131, |
|
"learning_rate": 4.7593541375562804e-05, |
|
"loss": 4.0556, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.4840863219996896, |
|
"grad_norm": 3.842820644378662, |
|
"learning_rate": 4.751591367800031e-05, |
|
"loss": 4.0538, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.5617140195621797, |
|
"grad_norm": 4.127362251281738, |
|
"learning_rate": 4.7438285980437826e-05, |
|
"loss": 4.0186, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.63934171712467, |
|
"grad_norm": 3.498431921005249, |
|
"learning_rate": 4.7360658282875334e-05, |
|
"loss": 3.9995, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.7169694146871604, |
|
"grad_norm": 3.7191123962402344, |
|
"learning_rate": 4.728303058531284e-05, |
|
"loss": 4.0059, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.794597112249651, |
|
"grad_norm": 3.748997688293457, |
|
"learning_rate": 4.720540288775035e-05, |
|
"loss": 3.9807, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.872224809812141, |
|
"grad_norm": 3.91758394241333, |
|
"learning_rate": 4.712777519018786e-05, |
|
"loss": 3.9752, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.949852507374631, |
|
"grad_norm": 4.286660671234131, |
|
"learning_rate": 4.705014749262537e-05, |
|
"loss": 3.9597, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.0274802049371217, |
|
"grad_norm": 4.166433334350586, |
|
"learning_rate": 4.697251979506288e-05, |
|
"loss": 3.9264, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.1051079024996118, |
|
"grad_norm": 4.093895435333252, |
|
"learning_rate": 4.689489209750039e-05, |
|
"loss": 3.8771, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.1827356000621023, |
|
"grad_norm": 3.8036608695983887, |
|
"learning_rate": 4.68172643999379e-05, |
|
"loss": 3.8691, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.2603632976245924, |
|
"grad_norm": 3.8469622135162354, |
|
"learning_rate": 4.6739636702375414e-05, |
|
"loss": 3.8456, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.3379909951870825, |
|
"grad_norm": 4.524165630340576, |
|
"learning_rate": 4.6662009004812915e-05, |
|
"loss": 3.8516, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.415618692749573, |
|
"grad_norm": 4.203705310821533, |
|
"learning_rate": 4.658438130725043e-05, |
|
"loss": 3.8486, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.493246390312063, |
|
"grad_norm": 3.79025936126709, |
|
"learning_rate": 4.650675360968794e-05, |
|
"loss": 3.8466, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.5708740878745537, |
|
"grad_norm": 4.120058059692383, |
|
"learning_rate": 4.642912591212545e-05, |
|
"loss": 3.8195, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.648501785437044, |
|
"grad_norm": 4.125455379486084, |
|
"learning_rate": 4.635149821456296e-05, |
|
"loss": 3.7975, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.7261294829995344, |
|
"grad_norm": 4.129229545593262, |
|
"learning_rate": 4.6273870517000466e-05, |
|
"loss": 3.8115, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.8037571805620245, |
|
"grad_norm": 4.444260597229004, |
|
"learning_rate": 4.619624281943798e-05, |
|
"loss": 3.8045, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.881384878124515, |
|
"grad_norm": 4.36641788482666, |
|
"learning_rate": 4.611861512187549e-05, |
|
"loss": 3.813, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.959012575687005, |
|
"grad_norm": 4.3214802742004395, |
|
"learning_rate": 4.6040987424312996e-05, |
|
"loss": 3.7778, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.036640273249495, |
|
"grad_norm": 4.101747989654541, |
|
"learning_rate": 4.5963359726750503e-05, |
|
"loss": 3.7416, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.114267970811985, |
|
"grad_norm": 4.384554386138916, |
|
"learning_rate": 4.588573202918802e-05, |
|
"loss": 3.7074, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.191895668374476, |
|
"grad_norm": 4.370575904846191, |
|
"learning_rate": 4.580810433162553e-05, |
|
"loss": 3.7012, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.2695233659369665, |
|
"grad_norm": 4.443875789642334, |
|
"learning_rate": 4.573047663406303e-05, |
|
"loss": 3.691, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.347151063499457, |
|
"grad_norm": 4.347660064697266, |
|
"learning_rate": 4.565284893650055e-05, |
|
"loss": 3.6706, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.424778761061947, |
|
"grad_norm": 4.289429187774658, |
|
"learning_rate": 4.5575221238938055e-05, |
|
"loss": 3.698, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.502406458624437, |
|
"grad_norm": 4.255033016204834, |
|
"learning_rate": 4.549759354137557e-05, |
|
"loss": 3.6576, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.580034156186928, |
|
"grad_norm": 4.466300010681152, |
|
"learning_rate": 4.541996584381307e-05, |
|
"loss": 3.6684, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.657661853749418, |
|
"grad_norm": 4.410152435302734, |
|
"learning_rate": 4.5342338146250584e-05, |
|
"loss": 3.6477, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.735289551311908, |
|
"grad_norm": 4.257645130157471, |
|
"learning_rate": 4.52647104486881e-05, |
|
"loss": 3.6531, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.812917248874398, |
|
"grad_norm": 4.475682258605957, |
|
"learning_rate": 4.5187082751125606e-05, |
|
"loss": 3.6587, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.890544946436888, |
|
"grad_norm": 4.372265338897705, |
|
"learning_rate": 4.5109455053563114e-05, |
|
"loss": 3.632, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.968172643999379, |
|
"grad_norm": 4.2151360511779785, |
|
"learning_rate": 4.503182735600062e-05, |
|
"loss": 3.6336, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.045800341561869, |
|
"grad_norm": 4.397316932678223, |
|
"learning_rate": 4.4954199658438135e-05, |
|
"loss": 3.566, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.123428039124359, |
|
"grad_norm": 4.471977710723877, |
|
"learning_rate": 4.487657196087564e-05, |
|
"loss": 3.5522, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.2010557366868495, |
|
"grad_norm": 4.2865471839904785, |
|
"learning_rate": 4.479894426331315e-05, |
|
"loss": 3.5675, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.2786834342493405, |
|
"grad_norm": 4.559909343719482, |
|
"learning_rate": 4.472131656575066e-05, |
|
"loss": 3.54, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.356311131811831, |
|
"grad_norm": 4.453431606292725, |
|
"learning_rate": 4.464368886818817e-05, |
|
"loss": 3.5392, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.433938829374321, |
|
"grad_norm": 4.54495906829834, |
|
"learning_rate": 4.456606117062569e-05, |
|
"loss": 3.5424, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.511566526936811, |
|
"grad_norm": 4.494850158691406, |
|
"learning_rate": 4.448843347306319e-05, |
|
"loss": 3.5414, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.589194224499302, |
|
"grad_norm": 4.8761162757873535, |
|
"learning_rate": 4.44108057755007e-05, |
|
"loss": 3.525, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.666821922061792, |
|
"grad_norm": 4.575265884399414, |
|
"learning_rate": 4.433317807793821e-05, |
|
"loss": 3.5405, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.744449619624282, |
|
"grad_norm": 4.597631454467773, |
|
"learning_rate": 4.4255550380375724e-05, |
|
"loss": 3.5259, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.822077317186772, |
|
"grad_norm": 4.326088905334473, |
|
"learning_rate": 4.4177922682813225e-05, |
|
"loss": 3.4985, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.899705014749262, |
|
"grad_norm": 4.202051639556885, |
|
"learning_rate": 4.410029498525074e-05, |
|
"loss": 3.5087, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.977332712311753, |
|
"grad_norm": 4.386417388916016, |
|
"learning_rate": 4.402266728768825e-05, |
|
"loss": 3.4926, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.054960409874243, |
|
"grad_norm": 4.612489700317383, |
|
"learning_rate": 4.394503959012576e-05, |
|
"loss": 3.456, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.132588107436733, |
|
"grad_norm": 4.2950286865234375, |
|
"learning_rate": 4.386741189256327e-05, |
|
"loss": 3.4195, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.2102158049992235, |
|
"grad_norm": 4.728135585784912, |
|
"learning_rate": 4.3789784195000776e-05, |
|
"loss": 3.422, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.287843502561714, |
|
"grad_norm": 4.690753936767578, |
|
"learning_rate": 4.371215649743829e-05, |
|
"loss": 3.4147, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.365471200124205, |
|
"grad_norm": 4.528134346008301, |
|
"learning_rate": 4.36345287998758e-05, |
|
"loss": 3.4115, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.443098897686695, |
|
"grad_norm": 4.323470592498779, |
|
"learning_rate": 4.3556901102313305e-05, |
|
"loss": 3.4058, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.520726595249185, |
|
"grad_norm": 4.374230861663818, |
|
"learning_rate": 4.347927340475082e-05, |
|
"loss": 3.4112, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.598354292811675, |
|
"grad_norm": 4.312314033508301, |
|
"learning_rate": 4.340164570718833e-05, |
|
"loss": 3.3881, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.675981990374165, |
|
"grad_norm": 4.178228378295898, |
|
"learning_rate": 4.332401800962584e-05, |
|
"loss": 3.4044, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.753609687936656, |
|
"grad_norm": 4.638906002044678, |
|
"learning_rate": 4.324639031206334e-05, |
|
"loss": 3.3954, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.831237385499146, |
|
"grad_norm": 4.238986492156982, |
|
"learning_rate": 4.3168762614500857e-05, |
|
"loss": 3.4013, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.908865083061636, |
|
"grad_norm": 4.471828460693359, |
|
"learning_rate": 4.3091134916938364e-05, |
|
"loss": 3.3806, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.986492780624126, |
|
"grad_norm": 4.4187912940979, |
|
"learning_rate": 4.301350721937588e-05, |
|
"loss": 3.3834, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.064120478186617, |
|
"grad_norm": 5.066268444061279, |
|
"learning_rate": 4.293587952181338e-05, |
|
"loss": 3.3064, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 7.1417481757491075, |
|
"grad_norm": 4.942110538482666, |
|
"learning_rate": 4.2858251824250894e-05, |
|
"loss": 3.2971, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.219375873311598, |
|
"grad_norm": 5.294034957885742, |
|
"learning_rate": 4.278062412668841e-05, |
|
"loss": 3.2643, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.297003570874088, |
|
"grad_norm": 4.650871753692627, |
|
"learning_rate": 4.2702996429125915e-05, |
|
"loss": 3.2768, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.374631268436578, |
|
"grad_norm": 5.170124053955078, |
|
"learning_rate": 4.262536873156342e-05, |
|
"loss": 3.2832, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.452258965999069, |
|
"grad_norm": 4.852886199951172, |
|
"learning_rate": 4.254774103400093e-05, |
|
"loss": 3.2779, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.529886663561559, |
|
"grad_norm": 5.047275543212891, |
|
"learning_rate": 4.2470113336438445e-05, |
|
"loss": 3.273, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.607514361124049, |
|
"grad_norm": 4.9860520362854, |
|
"learning_rate": 4.239248563887595e-05, |
|
"loss": 3.2538, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.685142058686539, |
|
"grad_norm": 4.9074859619140625, |
|
"learning_rate": 4.231485794131346e-05, |
|
"loss": 3.248, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.76276975624903, |
|
"grad_norm": 4.936252593994141, |
|
"learning_rate": 4.2237230243750974e-05, |
|
"loss": 3.2492, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.84039745381152, |
|
"grad_norm": 4.652443885803223, |
|
"learning_rate": 4.215960254618848e-05, |
|
"loss": 3.2412, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.91802515137401, |
|
"grad_norm": 4.407495021820068, |
|
"learning_rate": 4.2081974848625996e-05, |
|
"loss": 3.2372, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.9956528489365, |
|
"grad_norm": 4.413294792175293, |
|
"learning_rate": 4.20043471510635e-05, |
|
"loss": 3.2131, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 8.07328054649899, |
|
"grad_norm": 4.42469596862793, |
|
"learning_rate": 4.192671945350101e-05, |
|
"loss": 3.1377, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.150908244061482, |
|
"grad_norm": 4.906301498413086, |
|
"learning_rate": 4.184909175593852e-05, |
|
"loss": 3.1072, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.22853594162397, |
|
"grad_norm": 5.0347900390625, |
|
"learning_rate": 4.177146405837603e-05, |
|
"loss": 3.1374, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.306163639186462, |
|
"grad_norm": 5.217957496643066, |
|
"learning_rate": 4.169383636081354e-05, |
|
"loss": 3.1124, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.383791336748953, |
|
"grad_norm": 4.475755214691162, |
|
"learning_rate": 4.161620866325105e-05, |
|
"loss": 3.1194, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.461419034311442, |
|
"grad_norm": 5.22430419921875, |
|
"learning_rate": 4.153858096568856e-05, |
|
"loss": 3.1201, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.539046731873933, |
|
"grad_norm": 6.327775955200195, |
|
"learning_rate": 4.146095326812607e-05, |
|
"loss": 3.1031, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.616674429436422, |
|
"grad_norm": 4.703291893005371, |
|
"learning_rate": 4.138332557056358e-05, |
|
"loss": 3.1043, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.694302126998913, |
|
"grad_norm": 5.288379192352295, |
|
"learning_rate": 4.1305697873001085e-05, |
|
"loss": 3.1024, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 4.9670090675354, |
|
"learning_rate": 4.12280701754386e-05, |
|
"loss": 3.0797, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.849557522123893, |
|
"grad_norm": 4.910192012786865, |
|
"learning_rate": 4.115044247787611e-05, |
|
"loss": 3.0869, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.927185219686384, |
|
"grad_norm": 4.804894924163818, |
|
"learning_rate": 4.1072814780313615e-05, |
|
"loss": 3.0885, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 9.004812917248874, |
|
"grad_norm": 5.052229404449463, |
|
"learning_rate": 4.099518708275113e-05, |
|
"loss": 3.0821, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.082440614811365, |
|
"grad_norm": 5.419916152954102, |
|
"learning_rate": 4.0917559385188637e-05, |
|
"loss": 2.9879, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.160068312373856, |
|
"grad_norm": 5.0662078857421875, |
|
"learning_rate": 4.083993168762615e-05, |
|
"loss": 2.9825, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.237696009936345, |
|
"grad_norm": 4.776367664337158, |
|
"learning_rate": 4.076230399006365e-05, |
|
"loss": 2.977, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.315323707498836, |
|
"grad_norm": 4.7674031257629395, |
|
"learning_rate": 4.0684676292501166e-05, |
|
"loss": 2.9971, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.392951405061325, |
|
"grad_norm": 4.947634696960449, |
|
"learning_rate": 4.0607048594938673e-05, |
|
"loss": 2.9651, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 9.470579102623816, |
|
"grad_norm": 4.943103790283203, |
|
"learning_rate": 4.052942089737619e-05, |
|
"loss": 2.9781, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.548206800186307, |
|
"grad_norm": 5.14945125579834, |
|
"learning_rate": 4.0451793199813695e-05, |
|
"loss": 2.9702, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.625834497748796, |
|
"grad_norm": 5.054744243621826, |
|
"learning_rate": 4.03741655022512e-05, |
|
"loss": 2.9553, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.703462195311287, |
|
"grad_norm": 5.338235855102539, |
|
"learning_rate": 4.029653780468872e-05, |
|
"loss": 2.9489, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.781089892873778, |
|
"grad_norm": 4.819457530975342, |
|
"learning_rate": 4.0218910107126225e-05, |
|
"loss": 2.9676, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.858717590436267, |
|
"grad_norm": 4.814851760864258, |
|
"learning_rate": 4.014128240956373e-05, |
|
"loss": 2.9374, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.936345287998758, |
|
"grad_norm": 4.723858833312988, |
|
"learning_rate": 4.006365471200124e-05, |
|
"loss": 2.9474, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 10.013972985561248, |
|
"grad_norm": 4.435904026031494, |
|
"learning_rate": 3.9986027014438754e-05, |
|
"loss": 2.9094, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 10.091600683123739, |
|
"grad_norm": 4.80678129196167, |
|
"learning_rate": 3.990839931687627e-05, |
|
"loss": 2.8467, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.16922838068623, |
|
"grad_norm": 5.187747001647949, |
|
"learning_rate": 3.983077161931377e-05, |
|
"loss": 2.8237, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 10.246856078248719, |
|
"grad_norm": 4.363202095031738, |
|
"learning_rate": 3.9753143921751284e-05, |
|
"loss": 2.8334, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 10.32448377581121, |
|
"grad_norm": 5.085516929626465, |
|
"learning_rate": 3.967551622418879e-05, |
|
"loss": 2.8284, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 10.402111473373699, |
|
"grad_norm": 4.973574638366699, |
|
"learning_rate": 3.9597888526626306e-05, |
|
"loss": 2.8194, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 10.47973917093619, |
|
"grad_norm": 4.629599094390869, |
|
"learning_rate": 3.952026082906381e-05, |
|
"loss": 2.8284, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 10.557366868498681, |
|
"grad_norm": 4.970963001251221, |
|
"learning_rate": 3.944263313150132e-05, |
|
"loss": 2.8285, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 10.63499456606117, |
|
"grad_norm": 4.869990348815918, |
|
"learning_rate": 3.936500543393883e-05, |
|
"loss": 2.8048, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 10.712622263623661, |
|
"grad_norm": 5.26320743560791, |
|
"learning_rate": 3.928737773637634e-05, |
|
"loss": 2.803, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 10.79024996118615, |
|
"grad_norm": 4.8318352699279785, |
|
"learning_rate": 3.920975003881385e-05, |
|
"loss": 2.7984, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 10.867877658748641, |
|
"grad_norm": 4.917919158935547, |
|
"learning_rate": 3.913212234125136e-05, |
|
"loss": 2.8091, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 10.945505356311132, |
|
"grad_norm": 4.485991954803467, |
|
"learning_rate": 3.905449464368887e-05, |
|
"loss": 2.7917, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 11.023133053873622, |
|
"grad_norm": 4.8984246253967285, |
|
"learning_rate": 3.897686694612638e-05, |
|
"loss": 2.7501, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 11.100760751436113, |
|
"grad_norm": 4.431053161621094, |
|
"learning_rate": 3.889923924856389e-05, |
|
"loss": 2.6896, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 11.178388448998602, |
|
"grad_norm": 4.597928524017334, |
|
"learning_rate": 3.8821611551001395e-05, |
|
"loss": 2.6874, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 11.256016146561093, |
|
"grad_norm": 4.701462268829346, |
|
"learning_rate": 3.874398385343891e-05, |
|
"loss": 2.679, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 11.333643844123584, |
|
"grad_norm": 4.706751346588135, |
|
"learning_rate": 3.866635615587642e-05, |
|
"loss": 2.6799, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 11.411271541686073, |
|
"grad_norm": 4.8909430503845215, |
|
"learning_rate": 3.858872845831393e-05, |
|
"loss": 2.6779, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 11.488899239248564, |
|
"grad_norm": 4.814470291137695, |
|
"learning_rate": 3.851110076075144e-05, |
|
"loss": 2.6723, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 11.566526936811055, |
|
"grad_norm": 4.277644157409668, |
|
"learning_rate": 3.8433473063188946e-05, |
|
"loss": 2.6787, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 11.644154634373544, |
|
"grad_norm": 4.709313869476318, |
|
"learning_rate": 3.835584536562646e-05, |
|
"loss": 2.6672, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 11.721782331936035, |
|
"grad_norm": 4.462389945983887, |
|
"learning_rate": 3.827821766806397e-05, |
|
"loss": 2.66, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 11.799410029498524, |
|
"grad_norm": 4.836484909057617, |
|
"learning_rate": 3.8200589970501475e-05, |
|
"loss": 2.6646, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 11.877037727061015, |
|
"grad_norm": 4.758359909057617, |
|
"learning_rate": 3.812296227293899e-05, |
|
"loss": 2.6561, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 11.954665424623506, |
|
"grad_norm": 4.208640098571777, |
|
"learning_rate": 3.80453345753765e-05, |
|
"loss": 2.6659, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 12.032293122185996, |
|
"grad_norm": 4.91511344909668, |
|
"learning_rate": 3.7967706877814005e-05, |
|
"loss": 2.5897, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 12.109920819748487, |
|
"grad_norm": 4.086484909057617, |
|
"learning_rate": 3.789007918025151e-05, |
|
"loss": 2.5594, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 12.187548517310976, |
|
"grad_norm": 4.583057880401611, |
|
"learning_rate": 3.781245148268903e-05, |
|
"loss": 2.5543, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 12.265176214873467, |
|
"grad_norm": 4.570094585418701, |
|
"learning_rate": 3.7734823785126534e-05, |
|
"loss": 2.5503, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 12.342803912435958, |
|
"grad_norm": 4.889599800109863, |
|
"learning_rate": 3.765719608756404e-05, |
|
"loss": 2.5416, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 12.420431609998447, |
|
"grad_norm": 4.4805426597595215, |
|
"learning_rate": 3.757956839000155e-05, |
|
"loss": 2.5589, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 12.498059307560938, |
|
"grad_norm": 4.407408237457275, |
|
"learning_rate": 3.7501940692439064e-05, |
|
"loss": 2.5315, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 12.575687005123427, |
|
"grad_norm": 4.637092113494873, |
|
"learning_rate": 3.742431299487658e-05, |
|
"loss": 2.5454, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 12.653314702685918, |
|
"grad_norm": 4.7181854248046875, |
|
"learning_rate": 3.7346685297314085e-05, |
|
"loss": 2.5383, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 12.73094240024841, |
|
"grad_norm": 4.588499546051025, |
|
"learning_rate": 3.726905759975159e-05, |
|
"loss": 2.5267, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 12.808570097810899, |
|
"grad_norm": 4.137992858886719, |
|
"learning_rate": 3.71914299021891e-05, |
|
"loss": 2.5345, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 12.88619779537339, |
|
"grad_norm": 4.400317668914795, |
|
"learning_rate": 3.7113802204626615e-05, |
|
"loss": 2.5259, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 12.963825492935879, |
|
"grad_norm": 4.139917850494385, |
|
"learning_rate": 3.703617450706412e-05, |
|
"loss": 2.5335, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 13.04145319049837, |
|
"grad_norm": 4.182736396789551, |
|
"learning_rate": 3.695854680950163e-05, |
|
"loss": 2.4574, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 13.11908088806086, |
|
"grad_norm": 4.659245491027832, |
|
"learning_rate": 3.6880919111939144e-05, |
|
"loss": 2.4193, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 13.19670858562335, |
|
"grad_norm": 4.163915157318115, |
|
"learning_rate": 3.680329141437665e-05, |
|
"loss": 2.4169, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 13.274336283185841, |
|
"grad_norm": 4.518395900726318, |
|
"learning_rate": 3.672566371681416e-05, |
|
"loss": 2.4161, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 13.35196398074833, |
|
"grad_norm": 4.277214050292969, |
|
"learning_rate": 3.664803601925167e-05, |
|
"loss": 2.4169, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 13.429591678310821, |
|
"grad_norm": 4.701220989227295, |
|
"learning_rate": 3.657040832168918e-05, |
|
"loss": 2.424, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 13.507219375873312, |
|
"grad_norm": 4.375713348388672, |
|
"learning_rate": 3.649278062412669e-05, |
|
"loss": 2.4193, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 13.584847073435801, |
|
"grad_norm": 4.191773891448975, |
|
"learning_rate": 3.64151529265642e-05, |
|
"loss": 2.4188, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 13.662474770998292, |
|
"grad_norm": 4.385691165924072, |
|
"learning_rate": 3.633752522900171e-05, |
|
"loss": 2.4149, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 13.740102468560782, |
|
"grad_norm": 4.488534927368164, |
|
"learning_rate": 3.625989753143922e-05, |
|
"loss": 2.3998, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 13.817730166123273, |
|
"grad_norm": 4.578937530517578, |
|
"learning_rate": 3.618226983387673e-05, |
|
"loss": 2.4065, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 13.895357863685764, |
|
"grad_norm": 4.423867702484131, |
|
"learning_rate": 3.610464213631424e-05, |
|
"loss": 2.4004, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 13.972985561248253, |
|
"grad_norm": 4.474419116973877, |
|
"learning_rate": 3.602701443875175e-05, |
|
"loss": 2.4044, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 14.050613258810744, |
|
"grad_norm": 4.806559085845947, |
|
"learning_rate": 3.5949386741189255e-05, |
|
"loss": 2.3339, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 14.128240956373235, |
|
"grad_norm": 4.276415824890137, |
|
"learning_rate": 3.587175904362677e-05, |
|
"loss": 2.2801, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 14.205868653935724, |
|
"grad_norm": 4.825454235076904, |
|
"learning_rate": 3.579413134606428e-05, |
|
"loss": 2.297, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 14.283496351498215, |
|
"grad_norm": 4.838090896606445, |
|
"learning_rate": 3.5716503648501785e-05, |
|
"loss": 2.299, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 14.361124049060704, |
|
"grad_norm": 4.015684604644775, |
|
"learning_rate": 3.56388759509393e-05, |
|
"loss": 2.2892, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 14.438751746623195, |
|
"grad_norm": 4.386364459991455, |
|
"learning_rate": 3.5561248253376807e-05, |
|
"loss": 2.3058, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 14.516379444185686, |
|
"grad_norm": 4.3224968910217285, |
|
"learning_rate": 3.548362055581432e-05, |
|
"loss": 2.3027, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 14.594007141748175, |
|
"grad_norm": 4.265476226806641, |
|
"learning_rate": 3.540599285825182e-05, |
|
"loss": 2.2993, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 14.671634839310666, |
|
"grad_norm": 4.053600311279297, |
|
"learning_rate": 3.5328365160689336e-05, |
|
"loss": 2.2942, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 14.749262536873156, |
|
"grad_norm": 4.602315902709961, |
|
"learning_rate": 3.5250737463126844e-05, |
|
"loss": 2.2906, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 14.826890234435647, |
|
"grad_norm": 4.402678489685059, |
|
"learning_rate": 3.517310976556436e-05, |
|
"loss": 2.2702, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 14.904517931998138, |
|
"grad_norm": 4.164185523986816, |
|
"learning_rate": 3.5095482068001865e-05, |
|
"loss": 2.2815, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 14.982145629560627, |
|
"grad_norm": 3.9488399028778076, |
|
"learning_rate": 3.501785437043937e-05, |
|
"loss": 2.2949, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 15.059773327123118, |
|
"grad_norm": 4.283924102783203, |
|
"learning_rate": 3.494022667287689e-05, |
|
"loss": 2.2053, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 15.137401024685607, |
|
"grad_norm": 4.1038923263549805, |
|
"learning_rate": 3.4862598975314395e-05, |
|
"loss": 2.1718, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 15.215028722248098, |
|
"grad_norm": 3.826446533203125, |
|
"learning_rate": 3.47849712777519e-05, |
|
"loss": 2.1859, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 15.292656419810589, |
|
"grad_norm": 4.282005310058594, |
|
"learning_rate": 3.470734358018941e-05, |
|
"loss": 2.1854, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 15.370284117373078, |
|
"grad_norm": 4.259530067443848, |
|
"learning_rate": 3.4629715882626924e-05, |
|
"loss": 2.188, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 15.44791181493557, |
|
"grad_norm": 4.105893135070801, |
|
"learning_rate": 3.455208818506443e-05, |
|
"loss": 2.1824, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 15.52553951249806, |
|
"grad_norm": 4.21387243270874, |
|
"learning_rate": 3.447446048750194e-05, |
|
"loss": 2.1729, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 15.60316721006055, |
|
"grad_norm": 4.400328636169434, |
|
"learning_rate": 3.4396832789939454e-05, |
|
"loss": 2.1831, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 15.68079490762304, |
|
"grad_norm": 4.224130153656006, |
|
"learning_rate": 3.431920509237696e-05, |
|
"loss": 2.1936, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 15.75842260518553, |
|
"grad_norm": 3.9993326663970947, |
|
"learning_rate": 3.4241577394814476e-05, |
|
"loss": 2.1838, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 15.83605030274802, |
|
"grad_norm": 4.2306671142578125, |
|
"learning_rate": 3.4163949697251976e-05, |
|
"loss": 2.1838, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 15.913678000310512, |
|
"grad_norm": 4.4622368812561035, |
|
"learning_rate": 3.408632199968949e-05, |
|
"loss": 2.1836, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 15.991305697873, |
|
"grad_norm": 4.376685619354248, |
|
"learning_rate": 3.4008694302127005e-05, |
|
"loss": 2.1779, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 16.06893339543549, |
|
"grad_norm": 4.104698657989502, |
|
"learning_rate": 3.393106660456451e-05, |
|
"loss": 2.0854, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 16.14656109299798, |
|
"grad_norm": 3.761953353881836, |
|
"learning_rate": 3.385343890700202e-05, |
|
"loss": 2.0603, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 16.224188790560472, |
|
"grad_norm": 4.365135192871094, |
|
"learning_rate": 3.377581120943953e-05, |
|
"loss": 2.0572, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 16.301816488122963, |
|
"grad_norm": 4.137313365936279, |
|
"learning_rate": 3.369818351187704e-05, |
|
"loss": 2.0691, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 16.379444185685454, |
|
"grad_norm": 4.869952201843262, |
|
"learning_rate": 3.362055581431455e-05, |
|
"loss": 2.0935, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 16.45707188324794, |
|
"grad_norm": 4.275235652923584, |
|
"learning_rate": 3.354292811675206e-05, |
|
"loss": 2.077, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 16.534699580810432, |
|
"grad_norm": 4.092933177947998, |
|
"learning_rate": 3.3465300419189565e-05, |
|
"loss": 2.0977, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 16.612327278372923, |
|
"grad_norm": 3.9494364261627197, |
|
"learning_rate": 3.338767272162708e-05, |
|
"loss": 2.095, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 16.689954975935414, |
|
"grad_norm": 3.6660993099212646, |
|
"learning_rate": 3.331004502406459e-05, |
|
"loss": 2.0867, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 16.767582673497905, |
|
"grad_norm": 4.6808977127075195, |
|
"learning_rate": 3.3232417326502094e-05, |
|
"loss": 2.0856, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 16.845210371060393, |
|
"grad_norm": 3.951265335083008, |
|
"learning_rate": 3.315478962893961e-05, |
|
"loss": 2.0786, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 16.922838068622884, |
|
"grad_norm": 3.390282392501831, |
|
"learning_rate": 3.3077161931377116e-05, |
|
"loss": 2.0756, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 17.000465766185375, |
|
"grad_norm": 3.9212212562561035, |
|
"learning_rate": 3.299953423381463e-05, |
|
"loss": 2.0858, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 17.078093463747866, |
|
"grad_norm": 4.350470542907715, |
|
"learning_rate": 3.292190653625213e-05, |
|
"loss": 1.969, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 17.155721161310357, |
|
"grad_norm": 4.253689765930176, |
|
"learning_rate": 3.2844278838689645e-05, |
|
"loss": 1.9756, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 17.233348858872844, |
|
"grad_norm": 4.202712059020996, |
|
"learning_rate": 3.276665114112716e-05, |
|
"loss": 1.9793, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 17.310976556435335, |
|
"grad_norm": 4.103579998016357, |
|
"learning_rate": 3.268902344356467e-05, |
|
"loss": 1.9825, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 17.388604253997826, |
|
"grad_norm": 4.335016250610352, |
|
"learning_rate": 3.2611395746002175e-05, |
|
"loss": 1.978, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 17.466231951560317, |
|
"grad_norm": 4.291495323181152, |
|
"learning_rate": 3.253376804843968e-05, |
|
"loss": 1.9884, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"grad_norm": 4.035206317901611, |
|
"learning_rate": 3.24561403508772e-05, |
|
"loss": 2.0041, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 17.621487346685296, |
|
"grad_norm": 3.9616289138793945, |
|
"learning_rate": 3.2378512653314704e-05, |
|
"loss": 1.9928, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 17.699115044247787, |
|
"grad_norm": 4.101945400238037, |
|
"learning_rate": 3.230088495575221e-05, |
|
"loss": 1.9906, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 17.776742741810278, |
|
"grad_norm": 4.0245490074157715, |
|
"learning_rate": 3.2223257258189726e-05, |
|
"loss": 1.9873, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 17.85437043937277, |
|
"grad_norm": 4.1350908279418945, |
|
"learning_rate": 3.2145629560627234e-05, |
|
"loss": 1.9917, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 17.93199813693526, |
|
"grad_norm": 4.366165637969971, |
|
"learning_rate": 3.206800186306475e-05, |
|
"loss": 1.9897, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 18.009625834497747, |
|
"grad_norm": 4.272118091583252, |
|
"learning_rate": 3.199037416550225e-05, |
|
"loss": 1.9837, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 18.087253532060238, |
|
"grad_norm": 4.427468776702881, |
|
"learning_rate": 3.191274646793976e-05, |
|
"loss": 1.8798, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 18.16488122962273, |
|
"grad_norm": 4.1292033195495605, |
|
"learning_rate": 3.183511877037727e-05, |
|
"loss": 1.8857, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 18.24250892718522, |
|
"grad_norm": 4.270112037658691, |
|
"learning_rate": 3.1757491072814785e-05, |
|
"loss": 1.8921, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 18.32013662474771, |
|
"grad_norm": 4.079245567321777, |
|
"learning_rate": 3.1679863375252286e-05, |
|
"loss": 1.8984, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 18.3977643223102, |
|
"grad_norm": 3.783048391342163, |
|
"learning_rate": 3.16022356776898e-05, |
|
"loss": 1.9001, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 18.47539201987269, |
|
"grad_norm": 3.9977831840515137, |
|
"learning_rate": 3.1524607980127314e-05, |
|
"loss": 1.9026, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 18.55301971743518, |
|
"grad_norm": 5.004773139953613, |
|
"learning_rate": 3.144698028256482e-05, |
|
"loss": 1.9027, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 18.63064741499767, |
|
"grad_norm": 4.3422417640686035, |
|
"learning_rate": 3.136935258500233e-05, |
|
"loss": 1.9084, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 18.708275112560163, |
|
"grad_norm": 3.9378857612609863, |
|
"learning_rate": 3.129172488743984e-05, |
|
"loss": 1.9038, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 18.78590281012265, |
|
"grad_norm": 4.138620853424072, |
|
"learning_rate": 3.121409718987735e-05, |
|
"loss": 1.9133, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 18.86353050768514, |
|
"grad_norm": 4.3769659996032715, |
|
"learning_rate": 3.113646949231486e-05, |
|
"loss": 1.9109, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 18.941158205247632, |
|
"grad_norm": 3.955392837524414, |
|
"learning_rate": 3.1058841794752366e-05, |
|
"loss": 1.913, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 19.018785902810123, |
|
"grad_norm": 4.047823905944824, |
|
"learning_rate": 3.098121409718988e-05, |
|
"loss": 1.8897, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 19.096413600372614, |
|
"grad_norm": 4.446326732635498, |
|
"learning_rate": 3.090358639962739e-05, |
|
"loss": 1.7936, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 19.174041297935105, |
|
"grad_norm": 3.9434542655944824, |
|
"learning_rate": 3.08259587020649e-05, |
|
"loss": 1.8065, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 19.251668995497592, |
|
"grad_norm": 4.108802318572998, |
|
"learning_rate": 3.0748331004502403e-05, |
|
"loss": 1.8157, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 19.329296693060083, |
|
"grad_norm": 4.374671459197998, |
|
"learning_rate": 3.067070330693992e-05, |
|
"loss": 1.8276, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 19.406924390622574, |
|
"grad_norm": 3.985368013381958, |
|
"learning_rate": 3.0593075609377425e-05, |
|
"loss": 1.8246, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 19.484552088185065, |
|
"grad_norm": 3.956395149230957, |
|
"learning_rate": 3.0515447911814936e-05, |
|
"loss": 1.8263, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 19.562179785747556, |
|
"grad_norm": 3.358553886413574, |
|
"learning_rate": 3.043782021425245e-05, |
|
"loss": 1.8227, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 19.639807483310044, |
|
"grad_norm": 4.203612804412842, |
|
"learning_rate": 3.0360192516689955e-05, |
|
"loss": 1.8225, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 19.717435180872535, |
|
"grad_norm": 3.790905714035034, |
|
"learning_rate": 3.028256481912747e-05, |
|
"loss": 1.8433, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 19.795062878435026, |
|
"grad_norm": 4.040520191192627, |
|
"learning_rate": 3.0204937121564973e-05, |
|
"loss": 1.8336, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 19.872690575997517, |
|
"grad_norm": 4.027768135070801, |
|
"learning_rate": 3.0127309424002488e-05, |
|
"loss": 1.8314, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 19.950318273560008, |
|
"grad_norm": 3.8109354972839355, |
|
"learning_rate": 3.0049681726439992e-05, |
|
"loss": 1.8425, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 20.027945971122495, |
|
"grad_norm": 3.751999855041504, |
|
"learning_rate": 2.9972054028877506e-05, |
|
"loss": 1.7967, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 20.105573668684986, |
|
"grad_norm": 3.9639225006103516, |
|
"learning_rate": 2.9894426331315014e-05, |
|
"loss": 1.7213, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 20.183201366247477, |
|
"grad_norm": 4.027946472167969, |
|
"learning_rate": 2.9816798633752525e-05, |
|
"loss": 1.7408, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 20.260829063809968, |
|
"grad_norm": 4.050852298736572, |
|
"learning_rate": 2.9739170936190035e-05, |
|
"loss": 1.7305, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 20.33845676137246, |
|
"grad_norm": 4.3804216384887695, |
|
"learning_rate": 2.9661543238627543e-05, |
|
"loss": 1.7499, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 20.416084458934947, |
|
"grad_norm": 4.021152019500732, |
|
"learning_rate": 2.9583915541065054e-05, |
|
"loss": 1.7484, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 20.493712156497438, |
|
"grad_norm": 3.7631611824035645, |
|
"learning_rate": 2.950628784350256e-05, |
|
"loss": 1.7531, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 20.57133985405993, |
|
"grad_norm": 4.4973249435424805, |
|
"learning_rate": 2.9428660145940072e-05, |
|
"loss": 1.767, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 20.64896755162242, |
|
"grad_norm": 4.386341571807861, |
|
"learning_rate": 2.935103244837758e-05, |
|
"loss": 1.7621, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 20.72659524918491, |
|
"grad_norm": 4.0129499435424805, |
|
"learning_rate": 2.927340475081509e-05, |
|
"loss": 1.7637, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 20.804222946747398, |
|
"grad_norm": 4.22186279296875, |
|
"learning_rate": 2.9195777053252605e-05, |
|
"loss": 1.7643, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 20.88185064430989, |
|
"grad_norm": 4.511717319488525, |
|
"learning_rate": 2.911814935569011e-05, |
|
"loss": 1.7761, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 20.95947834187238, |
|
"grad_norm": 4.100383281707764, |
|
"learning_rate": 2.9040521658127624e-05, |
|
"loss": 1.7625, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 21.03710603943487, |
|
"grad_norm": 4.241291046142578, |
|
"learning_rate": 2.8962893960565128e-05, |
|
"loss": 1.7083, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 21.114733736997362, |
|
"grad_norm": 3.8240482807159424, |
|
"learning_rate": 2.8885266263002642e-05, |
|
"loss": 1.6514, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 21.19236143455985, |
|
"grad_norm": 3.9241297245025635, |
|
"learning_rate": 2.880763856544015e-05, |
|
"loss": 1.662, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 21.26998913212234, |
|
"grad_norm": 3.836834669113159, |
|
"learning_rate": 2.873001086787766e-05, |
|
"loss": 1.6674, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 21.34761682968483, |
|
"grad_norm": 4.176065921783447, |
|
"learning_rate": 2.865238317031517e-05, |
|
"loss": 1.6754, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 21.425244527247322, |
|
"grad_norm": 4.702647686004639, |
|
"learning_rate": 2.857475547275268e-05, |
|
"loss": 1.6841, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 21.502872224809813, |
|
"grad_norm": 3.71679425239563, |
|
"learning_rate": 2.849712777519019e-05, |
|
"loss": 1.6918, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 21.5804999223723, |
|
"grad_norm": 4.379159450531006, |
|
"learning_rate": 2.8419500077627698e-05, |
|
"loss": 1.6845, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 21.658127619934792, |
|
"grad_norm": 3.984041213989258, |
|
"learning_rate": 2.834187238006521e-05, |
|
"loss": 1.7042, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 21.735755317497283, |
|
"grad_norm": 4.80483865737915, |
|
"learning_rate": 2.8264244682502716e-05, |
|
"loss": 1.7063, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 21.813383015059774, |
|
"grad_norm": 3.897512674331665, |
|
"learning_rate": 2.8186616984940227e-05, |
|
"loss": 1.697, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 21.891010712622265, |
|
"grad_norm": 3.8755526542663574, |
|
"learning_rate": 2.8108989287377735e-05, |
|
"loss": 1.6936, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 21.968638410184752, |
|
"grad_norm": 4.30952262878418, |
|
"learning_rate": 2.8031361589815246e-05, |
|
"loss": 1.7112, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 22.046266107747243, |
|
"grad_norm": 4.38576602935791, |
|
"learning_rate": 2.795373389225276e-05, |
|
"loss": 1.644, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 22.123893805309734, |
|
"grad_norm": 4.09429931640625, |
|
"learning_rate": 2.7876106194690264e-05, |
|
"loss": 1.6035, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 22.201521502872225, |
|
"grad_norm": 4.038272857666016, |
|
"learning_rate": 2.779847849712778e-05, |
|
"loss": 1.6024, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 22.279149200434716, |
|
"grad_norm": 4.369879245758057, |
|
"learning_rate": 2.7720850799565286e-05, |
|
"loss": 1.6185, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 22.356776897997204, |
|
"grad_norm": 4.589230537414551, |
|
"learning_rate": 2.7643223102002797e-05, |
|
"loss": 1.6199, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 22.434404595559695, |
|
"grad_norm": 4.705469608306885, |
|
"learning_rate": 2.7565595404440304e-05, |
|
"loss": 1.6101, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 22.512032293122186, |
|
"grad_norm": 4.487303256988525, |
|
"learning_rate": 2.7487967706877815e-05, |
|
"loss": 1.6163, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 22.589659990684677, |
|
"grad_norm": 3.795254945755005, |
|
"learning_rate": 2.7410340009315326e-05, |
|
"loss": 1.6382, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 22.667287688247168, |
|
"grad_norm": 3.8786396980285645, |
|
"learning_rate": 2.7332712311752834e-05, |
|
"loss": 1.6223, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 22.744915385809655, |
|
"grad_norm": 4.308375835418701, |
|
"learning_rate": 2.7255084614190345e-05, |
|
"loss": 1.6447, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 22.822543083372146, |
|
"grad_norm": 4.034188747406006, |
|
"learning_rate": 2.7177456916627852e-05, |
|
"loss": 1.6351, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 22.900170780934637, |
|
"grad_norm": 4.602024555206299, |
|
"learning_rate": 2.7099829219065363e-05, |
|
"loss": 1.6344, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 22.977798478497128, |
|
"grad_norm": 4.131753921508789, |
|
"learning_rate": 2.702220152150287e-05, |
|
"loss": 1.6437, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 23.05542617605962, |
|
"grad_norm": 3.612490177154541, |
|
"learning_rate": 2.6944573823940382e-05, |
|
"loss": 1.5592, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 23.13305387362211, |
|
"grad_norm": 4.134332656860352, |
|
"learning_rate": 2.6866946126377896e-05, |
|
"loss": 1.5415, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 23.210681571184598, |
|
"grad_norm": 4.3021321296691895, |
|
"learning_rate": 2.67893184288154e-05, |
|
"loss": 1.5512, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 23.28830926874709, |
|
"grad_norm": 4.436678886413574, |
|
"learning_rate": 2.6711690731252915e-05, |
|
"loss": 1.5472, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 23.36593696630958, |
|
"grad_norm": 4.172628402709961, |
|
"learning_rate": 2.6634063033690422e-05, |
|
"loss": 1.5494, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 23.44356466387207, |
|
"grad_norm": 4.578736782073975, |
|
"learning_rate": 2.6556435336127933e-05, |
|
"loss": 1.5561, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 23.52119236143456, |
|
"grad_norm": 4.1252336502075195, |
|
"learning_rate": 2.647880763856544e-05, |
|
"loss": 1.5626, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 23.59882005899705, |
|
"grad_norm": 3.929494619369507, |
|
"learning_rate": 2.640117994100295e-05, |
|
"loss": 1.5769, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 23.67644775655954, |
|
"grad_norm": 4.310312271118164, |
|
"learning_rate": 2.6323552243440463e-05, |
|
"loss": 1.5716, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 23.75407545412203, |
|
"grad_norm": 3.970519781112671, |
|
"learning_rate": 2.624592454587797e-05, |
|
"loss": 1.5764, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 23.831703151684522, |
|
"grad_norm": 3.880556583404541, |
|
"learning_rate": 2.616829684831548e-05, |
|
"loss": 1.5871, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 23.909330849247013, |
|
"grad_norm": 4.146645545959473, |
|
"learning_rate": 2.609066915075299e-05, |
|
"loss": 1.5869, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 23.9869585468095, |
|
"grad_norm": 4.036287784576416, |
|
"learning_rate": 2.60130414531905e-05, |
|
"loss": 1.583, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 24.06458624437199, |
|
"grad_norm": 4.351132869720459, |
|
"learning_rate": 2.5935413755628007e-05, |
|
"loss": 1.4982, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 24.142213941934482, |
|
"grad_norm": 4.366822242736816, |
|
"learning_rate": 2.5857786058065518e-05, |
|
"loss": 1.4897, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 24.219841639496973, |
|
"grad_norm": 4.432433128356934, |
|
"learning_rate": 2.5780158360503026e-05, |
|
"loss": 1.4969, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 24.297469337059464, |
|
"grad_norm": 4.0283613204956055, |
|
"learning_rate": 2.570253066294054e-05, |
|
"loss": 1.4992, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 24.37509703462195, |
|
"grad_norm": 4.035061359405518, |
|
"learning_rate": 2.562490296537805e-05, |
|
"loss": 1.4968, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 24.452724732184443, |
|
"grad_norm": 3.834836006164551, |
|
"learning_rate": 2.554727526781556e-05, |
|
"loss": 1.5156, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 24.530352429746934, |
|
"grad_norm": 4.057690143585205, |
|
"learning_rate": 2.546964757025307e-05, |
|
"loss": 1.5052, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 24.607980127309425, |
|
"grad_norm": 4.63842248916626, |
|
"learning_rate": 2.5392019872690577e-05, |
|
"loss": 1.5107, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 24.685607824871916, |
|
"grad_norm": 3.624314069747925, |
|
"learning_rate": 2.5314392175128088e-05, |
|
"loss": 1.5185, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 24.763235522434403, |
|
"grad_norm": 4.338582515716553, |
|
"learning_rate": 2.5236764477565595e-05, |
|
"loss": 1.5187, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 24.840863219996894, |
|
"grad_norm": 3.9074742794036865, |
|
"learning_rate": 2.5159136780003106e-05, |
|
"loss": 1.524, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 24.918490917559385, |
|
"grad_norm": 3.97880482673645, |
|
"learning_rate": 2.5081509082440617e-05, |
|
"loss": 1.5278, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 24.996118615121876, |
|
"grad_norm": 4.298096656799316, |
|
"learning_rate": 2.5003881384878125e-05, |
|
"loss": 1.5267, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 25.073746312684367, |
|
"grad_norm": 3.85455322265625, |
|
"learning_rate": 2.4926253687315636e-05, |
|
"loss": 1.442, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 25.151374010246855, |
|
"grad_norm": 3.907085418701172, |
|
"learning_rate": 2.4848625989753147e-05, |
|
"loss": 1.4262, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 25.229001707809346, |
|
"grad_norm": 4.488945484161377, |
|
"learning_rate": 2.4770998292190654e-05, |
|
"loss": 1.4391, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 25.306629405371837, |
|
"grad_norm": 4.565778732299805, |
|
"learning_rate": 2.4693370594628165e-05, |
|
"loss": 1.447, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 25.384257102934328, |
|
"grad_norm": 4.2508015632629395, |
|
"learning_rate": 2.4615742897065676e-05, |
|
"loss": 1.4442, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 25.46188480049682, |
|
"grad_norm": 4.572117328643799, |
|
"learning_rate": 2.4538115199503184e-05, |
|
"loss": 1.4495, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 25.539512498059306, |
|
"grad_norm": 4.516686916351318, |
|
"learning_rate": 2.4460487501940695e-05, |
|
"loss": 1.4625, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 25.617140195621797, |
|
"grad_norm": 4.200167655944824, |
|
"learning_rate": 2.4382859804378202e-05, |
|
"loss": 1.4614, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 25.694767893184288, |
|
"grad_norm": 3.777397632598877, |
|
"learning_rate": 2.4305232106815713e-05, |
|
"loss": 1.4632, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 25.77239559074678, |
|
"grad_norm": 4.383970737457275, |
|
"learning_rate": 2.4227604409253224e-05, |
|
"loss": 1.4773, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 25.85002328830927, |
|
"grad_norm": 4.216927528381348, |
|
"learning_rate": 2.4149976711690735e-05, |
|
"loss": 1.4794, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 25.927650985871757, |
|
"grad_norm": 5.53390645980835, |
|
"learning_rate": 2.4072349014128243e-05, |
|
"loss": 1.4685, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 26.00527868343425, |
|
"grad_norm": 3.9746012687683105, |
|
"learning_rate": 2.3994721316565753e-05, |
|
"loss": 1.4873, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 26.08290638099674, |
|
"grad_norm": 4.278408527374268, |
|
"learning_rate": 2.391709361900326e-05, |
|
"loss": 1.3877, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 26.16053407855923, |
|
"grad_norm": 4.082756042480469, |
|
"learning_rate": 2.3839465921440772e-05, |
|
"loss": 1.3938, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 26.23816177612172, |
|
"grad_norm": 3.929353713989258, |
|
"learning_rate": 2.376183822387828e-05, |
|
"loss": 1.3903, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"grad_norm": 4.400444030761719, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 1.4032, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 26.3934171712467, |
|
"grad_norm": 4.266624450683594, |
|
"learning_rate": 2.36065828287533e-05, |
|
"loss": 1.4028, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 26.47104486880919, |
|
"grad_norm": 4.547267913818359, |
|
"learning_rate": 2.3528955131190812e-05, |
|
"loss": 1.4043, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 26.548672566371682, |
|
"grad_norm": 4.04599666595459, |
|
"learning_rate": 2.345132743362832e-05, |
|
"loss": 1.4047, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 26.626300263934173, |
|
"grad_norm": 4.308363437652588, |
|
"learning_rate": 2.337369973606583e-05, |
|
"loss": 1.4154, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 26.70392796149666, |
|
"grad_norm": 3.774397373199463, |
|
"learning_rate": 2.329607203850334e-05, |
|
"loss": 1.4127, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 26.78155565905915, |
|
"grad_norm": 4.222719669342041, |
|
"learning_rate": 2.321844434094085e-05, |
|
"loss": 1.4149, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 26.859183356621642, |
|
"grad_norm": 4.3920135498046875, |
|
"learning_rate": 2.3140816643378357e-05, |
|
"loss": 1.4238, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 26.936811054184133, |
|
"grad_norm": 4.5161213874816895, |
|
"learning_rate": 2.306318894581587e-05, |
|
"loss": 1.4232, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 27.014438751746624, |
|
"grad_norm": 4.091419696807861, |
|
"learning_rate": 2.298556124825338e-05, |
|
"loss": 1.412, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 27.092066449309115, |
|
"grad_norm": 4.063779830932617, |
|
"learning_rate": 2.290793355069089e-05, |
|
"loss": 1.3344, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 27.169694146871603, |
|
"grad_norm": 4.165656089782715, |
|
"learning_rate": 2.2830305853128397e-05, |
|
"loss": 1.3348, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 27.247321844434094, |
|
"grad_norm": 4.288286209106445, |
|
"learning_rate": 2.2752678155565908e-05, |
|
"loss": 1.3389, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 27.324949541996585, |
|
"grad_norm": 4.2835211753845215, |
|
"learning_rate": 2.2675050458003416e-05, |
|
"loss": 1.3493, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 27.402577239559076, |
|
"grad_norm": 4.381802558898926, |
|
"learning_rate": 2.2597422760440927e-05, |
|
"loss": 1.358, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 27.480204937121567, |
|
"grad_norm": 4.263532638549805, |
|
"learning_rate": 2.2519795062878434e-05, |
|
"loss": 1.3632, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 27.557832634684054, |
|
"grad_norm": 4.2341742515563965, |
|
"learning_rate": 2.244216736531595e-05, |
|
"loss": 1.3734, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 27.635460332246545, |
|
"grad_norm": 3.9163522720336914, |
|
"learning_rate": 2.2364539667753456e-05, |
|
"loss": 1.3658, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 27.713088029809036, |
|
"grad_norm": 4.0479841232299805, |
|
"learning_rate": 2.2286911970190967e-05, |
|
"loss": 1.3593, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 27.790715727371527, |
|
"grad_norm": 5.027287483215332, |
|
"learning_rate": 2.2209284272628475e-05, |
|
"loss": 1.3869, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 27.868343424934018, |
|
"grad_norm": 4.199400424957275, |
|
"learning_rate": 2.2131656575065985e-05, |
|
"loss": 1.3882, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 27.945971122496506, |
|
"grad_norm": 3.9147210121154785, |
|
"learning_rate": 2.2054028877503493e-05, |
|
"loss": 1.3781, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 28.023598820058996, |
|
"grad_norm": 4.450961112976074, |
|
"learning_rate": 2.1976401179941004e-05, |
|
"loss": 1.3514, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 28.101226517621487, |
|
"grad_norm": 4.467356204986572, |
|
"learning_rate": 2.189877348237851e-05, |
|
"loss": 1.2839, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 28.17885421518398, |
|
"grad_norm": 4.179466247558594, |
|
"learning_rate": 2.1821145784816026e-05, |
|
"loss": 1.3017, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 28.25648191274647, |
|
"grad_norm": 3.7988483905792236, |
|
"learning_rate": 2.1743518087253533e-05, |
|
"loss": 1.3177, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 28.334109610308957, |
|
"grad_norm": 3.9721014499664307, |
|
"learning_rate": 2.1665890389691044e-05, |
|
"loss": 1.302, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 28.411737307871448, |
|
"grad_norm": 4.474249362945557, |
|
"learning_rate": 2.1588262692128552e-05, |
|
"loss": 1.3053, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 28.48936500543394, |
|
"grad_norm": 4.546684741973877, |
|
"learning_rate": 2.1510634994566063e-05, |
|
"loss": 1.3231, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 28.56699270299643, |
|
"grad_norm": 4.715445518493652, |
|
"learning_rate": 2.143300729700357e-05, |
|
"loss": 1.3305, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 28.64462040055892, |
|
"grad_norm": 4.777371406555176, |
|
"learning_rate": 2.135537959944108e-05, |
|
"loss": 1.3231, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 28.72224809812141, |
|
"grad_norm": 4.404980182647705, |
|
"learning_rate": 2.1277751901878592e-05, |
|
"loss": 1.3266, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 28.7998757956839, |
|
"grad_norm": 4.121158599853516, |
|
"learning_rate": 2.1200124204316103e-05, |
|
"loss": 1.3326, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 28.87750349324639, |
|
"grad_norm": 4.212721824645996, |
|
"learning_rate": 2.112249650675361e-05, |
|
"loss": 1.3239, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 28.95513119080888, |
|
"grad_norm": 3.941192626953125, |
|
"learning_rate": 2.104486880919112e-05, |
|
"loss": 1.337, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 29.032758888371372, |
|
"grad_norm": 4.226070404052734, |
|
"learning_rate": 2.096724111162863e-05, |
|
"loss": 1.2999, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 29.11038658593386, |
|
"grad_norm": 4.37491512298584, |
|
"learning_rate": 2.088961341406614e-05, |
|
"loss": 1.2449, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 29.18801428349635, |
|
"grad_norm": 4.1313347816467285, |
|
"learning_rate": 2.0811985716503648e-05, |
|
"loss": 1.2655, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 29.26564198105884, |
|
"grad_norm": 4.144821643829346, |
|
"learning_rate": 2.073435801894116e-05, |
|
"loss": 1.2701, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 29.343269678621333, |
|
"grad_norm": 4.262469291687012, |
|
"learning_rate": 2.065673032137867e-05, |
|
"loss": 1.2671, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 29.420897376183824, |
|
"grad_norm": 4.0824761390686035, |
|
"learning_rate": 2.057910262381618e-05, |
|
"loss": 1.2757, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 29.49852507374631, |
|
"grad_norm": 4.00981330871582, |
|
"learning_rate": 2.0501474926253688e-05, |
|
"loss": 1.275, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 29.576152771308802, |
|
"grad_norm": 4.502607822418213, |
|
"learning_rate": 2.04238472286912e-05, |
|
"loss": 1.278, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 29.653780468871293, |
|
"grad_norm": 4.623337745666504, |
|
"learning_rate": 2.0346219531128707e-05, |
|
"loss": 1.2805, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 29.731408166433784, |
|
"grad_norm": 4.471139430999756, |
|
"learning_rate": 2.0268591833566218e-05, |
|
"loss": 1.2761, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 29.809035863996275, |
|
"grad_norm": 4.283520698547363, |
|
"learning_rate": 2.0190964136003725e-05, |
|
"loss": 1.2907, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 29.886663561558763, |
|
"grad_norm": 4.755760192871094, |
|
"learning_rate": 2.011333643844124e-05, |
|
"loss": 1.2887, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 29.964291259121254, |
|
"grad_norm": 4.386314392089844, |
|
"learning_rate": 2.0035708740878747e-05, |
|
"loss": 1.2949, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 30.041918956683745, |
|
"grad_norm": 4.468728542327881, |
|
"learning_rate": 1.9958081043316258e-05, |
|
"loss": 1.2377, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 30.119546654246236, |
|
"grad_norm": 4.082640171051025, |
|
"learning_rate": 1.9880453345753765e-05, |
|
"loss": 1.2118, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 30.197174351808727, |
|
"grad_norm": 4.6380205154418945, |
|
"learning_rate": 1.9802825648191276e-05, |
|
"loss": 1.2211, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 30.274802049371214, |
|
"grad_norm": 4.422779083251953, |
|
"learning_rate": 1.9725197950628784e-05, |
|
"loss": 1.2255, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 30.352429746933705, |
|
"grad_norm": 4.414443016052246, |
|
"learning_rate": 1.9647570253066295e-05, |
|
"loss": 1.2277, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 30.430057444496196, |
|
"grad_norm": 4.212508201599121, |
|
"learning_rate": 1.9569942555503802e-05, |
|
"loss": 1.236, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 30.507685142058687, |
|
"grad_norm": 4.3478803634643555, |
|
"learning_rate": 1.9492314857941317e-05, |
|
"loss": 1.2387, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 30.585312839621178, |
|
"grad_norm": 5.213949203491211, |
|
"learning_rate": 1.9414687160378824e-05, |
|
"loss": 1.2434, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 30.662940537183665, |
|
"grad_norm": 3.907501459121704, |
|
"learning_rate": 1.9337059462816335e-05, |
|
"loss": 1.2415, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 30.740568234746156, |
|
"grad_norm": 4.092105865478516, |
|
"learning_rate": 1.9259431765253843e-05, |
|
"loss": 1.2515, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 30.818195932308647, |
|
"grad_norm": 4.422701835632324, |
|
"learning_rate": 1.9181804067691354e-05, |
|
"loss": 1.2554, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 30.89582362987114, |
|
"grad_norm": 4.132325172424316, |
|
"learning_rate": 1.910417637012886e-05, |
|
"loss": 1.2607, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 30.97345132743363, |
|
"grad_norm": 4.294840335845947, |
|
"learning_rate": 1.9026548672566372e-05, |
|
"loss": 1.2457, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 31.05107902499612, |
|
"grad_norm": 4.593545913696289, |
|
"learning_rate": 1.894892097500388e-05, |
|
"loss": 1.1969, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 31.128706722558608, |
|
"grad_norm": 3.965829610824585, |
|
"learning_rate": 1.8871293277441394e-05, |
|
"loss": 1.1812, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 31.2063344201211, |
|
"grad_norm": 4.391860008239746, |
|
"learning_rate": 1.87936655798789e-05, |
|
"loss": 1.1764, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 31.28396211768359, |
|
"grad_norm": 4.370110511779785, |
|
"learning_rate": 1.8716037882316413e-05, |
|
"loss": 1.1804, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 31.36158981524608, |
|
"grad_norm": 4.167665958404541, |
|
"learning_rate": 1.863841018475392e-05, |
|
"loss": 1.1993, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 31.439217512808572, |
|
"grad_norm": 4.17106294631958, |
|
"learning_rate": 1.856078248719143e-05, |
|
"loss": 1.1915, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 31.51684521037106, |
|
"grad_norm": 4.328006267547607, |
|
"learning_rate": 1.848315478962894e-05, |
|
"loss": 1.2023, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 31.59447290793355, |
|
"grad_norm": 4.033382415771484, |
|
"learning_rate": 1.840552709206645e-05, |
|
"loss": 1.2049, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 31.67210060549604, |
|
"grad_norm": 4.497017860412598, |
|
"learning_rate": 1.832789939450396e-05, |
|
"loss": 1.2005, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 31.749728303058532, |
|
"grad_norm": 4.34217643737793, |
|
"learning_rate": 1.825027169694147e-05, |
|
"loss": 1.1972, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 31.827356000621023, |
|
"grad_norm": 4.198293209075928, |
|
"learning_rate": 1.817264399937898e-05, |
|
"loss": 1.2119, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 31.90498369818351, |
|
"grad_norm": 4.584846019744873, |
|
"learning_rate": 1.809501630181649e-05, |
|
"loss": 1.2265, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 31.982611395746, |
|
"grad_norm": 4.147974014282227, |
|
"learning_rate": 1.8017388604253997e-05, |
|
"loss": 1.231, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 32.06023909330849, |
|
"grad_norm": 4.133516311645508, |
|
"learning_rate": 1.793976090669151e-05, |
|
"loss": 1.1624, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 32.13786679087098, |
|
"grad_norm": 3.903019905090332, |
|
"learning_rate": 1.7862133209129016e-05, |
|
"loss": 1.1447, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 32.21549448843347, |
|
"grad_norm": 4.349834442138672, |
|
"learning_rate": 1.7784505511566527e-05, |
|
"loss": 1.1472, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 32.29312218599596, |
|
"grad_norm": 5.044727325439453, |
|
"learning_rate": 1.7706877814004038e-05, |
|
"loss": 1.1497, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 32.37074988355845, |
|
"grad_norm": 4.564863681793213, |
|
"learning_rate": 1.762925011644155e-05, |
|
"loss": 1.1568, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 32.448377581120944, |
|
"grad_norm": 4.659034252166748, |
|
"learning_rate": 1.7551622418879056e-05, |
|
"loss": 1.1652, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 32.526005278683435, |
|
"grad_norm": 4.484036445617676, |
|
"learning_rate": 1.7473994721316567e-05, |
|
"loss": 1.1689, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 32.603632976245926, |
|
"grad_norm": 3.8715898990631104, |
|
"learning_rate": 1.7396367023754075e-05, |
|
"loss": 1.1625, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 32.68126067380842, |
|
"grad_norm": 4.791990280151367, |
|
"learning_rate": 1.7318739326191586e-05, |
|
"loss": 1.1649, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 32.75888837137091, |
|
"grad_norm": 4.657315254211426, |
|
"learning_rate": 1.7241111628629093e-05, |
|
"loss": 1.1658, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 32.83651606893339, |
|
"grad_norm": 4.780379295349121, |
|
"learning_rate": 1.7163483931066604e-05, |
|
"loss": 1.1789, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 32.91414376649588, |
|
"grad_norm": 4.298798561096191, |
|
"learning_rate": 1.7085856233504115e-05, |
|
"loss": 1.1873, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 32.991771464058374, |
|
"grad_norm": 4.570270538330078, |
|
"learning_rate": 1.7008228535941626e-05, |
|
"loss": 1.1736, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 33.069399161620865, |
|
"grad_norm": 4.421665191650391, |
|
"learning_rate": 1.6930600838379134e-05, |
|
"loss": 1.1079, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 33.147026859183356, |
|
"grad_norm": 4.232321739196777, |
|
"learning_rate": 1.6852973140816645e-05, |
|
"loss": 1.0986, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 33.22465455674585, |
|
"grad_norm": 4.439553737640381, |
|
"learning_rate": 1.6775345443254152e-05, |
|
"loss": 1.114, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 33.30228225430834, |
|
"grad_norm": 3.9282166957855225, |
|
"learning_rate": 1.6697717745691663e-05, |
|
"loss": 1.1229, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 33.37990995187083, |
|
"grad_norm": 4.5075907707214355, |
|
"learning_rate": 1.662009004812917e-05, |
|
"loss": 1.1298, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 33.45753764943332, |
|
"grad_norm": 4.296872138977051, |
|
"learning_rate": 1.6542462350566685e-05, |
|
"loss": 1.1271, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 33.53516534699581, |
|
"grad_norm": 3.8833069801330566, |
|
"learning_rate": 1.6464834653004193e-05, |
|
"loss": 1.1334, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 33.6127930445583, |
|
"grad_norm": 4.518033027648926, |
|
"learning_rate": 1.6387206955441703e-05, |
|
"loss": 1.1251, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 33.690420742120786, |
|
"grad_norm": 4.618717670440674, |
|
"learning_rate": 1.630957925787921e-05, |
|
"loss": 1.137, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 33.76804843968328, |
|
"grad_norm": 4.346001148223877, |
|
"learning_rate": 1.6231951560316722e-05, |
|
"loss": 1.1439, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 33.84567613724577, |
|
"grad_norm": 4.203965663909912, |
|
"learning_rate": 1.615432386275423e-05, |
|
"loss": 1.1424, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 33.92330383480826, |
|
"grad_norm": 4.829082489013672, |
|
"learning_rate": 1.607669616519174e-05, |
|
"loss": 1.1476, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 34.00093153237075, |
|
"grad_norm": 4.414132118225098, |
|
"learning_rate": 1.5999068467629248e-05, |
|
"loss": 1.1452, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 34.07855922993324, |
|
"grad_norm": 4.220102787017822, |
|
"learning_rate": 1.5921440770066762e-05, |
|
"loss": 1.0785, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 34.15618692749573, |
|
"grad_norm": 4.156444549560547, |
|
"learning_rate": 1.584381307250427e-05, |
|
"loss": 1.0781, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 34.23381462505822, |
|
"grad_norm": 3.997420072555542, |
|
"learning_rate": 1.576618537494178e-05, |
|
"loss": 1.0911, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 34.311442322620714, |
|
"grad_norm": 4.4925537109375, |
|
"learning_rate": 1.568855767737929e-05, |
|
"loss": 1.0861, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 34.389070020183205, |
|
"grad_norm": 4.4098615646362305, |
|
"learning_rate": 1.56109299798168e-05, |
|
"loss": 1.0984, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 34.46669771774569, |
|
"grad_norm": 4.235119819641113, |
|
"learning_rate": 1.5533302282254307e-05, |
|
"loss": 1.0945, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 34.54432541530818, |
|
"grad_norm": 4.796499729156494, |
|
"learning_rate": 1.5455674584691818e-05, |
|
"loss": 1.0973, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 34.62195311287067, |
|
"grad_norm": 4.959954261779785, |
|
"learning_rate": 1.537804688712933e-05, |
|
"loss": 1.0978, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 34.69958081043316, |
|
"grad_norm": 4.675489902496338, |
|
"learning_rate": 1.530041918956684e-05, |
|
"loss": 1.1047, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 34.77720850799565, |
|
"grad_norm": 4.466859340667725, |
|
"learning_rate": 1.5222791492004349e-05, |
|
"loss": 1.093, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 34.85483620555814, |
|
"grad_norm": 4.607345104217529, |
|
"learning_rate": 1.5145163794441858e-05, |
|
"loss": 1.1098, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 34.932463903120635, |
|
"grad_norm": 3.9733870029449463, |
|
"learning_rate": 1.5067536096879367e-05, |
|
"loss": 1.1199, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 35.010091600683126, |
|
"grad_norm": 4.052885055541992, |
|
"learning_rate": 1.4989908399316877e-05, |
|
"loss": 1.1009, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 35.08771929824562, |
|
"grad_norm": 4.508426189422607, |
|
"learning_rate": 1.4912280701754386e-05, |
|
"loss": 1.0394, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 35.16534699580811, |
|
"grad_norm": 4.186591148376465, |
|
"learning_rate": 1.4834653004191895e-05, |
|
"loss": 1.0526, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 35.24297469337059, |
|
"grad_norm": 4.583897590637207, |
|
"learning_rate": 1.4757025306629408e-05, |
|
"loss": 1.0492, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 35.32060239093308, |
|
"grad_norm": 4.202432155609131, |
|
"learning_rate": 1.4679397609066917e-05, |
|
"loss": 1.0575, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 35.39823008849557, |
|
"grad_norm": 4.248536586761475, |
|
"learning_rate": 1.4601769911504426e-05, |
|
"loss": 1.0694, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 35.475857786058064, |
|
"grad_norm": 4.490120887756348, |
|
"learning_rate": 1.4524142213941935e-05, |
|
"loss": 1.0661, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 35.553485483620555, |
|
"grad_norm": 4.558992862701416, |
|
"learning_rate": 1.4446514516379445e-05, |
|
"loss": 1.0683, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 35.631113181183046, |
|
"grad_norm": 4.340649127960205, |
|
"learning_rate": 1.4368886818816954e-05, |
|
"loss": 1.0733, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 35.70874087874554, |
|
"grad_norm": 4.814639091491699, |
|
"learning_rate": 1.4291259121254463e-05, |
|
"loss": 1.0699, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 35.78636857630803, |
|
"grad_norm": 5.107011795043945, |
|
"learning_rate": 1.4213631423691972e-05, |
|
"loss": 1.0785, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 35.86399627387052, |
|
"grad_norm": 4.92033052444458, |
|
"learning_rate": 1.4136003726129485e-05, |
|
"loss": 1.0779, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 35.94162397143301, |
|
"grad_norm": 5.033237457275391, |
|
"learning_rate": 1.4058376028566994e-05, |
|
"loss": 1.0863, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 36.019251668995494, |
|
"grad_norm": 4.0776591300964355, |
|
"learning_rate": 1.3980748331004504e-05, |
|
"loss": 1.0703, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 36.096879366557985, |
|
"grad_norm": 4.491557598114014, |
|
"learning_rate": 1.3903120633442013e-05, |
|
"loss": 1.0207, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 36.174507064120476, |
|
"grad_norm": 4.444462299346924, |
|
"learning_rate": 1.3825492935879522e-05, |
|
"loss": 1.0357, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 36.25213476168297, |
|
"grad_norm": 4.559656143188477, |
|
"learning_rate": 1.3747865238317031e-05, |
|
"loss": 1.0295, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 36.32976245924546, |
|
"grad_norm": 4.09979248046875, |
|
"learning_rate": 1.367023754075454e-05, |
|
"loss": 1.0142, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 36.40739015680795, |
|
"grad_norm": 4.5045084953308105, |
|
"learning_rate": 1.3592609843192053e-05, |
|
"loss": 1.0292, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 36.48501785437044, |
|
"grad_norm": 5.544869422912598, |
|
"learning_rate": 1.3514982145629562e-05, |
|
"loss": 1.0371, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 36.56264555193293, |
|
"grad_norm": 4.618766784667969, |
|
"learning_rate": 1.3437354448067072e-05, |
|
"loss": 1.0376, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 36.64027324949542, |
|
"grad_norm": 4.791065216064453, |
|
"learning_rate": 1.3359726750504581e-05, |
|
"loss": 1.0438, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 36.71790094705791, |
|
"grad_norm": 4.122102737426758, |
|
"learning_rate": 1.328209905294209e-05, |
|
"loss": 1.0462, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 36.7955286446204, |
|
"grad_norm": 4.137369632720947, |
|
"learning_rate": 1.32044713553796e-05, |
|
"loss": 1.0444, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 36.87315634218289, |
|
"grad_norm": 4.59998083114624, |
|
"learning_rate": 1.3126843657817109e-05, |
|
"loss": 1.0508, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 36.95078403974538, |
|
"grad_norm": 4.751966953277588, |
|
"learning_rate": 1.3049215960254618e-05, |
|
"loss": 1.0474, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 37.02841173730787, |
|
"grad_norm": 4.363110065460205, |
|
"learning_rate": 1.297158826269213e-05, |
|
"loss": 1.026, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 37.10603943487036, |
|
"grad_norm": 5.005125045776367, |
|
"learning_rate": 1.289396056512964e-05, |
|
"loss": 0.9971, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 37.18366713243285, |
|
"grad_norm": 4.143869400024414, |
|
"learning_rate": 1.2816332867567149e-05, |
|
"loss": 0.9877, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 37.26129482999534, |
|
"grad_norm": 4.527329444885254, |
|
"learning_rate": 1.2738705170004658e-05, |
|
"loss": 0.9914, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 37.338922527557834, |
|
"grad_norm": 3.8393781185150146, |
|
"learning_rate": 1.2661077472442168e-05, |
|
"loss": 1.0098, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 37.416550225120325, |
|
"grad_norm": 4.1036295890808105, |
|
"learning_rate": 1.2583449774879677e-05, |
|
"loss": 1.0058, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 37.494177922682816, |
|
"grad_norm": 4.97705078125, |
|
"learning_rate": 1.2505822077317186e-05, |
|
"loss": 1.0098, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 37.57180562024531, |
|
"grad_norm": 4.289205074310303, |
|
"learning_rate": 1.2428194379754697e-05, |
|
"loss": 1.0117, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 37.64943331780779, |
|
"grad_norm": 4.353816509246826, |
|
"learning_rate": 1.2350566682192206e-05, |
|
"loss": 1.0162, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 37.72706101537028, |
|
"grad_norm": 4.447281837463379, |
|
"learning_rate": 1.2272938984629717e-05, |
|
"loss": 1.0202, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 37.80468871293277, |
|
"grad_norm": 4.254565715789795, |
|
"learning_rate": 1.2195311287067226e-05, |
|
"loss": 1.0252, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 37.882316410495264, |
|
"grad_norm": 4.382399559020996, |
|
"learning_rate": 1.2117683589504736e-05, |
|
"loss": 1.023, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 37.959944108057755, |
|
"grad_norm": 4.591485977172852, |
|
"learning_rate": 1.2040055891942245e-05, |
|
"loss": 1.024, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 38.037571805620246, |
|
"grad_norm": 4.238889217376709, |
|
"learning_rate": 1.1962428194379756e-05, |
|
"loss": 0.996, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 38.11519950318274, |
|
"grad_norm": 5.276005268096924, |
|
"learning_rate": 1.1884800496817265e-05, |
|
"loss": 0.97, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 38.19282720074523, |
|
"grad_norm": 4.318702697753906, |
|
"learning_rate": 1.1807172799254774e-05, |
|
"loss": 0.9679, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 38.27045489830772, |
|
"grad_norm": 4.6534504890441895, |
|
"learning_rate": 1.1729545101692284e-05, |
|
"loss": 0.9754, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 38.34808259587021, |
|
"grad_norm": 4.487671375274658, |
|
"learning_rate": 1.1651917404129794e-05, |
|
"loss": 0.9771, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 38.425710293432694, |
|
"grad_norm": 4.206161975860596, |
|
"learning_rate": 1.1574289706567304e-05, |
|
"loss": 0.9824, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 38.503337990995185, |
|
"grad_norm": 4.533993721008301, |
|
"learning_rate": 1.1496662009004813e-05, |
|
"loss": 0.98, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 38.580965688557676, |
|
"grad_norm": 4.58768892288208, |
|
"learning_rate": 1.1419034311442322e-05, |
|
"loss": 0.9891, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 38.65859338612017, |
|
"grad_norm": 4.578085422515869, |
|
"learning_rate": 1.1341406613879833e-05, |
|
"loss": 0.9912, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 38.73622108368266, |
|
"grad_norm": 4.549184799194336, |
|
"learning_rate": 1.1263778916317342e-05, |
|
"loss": 0.998, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 38.81384878124515, |
|
"grad_norm": 4.277008056640625, |
|
"learning_rate": 1.1186151218754852e-05, |
|
"loss": 0.9872, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 38.89147647880764, |
|
"grad_norm": 4.436850070953369, |
|
"learning_rate": 1.1108523521192361e-05, |
|
"loss": 0.9902, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 38.96910417637013, |
|
"grad_norm": 4.574080944061279, |
|
"learning_rate": 1.1030895823629872e-05, |
|
"loss": 1.0062, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 39.04673187393262, |
|
"grad_norm": 4.431211471557617, |
|
"learning_rate": 1.0953268126067381e-05, |
|
"loss": 0.9653, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 39.12435957149511, |
|
"grad_norm": 4.642630100250244, |
|
"learning_rate": 1.087564042850489e-05, |
|
"loss": 0.9415, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 39.2019872690576, |
|
"grad_norm": 4.911776065826416, |
|
"learning_rate": 1.0798012730942401e-05, |
|
"loss": 0.9479, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 39.27961496662009, |
|
"grad_norm": 4.803096771240234, |
|
"learning_rate": 1.072038503337991e-05, |
|
"loss": 0.9548, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 39.35724266418258, |
|
"grad_norm": 4.382226943969727, |
|
"learning_rate": 1.064275733581742e-05, |
|
"loss": 0.9501, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 39.43487036174507, |
|
"grad_norm": 4.663143634796143, |
|
"learning_rate": 1.0565129638254929e-05, |
|
"loss": 0.9671, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 39.51249805930756, |
|
"grad_norm": 4.334278106689453, |
|
"learning_rate": 1.048750194069244e-05, |
|
"loss": 0.9637, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 39.59012575687005, |
|
"grad_norm": 4.499300956726074, |
|
"learning_rate": 1.040987424312995e-05, |
|
"loss": 0.959, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 39.66775345443254, |
|
"grad_norm": 4.04175329208374, |
|
"learning_rate": 1.0332246545567458e-05, |
|
"loss": 0.9625, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 39.74538115199503, |
|
"grad_norm": 4.483138084411621, |
|
"learning_rate": 1.0254618848004968e-05, |
|
"loss": 0.9654, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 39.823008849557525, |
|
"grad_norm": 4.5711140632629395, |
|
"learning_rate": 1.0176991150442479e-05, |
|
"loss": 0.9705, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 39.900636547120016, |
|
"grad_norm": 4.339575290679932, |
|
"learning_rate": 1.0099363452879988e-05, |
|
"loss": 0.971, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 39.9782642446825, |
|
"grad_norm": 4.528174877166748, |
|
"learning_rate": 1.0021735755317497e-05, |
|
"loss": 0.9714, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 40.05589194224499, |
|
"grad_norm": 4.42559289932251, |
|
"learning_rate": 9.944108057755006e-06, |
|
"loss": 0.9325, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 40.13351963980748, |
|
"grad_norm": 4.588589191436768, |
|
"learning_rate": 9.866480360192517e-06, |
|
"loss": 0.9248, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 40.21114733736997, |
|
"grad_norm": 5.253052711486816, |
|
"learning_rate": 9.788852662630027e-06, |
|
"loss": 0.9285, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 40.28877503493246, |
|
"grad_norm": 4.5551042556762695, |
|
"learning_rate": 9.711224965067536e-06, |
|
"loss": 0.9384, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 40.366402732494954, |
|
"grad_norm": 4.9546990394592285, |
|
"learning_rate": 9.633597267505045e-06, |
|
"loss": 0.9332, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 40.444030430057445, |
|
"grad_norm": 4.840395450592041, |
|
"learning_rate": 9.555969569942556e-06, |
|
"loss": 0.9321, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 40.521658127619936, |
|
"grad_norm": 4.765369415283203, |
|
"learning_rate": 9.478341872380065e-06, |
|
"loss": 0.9366, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 40.59928582518243, |
|
"grad_norm": 4.869214057922363, |
|
"learning_rate": 9.400714174817574e-06, |
|
"loss": 0.9419, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 40.67691352274492, |
|
"grad_norm": 4.868770599365234, |
|
"learning_rate": 9.323086477255084e-06, |
|
"loss": 0.9431, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 40.7545412203074, |
|
"grad_norm": 5.142333030700684, |
|
"learning_rate": 9.245458779692595e-06, |
|
"loss": 0.9455, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 40.83216891786989, |
|
"grad_norm": 4.263994216918945, |
|
"learning_rate": 9.167831082130104e-06, |
|
"loss": 0.9497, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 40.909796615432384, |
|
"grad_norm": 4.486149311065674, |
|
"learning_rate": 9.090203384567613e-06, |
|
"loss": 0.9484, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 40.987424312994875, |
|
"grad_norm": 4.359130859375, |
|
"learning_rate": 9.012575687005124e-06, |
|
"loss": 0.9441, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 41.065052010557366, |
|
"grad_norm": 4.38929557800293, |
|
"learning_rate": 8.934947989442633e-06, |
|
"loss": 0.9057, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 41.14267970811986, |
|
"grad_norm": 4.379587650299072, |
|
"learning_rate": 8.857320291880143e-06, |
|
"loss": 0.9024, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 41.22030740568235, |
|
"grad_norm": 4.549973964691162, |
|
"learning_rate": 8.779692594317652e-06, |
|
"loss": 0.9116, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 41.29793510324484, |
|
"grad_norm": 4.387326240539551, |
|
"learning_rate": 8.702064896755163e-06, |
|
"loss": 0.9132, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 41.37556280080733, |
|
"grad_norm": 4.824013710021973, |
|
"learning_rate": 8.624437199192672e-06, |
|
"loss": 0.9048, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 41.45319049836982, |
|
"grad_norm": 4.79560661315918, |
|
"learning_rate": 8.546809501630181e-06, |
|
"loss": 0.9142, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 41.53081819593231, |
|
"grad_norm": 4.503738880157471, |
|
"learning_rate": 8.46918180406769e-06, |
|
"loss": 0.92, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 41.608445893494796, |
|
"grad_norm": 4.430568218231201, |
|
"learning_rate": 8.391554106505201e-06, |
|
"loss": 0.9258, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 41.68607359105729, |
|
"grad_norm": 4.630665302276611, |
|
"learning_rate": 8.31392640894271e-06, |
|
"loss": 0.9226, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 41.76370128861978, |
|
"grad_norm": 4.298410415649414, |
|
"learning_rate": 8.23629871138022e-06, |
|
"loss": 0.9264, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 41.84132898618227, |
|
"grad_norm": 4.575562000274658, |
|
"learning_rate": 8.15867101381773e-06, |
|
"loss": 0.9194, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 41.91895668374476, |
|
"grad_norm": 4.254932880401611, |
|
"learning_rate": 8.08104331625524e-06, |
|
"loss": 0.9339, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 41.99658438130725, |
|
"grad_norm": 4.799808502197266, |
|
"learning_rate": 8.00341561869275e-06, |
|
"loss": 0.9262, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 42.07421207886974, |
|
"grad_norm": 4.432214260101318, |
|
"learning_rate": 7.925787921130259e-06, |
|
"loss": 0.8875, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 42.15183977643223, |
|
"grad_norm": 4.276678085327148, |
|
"learning_rate": 7.84816022356777e-06, |
|
"loss": 0.8923, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 42.229467473994724, |
|
"grad_norm": 5.178389072418213, |
|
"learning_rate": 7.770532526005279e-06, |
|
"loss": 0.8835, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 42.307095171557215, |
|
"grad_norm": 4.696712017059326, |
|
"learning_rate": 7.692904828442788e-06, |
|
"loss": 0.8872, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 42.3847228691197, |
|
"grad_norm": 4.507452011108398, |
|
"learning_rate": 7.615277130880298e-06, |
|
"loss": 0.892, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 42.46235056668219, |
|
"grad_norm": 4.397420883178711, |
|
"learning_rate": 7.537649433317809e-06, |
|
"loss": 0.9004, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 42.53997826424468, |
|
"grad_norm": 4.42085599899292, |
|
"learning_rate": 7.460021735755318e-06, |
|
"loss": 0.9006, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 42.61760596180717, |
|
"grad_norm": 4.6971306800842285, |
|
"learning_rate": 7.3823940381928275e-06, |
|
"loss": 0.8923, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 42.69523365936966, |
|
"grad_norm": 4.580519199371338, |
|
"learning_rate": 7.304766340630337e-06, |
|
"loss": 0.8984, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 42.772861356932154, |
|
"grad_norm": 4.263189315795898, |
|
"learning_rate": 7.227138643067848e-06, |
|
"loss": 0.9049, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 42.850489054494645, |
|
"grad_norm": 4.588529586791992, |
|
"learning_rate": 7.149510945505357e-06, |
|
"loss": 0.9078, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 42.928116752057136, |
|
"grad_norm": 4.9102559089660645, |
|
"learning_rate": 7.071883247942866e-06, |
|
"loss": 0.9073, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 43.00574444961963, |
|
"grad_norm": 4.7918853759765625, |
|
"learning_rate": 6.994255550380375e-06, |
|
"loss": 0.9072, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 43.08337214718212, |
|
"grad_norm": 3.824863910675049, |
|
"learning_rate": 6.916627852817886e-06, |
|
"loss": 0.8697, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 43.1609998447446, |
|
"grad_norm": 4.692780017852783, |
|
"learning_rate": 6.839000155255396e-06, |
|
"loss": 0.8758, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 43.23862754230709, |
|
"grad_norm": 5.024048805236816, |
|
"learning_rate": 6.761372457692905e-06, |
|
"loss": 0.8725, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 43.316255239869584, |
|
"grad_norm": 4.9430975914001465, |
|
"learning_rate": 6.683744760130414e-06, |
|
"loss": 0.8739, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 43.393882937432075, |
|
"grad_norm": 4.70835542678833, |
|
"learning_rate": 6.606117062567925e-06, |
|
"loss": 0.8774, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 43.471510634994566, |
|
"grad_norm": 4.474407196044922, |
|
"learning_rate": 6.528489365005434e-06, |
|
"loss": 0.8788, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 43.54913833255706, |
|
"grad_norm": 4.508847713470459, |
|
"learning_rate": 6.4508616674429435e-06, |
|
"loss": 0.8812, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 43.62676603011955, |
|
"grad_norm": 4.584230422973633, |
|
"learning_rate": 6.373233969880453e-06, |
|
"loss": 0.8787, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 43.70439372768204, |
|
"grad_norm": 4.892379283905029, |
|
"learning_rate": 6.295606272317964e-06, |
|
"loss": 0.8883, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 43.78202142524453, |
|
"grad_norm": 4.759417533874512, |
|
"learning_rate": 6.217978574755473e-06, |
|
"loss": 0.885, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 43.85964912280702, |
|
"grad_norm": 4.658566474914551, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.8799, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 43.937276820369505, |
|
"grad_norm": 4.660683631896973, |
|
"learning_rate": 6.062723179630492e-06, |
|
"loss": 0.8899, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 44.014904517931996, |
|
"grad_norm": 4.208764553070068, |
|
"learning_rate": 5.985095482068002e-06, |
|
"loss": 0.8801, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 44.09253221549449, |
|
"grad_norm": 4.277160167694092, |
|
"learning_rate": 5.907467784505512e-06, |
|
"loss": 0.854, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 44.17015991305698, |
|
"grad_norm": 4.98652982711792, |
|
"learning_rate": 5.829840086943022e-06, |
|
"loss": 0.8548, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 44.24778761061947, |
|
"grad_norm": 4.677061557769775, |
|
"learning_rate": 5.752212389380531e-06, |
|
"loss": 0.8661, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 44.32541530818196, |
|
"grad_norm": 4.650174617767334, |
|
"learning_rate": 5.674584691818041e-06, |
|
"loss": 0.8626, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 44.40304300574445, |
|
"grad_norm": 4.145635604858398, |
|
"learning_rate": 5.59695699425555e-06, |
|
"loss": 0.8635, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 44.48067070330694, |
|
"grad_norm": 4.334202766418457, |
|
"learning_rate": 5.51932929669306e-06, |
|
"loss": 0.8633, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 44.55829840086943, |
|
"grad_norm": 4.45126485824585, |
|
"learning_rate": 5.44170159913057e-06, |
|
"loss": 0.863, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 44.63592609843192, |
|
"grad_norm": 4.916016578674316, |
|
"learning_rate": 5.36407390156808e-06, |
|
"loss": 0.8687, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 44.71355379599441, |
|
"grad_norm": 4.656139373779297, |
|
"learning_rate": 5.286446204005589e-06, |
|
"loss": 0.8665, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 44.7911814935569, |
|
"grad_norm": 4.845007419586182, |
|
"learning_rate": 5.208818506443099e-06, |
|
"loss": 0.8681, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 44.86880919111939, |
|
"grad_norm": 4.315593242645264, |
|
"learning_rate": 5.131190808880608e-06, |
|
"loss": 0.863, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 44.94643688868188, |
|
"grad_norm": 4.265692710876465, |
|
"learning_rate": 5.053563111318118e-06, |
|
"loss": 0.8605, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 45.02406458624437, |
|
"grad_norm": 4.859785079956055, |
|
"learning_rate": 4.975935413755628e-06, |
|
"loss": 0.8637, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 45.10169228380686, |
|
"grad_norm": 4.233875751495361, |
|
"learning_rate": 4.898307716193138e-06, |
|
"loss": 0.8408, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 45.17931998136935, |
|
"grad_norm": 4.796300411224365, |
|
"learning_rate": 4.820680018630647e-06, |
|
"loss": 0.85, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 45.256947678931844, |
|
"grad_norm": 4.32379150390625, |
|
"learning_rate": 4.743052321068157e-06, |
|
"loss": 0.8455, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 45.334575376494335, |
|
"grad_norm": 4.826063632965088, |
|
"learning_rate": 4.665424623505667e-06, |
|
"loss": 0.853, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 45.412203074056826, |
|
"grad_norm": 4.197807312011719, |
|
"learning_rate": 4.587796925943176e-06, |
|
"loss": 0.8563, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 45.48983077161931, |
|
"grad_norm": 4.949887275695801, |
|
"learning_rate": 4.5101692283806865e-06, |
|
"loss": 0.8478, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 45.5674584691818, |
|
"grad_norm": 4.073297023773193, |
|
"learning_rate": 4.432541530818196e-06, |
|
"loss": 0.8502, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 45.64508616674429, |
|
"grad_norm": 4.890108108520508, |
|
"learning_rate": 4.354913833255706e-06, |
|
"loss": 0.8482, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 45.72271386430678, |
|
"grad_norm": 4.2948079109191895, |
|
"learning_rate": 4.277286135693216e-06, |
|
"loss": 0.847, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 45.800341561869274, |
|
"grad_norm": 4.1356425285339355, |
|
"learning_rate": 4.199658438130725e-06, |
|
"loss": 0.8543, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 45.877969259431765, |
|
"grad_norm": 4.8358001708984375, |
|
"learning_rate": 4.122030740568235e-06, |
|
"loss": 0.8519, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 45.955596956994256, |
|
"grad_norm": 4.316599369049072, |
|
"learning_rate": 4.0444030430057445e-06, |
|
"loss": 0.8518, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 46.03322465455675, |
|
"grad_norm": 5.166982173919678, |
|
"learning_rate": 3.9667753454432546e-06, |
|
"loss": 0.837, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 46.11085235211924, |
|
"grad_norm": 5.095579624176025, |
|
"learning_rate": 3.889147647880765e-06, |
|
"loss": 0.8304, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 46.18848004968173, |
|
"grad_norm": 4.376230716705322, |
|
"learning_rate": 3.8115199503182735e-06, |
|
"loss": 0.8317, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 46.26610774724422, |
|
"grad_norm": 4.394167900085449, |
|
"learning_rate": 3.7338922527557836e-06, |
|
"loss": 0.8334, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 46.343735444806704, |
|
"grad_norm": 4.203426361083984, |
|
"learning_rate": 3.656264555193293e-06, |
|
"loss": 0.8282, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 46.421363142369195, |
|
"grad_norm": 4.700695991516113, |
|
"learning_rate": 3.578636857630803e-06, |
|
"loss": 0.8387, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 46.498990839931686, |
|
"grad_norm": 4.512545585632324, |
|
"learning_rate": 3.501009160068312e-06, |
|
"loss": 0.8371, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 46.57661853749418, |
|
"grad_norm": 4.69306755065918, |
|
"learning_rate": 3.4233814625058222e-06, |
|
"loss": 0.8328, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 46.65424623505667, |
|
"grad_norm": 4.748707294464111, |
|
"learning_rate": 3.3457537649433315e-06, |
|
"loss": 0.8387, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 46.73187393261916, |
|
"grad_norm": 4.850402355194092, |
|
"learning_rate": 3.2681260673808416e-06, |
|
"loss": 0.8433, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 46.80950163018165, |
|
"grad_norm": 4.6922197341918945, |
|
"learning_rate": 3.1904983698183512e-06, |
|
"loss": 0.8437, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 46.88712932774414, |
|
"grad_norm": 4.400567054748535, |
|
"learning_rate": 3.112870672255861e-06, |
|
"loss": 0.8395, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 46.96475702530663, |
|
"grad_norm": 4.891355037689209, |
|
"learning_rate": 3.0352429746933706e-06, |
|
"loss": 0.8376, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 47.04238472286912, |
|
"grad_norm": 4.655758857727051, |
|
"learning_rate": 2.9576152771308803e-06, |
|
"loss": 0.8284, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 47.12001242043161, |
|
"grad_norm": 4.718132972717285, |
|
"learning_rate": 2.87998757956839e-06, |
|
"loss": 0.8187, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 47.1976401179941, |
|
"grad_norm": 4.415502071380615, |
|
"learning_rate": 2.8023598820059e-06, |
|
"loss": 0.8213, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 47.27526781555659, |
|
"grad_norm": 5.419862270355225, |
|
"learning_rate": 2.7247321844434097e-06, |
|
"loss": 0.8256, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 47.35289551311908, |
|
"grad_norm": 4.600099563598633, |
|
"learning_rate": 2.6471044868809193e-06, |
|
"loss": 0.8259, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 47.43052321068157, |
|
"grad_norm": 5.056214332580566, |
|
"learning_rate": 2.569476789318429e-06, |
|
"loss": 0.8232, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 47.50815090824406, |
|
"grad_norm": 4.458391189575195, |
|
"learning_rate": 2.4918490917559387e-06, |
|
"loss": 0.8297, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 47.58577860580655, |
|
"grad_norm": 4.724514961242676, |
|
"learning_rate": 2.4142213941934484e-06, |
|
"loss": 0.8257, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 47.663406303369044, |
|
"grad_norm": 4.462941646575928, |
|
"learning_rate": 2.336593696630958e-06, |
|
"loss": 0.8265, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 47.741034000931535, |
|
"grad_norm": 4.594760417938232, |
|
"learning_rate": 2.2589659990684677e-06, |
|
"loss": 0.8285, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 47.818661698494026, |
|
"grad_norm": 4.6404032707214355, |
|
"learning_rate": 2.1813383015059778e-06, |
|
"loss": 0.8261, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 47.89628939605651, |
|
"grad_norm": 3.944291830062866, |
|
"learning_rate": 2.1037106039434874e-06, |
|
"loss": 0.834, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 47.973917093619, |
|
"grad_norm": 4.836678504943848, |
|
"learning_rate": 2.026082906380997e-06, |
|
"loss": 0.827, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 48.05154479118149, |
|
"grad_norm": 4.680452823638916, |
|
"learning_rate": 1.9484552088185068e-06, |
|
"loss": 0.8142, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 48.12917248874398, |
|
"grad_norm": 5.229122161865234, |
|
"learning_rate": 1.8708275112560162e-06, |
|
"loss": 0.8151, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 48.206800186306474, |
|
"grad_norm": 4.585724353790283, |
|
"learning_rate": 1.7931998136935261e-06, |
|
"loss": 0.8188, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 48.284427883868965, |
|
"grad_norm": 4.325538158416748, |
|
"learning_rate": 1.7155721161310358e-06, |
|
"loss": 0.8115, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 48.362055581431456, |
|
"grad_norm": 4.884690761566162, |
|
"learning_rate": 1.6379444185685455e-06, |
|
"loss": 0.8105, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 48.43968327899395, |
|
"grad_norm": 4.815389633178711, |
|
"learning_rate": 1.5603167210060551e-06, |
|
"loss": 0.814, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 48.51731097655644, |
|
"grad_norm": 4.258877277374268, |
|
"learning_rate": 1.4826890234435648e-06, |
|
"loss": 0.814, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 48.59493867411893, |
|
"grad_norm": 4.596804618835449, |
|
"learning_rate": 1.4050613258810745e-06, |
|
"loss": 0.8168, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 48.67256637168141, |
|
"grad_norm": 4.754199504852295, |
|
"learning_rate": 1.3274336283185841e-06, |
|
"loss": 0.8205, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 48.7501940692439, |
|
"grad_norm": 4.652686595916748, |
|
"learning_rate": 1.2498059307560938e-06, |
|
"loss": 0.818, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 48.827821766806395, |
|
"grad_norm": 4.778179168701172, |
|
"learning_rate": 1.1721782331936035e-06, |
|
"loss": 0.8215, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 48.905449464368886, |
|
"grad_norm": 4.835714817047119, |
|
"learning_rate": 1.0945505356311131e-06, |
|
"loss": 0.8184, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 48.98307716193138, |
|
"grad_norm": 4.331784725189209, |
|
"learning_rate": 1.016922838068623e-06, |
|
"loss": 0.8149, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 49.06070485949387, |
|
"grad_norm": 4.657207012176514, |
|
"learning_rate": 9.392951405061327e-07, |
|
"loss": 0.8137, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 49.13833255705636, |
|
"grad_norm": 4.450284481048584, |
|
"learning_rate": 8.616674429436423e-07, |
|
"loss": 0.8115, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 49.21596025461885, |
|
"grad_norm": 3.921935558319092, |
|
"learning_rate": 7.84039745381152e-07, |
|
"loss": 0.8102, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 49.29358795218134, |
|
"grad_norm": 4.742419719696045, |
|
"learning_rate": 7.064120478186618e-07, |
|
"loss": 0.8069, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 49.37121564974383, |
|
"grad_norm": 4.7592387199401855, |
|
"learning_rate": 6.287843502561715e-07, |
|
"loss": 0.8111, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 49.44884334730632, |
|
"grad_norm": 4.364270210266113, |
|
"learning_rate": 5.511566526936811e-07, |
|
"loss": 0.8044, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 49.526471044868806, |
|
"grad_norm": 4.5575337409973145, |
|
"learning_rate": 4.735289551311908e-07, |
|
"loss": 0.8007, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 49.6040987424313, |
|
"grad_norm": 4.399910926818848, |
|
"learning_rate": 3.9590125756870057e-07, |
|
"loss": 0.8097, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 49.68172643999379, |
|
"grad_norm": 4.863783836364746, |
|
"learning_rate": 3.1827356000621023e-07, |
|
"loss": 0.8093, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 49.75935413755628, |
|
"grad_norm": 4.700865745544434, |
|
"learning_rate": 2.4064586244371996e-07, |
|
"loss": 0.812, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 49.83698183511877, |
|
"grad_norm": 4.929879188537598, |
|
"learning_rate": 1.6301816488122962e-07, |
|
"loss": 0.8121, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 49.91460953268126, |
|
"grad_norm": 4.459561347961426, |
|
"learning_rate": 8.539046731873933e-08, |
|
"loss": 0.8108, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 49.99223723024375, |
|
"grad_norm": 4.53715181350708, |
|
"learning_rate": 7.76276975624903e-09, |
|
"loss": 0.8126, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 322050, |
|
"total_flos": 9.94521893679661e+17, |
|
"train_loss": 1.8893472661618176, |
|
"train_runtime": 93675.3384, |
|
"train_samples_per_second": 110.014, |
|
"train_steps_per_second": 3.438 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 322050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.94521893679661e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|