|
{ |
|
"best_metric": 0.6869426704202687, |
|
"best_model_checkpoint": "/userstorage/modernbert-llm-grader/checkpoint-31216", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 31216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012813941568426449, |
|
"grad_norm": 5.321617603302002, |
|
"learning_rate": 4.987186058431574e-05, |
|
"loss": 1.4033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025627883136852898, |
|
"grad_norm": 3.621730089187622, |
|
"learning_rate": 4.974372116863147e-05, |
|
"loss": 1.3035, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03844182470527934, |
|
"grad_norm": 7.95962381362915, |
|
"learning_rate": 4.961558175294721e-05, |
|
"loss": 1.2506, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.051255766273705795, |
|
"grad_norm": 3.631398916244507, |
|
"learning_rate": 4.9487442337262944e-05, |
|
"loss": 1.2354, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06406970784213224, |
|
"grad_norm": 2.6680333614349365, |
|
"learning_rate": 4.935930292157868e-05, |
|
"loss": 1.2397, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07688364941055868, |
|
"grad_norm": 6.042360305786133, |
|
"learning_rate": 4.9231163505894415e-05, |
|
"loss": 1.1811, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08969759097898514, |
|
"grad_norm": 6.7501959800720215, |
|
"learning_rate": 4.9103024090210154e-05, |
|
"loss": 1.1947, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10251153254741159, |
|
"grad_norm": 3.4089736938476562, |
|
"learning_rate": 4.8974884674525886e-05, |
|
"loss": 1.1812, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11532547411583803, |
|
"grad_norm": 5.0775604248046875, |
|
"learning_rate": 4.884674525884162e-05, |
|
"loss": 1.1752, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12813941568426448, |
|
"grad_norm": 4.529630184173584, |
|
"learning_rate": 4.8718605843157357e-05, |
|
"loss": 1.1867, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14095335725269093, |
|
"grad_norm": 4.961220741271973, |
|
"learning_rate": 4.859046642747309e-05, |
|
"loss": 1.2129, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.15376729882111737, |
|
"grad_norm": 4.113813400268555, |
|
"learning_rate": 4.846232701178883e-05, |
|
"loss": 1.1293, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16658124038954383, |
|
"grad_norm": 7.25917387008667, |
|
"learning_rate": 4.8334187596104566e-05, |
|
"loss": 1.1008, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.17939518195797027, |
|
"grad_norm": 5.579372882843018, |
|
"learning_rate": 4.82060481804203e-05, |
|
"loss": 1.1327, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1922091235263967, |
|
"grad_norm": 9.794898986816406, |
|
"learning_rate": 4.807790876473604e-05, |
|
"loss": 1.1355, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.20502306509482318, |
|
"grad_norm": 9.875951766967773, |
|
"learning_rate": 4.794976934905177e-05, |
|
"loss": 1.0057, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.21783700666324962, |
|
"grad_norm": 7.271333694458008, |
|
"learning_rate": 4.782162993336751e-05, |
|
"loss": 1.1101, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.23065094823167606, |
|
"grad_norm": 6.730026721954346, |
|
"learning_rate": 4.769349051768324e-05, |
|
"loss": 1.0762, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2434648898001025, |
|
"grad_norm": 5.596224784851074, |
|
"learning_rate": 4.756535110199898e-05, |
|
"loss": 1.0413, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.25627883136852897, |
|
"grad_norm": 4.591865539550781, |
|
"learning_rate": 4.743721168631472e-05, |
|
"loss": 1.0593, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2690927729369554, |
|
"grad_norm": 6.357232570648193, |
|
"learning_rate": 4.730907227063045e-05, |
|
"loss": 1.0434, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.28190671450538185, |
|
"grad_norm": 5.185873508453369, |
|
"learning_rate": 4.718093285494619e-05, |
|
"loss": 1.021, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2947206560738083, |
|
"grad_norm": 8.19482135772705, |
|
"learning_rate": 4.705279343926192e-05, |
|
"loss": 1.1102, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.30753459764223473, |
|
"grad_norm": 6.1499176025390625, |
|
"learning_rate": 4.692465402357765e-05, |
|
"loss": 1.0115, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3203485392106612, |
|
"grad_norm": 11.092570304870605, |
|
"learning_rate": 4.679651460789339e-05, |
|
"loss": 0.9576, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.33316248077908767, |
|
"grad_norm": 5.10243034362793, |
|
"learning_rate": 4.666837519220912e-05, |
|
"loss": 1.0674, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3459764223475141, |
|
"grad_norm": 4.633431434631348, |
|
"learning_rate": 4.654023577652486e-05, |
|
"loss": 1.0004, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.35879036391594055, |
|
"grad_norm": 5.507874488830566, |
|
"learning_rate": 4.6412096360840594e-05, |
|
"loss": 1.0439, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.371604305484367, |
|
"grad_norm": 5.591798305511475, |
|
"learning_rate": 4.628395694515633e-05, |
|
"loss": 1.0509, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3844182470527934, |
|
"grad_norm": 4.341959476470947, |
|
"learning_rate": 4.6155817529472065e-05, |
|
"loss": 0.9864, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3972321886212199, |
|
"grad_norm": 3.6542482376098633, |
|
"learning_rate": 4.6027678113787804e-05, |
|
"loss": 0.9897, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.41004613018964636, |
|
"grad_norm": 6.769758701324463, |
|
"learning_rate": 4.589953869810354e-05, |
|
"loss": 1.0528, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.4228600717580728, |
|
"grad_norm": 5.762277603149414, |
|
"learning_rate": 4.5771399282419274e-05, |
|
"loss": 1.0036, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.43567401332649924, |
|
"grad_norm": 7.389179229736328, |
|
"learning_rate": 4.564325986673501e-05, |
|
"loss": 1.0304, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.44848795489492566, |
|
"grad_norm": 3.9039294719696045, |
|
"learning_rate": 4.5515120451050745e-05, |
|
"loss": 0.9962, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4613018964633521, |
|
"grad_norm": 3.0561447143554688, |
|
"learning_rate": 4.5386981035366484e-05, |
|
"loss": 0.9777, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4741158380317786, |
|
"grad_norm": 6.340303897857666, |
|
"learning_rate": 4.5258841619682216e-05, |
|
"loss": 0.9603, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.486929779600205, |
|
"grad_norm": 9.058144569396973, |
|
"learning_rate": 4.5130702203997955e-05, |
|
"loss": 0.9658, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.49974372116863147, |
|
"grad_norm": 8.219672203063965, |
|
"learning_rate": 4.500256278831369e-05, |
|
"loss": 0.9856, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.5125576627370579, |
|
"grad_norm": 3.6466543674468994, |
|
"learning_rate": 4.487442337262942e-05, |
|
"loss": 0.9734, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5253716043054844, |
|
"grad_norm": 7.289781093597412, |
|
"learning_rate": 4.474628395694516e-05, |
|
"loss": 0.9045, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5381855458739108, |
|
"grad_norm": 6.18227481842041, |
|
"learning_rate": 4.461814454126089e-05, |
|
"loss": 0.9679, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5509994874423373, |
|
"grad_norm": 3.994476318359375, |
|
"learning_rate": 4.449000512557663e-05, |
|
"loss": 0.8958, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5638134290107637, |
|
"grad_norm": 3.913896322250366, |
|
"learning_rate": 4.436186570989236e-05, |
|
"loss": 0.9453, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5766273705791901, |
|
"grad_norm": 4.39192008972168, |
|
"learning_rate": 4.42337262942081e-05, |
|
"loss": 0.9132, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5894413121476166, |
|
"grad_norm": 5.574671745300293, |
|
"learning_rate": 4.410558687852384e-05, |
|
"loss": 0.9069, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.602255253716043, |
|
"grad_norm": 4.218778610229492, |
|
"learning_rate": 4.397744746283957e-05, |
|
"loss": 0.9631, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6150691952844695, |
|
"grad_norm": 7.804980754852295, |
|
"learning_rate": 4.384930804715531e-05, |
|
"loss": 0.9121, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.627883136852896, |
|
"grad_norm": 7.064172744750977, |
|
"learning_rate": 4.372116863147104e-05, |
|
"loss": 0.9387, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.6406970784213224, |
|
"grad_norm": 5.293111324310303, |
|
"learning_rate": 4.359302921578678e-05, |
|
"loss": 0.9264, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6535110199897488, |
|
"grad_norm": 7.019448757171631, |
|
"learning_rate": 4.346488980010251e-05, |
|
"loss": 0.9452, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6663249615581753, |
|
"grad_norm": 6.714709758758545, |
|
"learning_rate": 4.333675038441825e-05, |
|
"loss": 0.8648, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6791389031266017, |
|
"grad_norm": 8.232748031616211, |
|
"learning_rate": 4.320861096873399e-05, |
|
"loss": 0.904, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6919528446950282, |
|
"grad_norm": 9.853933334350586, |
|
"learning_rate": 4.308047155304972e-05, |
|
"loss": 0.8895, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7047667862634547, |
|
"grad_norm": 6.8710455894470215, |
|
"learning_rate": 4.2952332137365454e-05, |
|
"loss": 0.86, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7175807278318811, |
|
"grad_norm": 6.45287561416626, |
|
"learning_rate": 4.2824192721681186e-05, |
|
"loss": 0.8718, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.7303946694003075, |
|
"grad_norm": 5.772899627685547, |
|
"learning_rate": 4.2696053305996924e-05, |
|
"loss": 0.8477, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.743208610968734, |
|
"grad_norm": 6.193540573120117, |
|
"learning_rate": 4.256791389031266e-05, |
|
"loss": 0.9184, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7560225525371604, |
|
"grad_norm": 2.5397393703460693, |
|
"learning_rate": 4.2439774474628395e-05, |
|
"loss": 0.9537, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7688364941055869, |
|
"grad_norm": 8.280569076538086, |
|
"learning_rate": 4.2311635058944134e-05, |
|
"loss": 0.8988, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7816504356740134, |
|
"grad_norm": 10.563502311706543, |
|
"learning_rate": 4.2183495643259866e-05, |
|
"loss": 0.8518, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7944643772424398, |
|
"grad_norm": 3.090008497238159, |
|
"learning_rate": 4.2055356227575605e-05, |
|
"loss": 0.8731, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.8072783188108662, |
|
"grad_norm": 4.051167011260986, |
|
"learning_rate": 4.192721681189134e-05, |
|
"loss": 0.8713, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.8200922603792927, |
|
"grad_norm": 7.207763671875, |
|
"learning_rate": 4.1799077396207076e-05, |
|
"loss": 0.8781, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.8329062019477191, |
|
"grad_norm": 6.396823883056641, |
|
"learning_rate": 4.1670937980522815e-05, |
|
"loss": 0.8231, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8457201435161456, |
|
"grad_norm": 6.260582447052002, |
|
"learning_rate": 4.1542798564838547e-05, |
|
"loss": 0.8658, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.858534085084572, |
|
"grad_norm": 8.35356616973877, |
|
"learning_rate": 4.1414659149154285e-05, |
|
"loss": 0.8637, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.8713480266529985, |
|
"grad_norm": 7.236725330352783, |
|
"learning_rate": 4.128651973347002e-05, |
|
"loss": 0.8525, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8841619682214249, |
|
"grad_norm": 14.001522064208984, |
|
"learning_rate": 4.1158380317785756e-05, |
|
"loss": 0.8628, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.8969759097898513, |
|
"grad_norm": 4.257541179656982, |
|
"learning_rate": 4.103024090210149e-05, |
|
"loss": 0.8443, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9097898513582778, |
|
"grad_norm": 5.065970420837402, |
|
"learning_rate": 4.090210148641722e-05, |
|
"loss": 0.8329, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.9226037929267042, |
|
"grad_norm": 6.647068977355957, |
|
"learning_rate": 4.077396207073296e-05, |
|
"loss": 0.8585, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.9354177344951307, |
|
"grad_norm": 8.440242767333984, |
|
"learning_rate": 4.064582265504869e-05, |
|
"loss": 0.8749, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.9482316760635572, |
|
"grad_norm": 7.684078216552734, |
|
"learning_rate": 4.051768323936443e-05, |
|
"loss": 0.7771, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.9610456176319836, |
|
"grad_norm": 6.4709577560424805, |
|
"learning_rate": 4.038954382368016e-05, |
|
"loss": 0.8597, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.97385955920041, |
|
"grad_norm": 4.3970489501953125, |
|
"learning_rate": 4.02614044079959e-05, |
|
"loss": 0.7852, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.9866735007688365, |
|
"grad_norm": 9.167794227600098, |
|
"learning_rate": 4.013326499231164e-05, |
|
"loss": 0.8563, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.9994874423372629, |
|
"grad_norm": 6.251096248626709, |
|
"learning_rate": 4.000512557662737e-05, |
|
"loss": 0.8243, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.640692076906927, |
|
"eval_loss": 0.8794865608215332, |
|
"eval_runtime": 744.6214, |
|
"eval_samples_per_second": 10.48, |
|
"eval_steps_per_second": 2.62, |
|
"step": 7804 |
|
}, |
|
{ |
|
"epoch": 1.0123013839056894, |
|
"grad_norm": 5.928829669952393, |
|
"learning_rate": 3.987698616094311e-05, |
|
"loss": 0.7046, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.0251153254741159, |
|
"grad_norm": 2.885106086730957, |
|
"learning_rate": 3.974884674525884e-05, |
|
"loss": 0.7663, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0379292670425422, |
|
"grad_norm": 5.951350212097168, |
|
"learning_rate": 3.962070732957458e-05, |
|
"loss": 0.7374, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.0507432086109687, |
|
"grad_norm": 2.5160486698150635, |
|
"learning_rate": 3.949256791389031e-05, |
|
"loss": 0.7126, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.0635571501793952, |
|
"grad_norm": 6.847401142120361, |
|
"learning_rate": 3.936442849820605e-05, |
|
"loss": 0.6785, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.0763710917478215, |
|
"grad_norm": 4.729136943817139, |
|
"learning_rate": 3.923628908252179e-05, |
|
"loss": 0.7085, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.089185033316248, |
|
"grad_norm": 5.535890102386475, |
|
"learning_rate": 3.910814966683752e-05, |
|
"loss": 0.7548, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1019989748846746, |
|
"grad_norm": 6.188892364501953, |
|
"learning_rate": 3.8980010251153255e-05, |
|
"loss": 0.7193, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.1148129164531009, |
|
"grad_norm": 5.806282997131348, |
|
"learning_rate": 3.885187083546899e-05, |
|
"loss": 0.7143, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.1276268580215274, |
|
"grad_norm": 10.726571083068848, |
|
"learning_rate": 3.8723731419784726e-05, |
|
"loss": 0.6892, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.140440799589954, |
|
"grad_norm": 7.0307512283325195, |
|
"learning_rate": 3.8595592004100465e-05, |
|
"loss": 0.7264, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.1532547411583802, |
|
"grad_norm": 20.715412139892578, |
|
"learning_rate": 3.8467452588416197e-05, |
|
"loss": 0.6987, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.1660686827268067, |
|
"grad_norm": 6.620629787445068, |
|
"learning_rate": 3.8339313172731935e-05, |
|
"loss": 0.7041, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.1788826242952333, |
|
"grad_norm": 5.27125883102417, |
|
"learning_rate": 3.821117375704767e-05, |
|
"loss": 0.67, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.1916965658636596, |
|
"grad_norm": 6.010765552520752, |
|
"learning_rate": 3.8083034341363406e-05, |
|
"loss": 0.6737, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.204510507432086, |
|
"grad_norm": 14.393863677978516, |
|
"learning_rate": 3.795489492567914e-05, |
|
"loss": 0.7097, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.2173244490005126, |
|
"grad_norm": 6.37823486328125, |
|
"learning_rate": 3.782675550999488e-05, |
|
"loss": 0.7157, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.230138390568939, |
|
"grad_norm": 11.626152992248535, |
|
"learning_rate": 3.7698616094310616e-05, |
|
"loss": 0.7066, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.2429523321373654, |
|
"grad_norm": 5.520190238952637, |
|
"learning_rate": 3.757047667862635e-05, |
|
"loss": 0.7303, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.255766273705792, |
|
"grad_norm": 9.865089416503906, |
|
"learning_rate": 3.744233726294209e-05, |
|
"loss": 0.7559, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.2685802152742183, |
|
"grad_norm": 7.075952529907227, |
|
"learning_rate": 3.731419784725782e-05, |
|
"loss": 0.6941, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.2813941568426448, |
|
"grad_norm": 3.4892656803131104, |
|
"learning_rate": 3.718605843157356e-05, |
|
"loss": 0.7164, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2942080984110713, |
|
"grad_norm": 9.843413352966309, |
|
"learning_rate": 3.705791901588929e-05, |
|
"loss": 0.695, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.3070220399794976, |
|
"grad_norm": 12.128110885620117, |
|
"learning_rate": 3.692977960020502e-05, |
|
"loss": 0.6563, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.3198359815479241, |
|
"grad_norm": 11.26876163482666, |
|
"learning_rate": 3.680164018452076e-05, |
|
"loss": 0.6803, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.3326499231163507, |
|
"grad_norm": 12.95758056640625, |
|
"learning_rate": 3.667350076883649e-05, |
|
"loss": 0.6864, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.345463864684777, |
|
"grad_norm": 4.91602897644043, |
|
"learning_rate": 3.654536135315223e-05, |
|
"loss": 0.7184, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.3582778062532035, |
|
"grad_norm": 4.799069881439209, |
|
"learning_rate": 3.641722193746796e-05, |
|
"loss": 0.7558, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.37109174782163, |
|
"grad_norm": 64.9485855102539, |
|
"learning_rate": 3.62890825217837e-05, |
|
"loss": 0.7292, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.3839056893900563, |
|
"grad_norm": 6.147428512573242, |
|
"learning_rate": 3.616094310609944e-05, |
|
"loss": 0.6623, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.3967196309584828, |
|
"grad_norm": 7.638481140136719, |
|
"learning_rate": 3.603280369041517e-05, |
|
"loss": 0.6981, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.4095335725269091, |
|
"grad_norm": 4.798500061035156, |
|
"learning_rate": 3.590466427473091e-05, |
|
"loss": 0.7569, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.4223475140953357, |
|
"grad_norm": 4.413691520690918, |
|
"learning_rate": 3.5776524859046644e-05, |
|
"loss": 0.6391, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.4351614556637622, |
|
"grad_norm": 6.2526421546936035, |
|
"learning_rate": 3.564838544336238e-05, |
|
"loss": 0.7045, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.4479753972321885, |
|
"grad_norm": 6.3732805252075195, |
|
"learning_rate": 3.5520246027678114e-05, |
|
"loss": 0.6916, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.460789338800615, |
|
"grad_norm": 25.24698829650879, |
|
"learning_rate": 3.539210661199385e-05, |
|
"loss": 0.7652, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.4736032803690415, |
|
"grad_norm": 4.716599941253662, |
|
"learning_rate": 3.5263967196309585e-05, |
|
"loss": 0.7199, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.4864172219374678, |
|
"grad_norm": 13.750917434692383, |
|
"learning_rate": 3.5135827780625324e-05, |
|
"loss": 0.7032, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.4992311635058944, |
|
"grad_norm": 3.6678273677825928, |
|
"learning_rate": 3.500768836494106e-05, |
|
"loss": 0.6821, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.5120451050743209, |
|
"grad_norm": 7.891080856323242, |
|
"learning_rate": 3.487954894925679e-05, |
|
"loss": 0.7301, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.5248590466427472, |
|
"grad_norm": 3.25317645072937, |
|
"learning_rate": 3.475140953357253e-05, |
|
"loss": 0.6665, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.5376729882111737, |
|
"grad_norm": 12.75395679473877, |
|
"learning_rate": 3.462327011788826e-05, |
|
"loss": 0.733, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.5504869297796002, |
|
"grad_norm": 10.9820556640625, |
|
"learning_rate": 3.4495130702204e-05, |
|
"loss": 0.7064, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.5633008713480265, |
|
"grad_norm": 6.558383941650391, |
|
"learning_rate": 3.4366991286519737e-05, |
|
"loss": 0.7105, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.576114812916453, |
|
"grad_norm": 8.5501070022583, |
|
"learning_rate": 3.423885187083547e-05, |
|
"loss": 0.706, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.5889287544848796, |
|
"grad_norm": 5.319694995880127, |
|
"learning_rate": 3.411071245515121e-05, |
|
"loss": 0.7239, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.6017426960533059, |
|
"grad_norm": 5.92519474029541, |
|
"learning_rate": 3.398257303946694e-05, |
|
"loss": 0.7043, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.6145566376217324, |
|
"grad_norm": 8.853275299072266, |
|
"learning_rate": 3.385443362378268e-05, |
|
"loss": 0.6831, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.627370579190159, |
|
"grad_norm": 9.30588150024414, |
|
"learning_rate": 3.372629420809841e-05, |
|
"loss": 0.6756, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.6401845207585852, |
|
"grad_norm": 5.903197288513184, |
|
"learning_rate": 3.359815479241415e-05, |
|
"loss": 0.725, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.6529984623270118, |
|
"grad_norm": 5.500326156616211, |
|
"learning_rate": 3.347001537672989e-05, |
|
"loss": 0.6801, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.6658124038954383, |
|
"grad_norm": 7.896096229553223, |
|
"learning_rate": 3.334187596104562e-05, |
|
"loss": 0.6975, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.6786263454638646, |
|
"grad_norm": 6.674001216888428, |
|
"learning_rate": 3.321373654536136e-05, |
|
"loss": 0.6681, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.691440287032291, |
|
"grad_norm": 21.74435806274414, |
|
"learning_rate": 3.308559712967709e-05, |
|
"loss": 0.7045, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.7042542286007176, |
|
"grad_norm": 6.329532146453857, |
|
"learning_rate": 3.295745771399282e-05, |
|
"loss": 0.6885, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.717068170169144, |
|
"grad_norm": 24.047470092773438, |
|
"learning_rate": 3.282931829830856e-05, |
|
"loss": 0.7003, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.7298821117375704, |
|
"grad_norm": 7.407759666442871, |
|
"learning_rate": 3.2701178882624294e-05, |
|
"loss": 0.6856, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.742696053305997, |
|
"grad_norm": 5.755215167999268, |
|
"learning_rate": 3.257303946694003e-05, |
|
"loss": 0.7005, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.7555099948744233, |
|
"grad_norm": 11.444562911987305, |
|
"learning_rate": 3.2444900051255764e-05, |
|
"loss": 0.7136, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.7683239364428498, |
|
"grad_norm": 8.267853736877441, |
|
"learning_rate": 3.23167606355715e-05, |
|
"loss": 0.7029, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.7811378780112763, |
|
"grad_norm": 6.73785924911499, |
|
"learning_rate": 3.2188621219887235e-05, |
|
"loss": 0.6572, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.7939518195797026, |
|
"grad_norm": 5.369395732879639, |
|
"learning_rate": 3.2060481804202974e-05, |
|
"loss": 0.6617, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.8067657611481291, |
|
"grad_norm": 2.288243293762207, |
|
"learning_rate": 3.193234238851871e-05, |
|
"loss": 0.6688, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.8195797027165557, |
|
"grad_norm": 14.942804336547852, |
|
"learning_rate": 3.1804202972834445e-05, |
|
"loss": 0.6792, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.832393644284982, |
|
"grad_norm": 8.988631248474121, |
|
"learning_rate": 3.1676063557150184e-05, |
|
"loss": 0.6518, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.8452075858534085, |
|
"grad_norm": 7.9590630531311035, |
|
"learning_rate": 3.1547924141465916e-05, |
|
"loss": 0.6503, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.858021527421835, |
|
"grad_norm": 9.33973503112793, |
|
"learning_rate": 3.1419784725781655e-05, |
|
"loss": 0.6647, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.8708354689902613, |
|
"grad_norm": 9.39842700958252, |
|
"learning_rate": 3.1291645310097387e-05, |
|
"loss": 0.6515, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.8836494105586878, |
|
"grad_norm": 10.142439842224121, |
|
"learning_rate": 3.1163505894413125e-05, |
|
"loss": 0.6794, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.8964633521271144, |
|
"grad_norm": 11.658042907714844, |
|
"learning_rate": 3.1035366478728864e-05, |
|
"loss": 0.6931, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.9092772936955407, |
|
"grad_norm": 8.672663688659668, |
|
"learning_rate": 3.090722706304459e-05, |
|
"loss": 0.6377, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.9220912352639672, |
|
"grad_norm": 6.620725631713867, |
|
"learning_rate": 3.077908764736033e-05, |
|
"loss": 0.7044, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9349051768323937, |
|
"grad_norm": 8.3103609085083, |
|
"learning_rate": 3.065094823167606e-05, |
|
"loss": 0.641, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.94771911840082, |
|
"grad_norm": 8.163315773010254, |
|
"learning_rate": 3.05228088159918e-05, |
|
"loss": 0.7094, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.9605330599692465, |
|
"grad_norm": 3.6365621089935303, |
|
"learning_rate": 3.0394669400307534e-05, |
|
"loss": 0.7022, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.973347001537673, |
|
"grad_norm": 4.264801502227783, |
|
"learning_rate": 3.026652998462327e-05, |
|
"loss": 0.6833, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.9861609431060994, |
|
"grad_norm": 6.547428131103516, |
|
"learning_rate": 3.0138390568939005e-05, |
|
"loss": 0.6126, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.9989748846745259, |
|
"grad_norm": 6.155936241149902, |
|
"learning_rate": 3.0010251153254744e-05, |
|
"loss": 0.6851, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.6772664805551888, |
|
"eval_loss": 0.781230092048645, |
|
"eval_runtime": 778.3436, |
|
"eval_samples_per_second": 10.026, |
|
"eval_steps_per_second": 2.507, |
|
"step": 15608 |
|
}, |
|
{ |
|
"epoch": 2.0117888262429524, |
|
"grad_norm": 8.777030944824219, |
|
"learning_rate": 2.988211173757048e-05, |
|
"loss": 0.4707, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.0246027678113787, |
|
"grad_norm": 4.798321723937988, |
|
"learning_rate": 2.9753972321886215e-05, |
|
"loss": 0.4366, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.037416709379805, |
|
"grad_norm": 2.5244762897491455, |
|
"learning_rate": 2.962583290620195e-05, |
|
"loss": 0.504, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.0502306509482318, |
|
"grad_norm": 15.636524200439453, |
|
"learning_rate": 2.9497693490517686e-05, |
|
"loss": 0.4234, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.063044592516658, |
|
"grad_norm": 8.811060905456543, |
|
"learning_rate": 2.936955407483342e-05, |
|
"loss": 0.3911, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.0758585340850844, |
|
"grad_norm": 4.1310930252075195, |
|
"learning_rate": 2.9241414659149157e-05, |
|
"loss": 0.4538, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.088672475653511, |
|
"grad_norm": 9.516937255859375, |
|
"learning_rate": 2.9113275243464892e-05, |
|
"loss": 0.4461, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.1014864172219374, |
|
"grad_norm": 4.6523756980896, |
|
"learning_rate": 2.8985135827780624e-05, |
|
"loss": 0.4808, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.1143003587903637, |
|
"grad_norm": 4.160647392272949, |
|
"learning_rate": 2.885699641209636e-05, |
|
"loss": 0.4879, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.1271143003587905, |
|
"grad_norm": 11.32701587677002, |
|
"learning_rate": 2.8728856996412095e-05, |
|
"loss": 0.4544, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.1399282419272168, |
|
"grad_norm": 4.703444004058838, |
|
"learning_rate": 2.860071758072783e-05, |
|
"loss": 0.466, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.152742183495643, |
|
"grad_norm": 8.985660552978516, |
|
"learning_rate": 2.847257816504357e-05, |
|
"loss": 0.4734, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.16555612506407, |
|
"grad_norm": 12.306890487670898, |
|
"learning_rate": 2.8344438749359304e-05, |
|
"loss": 0.4287, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.178370066632496, |
|
"grad_norm": 5.025609016418457, |
|
"learning_rate": 2.821629933367504e-05, |
|
"loss": 0.4657, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.1911840082009224, |
|
"grad_norm": 31.554025650024414, |
|
"learning_rate": 2.8088159917990775e-05, |
|
"loss": 0.4378, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.203997949769349, |
|
"grad_norm": 9.015434265136719, |
|
"learning_rate": 2.796002050230651e-05, |
|
"loss": 0.4538, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.2168118913377755, |
|
"grad_norm": 15.61099624633789, |
|
"learning_rate": 2.7831881086622246e-05, |
|
"loss": 0.4134, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.2296258329062018, |
|
"grad_norm": 10.191957473754883, |
|
"learning_rate": 2.770374167093798e-05, |
|
"loss": 0.5188, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.2424397744746285, |
|
"grad_norm": 2.2506730556488037, |
|
"learning_rate": 2.7575602255253717e-05, |
|
"loss": 0.4028, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.255253716043055, |
|
"grad_norm": 23.088764190673828, |
|
"learning_rate": 2.7447462839569456e-05, |
|
"loss": 0.4814, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.268067657611481, |
|
"grad_norm": 4.473659515380859, |
|
"learning_rate": 2.731932342388519e-05, |
|
"loss": 0.4822, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.280881599179908, |
|
"grad_norm": 2.1489970684051514, |
|
"learning_rate": 2.7191184008200927e-05, |
|
"loss": 0.4934, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.293695540748334, |
|
"grad_norm": 1.4255170822143555, |
|
"learning_rate": 2.7063044592516662e-05, |
|
"loss": 0.4314, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.3065094823167605, |
|
"grad_norm": 4.612204074859619, |
|
"learning_rate": 2.693490517683239e-05, |
|
"loss": 0.4322, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.319323423885187, |
|
"grad_norm": 3.1022679805755615, |
|
"learning_rate": 2.680676576114813e-05, |
|
"loss": 0.424, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.3321373654536135, |
|
"grad_norm": 3.745171070098877, |
|
"learning_rate": 2.6678626345463865e-05, |
|
"loss": 0.4269, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.34495130702204, |
|
"grad_norm": 4.0442328453063965, |
|
"learning_rate": 2.65504869297796e-05, |
|
"loss": 0.4698, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.3577652485904665, |
|
"grad_norm": 21.303607940673828, |
|
"learning_rate": 2.6422347514095336e-05, |
|
"loss": 0.4909, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.370579190158893, |
|
"grad_norm": 9.175422668457031, |
|
"learning_rate": 2.629420809841107e-05, |
|
"loss": 0.4598, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.383393131727319, |
|
"grad_norm": 5.787283420562744, |
|
"learning_rate": 2.6166068682726807e-05, |
|
"loss": 0.4409, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.396207073295746, |
|
"grad_norm": 7.338250637054443, |
|
"learning_rate": 2.6037929267042542e-05, |
|
"loss": 0.4157, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.409021014864172, |
|
"grad_norm": 13.879666328430176, |
|
"learning_rate": 2.590978985135828e-05, |
|
"loss": 0.4584, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.4218349564325985, |
|
"grad_norm": 9.484577178955078, |
|
"learning_rate": 2.5781650435674016e-05, |
|
"loss": 0.4914, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.4346488980010252, |
|
"grad_norm": 10.865300178527832, |
|
"learning_rate": 2.565351101998975e-05, |
|
"loss": 0.4259, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.4474628395694515, |
|
"grad_norm": 16.69988441467285, |
|
"learning_rate": 2.5525371604305487e-05, |
|
"loss": 0.4563, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.460276781137878, |
|
"grad_norm": 19.711631774902344, |
|
"learning_rate": 2.5397232188621222e-05, |
|
"loss": 0.4159, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.4730907227063046, |
|
"grad_norm": 13.3755521774292, |
|
"learning_rate": 2.5269092772936958e-05, |
|
"loss": 0.537, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.485904664274731, |
|
"grad_norm": 6.953076362609863, |
|
"learning_rate": 2.5140953357252693e-05, |
|
"loss": 0.4288, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.498718605843157, |
|
"grad_norm": 47.91322708129883, |
|
"learning_rate": 2.5012813941568432e-05, |
|
"loss": 0.5049, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.511532547411584, |
|
"grad_norm": 1.6553832292556763, |
|
"learning_rate": 2.4884674525884164e-05, |
|
"loss": 0.4779, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.5243464889800102, |
|
"grad_norm": 12.199808120727539, |
|
"learning_rate": 2.47565351101999e-05, |
|
"loss": 0.4246, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.5371604305484365, |
|
"grad_norm": 11.326825141906738, |
|
"learning_rate": 2.4628395694515635e-05, |
|
"loss": 0.4482, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.5499743721168633, |
|
"grad_norm": 9.247246742248535, |
|
"learning_rate": 2.450025627883137e-05, |
|
"loss": 0.4656, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.5627883136852896, |
|
"grad_norm": 1.773540735244751, |
|
"learning_rate": 2.4372116863147106e-05, |
|
"loss": 0.4776, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.575602255253716, |
|
"grad_norm": 7.454749584197998, |
|
"learning_rate": 2.424397744746284e-05, |
|
"loss": 0.4161, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.5884161968221426, |
|
"grad_norm": 19.77891731262207, |
|
"learning_rate": 2.4115838031778577e-05, |
|
"loss": 0.4609, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.601230138390569, |
|
"grad_norm": 12.208200454711914, |
|
"learning_rate": 2.3987698616094312e-05, |
|
"loss": 0.453, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.6140440799589952, |
|
"grad_norm": 11.438812255859375, |
|
"learning_rate": 2.3859559200410047e-05, |
|
"loss": 0.4439, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.626858021527422, |
|
"grad_norm": 1.6863147020339966, |
|
"learning_rate": 2.3731419784725783e-05, |
|
"loss": 0.3987, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.6396719630958483, |
|
"grad_norm": 1.3637946844100952, |
|
"learning_rate": 2.3603280369041518e-05, |
|
"loss": 0.4523, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.6524859046642746, |
|
"grad_norm": 21.555208206176758, |
|
"learning_rate": 2.3475140953357254e-05, |
|
"loss": 0.4624, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.6652998462327013, |
|
"grad_norm": 8.768684387207031, |
|
"learning_rate": 2.334700153767299e-05, |
|
"loss": 0.4585, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.6781137878011276, |
|
"grad_norm": 3.2959704399108887, |
|
"learning_rate": 2.3218862121988724e-05, |
|
"loss": 0.4579, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.690927729369554, |
|
"grad_norm": 16.97565269470215, |
|
"learning_rate": 2.309072270630446e-05, |
|
"loss": 0.4132, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.7037416709379807, |
|
"grad_norm": 14.613641738891602, |
|
"learning_rate": 2.2962583290620195e-05, |
|
"loss": 0.4297, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.716555612506407, |
|
"grad_norm": 28.61090087890625, |
|
"learning_rate": 2.283444387493593e-05, |
|
"loss": 0.4479, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.7293695540748333, |
|
"grad_norm": 9.84257984161377, |
|
"learning_rate": 2.2706304459251666e-05, |
|
"loss": 0.4428, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.74218349564326, |
|
"grad_norm": 8.199345588684082, |
|
"learning_rate": 2.2578165043567405e-05, |
|
"loss": 0.3999, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.7549974372116863, |
|
"grad_norm": 15.411248207092285, |
|
"learning_rate": 2.2450025627883137e-05, |
|
"loss": 0.4423, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.7678113787801126, |
|
"grad_norm": 7.122200012207031, |
|
"learning_rate": 2.2321886212198872e-05, |
|
"loss": 0.4675, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.7806253203485394, |
|
"grad_norm": 11.358266830444336, |
|
"learning_rate": 2.2193746796514608e-05, |
|
"loss": 0.4885, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.7934392619169657, |
|
"grad_norm": 9.456644058227539, |
|
"learning_rate": 2.2065607380830343e-05, |
|
"loss": 0.4973, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.806253203485392, |
|
"grad_norm": 28.7235164642334, |
|
"learning_rate": 2.193746796514608e-05, |
|
"loss": 0.429, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.8190671450538183, |
|
"grad_norm": 14.859136581420898, |
|
"learning_rate": 2.1809328549461817e-05, |
|
"loss": 0.4867, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.831881086622245, |
|
"grad_norm": 3.089897394180298, |
|
"learning_rate": 2.1681189133777553e-05, |
|
"loss": 0.4249, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.8446950281906713, |
|
"grad_norm": 14.606719970703125, |
|
"learning_rate": 2.1553049718093288e-05, |
|
"loss": 0.4429, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.857508969759098, |
|
"grad_norm": 7.761451244354248, |
|
"learning_rate": 2.142491030240902e-05, |
|
"loss": 0.4639, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.8703229113275244, |
|
"grad_norm": 6.9101362228393555, |
|
"learning_rate": 2.1296770886724756e-05, |
|
"loss": 0.4606, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.8831368528959507, |
|
"grad_norm": 6.754969120025635, |
|
"learning_rate": 2.116863147104049e-05, |
|
"loss": 0.4784, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.895950794464377, |
|
"grad_norm": 20.884119033813477, |
|
"learning_rate": 2.104049205535623e-05, |
|
"loss": 0.4625, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.9087647360328037, |
|
"grad_norm": 18.428529739379883, |
|
"learning_rate": 2.0912352639671965e-05, |
|
"loss": 0.4121, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.92157867760123, |
|
"grad_norm": 9.211915969848633, |
|
"learning_rate": 2.07842132239877e-05, |
|
"loss": 0.457, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.9343926191696568, |
|
"grad_norm": 5.744906425476074, |
|
"learning_rate": 2.0656073808303436e-05, |
|
"loss": 0.4169, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.947206560738083, |
|
"grad_norm": 10.679366111755371, |
|
"learning_rate": 2.052793439261917e-05, |
|
"loss": 0.4719, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.9600205023065094, |
|
"grad_norm": 8.72630500793457, |
|
"learning_rate": 2.0399794976934904e-05, |
|
"loss": 0.4743, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.9728344438749357, |
|
"grad_norm": 5.53284215927124, |
|
"learning_rate": 2.0271655561250642e-05, |
|
"loss": 0.4592, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.9856483854433624, |
|
"grad_norm": 10.75283432006836, |
|
"learning_rate": 2.0143516145566378e-05, |
|
"loss": 0.3971, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.9984623270117887, |
|
"grad_norm": 10.634764671325684, |
|
"learning_rate": 2.0015376729882113e-05, |
|
"loss": 0.4295, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.6853715205850849, |
|
"eval_loss": 1.0191140174865723, |
|
"eval_runtime": 837.6905, |
|
"eval_samples_per_second": 9.316, |
|
"eval_steps_per_second": 2.329, |
|
"step": 23412 |
|
}, |
|
{ |
|
"epoch": 3.0112762685802155, |
|
"grad_norm": 3.43902587890625, |
|
"learning_rate": 1.988723731419785e-05, |
|
"loss": 0.2448, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.0240902101486418, |
|
"grad_norm": 0.5649552941322327, |
|
"learning_rate": 1.9759097898513584e-05, |
|
"loss": 0.1908, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.036904151717068, |
|
"grad_norm": 1.3035610914230347, |
|
"learning_rate": 1.963095848282932e-05, |
|
"loss": 0.275, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 3.049718093285495, |
|
"grad_norm": 27.42232322692871, |
|
"learning_rate": 1.9502819067145055e-05, |
|
"loss": 0.2727, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.062532034853921, |
|
"grad_norm": 1.675907015800476, |
|
"learning_rate": 1.937467965146079e-05, |
|
"loss": 0.2916, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 3.0753459764223474, |
|
"grad_norm": 9.602179527282715, |
|
"learning_rate": 1.9246540235776526e-05, |
|
"loss": 0.2645, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.088159917990774, |
|
"grad_norm": 16.757831573486328, |
|
"learning_rate": 1.911840082009226e-05, |
|
"loss": 0.2476, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 3.1009738595592005, |
|
"grad_norm": 5.842043876647949, |
|
"learning_rate": 1.8990261404407997e-05, |
|
"loss": 0.2829, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.1137878011276268, |
|
"grad_norm": 0.593449592590332, |
|
"learning_rate": 1.8862121988723732e-05, |
|
"loss": 0.289, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 3.1266017426960535, |
|
"grad_norm": 5.712982177734375, |
|
"learning_rate": 1.8733982573039467e-05, |
|
"loss": 0.2355, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 3.13941568426448, |
|
"grad_norm": 0.3152589201927185, |
|
"learning_rate": 1.8605843157355203e-05, |
|
"loss": 0.2491, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.152229625832906, |
|
"grad_norm": 19.951833724975586, |
|
"learning_rate": 1.8477703741670938e-05, |
|
"loss": 0.271, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 3.165043567401333, |
|
"grad_norm": 5.257028579711914, |
|
"learning_rate": 1.8349564325986674e-05, |
|
"loss": 0.277, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 3.177857508969759, |
|
"grad_norm": 3.6717381477355957, |
|
"learning_rate": 1.822142491030241e-05, |
|
"loss": 0.2736, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.1906714505381855, |
|
"grad_norm": 38.49631881713867, |
|
"learning_rate": 1.8093285494618144e-05, |
|
"loss": 0.2789, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 3.2034853921066118, |
|
"grad_norm": 5.944704055786133, |
|
"learning_rate": 1.796514607893388e-05, |
|
"loss": 0.3111, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.2162993336750385, |
|
"grad_norm": 3.278078079223633, |
|
"learning_rate": 1.7837006663249615e-05, |
|
"loss": 0.287, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 3.229113275243465, |
|
"grad_norm": 13.320869445800781, |
|
"learning_rate": 1.7708867247565354e-05, |
|
"loss": 0.2708, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.2419272168118916, |
|
"grad_norm": 9.01321029663086, |
|
"learning_rate": 1.758072783188109e-05, |
|
"loss": 0.2891, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 3.254741158380318, |
|
"grad_norm": 14.35201644897461, |
|
"learning_rate": 1.745258841619682e-05, |
|
"loss": 0.1523, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.267555099948744, |
|
"grad_norm": 5.268370628356934, |
|
"learning_rate": 1.7324449000512557e-05, |
|
"loss": 0.3608, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.2803690415171705, |
|
"grad_norm": 3.338168144226074, |
|
"learning_rate": 1.7196309584828292e-05, |
|
"loss": 0.2829, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.293182983085597, |
|
"grad_norm": 12.441572189331055, |
|
"learning_rate": 1.7068170169144028e-05, |
|
"loss": 0.2563, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 3.3059969246540235, |
|
"grad_norm": 2.870978832244873, |
|
"learning_rate": 1.6940030753459767e-05, |
|
"loss": 0.2957, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.3188108662224503, |
|
"grad_norm": 10.626642227172852, |
|
"learning_rate": 1.6811891337775502e-05, |
|
"loss": 0.3493, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 3.3316248077908766, |
|
"grad_norm": 1.1796225309371948, |
|
"learning_rate": 1.6683751922091237e-05, |
|
"loss": 0.293, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.344438749359303, |
|
"grad_norm": 46.64753341674805, |
|
"learning_rate": 1.6555612506406973e-05, |
|
"loss": 0.2739, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 3.357252690927729, |
|
"grad_norm": 17.778207778930664, |
|
"learning_rate": 1.6427473090722705e-05, |
|
"loss": 0.2897, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.370066632496156, |
|
"grad_norm": 1.6698403358459473, |
|
"learning_rate": 1.629933367503844e-05, |
|
"loss": 0.2661, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 3.382880574064582, |
|
"grad_norm": 0.18206116557121277, |
|
"learning_rate": 1.617119425935418e-05, |
|
"loss": 0.2847, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.395694515633009, |
|
"grad_norm": 6.839690208435059, |
|
"learning_rate": 1.6043054843669915e-05, |
|
"loss": 0.3044, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.4085084572014352, |
|
"grad_norm": 0.6313930749893188, |
|
"learning_rate": 1.591491542798565e-05, |
|
"loss": 0.2623, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.4213223987698616, |
|
"grad_norm": 70.23905181884766, |
|
"learning_rate": 1.5786776012301385e-05, |
|
"loss": 0.2573, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.434136340338288, |
|
"grad_norm": 16.72913360595703, |
|
"learning_rate": 1.565863659661712e-05, |
|
"loss": 0.2626, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.4469502819067146, |
|
"grad_norm": 43.662845611572266, |
|
"learning_rate": 1.5530497180932856e-05, |
|
"loss": 0.2679, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 3.459764223475141, |
|
"grad_norm": 20.96466064453125, |
|
"learning_rate": 1.540235776524859e-05, |
|
"loss": 0.3082, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.4725781650435676, |
|
"grad_norm": 45.02407455444336, |
|
"learning_rate": 1.5274218349564327e-05, |
|
"loss": 0.2492, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 3.485392106611994, |
|
"grad_norm": 14.404077529907227, |
|
"learning_rate": 1.5146078933880062e-05, |
|
"loss": 0.2704, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.4982060481804202, |
|
"grad_norm": 19.40283966064453, |
|
"learning_rate": 1.5017939518195798e-05, |
|
"loss": 0.3089, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.5110199897488465, |
|
"grad_norm": 13.016902923583984, |
|
"learning_rate": 1.4889800102511533e-05, |
|
"loss": 0.2953, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.5238339313172733, |
|
"grad_norm": 6.934922695159912, |
|
"learning_rate": 1.4761660686827269e-05, |
|
"loss": 0.2132, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.5366478728856996, |
|
"grad_norm": 49.58895492553711, |
|
"learning_rate": 1.4633521271143006e-05, |
|
"loss": 0.271, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.5494618144541263, |
|
"grad_norm": 4.814508438110352, |
|
"learning_rate": 1.4505381855458741e-05, |
|
"loss": 0.3195, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 3.5622757560225526, |
|
"grad_norm": 28.65342903137207, |
|
"learning_rate": 1.4377242439774475e-05, |
|
"loss": 0.2869, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.575089697590979, |
|
"grad_norm": 5.931487083435059, |
|
"learning_rate": 1.424910302409021e-05, |
|
"loss": 0.2982, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.5879036391594052, |
|
"grad_norm": 0.22432470321655273, |
|
"learning_rate": 1.4120963608405946e-05, |
|
"loss": 0.3167, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.600717580727832, |
|
"grad_norm": 27.89299964904785, |
|
"learning_rate": 1.3992824192721681e-05, |
|
"loss": 0.2831, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 3.6135315222962583, |
|
"grad_norm": 6.232203006744385, |
|
"learning_rate": 1.3864684777037418e-05, |
|
"loss": 0.2328, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.626345463864685, |
|
"grad_norm": 0.3798358738422394, |
|
"learning_rate": 1.3736545361353154e-05, |
|
"loss": 0.2565, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 3.6391594054331113, |
|
"grad_norm": 2.3177566528320312, |
|
"learning_rate": 1.3608405945668889e-05, |
|
"loss": 0.2822, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.6519733470015376, |
|
"grad_norm": 0.9287611246109009, |
|
"learning_rate": 1.3480266529984623e-05, |
|
"loss": 0.2206, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.664787288569964, |
|
"grad_norm": 19.89398765563965, |
|
"learning_rate": 1.3352127114300358e-05, |
|
"loss": 0.2934, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.6776012301383907, |
|
"grad_norm": 14.735712051391602, |
|
"learning_rate": 1.3223987698616094e-05, |
|
"loss": 0.2667, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 3.690415171706817, |
|
"grad_norm": 2.782954454421997, |
|
"learning_rate": 1.309584828293183e-05, |
|
"loss": 0.2565, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.7032291132752437, |
|
"grad_norm": 20.082395553588867, |
|
"learning_rate": 1.2967708867247566e-05, |
|
"loss": 0.3069, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 3.71604305484367, |
|
"grad_norm": 1.8632967472076416, |
|
"learning_rate": 1.2839569451563302e-05, |
|
"loss": 0.2484, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.7288569964120963, |
|
"grad_norm": 6.2880330085754395, |
|
"learning_rate": 1.2711430035879037e-05, |
|
"loss": 0.2769, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.7416709379805226, |
|
"grad_norm": 18.328922271728516, |
|
"learning_rate": 1.2583290620194774e-05, |
|
"loss": 0.284, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.7544848795489494, |
|
"grad_norm": 0.2658964991569519, |
|
"learning_rate": 1.2455151204510508e-05, |
|
"loss": 0.2725, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 3.7672988211173757, |
|
"grad_norm": 7.819123268127441, |
|
"learning_rate": 1.2327011788826243e-05, |
|
"loss": 0.2513, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.7801127626858024, |
|
"grad_norm": 4.6279144287109375, |
|
"learning_rate": 1.2198872373141979e-05, |
|
"loss": 0.2573, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.7929267042542287, |
|
"grad_norm": 24.996662139892578, |
|
"learning_rate": 1.2070732957457714e-05, |
|
"loss": 0.2621, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.805740645822655, |
|
"grad_norm": 20.87746810913086, |
|
"learning_rate": 1.194259354177345e-05, |
|
"loss": 0.2499, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.8185545873910813, |
|
"grad_norm": 1.5061414241790771, |
|
"learning_rate": 1.1814454126089187e-05, |
|
"loss": 0.265, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.831368528959508, |
|
"grad_norm": 2.7230064868927, |
|
"learning_rate": 1.168631471040492e-05, |
|
"loss": 0.2469, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 3.8441824705279344, |
|
"grad_norm": 0.6768075823783875, |
|
"learning_rate": 1.1558175294720656e-05, |
|
"loss": 0.2686, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.8569964120963607, |
|
"grad_norm": 0.08343211561441422, |
|
"learning_rate": 1.1430035879036393e-05, |
|
"loss": 0.2565, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 3.8698103536647874, |
|
"grad_norm": 25.58348274230957, |
|
"learning_rate": 1.1301896463352128e-05, |
|
"loss": 0.2967, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.8826242952332137, |
|
"grad_norm": 1.0459709167480469, |
|
"learning_rate": 1.1173757047667862e-05, |
|
"loss": 0.3028, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.89543823680164, |
|
"grad_norm": 0.33878639340400696, |
|
"learning_rate": 1.1045617631983599e-05, |
|
"loss": 0.2243, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.9082521783700668, |
|
"grad_norm": 2.021047592163086, |
|
"learning_rate": 1.0917478216299335e-05, |
|
"loss": 0.3656, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.921066119938493, |
|
"grad_norm": 1.6855653524398804, |
|
"learning_rate": 1.078933880061507e-05, |
|
"loss": 0.2323, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.9338800615069194, |
|
"grad_norm": 21.66104507446289, |
|
"learning_rate": 1.0661199384930805e-05, |
|
"loss": 0.2205, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 3.946694003075346, |
|
"grad_norm": 2.4428458213806152, |
|
"learning_rate": 1.053305996924654e-05, |
|
"loss": 0.2436, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.9595079446437724, |
|
"grad_norm": 39.37623596191406, |
|
"learning_rate": 1.0404920553562276e-05, |
|
"loss": 0.2831, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 3.9723218862121987, |
|
"grad_norm": 44.4313850402832, |
|
"learning_rate": 1.0276781137878012e-05, |
|
"loss": 0.2522, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.9851358277806255, |
|
"grad_norm": 2.6004929542541504, |
|
"learning_rate": 1.0148641722193747e-05, |
|
"loss": 0.3209, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 3.9979497693490518, |
|
"grad_norm": 2.536029815673828, |
|
"learning_rate": 1.0020502306509482e-05, |
|
"loss": 0.2807, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.6869426704202687, |
|
"eval_loss": 2.03011155128479, |
|
"eval_runtime": 825.2142, |
|
"eval_samples_per_second": 9.457, |
|
"eval_steps_per_second": 2.364, |
|
"step": 31216 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 39020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.105948110057636e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|