|
{ |
|
"best_metric": 0.5932047750229569, |
|
"best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-614", |
|
"epoch": 11.956043956043956, |
|
"eval_steps": 500, |
|
"global_step": 816, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 16.747648239135742, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 1.4185, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 14.544577598571777, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 1.4153, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 12.55716323852539, |
|
"learning_rate": 1.8292682926829268e-05, |
|
"loss": 1.3581, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 16.329694747924805, |
|
"learning_rate": 2.378048780487805e-05, |
|
"loss": 1.3421, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 9.924437522888184, |
|
"learning_rate": 2.9878048780487805e-05, |
|
"loss": 1.3174, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 8.243997573852539, |
|
"learning_rate": 3.597560975609756e-05, |
|
"loss": 1.2755, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9963369963369964, |
|
"eval_accuracy": 0.4903581267217631, |
|
"eval_f1": 0.39101576597192117, |
|
"eval_loss": 1.2008219957351685, |
|
"eval_precision": 0.45771669331368264, |
|
"eval_recall": 0.4903581267217631, |
|
"eval_runtime": 10.5546, |
|
"eval_samples_per_second": 103.178, |
|
"eval_steps_per_second": 6.537, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 13.558063507080078, |
|
"learning_rate": 4.207317073170732e-05, |
|
"loss": 1.2711, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1721611721611722, |
|
"grad_norm": 27.544034957885742, |
|
"learning_rate": 4.817073170731707e-05, |
|
"loss": 1.2731, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3186813186813187, |
|
"grad_norm": 12.061452865600586, |
|
"learning_rate": 4.952316076294278e-05, |
|
"loss": 1.2582, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"grad_norm": 13.313647270202637, |
|
"learning_rate": 4.884196185286104e-05, |
|
"loss": 1.2729, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6117216117216118, |
|
"grad_norm": 8.85893440246582, |
|
"learning_rate": 4.816076294277929e-05, |
|
"loss": 1.2093, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7582417582417582, |
|
"grad_norm": 7.619632244110107, |
|
"learning_rate": 4.747956403269755e-05, |
|
"loss": 1.2445, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 9.313189506530762, |
|
"learning_rate": 4.6798365122615805e-05, |
|
"loss": 1.1711, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9926739926739927, |
|
"eval_accuracy": 0.5353535353535354, |
|
"eval_f1": 0.47656758341858724, |
|
"eval_loss": 1.0650511980056763, |
|
"eval_precision": 0.48653241546321574, |
|
"eval_recall": 0.5353535353535354, |
|
"eval_runtime": 10.5232, |
|
"eval_samples_per_second": 103.486, |
|
"eval_steps_per_second": 6.557, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 9.400361061096191, |
|
"learning_rate": 4.6117166212534065e-05, |
|
"loss": 1.1145, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"grad_norm": 5.284915924072266, |
|
"learning_rate": 4.543596730245232e-05, |
|
"loss": 1.1809, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3443223443223444, |
|
"grad_norm": 10.18017292022705, |
|
"learning_rate": 4.475476839237057e-05, |
|
"loss": 1.1668, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.490842490842491, |
|
"grad_norm": 7.79976224899292, |
|
"learning_rate": 4.407356948228883e-05, |
|
"loss": 1.1582, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.6373626373626373, |
|
"grad_norm": 8.457806587219238, |
|
"learning_rate": 4.339237057220708e-05, |
|
"loss": 1.1628, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7838827838827838, |
|
"grad_norm": 6.220970630645752, |
|
"learning_rate": 4.271117166212534e-05, |
|
"loss": 1.2035, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9304029304029307, |
|
"grad_norm": 7.159059524536133, |
|
"learning_rate": 4.20299727520436e-05, |
|
"loss": 1.1599, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.989010989010989, |
|
"eval_accuracy": 0.54178145087236, |
|
"eval_f1": 0.5077316365087595, |
|
"eval_loss": 1.0533095598220825, |
|
"eval_precision": 0.5274645952658832, |
|
"eval_recall": 0.54178145087236, |
|
"eval_runtime": 10.5408, |
|
"eval_samples_per_second": 103.313, |
|
"eval_steps_per_second": 6.546, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 5.542409896850586, |
|
"learning_rate": 4.1348773841961855e-05, |
|
"loss": 1.1692, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.2234432234432235, |
|
"grad_norm": 7.356290340423584, |
|
"learning_rate": 4.066757493188011e-05, |
|
"loss": 1.1176, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.36996336996337, |
|
"grad_norm": 6.849288463592529, |
|
"learning_rate": 3.998637602179837e-05, |
|
"loss": 1.1335, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.5164835164835164, |
|
"grad_norm": 11.968345642089844, |
|
"learning_rate": 3.930517711171662e-05, |
|
"loss": 1.14, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 8.681733131408691, |
|
"learning_rate": 3.862397820163488e-05, |
|
"loss": 1.1195, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.8095238095238093, |
|
"grad_norm": 8.258611679077148, |
|
"learning_rate": 3.794277929155314e-05, |
|
"loss": 1.1578, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.956043956043956, |
|
"grad_norm": 9.197736740112305, |
|
"learning_rate": 3.726158038147139e-05, |
|
"loss": 1.1595, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_f1": 0.5413509466694156, |
|
"eval_loss": 1.0423349142074585, |
|
"eval_precision": 0.5690587889084956, |
|
"eval_recall": 0.5454545454545454, |
|
"eval_runtime": 10.5435, |
|
"eval_samples_per_second": 103.286, |
|
"eval_steps_per_second": 6.544, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.102564102564102, |
|
"grad_norm": 7.987867832183838, |
|
"learning_rate": 3.6580381471389645e-05, |
|
"loss": 1.0988, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.249084249084249, |
|
"grad_norm": 6.383251667022705, |
|
"learning_rate": 3.5899182561307905e-05, |
|
"loss": 1.1338, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.395604395604396, |
|
"grad_norm": 7.749887943267822, |
|
"learning_rate": 3.521798365122616e-05, |
|
"loss": 1.1635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.542124542124542, |
|
"grad_norm": 9.830282211303711, |
|
"learning_rate": 3.453678474114442e-05, |
|
"loss": 1.0912, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.688644688644689, |
|
"grad_norm": 9.785733222961426, |
|
"learning_rate": 3.385558583106267e-05, |
|
"loss": 1.1293, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.835164835164835, |
|
"grad_norm": 9.794586181640625, |
|
"learning_rate": 3.317438692098093e-05, |
|
"loss": 1.0867, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.981684981684982, |
|
"grad_norm": 5.60059928894043, |
|
"learning_rate": 3.249318801089918e-05, |
|
"loss": 1.096, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.996336996336996, |
|
"eval_accuracy": 0.5610651974288338, |
|
"eval_f1": 0.5462709659715219, |
|
"eval_loss": 1.0159707069396973, |
|
"eval_precision": 0.5419035604614381, |
|
"eval_recall": 0.5610651974288338, |
|
"eval_runtime": 10.5161, |
|
"eval_samples_per_second": 103.556, |
|
"eval_steps_per_second": 6.561, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 5.128205128205128, |
|
"grad_norm": 6.809742450714111, |
|
"learning_rate": 3.181198910081744e-05, |
|
"loss": 1.071, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.274725274725275, |
|
"grad_norm": 7.61679744720459, |
|
"learning_rate": 3.1130790190735695e-05, |
|
"loss": 1.1031, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.4212454212454215, |
|
"grad_norm": 7.127725601196289, |
|
"learning_rate": 3.0449591280653955e-05, |
|
"loss": 1.0971, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.5677655677655675, |
|
"grad_norm": 6.362829685211182, |
|
"learning_rate": 2.9768392370572208e-05, |
|
"loss": 1.1078, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 8.149834632873535, |
|
"learning_rate": 2.9087193460490464e-05, |
|
"loss": 1.0871, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.860805860805861, |
|
"grad_norm": 8.003039360046387, |
|
"learning_rate": 2.840599455040872e-05, |
|
"loss": 1.0592, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.992673992673993, |
|
"eval_accuracy": 0.5766758494031221, |
|
"eval_f1": 0.5414852771503386, |
|
"eval_loss": 0.9847236275672913, |
|
"eval_precision": 0.5484837093262834, |
|
"eval_recall": 0.5766758494031221, |
|
"eval_runtime": 10.5345, |
|
"eval_samples_per_second": 103.374, |
|
"eval_steps_per_second": 6.55, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 6.007326007326007, |
|
"grad_norm": 9.939899444580078, |
|
"learning_rate": 2.772479564032698e-05, |
|
"loss": 1.0573, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 8.810173988342285, |
|
"learning_rate": 2.7043596730245236e-05, |
|
"loss": 1.038, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.3003663003663, |
|
"grad_norm": 9.228567123413086, |
|
"learning_rate": 2.6362397820163485e-05, |
|
"loss": 1.0441, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.446886446886447, |
|
"grad_norm": 10.357806205749512, |
|
"learning_rate": 2.5681198910081745e-05, |
|
"loss": 1.0313, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.593406593406593, |
|
"grad_norm": 6.874061584472656, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0667, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.73992673992674, |
|
"grad_norm": 10.741703987121582, |
|
"learning_rate": 2.4318801089918257e-05, |
|
"loss": 1.0353, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.886446886446887, |
|
"grad_norm": 7.215301513671875, |
|
"learning_rate": 2.3637602179836514e-05, |
|
"loss": 1.0706, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.989010989010989, |
|
"eval_accuracy": 0.588613406795225, |
|
"eval_f1": 0.5835655659113839, |
|
"eval_loss": 0.9867706894874573, |
|
"eval_precision": 0.5861531495568983, |
|
"eval_recall": 0.588613406795225, |
|
"eval_runtime": 10.5429, |
|
"eval_samples_per_second": 103.292, |
|
"eval_steps_per_second": 6.545, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 7.032967032967033, |
|
"grad_norm": 9.452975273132324, |
|
"learning_rate": 2.295640326975477e-05, |
|
"loss": 1.1093, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.17948717948718, |
|
"grad_norm": 5.909883499145508, |
|
"learning_rate": 2.2275204359673023e-05, |
|
"loss": 1.0628, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 8.430510520935059, |
|
"learning_rate": 2.1594005449591282e-05, |
|
"loss": 1.0175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.472527472527473, |
|
"grad_norm": 11.566703796386719, |
|
"learning_rate": 2.091280653950954e-05, |
|
"loss": 0.9975, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.619047619047619, |
|
"grad_norm": 8.561046600341797, |
|
"learning_rate": 2.023160762942779e-05, |
|
"loss": 1.057, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.7655677655677655, |
|
"grad_norm": 9.214874267578125, |
|
"learning_rate": 1.955040871934605e-05, |
|
"loss": 1.0085, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.912087912087912, |
|
"grad_norm": 7.050257682800293, |
|
"learning_rate": 1.8869209809264307e-05, |
|
"loss": 1.0404, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5867768595041323, |
|
"eval_f1": 0.5736657074074295, |
|
"eval_loss": 0.9758484363555908, |
|
"eval_precision": 0.5694509571644558, |
|
"eval_recall": 0.5867768595041323, |
|
"eval_runtime": 10.5054, |
|
"eval_samples_per_second": 103.661, |
|
"eval_steps_per_second": 6.568, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 8.058608058608058, |
|
"grad_norm": 9.406164169311523, |
|
"learning_rate": 1.818801089918256e-05, |
|
"loss": 1.0564, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.205128205128204, |
|
"grad_norm": 8.063167572021484, |
|
"learning_rate": 1.750681198910082e-05, |
|
"loss": 1.0381, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.351648351648352, |
|
"grad_norm": 7.877150535583496, |
|
"learning_rate": 1.6825613079019073e-05, |
|
"loss": 0.9996, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.498168498168498, |
|
"grad_norm": 10.49506664276123, |
|
"learning_rate": 1.614441416893733e-05, |
|
"loss": 0.9723, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.644688644688644, |
|
"grad_norm": 8.74528694152832, |
|
"learning_rate": 1.546321525885559e-05, |
|
"loss": 1.0267, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.791208791208792, |
|
"grad_norm": 7.0554962158203125, |
|
"learning_rate": 1.4782016348773841e-05, |
|
"loss": 1.0251, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.937728937728938, |
|
"grad_norm": 7.974668502807617, |
|
"learning_rate": 1.41008174386921e-05, |
|
"loss": 1.0059, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.996336996336996, |
|
"eval_accuracy": 0.5932047750229569, |
|
"eval_f1": 0.5674291939014156, |
|
"eval_loss": 0.9468401074409485, |
|
"eval_precision": 0.5709384462011384, |
|
"eval_recall": 0.5932047750229569, |
|
"eval_runtime": 10.4878, |
|
"eval_samples_per_second": 103.835, |
|
"eval_steps_per_second": 6.579, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 9.084249084249084, |
|
"grad_norm": 8.571954727172852, |
|
"learning_rate": 1.3419618528610356e-05, |
|
"loss": 0.9878, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 8.298359870910645, |
|
"learning_rate": 1.273841961852861e-05, |
|
"loss": 1.0056, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.377289377289378, |
|
"grad_norm": 7.087368965148926, |
|
"learning_rate": 1.2057220708446868e-05, |
|
"loss": 1.037, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.523809523809524, |
|
"grad_norm": 7.409104347229004, |
|
"learning_rate": 1.1376021798365123e-05, |
|
"loss": 0.9326, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.67032967032967, |
|
"grad_norm": 7.1452531814575195, |
|
"learning_rate": 1.0694822888283379e-05, |
|
"loss": 0.9723, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.816849816849818, |
|
"grad_norm": 8.340729713439941, |
|
"learning_rate": 1.0013623978201635e-05, |
|
"loss": 0.9914, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.963369963369964, |
|
"grad_norm": 10.698880195617676, |
|
"learning_rate": 9.332425068119891e-06, |
|
"loss": 0.965, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.992673992673993, |
|
"eval_accuracy": 0.5932047750229569, |
|
"eval_f1": 0.5804462637419235, |
|
"eval_loss": 0.9565483331680298, |
|
"eval_precision": 0.5857538351608745, |
|
"eval_recall": 0.5932047750229569, |
|
"eval_runtime": 10.4791, |
|
"eval_samples_per_second": 103.921, |
|
"eval_steps_per_second": 6.585, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 10.10989010989011, |
|
"grad_norm": 10.67618465423584, |
|
"learning_rate": 8.651226158038147e-06, |
|
"loss": 0.9732, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.256410256410255, |
|
"grad_norm": 9.102426528930664, |
|
"learning_rate": 7.970027247956404e-06, |
|
"loss": 0.9804, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.402930402930403, |
|
"grad_norm": 11.115556716918945, |
|
"learning_rate": 7.288828337874659e-06, |
|
"loss": 0.9855, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.54945054945055, |
|
"grad_norm": 11.00426197052002, |
|
"learning_rate": 6.607629427792916e-06, |
|
"loss": 0.9691, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.695970695970695, |
|
"grad_norm": 10.043339729309082, |
|
"learning_rate": 5.9264305177111724e-06, |
|
"loss": 0.9365, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.842490842490843, |
|
"grad_norm": 11.518232345581055, |
|
"learning_rate": 5.245231607629428e-06, |
|
"loss": 0.9659, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 9.314995765686035, |
|
"learning_rate": 4.564032697547684e-06, |
|
"loss": 0.9362, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"eval_accuracy": 0.588613406795225, |
|
"eval_f1": 0.5778239696801104, |
|
"eval_loss": 0.9466218948364258, |
|
"eval_precision": 0.5767647827506791, |
|
"eval_recall": 0.588613406795225, |
|
"eval_runtime": 10.5258, |
|
"eval_samples_per_second": 103.46, |
|
"eval_steps_per_second": 6.555, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.135531135531135, |
|
"grad_norm": 10.667020797729492, |
|
"learning_rate": 3.88283378746594e-06, |
|
"loss": 0.9564, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.282051282051283, |
|
"grad_norm": 10.923125267028809, |
|
"learning_rate": 3.2016348773841965e-06, |
|
"loss": 0.9177, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 9.893692016601562, |
|
"learning_rate": 2.5204359673024523e-06, |
|
"loss": 0.9732, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.575091575091575, |
|
"grad_norm": 8.784825325012207, |
|
"learning_rate": 1.8392370572207086e-06, |
|
"loss": 0.9558, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.72161172161172, |
|
"grad_norm": 8.990778923034668, |
|
"learning_rate": 1.1580381471389646e-06, |
|
"loss": 0.9656, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.868131868131869, |
|
"grad_norm": 8.444221496582031, |
|
"learning_rate": 4.768392370572207e-07, |
|
"loss": 0.9334, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.956043956043956, |
|
"eval_accuracy": 0.5858585858585859, |
|
"eval_f1": 0.5699572730591373, |
|
"eval_loss": 0.9441593885421753, |
|
"eval_precision": 0.5692451226403554, |
|
"eval_recall": 0.5858585858585859, |
|
"eval_runtime": 10.5926, |
|
"eval_samples_per_second": 102.808, |
|
"eval_steps_per_second": 6.514, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 11.956043956043956, |
|
"step": 816, |
|
"total_flos": 4.123334853511373e+18, |
|
"train_loss": 1.090085435147379, |
|
"train_runtime": 2115.1144, |
|
"train_samples_per_second": 24.697, |
|
"train_steps_per_second": 0.386 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 816, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.123334853511373e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|