{ "best_metric": 0.5932047750229569, "best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-614", "epoch": 11.956043956043956, "eval_steps": 500, "global_step": 816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14652014652014653, "grad_norm": 16.747648239135742, "learning_rate": 6.0975609756097564e-06, "loss": 1.4185, "step": 10 }, { "epoch": 0.29304029304029305, "grad_norm": 14.544577598571777, "learning_rate": 1.2195121951219513e-05, "loss": 1.4153, "step": 20 }, { "epoch": 0.43956043956043955, "grad_norm": 12.55716323852539, "learning_rate": 1.8292682926829268e-05, "loss": 1.3581, "step": 30 }, { "epoch": 0.5860805860805861, "grad_norm": 16.329694747924805, "learning_rate": 2.378048780487805e-05, "loss": 1.3421, "step": 40 }, { "epoch": 0.7326007326007326, "grad_norm": 9.924437522888184, "learning_rate": 2.9878048780487805e-05, "loss": 1.3174, "step": 50 }, { "epoch": 0.8791208791208791, "grad_norm": 8.243997573852539, "learning_rate": 3.597560975609756e-05, "loss": 1.2755, "step": 60 }, { "epoch": 0.9963369963369964, "eval_accuracy": 0.4903581267217631, "eval_f1": 0.39101576597192117, "eval_loss": 1.2008219957351685, "eval_precision": 0.45771669331368264, "eval_recall": 0.4903581267217631, "eval_runtime": 10.5546, "eval_samples_per_second": 103.178, "eval_steps_per_second": 6.537, "step": 68 }, { "epoch": 1.0256410256410255, "grad_norm": 13.558063507080078, "learning_rate": 4.207317073170732e-05, "loss": 1.2711, "step": 70 }, { "epoch": 1.1721611721611722, "grad_norm": 27.544034957885742, "learning_rate": 4.817073170731707e-05, "loss": 1.2731, "step": 80 }, { "epoch": 1.3186813186813187, "grad_norm": 12.061452865600586, "learning_rate": 4.952316076294278e-05, "loss": 1.2582, "step": 90 }, { "epoch": 1.4652014652014653, "grad_norm": 13.313647270202637, "learning_rate": 4.884196185286104e-05, "loss": 1.2729, "step": 100 }, { "epoch": 1.6117216117216118, "grad_norm": 8.85893440246582, "learning_rate": 4.816076294277929e-05, "loss": 1.2093, "step": 110 }, { "epoch": 1.7582417582417582, "grad_norm": 7.619632244110107, "learning_rate": 4.747956403269755e-05, "loss": 1.2445, "step": 120 }, { "epoch": 1.9047619047619047, "grad_norm": 9.313189506530762, "learning_rate": 4.6798365122615805e-05, "loss": 1.1711, "step": 130 }, { "epoch": 1.9926739926739927, "eval_accuracy": 0.5353535353535354, "eval_f1": 0.47656758341858724, "eval_loss": 1.0650511980056763, "eval_precision": 0.48653241546321574, "eval_recall": 0.5353535353535354, "eval_runtime": 10.5232, "eval_samples_per_second": 103.486, "eval_steps_per_second": 6.557, "step": 136 }, { "epoch": 2.051282051282051, "grad_norm": 9.400361061096191, "learning_rate": 4.6117166212534065e-05, "loss": 1.1145, "step": 140 }, { "epoch": 2.197802197802198, "grad_norm": 5.284915924072266, "learning_rate": 4.543596730245232e-05, "loss": 1.1809, "step": 150 }, { "epoch": 2.3443223443223444, "grad_norm": 10.18017292022705, "learning_rate": 4.475476839237057e-05, "loss": 1.1668, "step": 160 }, { "epoch": 2.490842490842491, "grad_norm": 7.79976224899292, "learning_rate": 4.407356948228883e-05, "loss": 1.1582, "step": 170 }, { "epoch": 2.6373626373626373, "grad_norm": 8.457806587219238, "learning_rate": 4.339237057220708e-05, "loss": 1.1628, "step": 180 }, { "epoch": 2.7838827838827838, "grad_norm": 6.220970630645752, "learning_rate": 4.271117166212534e-05, "loss": 1.2035, "step": 190 }, { "epoch": 2.9304029304029307, "grad_norm": 7.159059524536133, "learning_rate": 4.20299727520436e-05, "loss": 1.1599, "step": 200 }, { "epoch": 2.989010989010989, "eval_accuracy": 0.54178145087236, "eval_f1": 0.5077316365087595, "eval_loss": 1.0533095598220825, "eval_precision": 0.5274645952658832, "eval_recall": 0.54178145087236, "eval_runtime": 10.5408, "eval_samples_per_second": 103.313, "eval_steps_per_second": 6.546, "step": 204 }, { "epoch": 3.076923076923077, "grad_norm": 5.542409896850586, "learning_rate": 4.1348773841961855e-05, "loss": 1.1692, "step": 210 }, { "epoch": 3.2234432234432235, "grad_norm": 7.356290340423584, "learning_rate": 4.066757493188011e-05, "loss": 1.1176, "step": 220 }, { "epoch": 3.36996336996337, "grad_norm": 6.849288463592529, "learning_rate": 3.998637602179837e-05, "loss": 1.1335, "step": 230 }, { "epoch": 3.5164835164835164, "grad_norm": 11.968345642089844, "learning_rate": 3.930517711171662e-05, "loss": 1.14, "step": 240 }, { "epoch": 3.663003663003663, "grad_norm": 8.681733131408691, "learning_rate": 3.862397820163488e-05, "loss": 1.1195, "step": 250 }, { "epoch": 3.8095238095238093, "grad_norm": 8.258611679077148, "learning_rate": 3.794277929155314e-05, "loss": 1.1578, "step": 260 }, { "epoch": 3.956043956043956, "grad_norm": 9.197736740112305, "learning_rate": 3.726158038147139e-05, "loss": 1.1595, "step": 270 }, { "epoch": 4.0, "eval_accuracy": 0.5454545454545454, "eval_f1": 0.5413509466694156, "eval_loss": 1.0423349142074585, "eval_precision": 0.5690587889084956, "eval_recall": 0.5454545454545454, "eval_runtime": 10.5435, "eval_samples_per_second": 103.286, "eval_steps_per_second": 6.544, "step": 273 }, { "epoch": 4.102564102564102, "grad_norm": 7.987867832183838, "learning_rate": 3.6580381471389645e-05, "loss": 1.0988, "step": 280 }, { "epoch": 4.249084249084249, "grad_norm": 6.383251667022705, "learning_rate": 3.5899182561307905e-05, "loss": 1.1338, "step": 290 }, { "epoch": 4.395604395604396, "grad_norm": 7.749887943267822, "learning_rate": 3.521798365122616e-05, "loss": 1.1635, "step": 300 }, { "epoch": 4.542124542124542, "grad_norm": 9.830282211303711, "learning_rate": 3.453678474114442e-05, "loss": 1.0912, "step": 310 }, { "epoch": 4.688644688644689, "grad_norm": 9.785733222961426, "learning_rate": 3.385558583106267e-05, "loss": 1.1293, "step": 320 }, { "epoch": 4.835164835164835, "grad_norm": 9.794586181640625, "learning_rate": 3.317438692098093e-05, "loss": 1.0867, "step": 330 }, { "epoch": 4.981684981684982, "grad_norm": 5.60059928894043, "learning_rate": 3.249318801089918e-05, "loss": 1.096, "step": 340 }, { "epoch": 4.996336996336996, "eval_accuracy": 0.5610651974288338, "eval_f1": 0.5462709659715219, "eval_loss": 1.0159707069396973, "eval_precision": 0.5419035604614381, "eval_recall": 0.5610651974288338, "eval_runtime": 10.5161, "eval_samples_per_second": 103.556, "eval_steps_per_second": 6.561, "step": 341 }, { "epoch": 5.128205128205128, "grad_norm": 6.809742450714111, "learning_rate": 3.181198910081744e-05, "loss": 1.071, "step": 350 }, { "epoch": 5.274725274725275, "grad_norm": 7.61679744720459, "learning_rate": 3.1130790190735695e-05, "loss": 1.1031, "step": 360 }, { "epoch": 5.4212454212454215, "grad_norm": 7.127725601196289, "learning_rate": 3.0449591280653955e-05, "loss": 1.0971, "step": 370 }, { "epoch": 5.5677655677655675, "grad_norm": 6.362829685211182, "learning_rate": 2.9768392370572208e-05, "loss": 1.1078, "step": 380 }, { "epoch": 5.714285714285714, "grad_norm": 8.149834632873535, "learning_rate": 2.9087193460490464e-05, "loss": 1.0871, "step": 390 }, { "epoch": 5.860805860805861, "grad_norm": 8.003039360046387, "learning_rate": 2.840599455040872e-05, "loss": 1.0592, "step": 400 }, { "epoch": 5.992673992673993, "eval_accuracy": 0.5766758494031221, "eval_f1": 0.5414852771503386, "eval_loss": 0.9847236275672913, "eval_precision": 0.5484837093262834, "eval_recall": 0.5766758494031221, "eval_runtime": 10.5345, "eval_samples_per_second": 103.374, "eval_steps_per_second": 6.55, "step": 409 }, { "epoch": 6.007326007326007, "grad_norm": 9.939899444580078, "learning_rate": 2.772479564032698e-05, "loss": 1.0573, "step": 410 }, { "epoch": 6.153846153846154, "grad_norm": 8.810173988342285, "learning_rate": 2.7043596730245236e-05, "loss": 1.038, "step": 420 }, { "epoch": 6.3003663003663, "grad_norm": 9.228567123413086, "learning_rate": 2.6362397820163485e-05, "loss": 1.0441, "step": 430 }, { "epoch": 6.446886446886447, "grad_norm": 10.357806205749512, "learning_rate": 2.5681198910081745e-05, "loss": 1.0313, "step": 440 }, { "epoch": 6.593406593406593, "grad_norm": 6.874061584472656, "learning_rate": 2.5e-05, "loss": 1.0667, "step": 450 }, { "epoch": 6.73992673992674, "grad_norm": 10.741703987121582, "learning_rate": 2.4318801089918257e-05, "loss": 1.0353, "step": 460 }, { "epoch": 6.886446886446887, "grad_norm": 7.215301513671875, "learning_rate": 2.3637602179836514e-05, "loss": 1.0706, "step": 470 }, { "epoch": 6.989010989010989, "eval_accuracy": 0.588613406795225, "eval_f1": 0.5835655659113839, "eval_loss": 0.9867706894874573, "eval_precision": 0.5861531495568983, "eval_recall": 0.588613406795225, "eval_runtime": 10.5429, "eval_samples_per_second": 103.292, "eval_steps_per_second": 6.545, "step": 477 }, { "epoch": 7.032967032967033, "grad_norm": 9.452975273132324, "learning_rate": 2.295640326975477e-05, "loss": 1.1093, "step": 480 }, { "epoch": 7.17948717948718, "grad_norm": 5.909883499145508, "learning_rate": 2.2275204359673023e-05, "loss": 1.0628, "step": 490 }, { "epoch": 7.326007326007326, "grad_norm": 8.430510520935059, "learning_rate": 2.1594005449591282e-05, "loss": 1.0175, "step": 500 }, { "epoch": 7.472527472527473, "grad_norm": 11.566703796386719, "learning_rate": 2.091280653950954e-05, "loss": 0.9975, "step": 510 }, { "epoch": 7.619047619047619, "grad_norm": 8.561046600341797, "learning_rate": 2.023160762942779e-05, "loss": 1.057, "step": 520 }, { "epoch": 7.7655677655677655, "grad_norm": 9.214874267578125, "learning_rate": 1.955040871934605e-05, "loss": 1.0085, "step": 530 }, { "epoch": 7.912087912087912, "grad_norm": 7.050257682800293, "learning_rate": 1.8869209809264307e-05, "loss": 1.0404, "step": 540 }, { "epoch": 8.0, "eval_accuracy": 0.5867768595041323, "eval_f1": 0.5736657074074295, "eval_loss": 0.9758484363555908, "eval_precision": 0.5694509571644558, "eval_recall": 0.5867768595041323, "eval_runtime": 10.5054, "eval_samples_per_second": 103.661, "eval_steps_per_second": 6.568, "step": 546 }, { "epoch": 8.058608058608058, "grad_norm": 9.406164169311523, "learning_rate": 1.818801089918256e-05, "loss": 1.0564, "step": 550 }, { "epoch": 8.205128205128204, "grad_norm": 8.063167572021484, "learning_rate": 1.750681198910082e-05, "loss": 1.0381, "step": 560 }, { "epoch": 8.351648351648352, "grad_norm": 7.877150535583496, "learning_rate": 1.6825613079019073e-05, "loss": 0.9996, "step": 570 }, { "epoch": 8.498168498168498, "grad_norm": 10.49506664276123, "learning_rate": 1.614441416893733e-05, "loss": 0.9723, "step": 580 }, { "epoch": 8.644688644688644, "grad_norm": 8.74528694152832, "learning_rate": 1.546321525885559e-05, "loss": 1.0267, "step": 590 }, { "epoch": 8.791208791208792, "grad_norm": 7.0554962158203125, "learning_rate": 1.4782016348773841e-05, "loss": 1.0251, "step": 600 }, { "epoch": 8.937728937728938, "grad_norm": 7.974668502807617, "learning_rate": 1.41008174386921e-05, "loss": 1.0059, "step": 610 }, { "epoch": 8.996336996336996, "eval_accuracy": 0.5932047750229569, "eval_f1": 0.5674291939014156, "eval_loss": 0.9468401074409485, "eval_precision": 0.5709384462011384, "eval_recall": 0.5932047750229569, "eval_runtime": 10.4878, "eval_samples_per_second": 103.835, "eval_steps_per_second": 6.579, "step": 614 }, { "epoch": 9.084249084249084, "grad_norm": 8.571954727172852, "learning_rate": 1.3419618528610356e-05, "loss": 0.9878, "step": 620 }, { "epoch": 9.23076923076923, "grad_norm": 8.298359870910645, "learning_rate": 1.273841961852861e-05, "loss": 1.0056, "step": 630 }, { "epoch": 9.377289377289378, "grad_norm": 7.087368965148926, "learning_rate": 1.2057220708446868e-05, "loss": 1.037, "step": 640 }, { "epoch": 9.523809523809524, "grad_norm": 7.409104347229004, "learning_rate": 1.1376021798365123e-05, "loss": 0.9326, "step": 650 }, { "epoch": 9.67032967032967, "grad_norm": 7.1452531814575195, "learning_rate": 1.0694822888283379e-05, "loss": 0.9723, "step": 660 }, { "epoch": 9.816849816849818, "grad_norm": 8.340729713439941, "learning_rate": 1.0013623978201635e-05, "loss": 0.9914, "step": 670 }, { "epoch": 9.963369963369964, "grad_norm": 10.698880195617676, "learning_rate": 9.332425068119891e-06, "loss": 0.965, "step": 680 }, { "epoch": 9.992673992673993, "eval_accuracy": 0.5932047750229569, "eval_f1": 0.5804462637419235, "eval_loss": 0.9565483331680298, "eval_precision": 0.5857538351608745, "eval_recall": 0.5932047750229569, "eval_runtime": 10.4791, "eval_samples_per_second": 103.921, "eval_steps_per_second": 6.585, "step": 682 }, { "epoch": 10.10989010989011, "grad_norm": 10.67618465423584, "learning_rate": 8.651226158038147e-06, "loss": 0.9732, "step": 690 }, { "epoch": 10.256410256410255, "grad_norm": 9.102426528930664, "learning_rate": 7.970027247956404e-06, "loss": 0.9804, "step": 700 }, { "epoch": 10.402930402930403, "grad_norm": 11.115556716918945, "learning_rate": 7.288828337874659e-06, "loss": 0.9855, "step": 710 }, { "epoch": 10.54945054945055, "grad_norm": 11.00426197052002, "learning_rate": 6.607629427792916e-06, "loss": 0.9691, "step": 720 }, { "epoch": 10.695970695970695, "grad_norm": 10.043339729309082, "learning_rate": 5.9264305177111724e-06, "loss": 0.9365, "step": 730 }, { "epoch": 10.842490842490843, "grad_norm": 11.518232345581055, "learning_rate": 5.245231607629428e-06, "loss": 0.9659, "step": 740 }, { "epoch": 10.989010989010989, "grad_norm": 9.314995765686035, "learning_rate": 4.564032697547684e-06, "loss": 0.9362, "step": 750 }, { "epoch": 10.989010989010989, "eval_accuracy": 0.588613406795225, "eval_f1": 0.5778239696801104, "eval_loss": 0.9466218948364258, "eval_precision": 0.5767647827506791, "eval_recall": 0.588613406795225, "eval_runtime": 10.5258, "eval_samples_per_second": 103.46, "eval_steps_per_second": 6.555, "step": 750 }, { "epoch": 11.135531135531135, "grad_norm": 10.667020797729492, "learning_rate": 3.88283378746594e-06, "loss": 0.9564, "step": 760 }, { "epoch": 11.282051282051283, "grad_norm": 10.923125267028809, "learning_rate": 3.2016348773841965e-06, "loss": 0.9177, "step": 770 }, { "epoch": 11.428571428571429, "grad_norm": 9.893692016601562, "learning_rate": 2.5204359673024523e-06, "loss": 0.9732, "step": 780 }, { "epoch": 11.575091575091575, "grad_norm": 8.784825325012207, "learning_rate": 1.8392370572207086e-06, "loss": 0.9558, "step": 790 }, { "epoch": 11.72161172161172, "grad_norm": 8.990778923034668, "learning_rate": 1.1580381471389646e-06, "loss": 0.9656, "step": 800 }, { "epoch": 11.868131868131869, "grad_norm": 8.444221496582031, "learning_rate": 4.768392370572207e-07, "loss": 0.9334, "step": 810 }, { "epoch": 11.956043956043956, "eval_accuracy": 0.5858585858585859, "eval_f1": 0.5699572730591373, "eval_loss": 0.9441593885421753, "eval_precision": 0.5692451226403554, "eval_recall": 0.5858585858585859, "eval_runtime": 10.5926, "eval_samples_per_second": 102.808, "eval_steps_per_second": 6.514, "step": 816 }, { "epoch": 11.956043956043956, "step": 816, "total_flos": 4.123334853511373e+18, "train_loss": 1.090085435147379, "train_runtime": 2115.1144, "train_samples_per_second": 24.697, "train_steps_per_second": 0.386 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.123334853511373e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }