{ "best_metric": 0.6137931034482759, "best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-860", "epoch": 11.961661341853034, "eval_steps": 500, "global_step": 936, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12779552715654952, "grad_norm": 11.641097068786621, "learning_rate": 5.319148936170213e-06, "loss": 1.4077, "step": 10 }, { "epoch": 0.25559105431309903, "grad_norm": 10.328614234924316, "learning_rate": 1.0638297872340426e-05, "loss": 1.3907, "step": 20 }, { "epoch": 0.38338658146964855, "grad_norm": 11.496760368347168, "learning_rate": 1.595744680851064e-05, "loss": 1.3597, "step": 30 }, { "epoch": 0.5111821086261981, "grad_norm": 9.975175857543945, "learning_rate": 2.1276595744680852e-05, "loss": 1.354, "step": 40 }, { "epoch": 0.6389776357827476, "grad_norm": 11.381126403808594, "learning_rate": 2.6595744680851064e-05, "loss": 1.3266, "step": 50 }, { "epoch": 0.7667731629392971, "grad_norm": 9.771955490112305, "learning_rate": 3.191489361702128e-05, "loss": 1.291, "step": 60 }, { "epoch": 0.8945686900958466, "grad_norm": 21.0851993560791, "learning_rate": 3.617021276595745e-05, "loss": 1.3179, "step": 70 }, { "epoch": 0.9968051118210862, "eval_accuracy": 0.4206896551724138, "eval_f1": 0.39792953648489043, "eval_loss": 1.2415151596069336, "eval_precision": 0.4642161976696202, "eval_recall": 0.4206896551724138, "eval_runtime": 4.2118, "eval_samples_per_second": 103.281, "eval_steps_per_second": 6.648, "step": 78 }, { "epoch": 1.0223642172523961, "grad_norm": 19.948223114013672, "learning_rate": 4.148936170212766e-05, "loss": 1.2668, "step": 80 }, { "epoch": 1.1501597444089458, "grad_norm": 11.563603401184082, "learning_rate": 4.680851063829788e-05, "loss": 1.2202, "step": 90 }, { "epoch": 1.2779552715654952, "grad_norm": 9.605425834655762, "learning_rate": 4.97624703087886e-05, "loss": 1.2626, "step": 100 }, { "epoch": 1.4057507987220448, "grad_norm": 10.846478462219238, "learning_rate": 4.9168646080760093e-05, "loss": 1.251, "step": 110 }, { "epoch": 1.5335463258785942, "grad_norm": 10.546998023986816, "learning_rate": 4.8574821852731594e-05, "loss": 1.2136, "step": 120 }, { "epoch": 1.6613418530351438, "grad_norm": 10.678705215454102, "learning_rate": 4.798099762470309e-05, "loss": 1.1764, "step": 130 }, { "epoch": 1.7891373801916934, "grad_norm": 10.283668518066406, "learning_rate": 4.738717339667459e-05, "loss": 1.2624, "step": 140 }, { "epoch": 1.9169329073482428, "grad_norm": 10.07646656036377, "learning_rate": 4.679334916864608e-05, "loss": 1.1998, "step": 150 }, { "epoch": 1.9936102236421727, "eval_accuracy": 0.5103448275862069, "eval_f1": 0.4525112700085999, "eval_loss": 1.0768730640411377, "eval_precision": 0.5309344450319118, "eval_recall": 0.5103448275862069, "eval_runtime": 4.2293, "eval_samples_per_second": 102.853, "eval_steps_per_second": 6.62, "step": 156 }, { "epoch": 2.0447284345047922, "grad_norm": 10.981221199035645, "learning_rate": 4.6199524940617575e-05, "loss": 1.1287, "step": 160 }, { "epoch": 2.1725239616613417, "grad_norm": 6.460533142089844, "learning_rate": 4.5605700712589075e-05, "loss": 1.1806, "step": 170 }, { "epoch": 2.3003194888178915, "grad_norm": 7.450798511505127, "learning_rate": 4.501187648456057e-05, "loss": 1.1834, "step": 180 }, { "epoch": 2.428115015974441, "grad_norm": 5.972067832946777, "learning_rate": 4.441805225653207e-05, "loss": 1.1639, "step": 190 }, { "epoch": 2.5559105431309903, "grad_norm": 10.565897941589355, "learning_rate": 4.382422802850357e-05, "loss": 1.143, "step": 200 }, { "epoch": 2.68370607028754, "grad_norm": 11.751137733459473, "learning_rate": 4.323040380047506e-05, "loss": 1.1911, "step": 210 }, { "epoch": 2.8115015974440896, "grad_norm": 8.518805503845215, "learning_rate": 4.263657957244656e-05, "loss": 1.169, "step": 220 }, { "epoch": 2.939297124600639, "grad_norm": 7.631802558898926, "learning_rate": 4.204275534441806e-05, "loss": 1.168, "step": 230 }, { "epoch": 2.9904153354632586, "eval_accuracy": 0.5494252873563218, "eval_f1": 0.5033023418313557, "eval_loss": 1.0573328733444214, "eval_precision": 0.5604674717576669, "eval_recall": 0.5494252873563218, "eval_runtime": 4.2015, "eval_samples_per_second": 103.535, "eval_steps_per_second": 6.664, "step": 234 }, { "epoch": 3.0670926517571884, "grad_norm": 7.9839911460876465, "learning_rate": 4.144893111638955e-05, "loss": 1.1172, "step": 240 }, { "epoch": 3.194888178913738, "grad_norm": 8.082262992858887, "learning_rate": 4.0855106888361044e-05, "loss": 1.1659, "step": 250 }, { "epoch": 3.3226837060702876, "grad_norm": 5.319189548492432, "learning_rate": 4.0261282660332545e-05, "loss": 1.1493, "step": 260 }, { "epoch": 3.450479233226837, "grad_norm": 8.54591178894043, "learning_rate": 3.966745843230404e-05, "loss": 1.1158, "step": 270 }, { "epoch": 3.5782747603833864, "grad_norm": 9.336274147033691, "learning_rate": 3.907363420427554e-05, "loss": 1.1638, "step": 280 }, { "epoch": 3.7060702875399363, "grad_norm": 8.625086784362793, "learning_rate": 3.847980997624703e-05, "loss": 1.0932, "step": 290 }, { "epoch": 3.8338658146964857, "grad_norm": 9.026288032531738, "learning_rate": 3.7885985748218526e-05, "loss": 1.1056, "step": 300 }, { "epoch": 3.961661341853035, "grad_norm": 10.40346622467041, "learning_rate": 3.7292161520190026e-05, "loss": 1.1107, "step": 310 }, { "epoch": 4.0, "eval_accuracy": 0.5540229885057472, "eval_f1": 0.5163004360032211, "eval_loss": 0.9923611879348755, "eval_precision": 0.525717492734745, "eval_recall": 0.5540229885057472, "eval_runtime": 4.1795, "eval_samples_per_second": 104.079, "eval_steps_per_second": 6.699, "step": 313 }, { "epoch": 4.0894568690095845, "grad_norm": 6.684942722320557, "learning_rate": 3.669833729216152e-05, "loss": 1.1252, "step": 320 }, { "epoch": 4.217252396166134, "grad_norm": 10.904282569885254, "learning_rate": 3.6104513064133013e-05, "loss": 1.1098, "step": 330 }, { "epoch": 4.345047923322683, "grad_norm": 9.764264106750488, "learning_rate": 3.5510688836104514e-05, "loss": 1.096, "step": 340 }, { "epoch": 4.472843450479234, "grad_norm": 8.857853889465332, "learning_rate": 3.4916864608076014e-05, "loss": 1.0528, "step": 350 }, { "epoch": 4.600638977635783, "grad_norm": 7.500421524047852, "learning_rate": 3.432304038004751e-05, "loss": 1.1455, "step": 360 }, { "epoch": 4.728434504792332, "grad_norm": 9.362208366394043, "learning_rate": 3.372921615201901e-05, "loss": 1.0993, "step": 370 }, { "epoch": 4.856230031948882, "grad_norm": 9.38844108581543, "learning_rate": 3.31353919239905e-05, "loss": 1.0855, "step": 380 }, { "epoch": 4.984025559105431, "grad_norm": 6.451258659362793, "learning_rate": 3.2541567695961995e-05, "loss": 1.1062, "step": 390 }, { "epoch": 4.996805111821086, "eval_accuracy": 0.5747126436781609, "eval_f1": 0.5507051314754062, "eval_loss": 1.0018237829208374, "eval_precision": 0.5659922189344513, "eval_recall": 0.5747126436781609, "eval_runtime": 4.1822, "eval_samples_per_second": 104.012, "eval_steps_per_second": 6.695, "step": 391 }, { "epoch": 5.111821086261981, "grad_norm": 8.044933319091797, "learning_rate": 3.1947743467933496e-05, "loss": 1.0556, "step": 400 }, { "epoch": 5.23961661341853, "grad_norm": 9.200007438659668, "learning_rate": 3.135391923990499e-05, "loss": 1.056, "step": 410 }, { "epoch": 5.36741214057508, "grad_norm": 9.929868698120117, "learning_rate": 3.076009501187649e-05, "loss": 1.0098, "step": 420 }, { "epoch": 5.49520766773163, "grad_norm": 7.43991231918335, "learning_rate": 3.0166270783847983e-05, "loss": 1.1396, "step": 430 }, { "epoch": 5.623003194888179, "grad_norm": 7.019424915313721, "learning_rate": 2.9572446555819477e-05, "loss": 1.0659, "step": 440 }, { "epoch": 5.7507987220447285, "grad_norm": 7.227243900299072, "learning_rate": 2.8978622327790977e-05, "loss": 1.0453, "step": 450 }, { "epoch": 5.878594249201278, "grad_norm": 9.71042537689209, "learning_rate": 2.838479809976247e-05, "loss": 1.0331, "step": 460 }, { "epoch": 5.993610223642173, "eval_accuracy": 0.593103448275862, "eval_f1": 0.5768496494171164, "eval_loss": 0.9901189208030701, "eval_precision": 0.6202225107325855, "eval_recall": 0.593103448275862, "eval_runtime": 4.1789, "eval_samples_per_second": 104.095, "eval_steps_per_second": 6.7, "step": 469 }, { "epoch": 6.006389776357827, "grad_norm": 6.03436279296875, "learning_rate": 2.7790973871733968e-05, "loss": 1.135, "step": 470 }, { "epoch": 6.134185303514377, "grad_norm": 7.317747592926025, "learning_rate": 2.7197149643705465e-05, "loss": 1.0316, "step": 480 }, { "epoch": 6.261980830670926, "grad_norm": 7.093214511871338, "learning_rate": 2.6603325415676962e-05, "loss": 1.0445, "step": 490 }, { "epoch": 6.389776357827476, "grad_norm": 8.44990348815918, "learning_rate": 2.6009501187648455e-05, "loss": 1.064, "step": 500 }, { "epoch": 6.517571884984026, "grad_norm": 8.586387634277344, "learning_rate": 2.5415676959619956e-05, "loss": 1.0428, "step": 510 }, { "epoch": 6.645367412140575, "grad_norm": 8.286543846130371, "learning_rate": 2.482185273159145e-05, "loss": 1.0424, "step": 520 }, { "epoch": 6.773162939297125, "grad_norm": 11.05722427368164, "learning_rate": 2.4228028503562946e-05, "loss": 1.0486, "step": 530 }, { "epoch": 6.900958466453674, "grad_norm": 11.715845108032227, "learning_rate": 2.3634204275534443e-05, "loss": 1.0409, "step": 540 }, { "epoch": 6.9904153354632586, "eval_accuracy": 0.5747126436781609, "eval_f1": 0.5723268617274374, "eval_loss": 0.9633908867835999, "eval_precision": 0.5722167544506434, "eval_recall": 0.5747126436781609, "eval_runtime": 4.2036, "eval_samples_per_second": 103.482, "eval_steps_per_second": 6.661, "step": 547 }, { "epoch": 7.0287539936102235, "grad_norm": 9.900256156921387, "learning_rate": 2.3040380047505937e-05, "loss": 1.021, "step": 550 }, { "epoch": 7.156549520766773, "grad_norm": 11.22358226776123, "learning_rate": 2.2446555819477437e-05, "loss": 1.0387, "step": 560 }, { "epoch": 7.284345047923322, "grad_norm": 6.466274738311768, "learning_rate": 2.1852731591448934e-05, "loss": 1.0166, "step": 570 }, { "epoch": 7.412140575079873, "grad_norm": 9.685991287231445, "learning_rate": 2.1258907363420428e-05, "loss": 1.0459, "step": 580 }, { "epoch": 7.539936102236422, "grad_norm": 9.211965560913086, "learning_rate": 2.0665083135391925e-05, "loss": 0.9852, "step": 590 }, { "epoch": 7.667731629392971, "grad_norm": 9.930057525634766, "learning_rate": 2.0071258907363422e-05, "loss": 1.0176, "step": 600 }, { "epoch": 7.795527156549521, "grad_norm": 12.529800415039062, "learning_rate": 1.947743467933492e-05, "loss": 1.0465, "step": 610 }, { "epoch": 7.92332268370607, "grad_norm": 9.018549919128418, "learning_rate": 1.8883610451306412e-05, "loss": 1.0176, "step": 620 }, { "epoch": 8.0, "eval_accuracy": 0.593103448275862, "eval_f1": 0.5833555266825051, "eval_loss": 0.9504066705703735, "eval_precision": 0.5813528993323179, "eval_recall": 0.593103448275862, "eval_runtime": 4.1774, "eval_samples_per_second": 104.131, "eval_steps_per_second": 6.703, "step": 626 }, { "epoch": 8.05111821086262, "grad_norm": 8.328828811645508, "learning_rate": 1.828978622327791e-05, "loss": 0.9516, "step": 630 }, { "epoch": 8.178913738019169, "grad_norm": 7.750000476837158, "learning_rate": 1.7695961995249406e-05, "loss": 0.972, "step": 640 }, { "epoch": 8.30670926517572, "grad_norm": 6.95557165145874, "learning_rate": 1.7102137767220903e-05, "loss": 0.9842, "step": 650 }, { "epoch": 8.434504792332268, "grad_norm": 8.893524169921875, "learning_rate": 1.65083135391924e-05, "loss": 1.02, "step": 660 }, { "epoch": 8.562300319488818, "grad_norm": 9.984440803527832, "learning_rate": 1.5914489311163897e-05, "loss": 0.97, "step": 670 }, { "epoch": 8.690095846645367, "grad_norm": 8.314949989318848, "learning_rate": 1.5320665083135394e-05, "loss": 1.0366, "step": 680 }, { "epoch": 8.817891373801917, "grad_norm": 9.685540199279785, "learning_rate": 1.4726840855106888e-05, "loss": 1.0284, "step": 690 }, { "epoch": 8.945686900958467, "grad_norm": 10.43076229095459, "learning_rate": 1.4133016627078385e-05, "loss": 0.995, "step": 700 }, { "epoch": 8.996805111821086, "eval_accuracy": 0.5908045977011495, "eval_f1": 0.5853582114263199, "eval_loss": 0.9584209322929382, "eval_precision": 0.5853113777126823, "eval_recall": 0.5908045977011495, "eval_runtime": 4.2105, "eval_samples_per_second": 103.314, "eval_steps_per_second": 6.65, "step": 704 }, { "epoch": 9.073482428115016, "grad_norm": 8.847938537597656, "learning_rate": 1.3539192399049882e-05, "loss": 0.9524, "step": 710 }, { "epoch": 9.201277955271566, "grad_norm": 8.750248908996582, "learning_rate": 1.2945368171021377e-05, "loss": 0.9846, "step": 720 }, { "epoch": 9.329073482428115, "grad_norm": 9.161256790161133, "learning_rate": 1.2351543942992874e-05, "loss": 0.9751, "step": 730 }, { "epoch": 9.456869009584665, "grad_norm": 9.544412612915039, "learning_rate": 1.1757719714964371e-05, "loss": 0.9417, "step": 740 }, { "epoch": 9.584664536741213, "grad_norm": 9.703606605529785, "learning_rate": 1.1163895486935868e-05, "loss": 0.9707, "step": 750 }, { "epoch": 9.712460063897764, "grad_norm": 9.677326202392578, "learning_rate": 1.0570071258907365e-05, "loss": 1.0009, "step": 760 }, { "epoch": 9.840255591054314, "grad_norm": 8.860432624816895, "learning_rate": 9.97624703087886e-06, "loss": 0.9817, "step": 770 }, { "epoch": 9.968051118210862, "grad_norm": 8.743229866027832, "learning_rate": 9.382422802850356e-06, "loss": 0.9937, "step": 780 }, { "epoch": 9.993610223642172, "eval_accuracy": 0.6022988505747127, "eval_f1": 0.5934331301305344, "eval_loss": 0.9338871240615845, "eval_precision": 0.5893665058889589, "eval_recall": 0.6022988505747127, "eval_runtime": 4.2268, "eval_samples_per_second": 102.915, "eval_steps_per_second": 6.624, "step": 782 }, { "epoch": 10.095846645367413, "grad_norm": 7.1755900382995605, "learning_rate": 8.788598574821852e-06, "loss": 0.9908, "step": 790 }, { "epoch": 10.223642172523961, "grad_norm": 8.917673110961914, "learning_rate": 8.19477434679335e-06, "loss": 0.9361, "step": 800 }, { "epoch": 10.351437699680512, "grad_norm": 15.518793106079102, "learning_rate": 7.6009501187648464e-06, "loss": 0.9495, "step": 810 }, { "epoch": 10.47923322683706, "grad_norm": 12.074665069580078, "learning_rate": 7.007125890736342e-06, "loss": 0.9689, "step": 820 }, { "epoch": 10.60702875399361, "grad_norm": 9.314558982849121, "learning_rate": 6.4133016627078396e-06, "loss": 0.983, "step": 830 }, { "epoch": 10.73482428115016, "grad_norm": 9.714004516601562, "learning_rate": 5.819477434679335e-06, "loss": 1.0, "step": 840 }, { "epoch": 10.86261980830671, "grad_norm": 9.66527271270752, "learning_rate": 5.225653206650832e-06, "loss": 0.9036, "step": 850 }, { "epoch": 10.99041533546326, "grad_norm": 11.544416427612305, "learning_rate": 4.631828978622328e-06, "loss": 0.9387, "step": 860 }, { "epoch": 10.99041533546326, "eval_accuracy": 0.6137931034482759, "eval_f1": 0.5996477650763593, "eval_loss": 0.9119637608528137, "eval_precision": 0.596855024118962, "eval_recall": 0.6137931034482759, "eval_runtime": 4.2091, "eval_samples_per_second": 103.347, "eval_steps_per_second": 6.652, "step": 860 }, { "epoch": 11.118210862619808, "grad_norm": 9.786779403686523, "learning_rate": 4.038004750593825e-06, "loss": 0.911, "step": 870 }, { "epoch": 11.246006389776358, "grad_norm": 10.40623664855957, "learning_rate": 3.4441805225653207e-06, "loss": 0.9683, "step": 880 }, { "epoch": 11.373801916932907, "grad_norm": 10.116272926330566, "learning_rate": 2.850356294536817e-06, "loss": 0.9074, "step": 890 }, { "epoch": 11.501597444089457, "grad_norm": 9.423429489135742, "learning_rate": 2.2565320665083133e-06, "loss": 0.908, "step": 900 }, { "epoch": 11.629392971246006, "grad_norm": 11.146402359008789, "learning_rate": 1.6627078384798101e-06, "loss": 0.9744, "step": 910 }, { "epoch": 11.757188498402556, "grad_norm": 11.125927925109863, "learning_rate": 1.0688836104513065e-06, "loss": 0.9541, "step": 920 }, { "epoch": 11.884984025559106, "grad_norm": 10.135693550109863, "learning_rate": 4.750593824228029e-07, "loss": 0.9324, "step": 930 }, { "epoch": 11.961661341853034, "eval_accuracy": 0.5954022988505747, "eval_f1": 0.5878627034099811, "eval_loss": 0.9134895205497742, "eval_precision": 0.5865253155328708, "eval_recall": 0.5954022988505747, "eval_runtime": 4.3073, "eval_samples_per_second": 100.992, "eval_steps_per_second": 6.501, "step": 936 }, { "epoch": 11.961661341853034, "step": 936, "total_flos": 4.743827088137626e+18, "train_loss": 1.079931161342523, "train_runtime": 2308.5403, "train_samples_per_second": 26.027, "train_steps_per_second": 0.405 } ], "logging_steps": 10, "max_steps": 936, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.743827088137626e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }