|
{ |
|
"best_metric": 0.6137931034482759, |
|
"best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-860", |
|
"epoch": 11.961661341853034, |
|
"eval_steps": 500, |
|
"global_step": 936, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 11.641097068786621, |
|
"learning_rate": 5.319148936170213e-06, |
|
"loss": 1.4077, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 10.328614234924316, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 1.3907, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 11.496760368347168, |
|
"learning_rate": 1.595744680851064e-05, |
|
"loss": 1.3597, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 9.975175857543945, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 1.354, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 11.381126403808594, |
|
"learning_rate": 2.6595744680851064e-05, |
|
"loss": 1.3266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 9.771955490112305, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 1.291, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 21.0851993560791, |
|
"learning_rate": 3.617021276595745e-05, |
|
"loss": 1.3179, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9968051118210862, |
|
"eval_accuracy": 0.4206896551724138, |
|
"eval_f1": 0.39792953648489043, |
|
"eval_loss": 1.2415151596069336, |
|
"eval_precision": 0.4642161976696202, |
|
"eval_recall": 0.4206896551724138, |
|
"eval_runtime": 4.2118, |
|
"eval_samples_per_second": 103.281, |
|
"eval_steps_per_second": 6.648, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 19.948223114013672, |
|
"learning_rate": 4.148936170212766e-05, |
|
"loss": 1.2668, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 11.563603401184082, |
|
"learning_rate": 4.680851063829788e-05, |
|
"loss": 1.2202, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 9.605425834655762, |
|
"learning_rate": 4.97624703087886e-05, |
|
"loss": 1.2626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 10.846478462219238, |
|
"learning_rate": 4.9168646080760093e-05, |
|
"loss": 1.251, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 10.546998023986816, |
|
"learning_rate": 4.8574821852731594e-05, |
|
"loss": 1.2136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 10.678705215454102, |
|
"learning_rate": 4.798099762470309e-05, |
|
"loss": 1.1764, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 10.283668518066406, |
|
"learning_rate": 4.738717339667459e-05, |
|
"loss": 1.2624, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 10.07646656036377, |
|
"learning_rate": 4.679334916864608e-05, |
|
"loss": 1.1998, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.9936102236421727, |
|
"eval_accuracy": 0.5103448275862069, |
|
"eval_f1": 0.4525112700085999, |
|
"eval_loss": 1.0768730640411377, |
|
"eval_precision": 0.5309344450319118, |
|
"eval_recall": 0.5103448275862069, |
|
"eval_runtime": 4.2293, |
|
"eval_samples_per_second": 102.853, |
|
"eval_steps_per_second": 6.62, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0447284345047922, |
|
"grad_norm": 10.981221199035645, |
|
"learning_rate": 4.6199524940617575e-05, |
|
"loss": 1.1287, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.1725239616613417, |
|
"grad_norm": 6.460533142089844, |
|
"learning_rate": 4.5605700712589075e-05, |
|
"loss": 1.1806, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.3003194888178915, |
|
"grad_norm": 7.450798511505127, |
|
"learning_rate": 4.501187648456057e-05, |
|
"loss": 1.1834, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.428115015974441, |
|
"grad_norm": 5.972067832946777, |
|
"learning_rate": 4.441805225653207e-05, |
|
"loss": 1.1639, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.5559105431309903, |
|
"grad_norm": 10.565897941589355, |
|
"learning_rate": 4.382422802850357e-05, |
|
"loss": 1.143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.68370607028754, |
|
"grad_norm": 11.751137733459473, |
|
"learning_rate": 4.323040380047506e-05, |
|
"loss": 1.1911, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.8115015974440896, |
|
"grad_norm": 8.518805503845215, |
|
"learning_rate": 4.263657957244656e-05, |
|
"loss": 1.169, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.939297124600639, |
|
"grad_norm": 7.631802558898926, |
|
"learning_rate": 4.204275534441806e-05, |
|
"loss": 1.168, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.9904153354632586, |
|
"eval_accuracy": 0.5494252873563218, |
|
"eval_f1": 0.5033023418313557, |
|
"eval_loss": 1.0573328733444214, |
|
"eval_precision": 0.5604674717576669, |
|
"eval_recall": 0.5494252873563218, |
|
"eval_runtime": 4.2015, |
|
"eval_samples_per_second": 103.535, |
|
"eval_steps_per_second": 6.664, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.0670926517571884, |
|
"grad_norm": 7.9839911460876465, |
|
"learning_rate": 4.144893111638955e-05, |
|
"loss": 1.1172, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.194888178913738, |
|
"grad_norm": 8.082262992858887, |
|
"learning_rate": 4.0855106888361044e-05, |
|
"loss": 1.1659, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.3226837060702876, |
|
"grad_norm": 5.319189548492432, |
|
"learning_rate": 4.0261282660332545e-05, |
|
"loss": 1.1493, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.450479233226837, |
|
"grad_norm": 8.54591178894043, |
|
"learning_rate": 3.966745843230404e-05, |
|
"loss": 1.1158, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.5782747603833864, |
|
"grad_norm": 9.336274147033691, |
|
"learning_rate": 3.907363420427554e-05, |
|
"loss": 1.1638, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.7060702875399363, |
|
"grad_norm": 8.625086784362793, |
|
"learning_rate": 3.847980997624703e-05, |
|
"loss": 1.0932, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.8338658146964857, |
|
"grad_norm": 9.026288032531738, |
|
"learning_rate": 3.7885985748218526e-05, |
|
"loss": 1.1056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.961661341853035, |
|
"grad_norm": 10.40346622467041, |
|
"learning_rate": 3.7292161520190026e-05, |
|
"loss": 1.1107, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5540229885057472, |
|
"eval_f1": 0.5163004360032211, |
|
"eval_loss": 0.9923611879348755, |
|
"eval_precision": 0.525717492734745, |
|
"eval_recall": 0.5540229885057472, |
|
"eval_runtime": 4.1795, |
|
"eval_samples_per_second": 104.079, |
|
"eval_steps_per_second": 6.699, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 4.0894568690095845, |
|
"grad_norm": 6.684942722320557, |
|
"learning_rate": 3.669833729216152e-05, |
|
"loss": 1.1252, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.217252396166134, |
|
"grad_norm": 10.904282569885254, |
|
"learning_rate": 3.6104513064133013e-05, |
|
"loss": 1.1098, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.345047923322683, |
|
"grad_norm": 9.764264106750488, |
|
"learning_rate": 3.5510688836104514e-05, |
|
"loss": 1.096, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.472843450479234, |
|
"grad_norm": 8.857853889465332, |
|
"learning_rate": 3.4916864608076014e-05, |
|
"loss": 1.0528, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.600638977635783, |
|
"grad_norm": 7.500421524047852, |
|
"learning_rate": 3.432304038004751e-05, |
|
"loss": 1.1455, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.728434504792332, |
|
"grad_norm": 9.362208366394043, |
|
"learning_rate": 3.372921615201901e-05, |
|
"loss": 1.0993, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.856230031948882, |
|
"grad_norm": 9.38844108581543, |
|
"learning_rate": 3.31353919239905e-05, |
|
"loss": 1.0855, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.984025559105431, |
|
"grad_norm": 6.451258659362793, |
|
"learning_rate": 3.2541567695961995e-05, |
|
"loss": 1.1062, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.996805111821086, |
|
"eval_accuracy": 0.5747126436781609, |
|
"eval_f1": 0.5507051314754062, |
|
"eval_loss": 1.0018237829208374, |
|
"eval_precision": 0.5659922189344513, |
|
"eval_recall": 0.5747126436781609, |
|
"eval_runtime": 4.1822, |
|
"eval_samples_per_second": 104.012, |
|
"eval_steps_per_second": 6.695, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 5.111821086261981, |
|
"grad_norm": 8.044933319091797, |
|
"learning_rate": 3.1947743467933496e-05, |
|
"loss": 1.0556, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.23961661341853, |
|
"grad_norm": 9.200007438659668, |
|
"learning_rate": 3.135391923990499e-05, |
|
"loss": 1.056, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.36741214057508, |
|
"grad_norm": 9.929868698120117, |
|
"learning_rate": 3.076009501187649e-05, |
|
"loss": 1.0098, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.49520766773163, |
|
"grad_norm": 7.43991231918335, |
|
"learning_rate": 3.0166270783847983e-05, |
|
"loss": 1.1396, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.623003194888179, |
|
"grad_norm": 7.019424915313721, |
|
"learning_rate": 2.9572446555819477e-05, |
|
"loss": 1.0659, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.7507987220447285, |
|
"grad_norm": 7.227243900299072, |
|
"learning_rate": 2.8978622327790977e-05, |
|
"loss": 1.0453, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.878594249201278, |
|
"grad_norm": 9.71042537689209, |
|
"learning_rate": 2.838479809976247e-05, |
|
"loss": 1.0331, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.993610223642173, |
|
"eval_accuracy": 0.593103448275862, |
|
"eval_f1": 0.5768496494171164, |
|
"eval_loss": 0.9901189208030701, |
|
"eval_precision": 0.6202225107325855, |
|
"eval_recall": 0.593103448275862, |
|
"eval_runtime": 4.1789, |
|
"eval_samples_per_second": 104.095, |
|
"eval_steps_per_second": 6.7, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 6.006389776357827, |
|
"grad_norm": 6.03436279296875, |
|
"learning_rate": 2.7790973871733968e-05, |
|
"loss": 1.135, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.134185303514377, |
|
"grad_norm": 7.317747592926025, |
|
"learning_rate": 2.7197149643705465e-05, |
|
"loss": 1.0316, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.261980830670926, |
|
"grad_norm": 7.093214511871338, |
|
"learning_rate": 2.6603325415676962e-05, |
|
"loss": 1.0445, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.389776357827476, |
|
"grad_norm": 8.44990348815918, |
|
"learning_rate": 2.6009501187648455e-05, |
|
"loss": 1.064, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.517571884984026, |
|
"grad_norm": 8.586387634277344, |
|
"learning_rate": 2.5415676959619956e-05, |
|
"loss": 1.0428, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.645367412140575, |
|
"grad_norm": 8.286543846130371, |
|
"learning_rate": 2.482185273159145e-05, |
|
"loss": 1.0424, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.773162939297125, |
|
"grad_norm": 11.05722427368164, |
|
"learning_rate": 2.4228028503562946e-05, |
|
"loss": 1.0486, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.900958466453674, |
|
"grad_norm": 11.715845108032227, |
|
"learning_rate": 2.3634204275534443e-05, |
|
"loss": 1.0409, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.9904153354632586, |
|
"eval_accuracy": 0.5747126436781609, |
|
"eval_f1": 0.5723268617274374, |
|
"eval_loss": 0.9633908867835999, |
|
"eval_precision": 0.5722167544506434, |
|
"eval_recall": 0.5747126436781609, |
|
"eval_runtime": 4.2036, |
|
"eval_samples_per_second": 103.482, |
|
"eval_steps_per_second": 6.661, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 7.0287539936102235, |
|
"grad_norm": 9.900256156921387, |
|
"learning_rate": 2.3040380047505937e-05, |
|
"loss": 1.021, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.156549520766773, |
|
"grad_norm": 11.22358226776123, |
|
"learning_rate": 2.2446555819477437e-05, |
|
"loss": 1.0387, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.284345047923322, |
|
"grad_norm": 6.466274738311768, |
|
"learning_rate": 2.1852731591448934e-05, |
|
"loss": 1.0166, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.412140575079873, |
|
"grad_norm": 9.685991287231445, |
|
"learning_rate": 2.1258907363420428e-05, |
|
"loss": 1.0459, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.539936102236422, |
|
"grad_norm": 9.211965560913086, |
|
"learning_rate": 2.0665083135391925e-05, |
|
"loss": 0.9852, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 7.667731629392971, |
|
"grad_norm": 9.930057525634766, |
|
"learning_rate": 2.0071258907363422e-05, |
|
"loss": 1.0176, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.795527156549521, |
|
"grad_norm": 12.529800415039062, |
|
"learning_rate": 1.947743467933492e-05, |
|
"loss": 1.0465, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.92332268370607, |
|
"grad_norm": 9.018549919128418, |
|
"learning_rate": 1.8883610451306412e-05, |
|
"loss": 1.0176, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.593103448275862, |
|
"eval_f1": 0.5833555266825051, |
|
"eval_loss": 0.9504066705703735, |
|
"eval_precision": 0.5813528993323179, |
|
"eval_recall": 0.593103448275862, |
|
"eval_runtime": 4.1774, |
|
"eval_samples_per_second": 104.131, |
|
"eval_steps_per_second": 6.703, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 8.05111821086262, |
|
"grad_norm": 8.328828811645508, |
|
"learning_rate": 1.828978622327791e-05, |
|
"loss": 0.9516, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.178913738019169, |
|
"grad_norm": 7.750000476837158, |
|
"learning_rate": 1.7695961995249406e-05, |
|
"loss": 0.972, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.30670926517572, |
|
"grad_norm": 6.95557165145874, |
|
"learning_rate": 1.7102137767220903e-05, |
|
"loss": 0.9842, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.434504792332268, |
|
"grad_norm": 8.893524169921875, |
|
"learning_rate": 1.65083135391924e-05, |
|
"loss": 1.02, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.562300319488818, |
|
"grad_norm": 9.984440803527832, |
|
"learning_rate": 1.5914489311163897e-05, |
|
"loss": 0.97, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 8.690095846645367, |
|
"grad_norm": 8.314949989318848, |
|
"learning_rate": 1.5320665083135394e-05, |
|
"loss": 1.0366, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 8.817891373801917, |
|
"grad_norm": 9.685540199279785, |
|
"learning_rate": 1.4726840855106888e-05, |
|
"loss": 1.0284, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 8.945686900958467, |
|
"grad_norm": 10.43076229095459, |
|
"learning_rate": 1.4133016627078385e-05, |
|
"loss": 0.995, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.996805111821086, |
|
"eval_accuracy": 0.5908045977011495, |
|
"eval_f1": 0.5853582114263199, |
|
"eval_loss": 0.9584209322929382, |
|
"eval_precision": 0.5853113777126823, |
|
"eval_recall": 0.5908045977011495, |
|
"eval_runtime": 4.2105, |
|
"eval_samples_per_second": 103.314, |
|
"eval_steps_per_second": 6.65, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 9.073482428115016, |
|
"grad_norm": 8.847938537597656, |
|
"learning_rate": 1.3539192399049882e-05, |
|
"loss": 0.9524, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.201277955271566, |
|
"grad_norm": 8.750248908996582, |
|
"learning_rate": 1.2945368171021377e-05, |
|
"loss": 0.9846, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.329073482428115, |
|
"grad_norm": 9.161256790161133, |
|
"learning_rate": 1.2351543942992874e-05, |
|
"loss": 0.9751, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.456869009584665, |
|
"grad_norm": 9.544412612915039, |
|
"learning_rate": 1.1757719714964371e-05, |
|
"loss": 0.9417, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 9.584664536741213, |
|
"grad_norm": 9.703606605529785, |
|
"learning_rate": 1.1163895486935868e-05, |
|
"loss": 0.9707, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 9.712460063897764, |
|
"grad_norm": 9.677326202392578, |
|
"learning_rate": 1.0570071258907365e-05, |
|
"loss": 1.0009, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 9.840255591054314, |
|
"grad_norm": 8.860432624816895, |
|
"learning_rate": 9.97624703087886e-06, |
|
"loss": 0.9817, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 9.968051118210862, |
|
"grad_norm": 8.743229866027832, |
|
"learning_rate": 9.382422802850356e-06, |
|
"loss": 0.9937, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 9.993610223642172, |
|
"eval_accuracy": 0.6022988505747127, |
|
"eval_f1": 0.5934331301305344, |
|
"eval_loss": 0.9338871240615845, |
|
"eval_precision": 0.5893665058889589, |
|
"eval_recall": 0.6022988505747127, |
|
"eval_runtime": 4.2268, |
|
"eval_samples_per_second": 102.915, |
|
"eval_steps_per_second": 6.624, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 10.095846645367413, |
|
"grad_norm": 7.1755900382995605, |
|
"learning_rate": 8.788598574821852e-06, |
|
"loss": 0.9908, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 10.223642172523961, |
|
"grad_norm": 8.917673110961914, |
|
"learning_rate": 8.19477434679335e-06, |
|
"loss": 0.9361, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.351437699680512, |
|
"grad_norm": 15.518793106079102, |
|
"learning_rate": 7.6009501187648464e-06, |
|
"loss": 0.9495, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 10.47923322683706, |
|
"grad_norm": 12.074665069580078, |
|
"learning_rate": 7.007125890736342e-06, |
|
"loss": 0.9689, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 10.60702875399361, |
|
"grad_norm": 9.314558982849121, |
|
"learning_rate": 6.4133016627078396e-06, |
|
"loss": 0.983, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 10.73482428115016, |
|
"grad_norm": 9.714004516601562, |
|
"learning_rate": 5.819477434679335e-06, |
|
"loss": 1.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 10.86261980830671, |
|
"grad_norm": 9.66527271270752, |
|
"learning_rate": 5.225653206650832e-06, |
|
"loss": 0.9036, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 10.99041533546326, |
|
"grad_norm": 11.544416427612305, |
|
"learning_rate": 4.631828978622328e-06, |
|
"loss": 0.9387, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 10.99041533546326, |
|
"eval_accuracy": 0.6137931034482759, |
|
"eval_f1": 0.5996477650763593, |
|
"eval_loss": 0.9119637608528137, |
|
"eval_precision": 0.596855024118962, |
|
"eval_recall": 0.6137931034482759, |
|
"eval_runtime": 4.2091, |
|
"eval_samples_per_second": 103.347, |
|
"eval_steps_per_second": 6.652, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 11.118210862619808, |
|
"grad_norm": 9.786779403686523, |
|
"learning_rate": 4.038004750593825e-06, |
|
"loss": 0.911, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 11.246006389776358, |
|
"grad_norm": 10.40623664855957, |
|
"learning_rate": 3.4441805225653207e-06, |
|
"loss": 0.9683, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 11.373801916932907, |
|
"grad_norm": 10.116272926330566, |
|
"learning_rate": 2.850356294536817e-06, |
|
"loss": 0.9074, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 11.501597444089457, |
|
"grad_norm": 9.423429489135742, |
|
"learning_rate": 2.2565320665083133e-06, |
|
"loss": 0.908, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.629392971246006, |
|
"grad_norm": 11.146402359008789, |
|
"learning_rate": 1.6627078384798101e-06, |
|
"loss": 0.9744, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 11.757188498402556, |
|
"grad_norm": 11.125927925109863, |
|
"learning_rate": 1.0688836104513065e-06, |
|
"loss": 0.9541, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 11.884984025559106, |
|
"grad_norm": 10.135693550109863, |
|
"learning_rate": 4.750593824228029e-07, |
|
"loss": 0.9324, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 11.961661341853034, |
|
"eval_accuracy": 0.5954022988505747, |
|
"eval_f1": 0.5878627034099811, |
|
"eval_loss": 0.9134895205497742, |
|
"eval_precision": 0.5865253155328708, |
|
"eval_recall": 0.5954022988505747, |
|
"eval_runtime": 4.3073, |
|
"eval_samples_per_second": 100.992, |
|
"eval_steps_per_second": 6.501, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 11.961661341853034, |
|
"step": 936, |
|
"total_flos": 4.743827088137626e+18, |
|
"train_loss": 1.079931161342523, |
|
"train_runtime": 2308.5403, |
|
"train_samples_per_second": 26.027, |
|
"train_steps_per_second": 0.405 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 936, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.743827088137626e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|