BTX24's picture
End of training
780cc87 verified
raw
history blame
21.6 kB
{
"best_metric": 0.6137931034482759,
"best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-860",
"epoch": 11.961661341853034,
"eval_steps": 500,
"global_step": 936,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12779552715654952,
"grad_norm": 11.641097068786621,
"learning_rate": 5.319148936170213e-06,
"loss": 1.4077,
"step": 10
},
{
"epoch": 0.25559105431309903,
"grad_norm": 10.328614234924316,
"learning_rate": 1.0638297872340426e-05,
"loss": 1.3907,
"step": 20
},
{
"epoch": 0.38338658146964855,
"grad_norm": 11.496760368347168,
"learning_rate": 1.595744680851064e-05,
"loss": 1.3597,
"step": 30
},
{
"epoch": 0.5111821086261981,
"grad_norm": 9.975175857543945,
"learning_rate": 2.1276595744680852e-05,
"loss": 1.354,
"step": 40
},
{
"epoch": 0.6389776357827476,
"grad_norm": 11.381126403808594,
"learning_rate": 2.6595744680851064e-05,
"loss": 1.3266,
"step": 50
},
{
"epoch": 0.7667731629392971,
"grad_norm": 9.771955490112305,
"learning_rate": 3.191489361702128e-05,
"loss": 1.291,
"step": 60
},
{
"epoch": 0.8945686900958466,
"grad_norm": 21.0851993560791,
"learning_rate": 3.617021276595745e-05,
"loss": 1.3179,
"step": 70
},
{
"epoch": 0.9968051118210862,
"eval_accuracy": 0.4206896551724138,
"eval_f1": 0.39792953648489043,
"eval_loss": 1.2415151596069336,
"eval_precision": 0.4642161976696202,
"eval_recall": 0.4206896551724138,
"eval_runtime": 4.2118,
"eval_samples_per_second": 103.281,
"eval_steps_per_second": 6.648,
"step": 78
},
{
"epoch": 1.0223642172523961,
"grad_norm": 19.948223114013672,
"learning_rate": 4.148936170212766e-05,
"loss": 1.2668,
"step": 80
},
{
"epoch": 1.1501597444089458,
"grad_norm": 11.563603401184082,
"learning_rate": 4.680851063829788e-05,
"loss": 1.2202,
"step": 90
},
{
"epoch": 1.2779552715654952,
"grad_norm": 9.605425834655762,
"learning_rate": 4.97624703087886e-05,
"loss": 1.2626,
"step": 100
},
{
"epoch": 1.4057507987220448,
"grad_norm": 10.846478462219238,
"learning_rate": 4.9168646080760093e-05,
"loss": 1.251,
"step": 110
},
{
"epoch": 1.5335463258785942,
"grad_norm": 10.546998023986816,
"learning_rate": 4.8574821852731594e-05,
"loss": 1.2136,
"step": 120
},
{
"epoch": 1.6613418530351438,
"grad_norm": 10.678705215454102,
"learning_rate": 4.798099762470309e-05,
"loss": 1.1764,
"step": 130
},
{
"epoch": 1.7891373801916934,
"grad_norm": 10.283668518066406,
"learning_rate": 4.738717339667459e-05,
"loss": 1.2624,
"step": 140
},
{
"epoch": 1.9169329073482428,
"grad_norm": 10.07646656036377,
"learning_rate": 4.679334916864608e-05,
"loss": 1.1998,
"step": 150
},
{
"epoch": 1.9936102236421727,
"eval_accuracy": 0.5103448275862069,
"eval_f1": 0.4525112700085999,
"eval_loss": 1.0768730640411377,
"eval_precision": 0.5309344450319118,
"eval_recall": 0.5103448275862069,
"eval_runtime": 4.2293,
"eval_samples_per_second": 102.853,
"eval_steps_per_second": 6.62,
"step": 156
},
{
"epoch": 2.0447284345047922,
"grad_norm": 10.981221199035645,
"learning_rate": 4.6199524940617575e-05,
"loss": 1.1287,
"step": 160
},
{
"epoch": 2.1725239616613417,
"grad_norm": 6.460533142089844,
"learning_rate": 4.5605700712589075e-05,
"loss": 1.1806,
"step": 170
},
{
"epoch": 2.3003194888178915,
"grad_norm": 7.450798511505127,
"learning_rate": 4.501187648456057e-05,
"loss": 1.1834,
"step": 180
},
{
"epoch": 2.428115015974441,
"grad_norm": 5.972067832946777,
"learning_rate": 4.441805225653207e-05,
"loss": 1.1639,
"step": 190
},
{
"epoch": 2.5559105431309903,
"grad_norm": 10.565897941589355,
"learning_rate": 4.382422802850357e-05,
"loss": 1.143,
"step": 200
},
{
"epoch": 2.68370607028754,
"grad_norm": 11.751137733459473,
"learning_rate": 4.323040380047506e-05,
"loss": 1.1911,
"step": 210
},
{
"epoch": 2.8115015974440896,
"grad_norm": 8.518805503845215,
"learning_rate": 4.263657957244656e-05,
"loss": 1.169,
"step": 220
},
{
"epoch": 2.939297124600639,
"grad_norm": 7.631802558898926,
"learning_rate": 4.204275534441806e-05,
"loss": 1.168,
"step": 230
},
{
"epoch": 2.9904153354632586,
"eval_accuracy": 0.5494252873563218,
"eval_f1": 0.5033023418313557,
"eval_loss": 1.0573328733444214,
"eval_precision": 0.5604674717576669,
"eval_recall": 0.5494252873563218,
"eval_runtime": 4.2015,
"eval_samples_per_second": 103.535,
"eval_steps_per_second": 6.664,
"step": 234
},
{
"epoch": 3.0670926517571884,
"grad_norm": 7.9839911460876465,
"learning_rate": 4.144893111638955e-05,
"loss": 1.1172,
"step": 240
},
{
"epoch": 3.194888178913738,
"grad_norm": 8.082262992858887,
"learning_rate": 4.0855106888361044e-05,
"loss": 1.1659,
"step": 250
},
{
"epoch": 3.3226837060702876,
"grad_norm": 5.319189548492432,
"learning_rate": 4.0261282660332545e-05,
"loss": 1.1493,
"step": 260
},
{
"epoch": 3.450479233226837,
"grad_norm": 8.54591178894043,
"learning_rate": 3.966745843230404e-05,
"loss": 1.1158,
"step": 270
},
{
"epoch": 3.5782747603833864,
"grad_norm": 9.336274147033691,
"learning_rate": 3.907363420427554e-05,
"loss": 1.1638,
"step": 280
},
{
"epoch": 3.7060702875399363,
"grad_norm": 8.625086784362793,
"learning_rate": 3.847980997624703e-05,
"loss": 1.0932,
"step": 290
},
{
"epoch": 3.8338658146964857,
"grad_norm": 9.026288032531738,
"learning_rate": 3.7885985748218526e-05,
"loss": 1.1056,
"step": 300
},
{
"epoch": 3.961661341853035,
"grad_norm": 10.40346622467041,
"learning_rate": 3.7292161520190026e-05,
"loss": 1.1107,
"step": 310
},
{
"epoch": 4.0,
"eval_accuracy": 0.5540229885057472,
"eval_f1": 0.5163004360032211,
"eval_loss": 0.9923611879348755,
"eval_precision": 0.525717492734745,
"eval_recall": 0.5540229885057472,
"eval_runtime": 4.1795,
"eval_samples_per_second": 104.079,
"eval_steps_per_second": 6.699,
"step": 313
},
{
"epoch": 4.0894568690095845,
"grad_norm": 6.684942722320557,
"learning_rate": 3.669833729216152e-05,
"loss": 1.1252,
"step": 320
},
{
"epoch": 4.217252396166134,
"grad_norm": 10.904282569885254,
"learning_rate": 3.6104513064133013e-05,
"loss": 1.1098,
"step": 330
},
{
"epoch": 4.345047923322683,
"grad_norm": 9.764264106750488,
"learning_rate": 3.5510688836104514e-05,
"loss": 1.096,
"step": 340
},
{
"epoch": 4.472843450479234,
"grad_norm": 8.857853889465332,
"learning_rate": 3.4916864608076014e-05,
"loss": 1.0528,
"step": 350
},
{
"epoch": 4.600638977635783,
"grad_norm": 7.500421524047852,
"learning_rate": 3.432304038004751e-05,
"loss": 1.1455,
"step": 360
},
{
"epoch": 4.728434504792332,
"grad_norm": 9.362208366394043,
"learning_rate": 3.372921615201901e-05,
"loss": 1.0993,
"step": 370
},
{
"epoch": 4.856230031948882,
"grad_norm": 9.38844108581543,
"learning_rate": 3.31353919239905e-05,
"loss": 1.0855,
"step": 380
},
{
"epoch": 4.984025559105431,
"grad_norm": 6.451258659362793,
"learning_rate": 3.2541567695961995e-05,
"loss": 1.1062,
"step": 390
},
{
"epoch": 4.996805111821086,
"eval_accuracy": 0.5747126436781609,
"eval_f1": 0.5507051314754062,
"eval_loss": 1.0018237829208374,
"eval_precision": 0.5659922189344513,
"eval_recall": 0.5747126436781609,
"eval_runtime": 4.1822,
"eval_samples_per_second": 104.012,
"eval_steps_per_second": 6.695,
"step": 391
},
{
"epoch": 5.111821086261981,
"grad_norm": 8.044933319091797,
"learning_rate": 3.1947743467933496e-05,
"loss": 1.0556,
"step": 400
},
{
"epoch": 5.23961661341853,
"grad_norm": 9.200007438659668,
"learning_rate": 3.135391923990499e-05,
"loss": 1.056,
"step": 410
},
{
"epoch": 5.36741214057508,
"grad_norm": 9.929868698120117,
"learning_rate": 3.076009501187649e-05,
"loss": 1.0098,
"step": 420
},
{
"epoch": 5.49520766773163,
"grad_norm": 7.43991231918335,
"learning_rate": 3.0166270783847983e-05,
"loss": 1.1396,
"step": 430
},
{
"epoch": 5.623003194888179,
"grad_norm": 7.019424915313721,
"learning_rate": 2.9572446555819477e-05,
"loss": 1.0659,
"step": 440
},
{
"epoch": 5.7507987220447285,
"grad_norm": 7.227243900299072,
"learning_rate": 2.8978622327790977e-05,
"loss": 1.0453,
"step": 450
},
{
"epoch": 5.878594249201278,
"grad_norm": 9.71042537689209,
"learning_rate": 2.838479809976247e-05,
"loss": 1.0331,
"step": 460
},
{
"epoch": 5.993610223642173,
"eval_accuracy": 0.593103448275862,
"eval_f1": 0.5768496494171164,
"eval_loss": 0.9901189208030701,
"eval_precision": 0.6202225107325855,
"eval_recall": 0.593103448275862,
"eval_runtime": 4.1789,
"eval_samples_per_second": 104.095,
"eval_steps_per_second": 6.7,
"step": 469
},
{
"epoch": 6.006389776357827,
"grad_norm": 6.03436279296875,
"learning_rate": 2.7790973871733968e-05,
"loss": 1.135,
"step": 470
},
{
"epoch": 6.134185303514377,
"grad_norm": 7.317747592926025,
"learning_rate": 2.7197149643705465e-05,
"loss": 1.0316,
"step": 480
},
{
"epoch": 6.261980830670926,
"grad_norm": 7.093214511871338,
"learning_rate": 2.6603325415676962e-05,
"loss": 1.0445,
"step": 490
},
{
"epoch": 6.389776357827476,
"grad_norm": 8.44990348815918,
"learning_rate": 2.6009501187648455e-05,
"loss": 1.064,
"step": 500
},
{
"epoch": 6.517571884984026,
"grad_norm": 8.586387634277344,
"learning_rate": 2.5415676959619956e-05,
"loss": 1.0428,
"step": 510
},
{
"epoch": 6.645367412140575,
"grad_norm": 8.286543846130371,
"learning_rate": 2.482185273159145e-05,
"loss": 1.0424,
"step": 520
},
{
"epoch": 6.773162939297125,
"grad_norm": 11.05722427368164,
"learning_rate": 2.4228028503562946e-05,
"loss": 1.0486,
"step": 530
},
{
"epoch": 6.900958466453674,
"grad_norm": 11.715845108032227,
"learning_rate": 2.3634204275534443e-05,
"loss": 1.0409,
"step": 540
},
{
"epoch": 6.9904153354632586,
"eval_accuracy": 0.5747126436781609,
"eval_f1": 0.5723268617274374,
"eval_loss": 0.9633908867835999,
"eval_precision": 0.5722167544506434,
"eval_recall": 0.5747126436781609,
"eval_runtime": 4.2036,
"eval_samples_per_second": 103.482,
"eval_steps_per_second": 6.661,
"step": 547
},
{
"epoch": 7.0287539936102235,
"grad_norm": 9.900256156921387,
"learning_rate": 2.3040380047505937e-05,
"loss": 1.021,
"step": 550
},
{
"epoch": 7.156549520766773,
"grad_norm": 11.22358226776123,
"learning_rate": 2.2446555819477437e-05,
"loss": 1.0387,
"step": 560
},
{
"epoch": 7.284345047923322,
"grad_norm": 6.466274738311768,
"learning_rate": 2.1852731591448934e-05,
"loss": 1.0166,
"step": 570
},
{
"epoch": 7.412140575079873,
"grad_norm": 9.685991287231445,
"learning_rate": 2.1258907363420428e-05,
"loss": 1.0459,
"step": 580
},
{
"epoch": 7.539936102236422,
"grad_norm": 9.211965560913086,
"learning_rate": 2.0665083135391925e-05,
"loss": 0.9852,
"step": 590
},
{
"epoch": 7.667731629392971,
"grad_norm": 9.930057525634766,
"learning_rate": 2.0071258907363422e-05,
"loss": 1.0176,
"step": 600
},
{
"epoch": 7.795527156549521,
"grad_norm": 12.529800415039062,
"learning_rate": 1.947743467933492e-05,
"loss": 1.0465,
"step": 610
},
{
"epoch": 7.92332268370607,
"grad_norm": 9.018549919128418,
"learning_rate": 1.8883610451306412e-05,
"loss": 1.0176,
"step": 620
},
{
"epoch": 8.0,
"eval_accuracy": 0.593103448275862,
"eval_f1": 0.5833555266825051,
"eval_loss": 0.9504066705703735,
"eval_precision": 0.5813528993323179,
"eval_recall": 0.593103448275862,
"eval_runtime": 4.1774,
"eval_samples_per_second": 104.131,
"eval_steps_per_second": 6.703,
"step": 626
},
{
"epoch": 8.05111821086262,
"grad_norm": 8.328828811645508,
"learning_rate": 1.828978622327791e-05,
"loss": 0.9516,
"step": 630
},
{
"epoch": 8.178913738019169,
"grad_norm": 7.750000476837158,
"learning_rate": 1.7695961995249406e-05,
"loss": 0.972,
"step": 640
},
{
"epoch": 8.30670926517572,
"grad_norm": 6.95557165145874,
"learning_rate": 1.7102137767220903e-05,
"loss": 0.9842,
"step": 650
},
{
"epoch": 8.434504792332268,
"grad_norm": 8.893524169921875,
"learning_rate": 1.65083135391924e-05,
"loss": 1.02,
"step": 660
},
{
"epoch": 8.562300319488818,
"grad_norm": 9.984440803527832,
"learning_rate": 1.5914489311163897e-05,
"loss": 0.97,
"step": 670
},
{
"epoch": 8.690095846645367,
"grad_norm": 8.314949989318848,
"learning_rate": 1.5320665083135394e-05,
"loss": 1.0366,
"step": 680
},
{
"epoch": 8.817891373801917,
"grad_norm": 9.685540199279785,
"learning_rate": 1.4726840855106888e-05,
"loss": 1.0284,
"step": 690
},
{
"epoch": 8.945686900958467,
"grad_norm": 10.43076229095459,
"learning_rate": 1.4133016627078385e-05,
"loss": 0.995,
"step": 700
},
{
"epoch": 8.996805111821086,
"eval_accuracy": 0.5908045977011495,
"eval_f1": 0.5853582114263199,
"eval_loss": 0.9584209322929382,
"eval_precision": 0.5853113777126823,
"eval_recall": 0.5908045977011495,
"eval_runtime": 4.2105,
"eval_samples_per_second": 103.314,
"eval_steps_per_second": 6.65,
"step": 704
},
{
"epoch": 9.073482428115016,
"grad_norm": 8.847938537597656,
"learning_rate": 1.3539192399049882e-05,
"loss": 0.9524,
"step": 710
},
{
"epoch": 9.201277955271566,
"grad_norm": 8.750248908996582,
"learning_rate": 1.2945368171021377e-05,
"loss": 0.9846,
"step": 720
},
{
"epoch": 9.329073482428115,
"grad_norm": 9.161256790161133,
"learning_rate": 1.2351543942992874e-05,
"loss": 0.9751,
"step": 730
},
{
"epoch": 9.456869009584665,
"grad_norm": 9.544412612915039,
"learning_rate": 1.1757719714964371e-05,
"loss": 0.9417,
"step": 740
},
{
"epoch": 9.584664536741213,
"grad_norm": 9.703606605529785,
"learning_rate": 1.1163895486935868e-05,
"loss": 0.9707,
"step": 750
},
{
"epoch": 9.712460063897764,
"grad_norm": 9.677326202392578,
"learning_rate": 1.0570071258907365e-05,
"loss": 1.0009,
"step": 760
},
{
"epoch": 9.840255591054314,
"grad_norm": 8.860432624816895,
"learning_rate": 9.97624703087886e-06,
"loss": 0.9817,
"step": 770
},
{
"epoch": 9.968051118210862,
"grad_norm": 8.743229866027832,
"learning_rate": 9.382422802850356e-06,
"loss": 0.9937,
"step": 780
},
{
"epoch": 9.993610223642172,
"eval_accuracy": 0.6022988505747127,
"eval_f1": 0.5934331301305344,
"eval_loss": 0.9338871240615845,
"eval_precision": 0.5893665058889589,
"eval_recall": 0.6022988505747127,
"eval_runtime": 4.2268,
"eval_samples_per_second": 102.915,
"eval_steps_per_second": 6.624,
"step": 782
},
{
"epoch": 10.095846645367413,
"grad_norm": 7.1755900382995605,
"learning_rate": 8.788598574821852e-06,
"loss": 0.9908,
"step": 790
},
{
"epoch": 10.223642172523961,
"grad_norm": 8.917673110961914,
"learning_rate": 8.19477434679335e-06,
"loss": 0.9361,
"step": 800
},
{
"epoch": 10.351437699680512,
"grad_norm": 15.518793106079102,
"learning_rate": 7.6009501187648464e-06,
"loss": 0.9495,
"step": 810
},
{
"epoch": 10.47923322683706,
"grad_norm": 12.074665069580078,
"learning_rate": 7.007125890736342e-06,
"loss": 0.9689,
"step": 820
},
{
"epoch": 10.60702875399361,
"grad_norm": 9.314558982849121,
"learning_rate": 6.4133016627078396e-06,
"loss": 0.983,
"step": 830
},
{
"epoch": 10.73482428115016,
"grad_norm": 9.714004516601562,
"learning_rate": 5.819477434679335e-06,
"loss": 1.0,
"step": 840
},
{
"epoch": 10.86261980830671,
"grad_norm": 9.66527271270752,
"learning_rate": 5.225653206650832e-06,
"loss": 0.9036,
"step": 850
},
{
"epoch": 10.99041533546326,
"grad_norm": 11.544416427612305,
"learning_rate": 4.631828978622328e-06,
"loss": 0.9387,
"step": 860
},
{
"epoch": 10.99041533546326,
"eval_accuracy": 0.6137931034482759,
"eval_f1": 0.5996477650763593,
"eval_loss": 0.9119637608528137,
"eval_precision": 0.596855024118962,
"eval_recall": 0.6137931034482759,
"eval_runtime": 4.2091,
"eval_samples_per_second": 103.347,
"eval_steps_per_second": 6.652,
"step": 860
},
{
"epoch": 11.118210862619808,
"grad_norm": 9.786779403686523,
"learning_rate": 4.038004750593825e-06,
"loss": 0.911,
"step": 870
},
{
"epoch": 11.246006389776358,
"grad_norm": 10.40623664855957,
"learning_rate": 3.4441805225653207e-06,
"loss": 0.9683,
"step": 880
},
{
"epoch": 11.373801916932907,
"grad_norm": 10.116272926330566,
"learning_rate": 2.850356294536817e-06,
"loss": 0.9074,
"step": 890
},
{
"epoch": 11.501597444089457,
"grad_norm": 9.423429489135742,
"learning_rate": 2.2565320665083133e-06,
"loss": 0.908,
"step": 900
},
{
"epoch": 11.629392971246006,
"grad_norm": 11.146402359008789,
"learning_rate": 1.6627078384798101e-06,
"loss": 0.9744,
"step": 910
},
{
"epoch": 11.757188498402556,
"grad_norm": 11.125927925109863,
"learning_rate": 1.0688836104513065e-06,
"loss": 0.9541,
"step": 920
},
{
"epoch": 11.884984025559106,
"grad_norm": 10.135693550109863,
"learning_rate": 4.750593824228029e-07,
"loss": 0.9324,
"step": 930
},
{
"epoch": 11.961661341853034,
"eval_accuracy": 0.5954022988505747,
"eval_f1": 0.5878627034099811,
"eval_loss": 0.9134895205497742,
"eval_precision": 0.5865253155328708,
"eval_recall": 0.5954022988505747,
"eval_runtime": 4.3073,
"eval_samples_per_second": 100.992,
"eval_steps_per_second": 6.501,
"step": 936
},
{
"epoch": 11.961661341853034,
"step": 936,
"total_flos": 4.743827088137626e+18,
"train_loss": 1.079931161342523,
"train_runtime": 2308.5403,
"train_samples_per_second": 26.027,
"train_steps_per_second": 0.405
}
],
"logging_steps": 10,
"max_steps": 936,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.743827088137626e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}