{ "best_metric": 0.9213530294983667, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-stroke-binary/checkpoint-3200", "epoch": 47.9907120743034, "eval_steps": 100, "global_step": 3840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6191950464396285, "grad_norm": 5.044180870056152, "learning_rate": 2.604166666666667e-06, "loss": 0.744, "step": 50 }, { "epoch": 1.2476780185758514, "grad_norm": 1.894531011581421, "learning_rate": 5.208333333333334e-06, "loss": 0.7256, "step": 100 }, { "epoch": 1.2476780185758514, "eval_accuracy": 0.5685210312075983, "eval_f1": 0.48230573725672854, "eval_loss": 0.6912839412689209, "eval_precision": 0.47305459682975004, "eval_recall": 0.5685210312075983, "eval_runtime": 9.7307, "eval_samples_per_second": 227.22, "eval_steps_per_second": 28.467, "step": 100 }, { "epoch": 1.86687306501548, "grad_norm": 1.9874528646469116, "learning_rate": 7.8125e-06, "loss": 0.7033, "step": 150 }, { "epoch": 2.4953560371517027, "grad_norm": 2.1426079273223877, "learning_rate": 1.0416666666666668e-05, "loss": 0.6695, "step": 200 }, { "epoch": 2.4953560371517027, "eval_accuracy": 0.6209859791949344, "eval_f1": 0.5163511984739979, "eval_loss": 0.64801025390625, "eval_precision": 0.5986617059765201, "eval_recall": 0.6209859791949344, "eval_runtime": 9.7733, "eval_samples_per_second": 226.229, "eval_steps_per_second": 28.343, "step": 200 }, { "epoch": 3.123839009287926, "grad_norm": 1.4950852394104004, "learning_rate": 1.3020833333333334e-05, "loss": 0.6399, "step": 250 }, { "epoch": 3.7430340557275543, "grad_norm": 1.932099461555481, "learning_rate": 1.5625e-05, "loss": 0.5963, "step": 300 }, { "epoch": 3.7430340557275543, "eval_accuracy": 0.6725463591135233, "eval_f1": 0.6118497378760206, "eval_loss": 0.5882277488708496, "eval_precision": 0.6992514519414246, "eval_recall": 0.6725463591135233, "eval_runtime": 9.9265, "eval_samples_per_second": 222.737, "eval_steps_per_second": 27.905, "step": 300 }, { "epoch": 4.371517027863777, "grad_norm": 1.4714239835739136, "learning_rate": 1.8229166666666668e-05, "loss": 0.5616, "step": 350 }, { "epoch": 4.9907120743034055, "grad_norm": 1.6321817636489868, "learning_rate": 1.9998942319271076e-05, "loss": 0.518, "step": 400 }, { "epoch": 4.9907120743034055, "eval_accuracy": 0.7480777928539123, "eval_f1": 0.7167175429836762, "eval_loss": 0.49900302290916443, "eval_precision": 0.7891493639439078, "eval_recall": 0.7480777928539123, "eval_runtime": 9.9652, "eval_samples_per_second": 221.873, "eval_steps_per_second": 27.797, "step": 400 }, { "epoch": 5.6191950464396285, "grad_norm": 2.7871556282043457, "learning_rate": 1.998200798188685e-05, "loss": 0.4858, "step": 450 }, { "epoch": 6.247678018575852, "grad_norm": 3.0406832695007324, "learning_rate": 1.9944456171551024e-05, "loss": 0.4325, "step": 500 }, { "epoch": 6.247678018575852, "eval_accuracy": 0.8073270013568521, "eval_f1": 0.7957289617278764, "eval_loss": 0.40903061628341675, "eval_precision": 0.8231915996306657, "eval_recall": 0.8073270013568521, "eval_runtime": 10.0105, "eval_samples_per_second": 220.868, "eval_steps_per_second": 27.671, "step": 500 }, { "epoch": 6.86687306501548, "grad_norm": 1.8095026016235352, "learning_rate": 1.9886364450156485e-05, "loss": 0.3996, "step": 550 }, { "epoch": 7.495356037151703, "grad_norm": 2.2104787826538086, "learning_rate": 1.9807852804032306e-05, "loss": 0.3848, "step": 600 }, { "epoch": 7.495356037151703, "eval_accuracy": 0.8340117593848937, "eval_f1": 0.8257264682013935, "eval_loss": 0.37033286690711975, "eval_precision": 0.8481671958516468, "eval_recall": 0.8340117593848937, "eval_runtime": 10.0319, "eval_samples_per_second": 220.396, "eval_steps_per_second": 27.612, "step": 600 }, { "epoch": 8.123839009287925, "grad_norm": 1.679534912109375, "learning_rate": 1.970908339611638e-05, "loss": 0.3628, "step": 650 }, { "epoch": 8.743034055727554, "grad_norm": 1.1306818723678589, "learning_rate": 1.9590260231013774e-05, "loss": 0.3532, "step": 700 }, { "epoch": 8.743034055727554, "eval_accuracy": 0.8312980551786522, "eval_f1": 0.8200843868501977, "eval_loss": 0.39578977227211, "eval_precision": 0.8564151669725956, "eval_recall": 0.8312980551786522, "eval_runtime": 10.0177, "eval_samples_per_second": 220.71, "eval_steps_per_second": 27.651, "step": 700 }, { "epoch": 9.371517027863778, "grad_norm": 2.967256784439087, "learning_rate": 1.945162873363268e-05, "loss": 0.3368, "step": 750 }, { "epoch": 9.990712074303406, "grad_norm": 1.7120331525802612, "learning_rate": 1.9293475242268224e-05, "loss": 0.3297, "step": 800 }, { "epoch": 9.990712074303406, "eval_accuracy": 0.8611488014473089, "eval_f1": 0.8558119987362278, "eval_loss": 0.32570937275886536, "eval_precision": 0.871840635108417, "eval_recall": 0.8611488014473089, "eval_runtime": 10.0508, "eval_samples_per_second": 219.982, "eval_steps_per_second": 27.56, "step": 800 }, { "epoch": 10.619195046439629, "grad_norm": 1.371287226676941, "learning_rate": 1.9116126417181188e-05, "loss": 0.3143, "step": 850 }, { "epoch": 11.24767801857585, "grad_norm": 1.5851930379867554, "learning_rate": 1.8919948565893144e-05, "loss": 0.3281, "step": 900 }, { "epoch": 11.24767801857585, "eval_accuracy": 0.866576209859792, "eval_f1": 0.8611949189284244, "eval_loss": 0.3168693780899048, "eval_precision": 0.8790588339902896, "eval_recall": 0.866576209859792, "eval_runtime": 10.0434, "eval_samples_per_second": 220.144, "eval_steps_per_second": 27.58, "step": 900 }, { "epoch": 11.86687306501548, "grad_norm": 0.8977594375610352, "learning_rate": 1.8705346886591667e-05, "loss": 0.3036, "step": 950 }, { "epoch": 12.495356037151703, "grad_norm": 2.8011789321899414, "learning_rate": 1.847276463120828e-05, "loss": 0.2938, "step": 1000 }, { "epoch": 12.495356037151703, "eval_accuracy": 0.8864767073722297, "eval_f1": 0.8841053273932936, "eval_loss": 0.28136056661605835, "eval_precision": 0.8900209862709351, "eval_recall": 0.8864767073722297, "eval_runtime": 10.0799, "eval_samples_per_second": 219.348, "eval_steps_per_second": 27.481, "step": 1000 }, { "epoch": 13.123839009287925, "grad_norm": 2.865165948867798, "learning_rate": 1.822268218989775e-05, "loss": 0.294, "step": 1050 }, { "epoch": 13.743034055727554, "grad_norm": 1.8461377620697021, "learning_rate": 1.79556160988098e-05, "loss": 0.2866, "step": 1100 }, { "epoch": 13.743034055727554, "eval_accuracy": 0.8869289914066033, "eval_f1": 0.883705253652531, "eval_loss": 0.2827624976634979, "eval_precision": 0.8942761361900506, "eval_recall": 0.8869289914066033, "eval_runtime": 10.0748, "eval_samples_per_second": 219.459, "eval_steps_per_second": 27.494, "step": 1100 }, { "epoch": 14.371517027863778, "grad_norm": 1.1585817337036133, "learning_rate": 1.7672117973202526e-05, "loss": 0.2667, "step": 1150 }, { "epoch": 14.990712074303406, "grad_norm": 1.050703525543213, "learning_rate": 1.737277336810124e-05, "loss": 0.2884, "step": 1200 }, { "epoch": 14.990712074303406, "eval_accuracy": 0.8846675712347354, "eval_f1": 0.8810137319907275, "eval_loss": 0.29289892315864563, "eval_precision": 0.8936495972210173, "eval_recall": 0.8846675712347354, "eval_runtime": 10.0424, "eval_samples_per_second": 220.167, "eval_steps_per_second": 27.583, "step": 1200 }, { "epoch": 15.619195046439629, "grad_norm": 1.1661638021469116, "learning_rate": 1.705820056885596e-05, "loss": 0.2722, "step": 1250 }, { "epoch": 16.24767801857585, "grad_norm": 1.4716720581054688, "learning_rate": 1.6729049314095578e-05, "loss": 0.2808, "step": 1300 }, { "epoch": 16.24767801857585, "eval_accuracy": 0.9014020805065581, "eval_f1": 0.8998791257539092, "eval_loss": 0.24578717350959778, "eval_precision": 0.9034472507945439, "eval_recall": 0.9014020805065581, "eval_runtime": 10.1493, "eval_samples_per_second": 217.847, "eval_steps_per_second": 27.292, "step": 1300 }, { "epoch": 16.86687306501548, "grad_norm": 1.2002067565917969, "learning_rate": 1.6385999453716453e-05, "loss": 0.2843, "step": 1350 }, { "epoch": 17.4953560371517, "grad_norm": 2.676912307739258, "learning_rate": 1.6029759544677298e-05, "loss": 0.258, "step": 1400 }, { "epoch": 17.4953560371517, "eval_accuracy": 0.9090909090909091, "eval_f1": 0.908023053812085, "eval_loss": 0.235076442360878, "eval_precision": 0.9101607687708339, "eval_recall": 0.9090909090909091, "eval_runtime": 10.0503, "eval_samples_per_second": 219.993, "eval_steps_per_second": 27.561, "step": 1400 }, { "epoch": 18.123839009287927, "grad_norm": 1.2184885740280151, "learning_rate": 1.566106538750063e-05, "loss": 0.2639, "step": 1450 }, { "epoch": 18.743034055727556, "grad_norm": 1.8000015020370483, "learning_rate": 1.528067850650368e-05, "loss": 0.2744, "step": 1500 }, { "epoch": 18.743034055727556, "eval_accuracy": 0.9014020805065581, "eval_f1": 0.8993606111929588, "eval_loss": 0.25163090229034424, "eval_precision": 0.9056565314020832, "eval_recall": 0.9014020805065581, "eval_runtime": 10.0711, "eval_samples_per_second": 219.538, "eval_steps_per_second": 27.504, "step": 1500 }, { "epoch": 19.371517027863778, "grad_norm": 0.9816193580627441, "learning_rate": 1.4889384576897728e-05, "loss": 0.2576, "step": 1550 }, { "epoch": 19.990712074303406, "grad_norm": 1.5961647033691406, "learning_rate": 1.4487991802004625e-05, "loss": 0.261, "step": 1600 }, { "epoch": 19.990712074303406, "eval_accuracy": 0.9068294889190411, "eval_f1": 0.9050356054740915, "eval_loss": 0.2453160583972931, "eval_precision": 0.910682778753074, "eval_recall": 0.9068294889190411, "eval_runtime": 10.0772, "eval_samples_per_second": 219.406, "eval_steps_per_second": 27.488, "step": 1600 }, { "epoch": 20.61919504643963, "grad_norm": 1.3451308012008667, "learning_rate": 1.4077329243942368e-05, "loss": 0.2573, "step": 1650 }, { "epoch": 21.24767801857585, "grad_norm": 2.416846990585327, "learning_rate": 1.3658245111227571e-05, "loss": 0.2519, "step": 1700 }, { "epoch": 21.24767801857585, "eval_accuracy": 0.8986883763003166, "eval_f1": 0.8961275295230519, "eval_loss": 0.25643372535705566, "eval_precision": 0.9051038365282786, "eval_recall": 0.8986883763003166, "eval_runtime": 10.0812, "eval_samples_per_second": 219.319, "eval_steps_per_second": 27.477, "step": 1700 }, { "epoch": 21.86687306501548, "grad_norm": 1.3278274536132812, "learning_rate": 1.323160500683173e-05, "loss": 0.2546, "step": 1750 }, { "epoch": 22.4953560371517, "grad_norm": 2.4079225063323975, "learning_rate": 1.2798290140309924e-05, "loss": 0.2595, "step": 1800 }, { "epoch": 22.4953560371517, "eval_accuracy": 0.9095431931252826, "eval_f1": 0.9079295985892596, "eval_loss": 0.23176445066928864, "eval_precision": 0.9128858666371455, "eval_recall": 0.9095431931252826, "eval_runtime": 10.0549, "eval_samples_per_second": 219.893, "eval_steps_per_second": 27.549, "step": 1800 }, { "epoch": 23.123839009287927, "grad_norm": 1.916286587715149, "learning_rate": 1.2359195507694633e-05, "loss": 0.2383, "step": 1850 }, { "epoch": 23.743034055727556, "grad_norm": 3.8509888648986816, "learning_rate": 1.1915228042914144e-05, "loss": 0.2548, "step": 1900 }, { "epoch": 23.743034055727556, "eval_accuracy": 0.9136137494346449, "eval_f1": 0.912807074710142, "eval_loss": 0.21959343552589417, "eval_precision": 0.91415122785338, "eval_recall": 0.9136137494346449, "eval_runtime": 10.0827, "eval_samples_per_second": 219.287, "eval_steps_per_second": 27.473, "step": 1900 }, { "epoch": 24.371517027863778, "grad_norm": 1.1695117950439453, "learning_rate": 1.1467304744553618e-05, "loss": 0.2336, "step": 1950 }, { "epoch": 24.990712074303406, "grad_norm": 0.93059903383255, "learning_rate": 1.101635078182802e-05, "loss": 0.2327, "step": 2000 }, { "epoch": 24.990712074303406, "eval_accuracy": 0.9068294889190411, "eval_f1": 0.904968418219579, "eval_loss": 0.23764155805110931, "eval_precision": 0.9110196660671599, "eval_recall": 0.9068294889190411, "eval_runtime": 10.0796, "eval_samples_per_second": 219.354, "eval_steps_per_second": 27.481, "step": 2000 }, { "epoch": 25.61919504643963, "grad_norm": 1.0239897966384888, "learning_rate": 1.0563297583678877e-05, "loss": 0.2351, "step": 2050 }, { "epoch": 26.24767801857585, "grad_norm": 1.6756315231323242, "learning_rate": 1.0109080914941825e-05, "loss": 0.2563, "step": 2100 }, { "epoch": 26.24767801857585, "eval_accuracy": 0.9027589326096789, "eval_f1": 0.9005087315122816, "eval_loss": 0.2420978993177414, "eval_precision": 0.9082977649283199, "eval_recall": 0.9027589326096789, "eval_runtime": 10.0674, "eval_samples_per_second": 219.621, "eval_steps_per_second": 27.515, "step": 2100 }, { "epoch": 26.86687306501548, "grad_norm": 2.134220838546753, "learning_rate": 9.65463894355851e-06, "loss": 0.2373, "step": 2150 }, { "epoch": 27.4953560371517, "grad_norm": 0.8266007304191589, "learning_rate": 9.200910302824964e-06, "loss": 0.2348, "step": 2200 }, { "epoch": 27.4953560371517, "eval_accuracy": 0.9109000452284034, "eval_f1": 0.9095386566495041, "eval_loss": 0.22126996517181396, "eval_precision": 0.9132386550038264, "eval_recall": 0.9109000452284034, "eval_runtime": 10.0881, "eval_samples_per_second": 219.169, "eval_steps_per_second": 27.458, "step": 2200 }, { "epoch": 28.123839009287927, "grad_norm": 1.1402108669281006, "learning_rate": 8.74883215267881e-06, "loss": 0.2366, "step": 2250 }, { "epoch": 28.743034055727556, "grad_norm": 1.5788310766220093, "learning_rate": 8.299338244029646e-06, "loss": 0.2427, "step": 2300 }, { "epoch": 28.743034055727556, "eval_accuracy": 0.9077340569877883, "eval_f1": 0.9059575898869645, "eval_loss": 0.23078913986682892, "eval_precision": 0.9116313077973748, "eval_recall": 0.9077340569877883, "eval_runtime": 10.0773, "eval_samples_per_second": 219.404, "eval_steps_per_second": 27.488, "step": 2300 }, { "epoch": 29.371517027863778, "grad_norm": 1.0100857019424438, "learning_rate": 7.853356990130625e-06, "loss": 0.2462, "step": 2350 }, { "epoch": 29.990712074303406, "grad_norm": 1.6099952459335327, "learning_rate": 7.411809548974792e-06, "loss": 0.2166, "step": 2400 }, { "epoch": 29.990712074303406, "eval_accuracy": 0.9140660334690185, "eval_f1": 0.9127673040043155, "eval_loss": 0.21517515182495117, "eval_precision": 0.9164570638030407, "eval_recall": 0.9140660334690185, "eval_runtime": 10.0698, "eval_samples_per_second": 219.568, "eval_steps_per_second": 27.508, "step": 2400 }, { "epoch": 30.61919504643963, "grad_norm": 2.7743020057678223, "learning_rate": 6.975607920676901e-06, "loss": 0.2315, "step": 2450 }, { "epoch": 31.24767801857585, "grad_norm": 2.1687729358673096, "learning_rate": 6.545653063770458e-06, "loss": 0.2345, "step": 2500 }, { "epoch": 31.24767801857585, "eval_accuracy": 0.9068294889190411, "eval_f1": 0.9049003940630713, "eval_loss": 0.22831733524799347, "eval_precision": 0.9113708908998591, "eval_recall": 0.9068294889190411, "eval_runtime": 10.1159, "eval_samples_per_second": 218.567, "eval_steps_per_second": 27.383, "step": 2500 }, { "epoch": 31.86687306501548, "grad_norm": 2.7222249507904053, "learning_rate": 6.122833034310794e-06, "loss": 0.2283, "step": 2550 }, { "epoch": 32.4953560371517, "grad_norm": 1.5686038732528687, "learning_rate": 5.708021151627712e-06, "loss": 0.2355, "step": 2600 }, { "epoch": 32.4953560371517, "eval_accuracy": 0.9118046132971506, "eval_f1": 0.9103077963732922, "eval_loss": 0.21731863915920258, "eval_precision": 0.9148692757405781, "eval_recall": 0.9118046132971506, "eval_runtime": 10.0717, "eval_samples_per_second": 219.526, "eval_steps_per_second": 27.503, "step": 2600 }, { "epoch": 33.12383900928793, "grad_norm": 2.4550020694732666, "learning_rate": 5.302074194516291e-06, "loss": 0.2207, "step": 2650 }, { "epoch": 33.743034055727556, "grad_norm": 2.3107786178588867, "learning_rate": 4.9058306315915826e-06, "loss": 0.2291, "step": 2700 }, { "epoch": 33.743034055727556, "eval_accuracy": 0.9127091813658977, "eval_f1": 0.9112873759668951, "eval_loss": 0.214884415268898, "eval_precision": 0.9155228611707341, "eval_recall": 0.9127091813658977, "eval_runtime": 10.1219, "eval_samples_per_second": 218.437, "eval_steps_per_second": 27.366, "step": 2700 }, { "epoch": 34.371517027863774, "grad_norm": 1.5490918159484863, "learning_rate": 4.52010888946231e-06, "loss": 0.2344, "step": 2750 }, { "epoch": 34.9907120743034, "grad_norm": 1.1831104755401611, "learning_rate": 4.1457056623005954e-06, "loss": 0.2319, "step": 2800 }, { "epoch": 34.9907120743034, "eval_accuracy": 0.9140660334690185, "eval_f1": 0.912709884455563, "eval_loss": 0.2123376727104187, "eval_precision": 0.9167245062463679, "eval_recall": 0.9140660334690185, "eval_runtime": 10.0267, "eval_samples_per_second": 220.512, "eval_steps_per_second": 27.626, "step": 2800 }, { "epoch": 35.61919504643963, "grad_norm": 1.047865629196167, "learning_rate": 3.7833942662992286e-06, "loss": 0.2253, "step": 2850 }, { "epoch": 36.247678018575854, "grad_norm": 1.880428433418274, "learning_rate": 3.4339230424153225e-06, "loss": 0.222, "step": 2900 }, { "epoch": 36.247678018575854, "eval_accuracy": 0.9181365897783809, "eval_f1": 0.9171239767848, "eval_loss": 0.2052914798259735, "eval_precision": 0.9196609395815928, "eval_recall": 0.9181365897783809, "eval_runtime": 10.0315, "eval_samples_per_second": 220.405, "eval_steps_per_second": 27.613, "step": 2900 }, { "epoch": 36.86687306501548, "grad_norm": 1.5235450267791748, "learning_rate": 3.098013810699404e-06, "loss": 0.2256, "step": 2950 }, { "epoch": 37.4953560371517, "grad_norm": 1.710253357887268, "learning_rate": 2.776360379402445e-06, "loss": 0.2235, "step": 3000 }, { "epoch": 37.4953560371517, "eval_accuracy": 0.9140660334690185, "eval_f1": 0.9127386875836824, "eval_loss": 0.21209371089935303, "eval_precision": 0.9165890729592928, "eval_recall": 0.9140660334690185, "eval_runtime": 10.0492, "eval_samples_per_second": 220.018, "eval_steps_per_second": 27.564, "step": 3000 }, { "epoch": 38.12383900928793, "grad_norm": 1.8632524013519287, "learning_rate": 2.469627111940258e-06, "loss": 0.2207, "step": 3050 }, { "epoch": 38.743034055727556, "grad_norm": 1.2123332023620605, "learning_rate": 2.178447554675136e-06, "loss": 0.2221, "step": 3100 }, { "epoch": 38.743034055727556, "eval_accuracy": 0.9194934418815016, "eval_f1": 0.9188000621156364, "eval_loss": 0.20125794410705566, "eval_precision": 0.9199821991038324, "eval_recall": 0.9194934418815016, "eval_runtime": 10.0477, "eval_samples_per_second": 220.05, "eval_steps_per_second": 27.568, "step": 3100 }, { "epoch": 39.371517027863774, "grad_norm": 1.6572494506835938, "learning_rate": 1.903423128348959e-06, "loss": 0.2411, "step": 3150 }, { "epoch": 39.9907120743034, "grad_norm": 1.047083854675293, "learning_rate": 1.6451218858706374e-06, "loss": 0.2262, "step": 3200 }, { "epoch": 39.9907120743034, "eval_accuracy": 0.9222071460877431, "eval_f1": 0.9213530294983667, "eval_loss": 0.20286186039447784, "eval_precision": 0.9233953907433776, "eval_recall": 0.9222071460877431, "eval_runtime": 10.0854, "eval_samples_per_second": 219.227, "eval_steps_per_second": 27.465, "step": 3200 }, { "epoch": 40.61919504643963, "grad_norm": 1.171499490737915, "learning_rate": 1.4040773390235463e-06, "loss": 0.2213, "step": 3250 }, { "epoch": 41.247678018575854, "grad_norm": 2.6431760787963867, "learning_rate": 1.1807873565164507e-06, "loss": 0.2171, "step": 3300 }, { "epoch": 41.247678018575854, "eval_accuracy": 0.9181365897783809, "eval_f1": 0.9169933978820265, "eval_loss": 0.2075406163930893, "eval_precision": 0.9202292831462632, "eval_recall": 0.9181365897783809, "eval_runtime": 10.0343, "eval_samples_per_second": 220.344, "eval_steps_per_second": 27.605, "step": 3300 }, { "epoch": 41.86687306501548, "grad_norm": 1.0989552736282349, "learning_rate": 9.757131356538408e-07, "loss": 0.2162, "step": 3350 }, { "epoch": 42.4953560371517, "grad_norm": 1.126570463180542, "learning_rate": 7.892782497497642e-07, "loss": 0.2268, "step": 3400 }, { "epoch": 42.4953560371517, "eval_accuracy": 0.919041157847128, "eval_f1": 0.9179885980860357, "eval_loss": 0.2045469731092453, "eval_precision": 0.9208108708737649, "eval_recall": 0.919041157847128, "eval_runtime": 10.0534, "eval_samples_per_second": 219.925, "eval_steps_per_second": 27.553, "step": 3400 }, { "epoch": 43.12383900928793, "grad_norm": 1.0992859601974487, "learning_rate": 6.218677732526035e-07, "loss": 0.2284, "step": 3450 }, { "epoch": 43.743034055727556, "grad_norm": 0.8807310461997986, "learning_rate": 4.7382748638786336e-07, "loss": 0.2222, "step": 3500 }, { "epoch": 43.743034055727556, "eval_accuracy": 0.9203980099502488, "eval_f1": 0.9193757241191305, "eval_loss": 0.20502084493637085, "eval_precision": 0.9221532972644962, "eval_recall": 0.9203980099502488, "eval_runtime": 10.0674, "eval_samples_per_second": 219.619, "eval_steps_per_second": 27.514, "step": 3500 }, { "epoch": 44.371517027863774, "grad_norm": 1.0601508617401123, "learning_rate": 3.454631609617487e-07, "loss": 0.2232, "step": 3550 }, { "epoch": 44.9907120743034, "grad_norm": 1.5610824823379517, "learning_rate": 2.370399288006664e-07, "loss": 0.2169, "step": 3600 }, { "epoch": 44.9907120743034, "eval_accuracy": 0.9176843057440073, "eval_f1": 0.9165481260183203, "eval_loss": 0.20695888996124268, "eval_precision": 0.9197013630605291, "eval_recall": 0.9176843057440073, "eval_runtime": 10.0683, "eval_samples_per_second": 219.6, "eval_steps_per_second": 27.512, "step": 3600 }, { "epoch": 45.61919504643963, "grad_norm": 1.097316861152649, "learning_rate": 1.4878173413111485e-07, "loss": 0.2269, "step": 3650 }, { "epoch": 46.247678018575854, "grad_norm": 1.628519892692566, "learning_rate": 8.087087103106461e-08, "loss": 0.2245, "step": 3700 }, { "epoch": 46.247678018575854, "eval_accuracy": 0.9181365897783809, "eval_f1": 0.9170198637903132, "eval_loss": 0.20643840730190277, "eval_precision": 0.9201088948883855, "eval_recall": 0.9181365897783809, "eval_runtime": 10.0506, "eval_samples_per_second": 219.986, "eval_steps_per_second": 27.56, "step": 3700 }, { "epoch": 46.86687306501548, "grad_norm": 0.9928280115127563, "learning_rate": 3.3447606908196815e-08, "loss": 0.212, "step": 3750 }, { "epoch": 47.4953560371517, "grad_norm": 1.0919773578643799, "learning_rate": 6.609892782699634e-09, "loss": 0.2148, "step": 3800 }, { "epoch": 47.4953560371517, "eval_accuracy": 0.9181365897783809, "eval_f1": 0.9170198637903132, "eval_loss": 0.20658649504184723, "eval_precision": 0.9201088948883855, "eval_recall": 0.9181365897783809, "eval_runtime": 10.0329, "eval_samples_per_second": 220.375, "eval_steps_per_second": 27.609, "step": 3800 }, { "epoch": 47.9907120743034, "step": 3840, "total_flos": 2.738563601264935e+19, "train_loss": 0.3012936460475127, "train_runtime": 3086.985, "train_samples_per_second": 80.203, "train_steps_per_second": 1.244 } ], "logging_steps": 50, "max_steps": 3840, "num_input_tokens_seen": 0, "num_train_epochs": 48, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.738563601264935e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }