{ "best_metric": 0.9595276897831222, "best_model_checkpoint": "dino-vitb8-finetuned-stroke-binary/checkpoint-2900", "epoch": 17.905426356589146, "eval_steps": 100, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31007751937984496, "grad_norm": 6.24950647354126, "learning_rate": 1.6896551724137933e-06, "loss": 0.8366, "step": 50 }, { "epoch": 0.6201550387596899, "grad_norm": 5.115139007568359, "learning_rate": 3.4137931034482764e-06, "loss": 0.7965, "step": 100 }, { "epoch": 0.6201550387596899, "eval_accuracy": 0.5382180009045681, "eval_f1": 0.5057986540639501, "eval_loss": 0.8312332630157471, "eval_precision": 0.496820169669259, "eval_recall": 0.5382180009045681, "eval_runtime": 32.0161, "eval_samples_per_second": 69.059, "eval_steps_per_second": 8.652, "step": 100 }, { "epoch": 0.9302325581395349, "grad_norm": 8.835868835449219, "learning_rate": 5.137931034482759e-06, "loss": 0.7713, "step": 150 }, { "epoch": 1.235658914728682, "grad_norm": 9.565229415893555, "learning_rate": 6.862068965517242e-06, "loss": 0.6839, "step": 200 }, { "epoch": 1.235658914728682, "eval_accuracy": 0.6246042514699232, "eval_f1": 0.5749818840100107, "eval_loss": 0.6796302199363708, "eval_precision": 0.5990946901610464, "eval_recall": 0.6246042514699232, "eval_runtime": 32.1198, "eval_samples_per_second": 68.836, "eval_steps_per_second": 8.624, "step": 200 }, { "epoch": 1.5457364341085271, "grad_norm": 8.520139694213867, "learning_rate": 8.586206896551726e-06, "loss": 0.6219, "step": 250 }, { "epoch": 1.8558139534883722, "grad_norm": 6.877398490905762, "learning_rate": 1.0310344827586208e-05, "loss": 0.5662, "step": 300 }, { "epoch": 1.8558139534883722, "eval_accuracy": 0.7317955676164631, "eval_f1": 0.7119348319857804, "eval_loss": 0.5344149470329285, "eval_precision": 0.7376504962044594, "eval_recall": 0.7317955676164631, "eval_runtime": 32.0121, "eval_samples_per_second": 69.068, "eval_steps_per_second": 8.653, "step": 300 }, { "epoch": 2.1612403100775195, "grad_norm": 6.749499797821045, "learning_rate": 1.203448275862069e-05, "loss": 0.4908, "step": 350 }, { "epoch": 2.471317829457364, "grad_norm": 11.308539390563965, "learning_rate": 1.3758620689655173e-05, "loss": 0.4408, "step": 400 }, { "epoch": 2.471317829457364, "eval_accuracy": 0.8123021257349615, "eval_f1": 0.8082421593111531, "eval_loss": 0.40815532207489014, "eval_precision": 0.8120443114553312, "eval_recall": 0.8123021257349615, "eval_runtime": 31.919, "eval_samples_per_second": 69.269, "eval_steps_per_second": 8.678, "step": 400 }, { "epoch": 2.781395348837209, "grad_norm": 3.188601493835449, "learning_rate": 1.548275862068966e-05, "loss": 0.3761, "step": 450 }, { "epoch": 3.0868217054263565, "grad_norm": 9.05437183380127, "learning_rate": 1.7206896551724138e-05, "loss": 0.3611, "step": 500 }, { "epoch": 3.0868217054263565, "eval_accuracy": 0.8602442333785617, "eval_f1": 0.859700206427454, "eval_loss": 0.333509624004364, "eval_precision": 0.8595584495556567, "eval_recall": 0.8602442333785617, "eval_runtime": 32.0233, "eval_samples_per_second": 69.044, "eval_steps_per_second": 8.65, "step": 500 }, { "epoch": 3.3968992248062015, "grad_norm": 2.100419759750366, "learning_rate": 1.893103448275862e-05, "loss": 0.3072, "step": 550 }, { "epoch": 3.7069767441860466, "grad_norm": 6.307981014251709, "learning_rate": 1.9999345217525786e-05, "loss": 0.3121, "step": 600 }, { "epoch": 3.7069767441860466, "eval_accuracy": 0.8860244233378561, "eval_f1": 0.8831883391472106, "eval_loss": 0.27464836835861206, "eval_precision": 0.8913973600585926, "eval_recall": 0.8860244233378561, "eval_runtime": 31.9786, "eval_samples_per_second": 69.14, "eval_steps_per_second": 8.662, "step": 600 }, { "epoch": 4.0124031007751935, "grad_norm": 4.161785125732422, "learning_rate": 1.999136563788985e-05, "loss": 0.2786, "step": 650 }, { "epoch": 4.322480620155039, "grad_norm": 5.515849590301514, "learning_rate": 1.997432546533359e-05, "loss": 0.2614, "step": 700 }, { "epoch": 4.322480620155039, "eval_accuracy": 0.9050203527815468, "eval_f1": 0.9039775360154476, "eval_loss": 0.2299066036939621, "eval_precision": 0.90576736213102, "eval_recall": 0.9050203527815468, "eval_runtime": 31.9694, "eval_samples_per_second": 69.16, "eval_steps_per_second": 8.665, "step": 700 }, { "epoch": 4.632558139534884, "grad_norm": 2.6630172729492188, "learning_rate": 1.9948240152606154e-05, "loss": 0.2622, "step": 750 }, { "epoch": 4.942635658914728, "grad_norm": 4.852052211761475, "learning_rate": 1.9913133354972336e-05, "loss": 0.242, "step": 800 }, { "epoch": 4.942635658914728, "eval_accuracy": 0.9176843057440073, "eval_f1": 0.9177728397549259, "eval_loss": 0.21027454733848572, "eval_precision": 0.9179004728816644, "eval_recall": 0.9176843057440073, "eval_runtime": 31.9231, "eval_samples_per_second": 69.26, "eval_steps_per_second": 8.677, "step": 800 }, { "epoch": 5.248062015503876, "grad_norm": 5.338088035583496, "learning_rate": 1.9869036908760982e-05, "loss": 0.2332, "step": 850 }, { "epoch": 5.558139534883721, "grad_norm": 1.9332038164138794, "learning_rate": 1.981599080249447e-05, "loss": 0.2239, "step": 900 }, { "epoch": 5.558139534883721, "eval_accuracy": 0.9081863410221619, "eval_f1": 0.908987571769516, "eval_loss": 0.22980067133903503, "eval_precision": 0.9136294473473457, "eval_recall": 0.9081863410221619, "eval_runtime": 31.9612, "eval_samples_per_second": 69.178, "eval_steps_per_second": 8.667, "step": 900 }, { "epoch": 5.868217054263566, "grad_norm": 4.6057281494140625, "learning_rate": 1.9754043140625415e-05, "loss": 0.2216, "step": 950 }, { "epoch": 6.173643410852713, "grad_norm": 3.815474271774292, "learning_rate": 1.9683250099913514e-05, "loss": 0.1979, "step": 1000 }, { "epoch": 6.173643410852713, "eval_accuracy": 0.9208502939846224, "eval_f1": 0.9197449979522355, "eval_loss": 0.20585399866104126, "eval_precision": 0.9230342997728979, "eval_recall": 0.9208502939846224, "eval_runtime": 32.0879, "eval_samples_per_second": 68.904, "eval_steps_per_second": 8.633, "step": 1000 }, { "epoch": 6.4837209302325585, "grad_norm": 3.7233753204345703, "learning_rate": 1.960367587848211e-05, "loss": 0.2074, "step": 1050 }, { "epoch": 6.793798449612403, "grad_norm": 3.686628580093384, "learning_rate": 1.9515392637600583e-05, "loss": 0.2082, "step": 1100 }, { "epoch": 6.793798449612403, "eval_accuracy": 0.9262777023971054, "eval_f1": 0.926117263289722, "eval_loss": 0.17793285846710205, "eval_precision": 0.9261011200078554, "eval_recall": 0.9262777023971054, "eval_runtime": 31.9195, "eval_samples_per_second": 69.268, "eval_steps_per_second": 8.678, "step": 1100 }, { "epoch": 7.09922480620155, "grad_norm": 3.4548275470733643, "learning_rate": 1.9418480436245487e-05, "loss": 0.1998, "step": 1150 }, { "epoch": 7.409302325581395, "grad_norm": 1.4032052755355835, "learning_rate": 1.931302715849967e-05, "loss": 0.1723, "step": 1200 }, { "epoch": 7.409302325581395, "eval_accuracy": 0.9308005427408412, "eval_f1": 0.9301945813993165, "eval_loss": 0.1693263202905655, "eval_precision": 0.9315485344818776, "eval_recall": 0.9308005427408412, "eval_runtime": 31.8606, "eval_samples_per_second": 69.396, "eval_steps_per_second": 8.694, "step": 1200 }, { "epoch": 7.7193798449612405, "grad_norm": 2.225787401199341, "learning_rate": 1.9199128433855277e-05, "loss": 0.1864, "step": 1250 }, { "epoch": 8.024806201550387, "grad_norm": 4.083457946777344, "learning_rate": 1.9076887550492877e-05, "loss": 0.1877, "step": 1300 }, { "epoch": 8.024806201550387, "eval_accuracy": 0.9380370872908186, "eval_f1": 0.9381736344846147, "eval_loss": 0.168083056807518, "eval_precision": 0.9384861550793825, "eval_recall": 0.9380370872908186, "eval_runtime": 32.0939, "eval_samples_per_second": 68.892, "eval_steps_per_second": 8.631, "step": 1300 }, { "epoch": 8.334883720930232, "grad_norm": 5.380185127258301, "learning_rate": 1.894641536161537e-05, "loss": 0.1662, "step": 1350 }, { "epoch": 8.644961240310078, "grad_norm": 2.990643262863159, "learning_rate": 1.8807830184921636e-05, "loss": 0.2, "step": 1400 }, { "epoch": 8.644961240310078, "eval_accuracy": 0.9402985074626866, "eval_f1": 0.9401756807643212, "eval_loss": 0.14824680984020233, "eval_precision": 0.9401845264674767, "eval_recall": 0.9402985074626866, "eval_runtime": 31.8892, "eval_samples_per_second": 69.334, "eval_steps_per_second": 8.686, "step": 1400 }, { "epoch": 8.955038759689922, "grad_norm": 3.6064834594726562, "learning_rate": 1.8661257695311006e-05, "loss": 0.1621, "step": 1450 }, { "epoch": 9.26046511627907, "grad_norm": 5.486879825592041, "learning_rate": 1.8506830810915983e-05, "loss": 0.1642, "step": 1500 }, { "epoch": 9.26046511627907, "eval_accuracy": 0.9330619629127092, "eval_f1": 0.9322023134638141, "eval_loss": 0.16374026238918304, "eval_precision": 0.9352048827585053, "eval_recall": 0.9330619629127092, "eval_runtime": 32.0395, "eval_samples_per_second": 69.009, "eval_steps_per_second": 8.646, "step": 1500 }, { "epoch": 9.570542635658915, "grad_norm": 2.025575637817383, "learning_rate": 1.8344689572566436e-05, "loss": 0.1586, "step": 1550 }, { "epoch": 9.88062015503876, "grad_norm": 3.0411391258239746, "learning_rate": 1.8174981016794653e-05, "loss": 0.1525, "step": 1600 }, { "epoch": 9.88062015503876, "eval_accuracy": 0.942107643600181, "eval_f1": 0.94173840230828, "eval_loss": 0.14938268065452576, "eval_precision": 0.9424881755892641, "eval_recall": 0.942107643600181, "eval_runtime": 31.8834, "eval_samples_per_second": 69.347, "eval_steps_per_second": 8.688, "step": 1600 }, { "epoch": 10.186046511627907, "grad_norm": 3.4836244583129883, "learning_rate": 1.799785904249642e-05, "loss": 0.1649, "step": 1650 }, { "epoch": 10.496124031007753, "grad_norm": 3.605983257293701, "learning_rate": 1.7813484271368947e-05, "loss": 0.158, "step": 1700 }, { "epoch": 10.496124031007753, "eval_accuracy": 0.9484396200814111, "eval_f1": 0.947995545399902, "eval_loss": 0.1402619332075119, "eval_precision": 0.9494961047542965, "eval_recall": 0.9484396200814111, "eval_runtime": 32.0182, "eval_samples_per_second": 69.055, "eval_steps_per_second": 8.651, "step": 1700 }, { "epoch": 10.806201550387597, "grad_norm": 4.386783123016357, "learning_rate": 1.7622023902252378e-05, "loss": 0.1669, "step": 1750 }, { "epoch": 11.111627906976745, "grad_norm": 3.84214186668396, "learning_rate": 1.742365155950676e-05, "loss": 0.1327, "step": 1800 }, { "epoch": 11.111627906976745, "eval_accuracy": 0.9497964721845319, "eval_f1": 0.9498129231725488, "eval_loss": 0.13292284309864044, "eval_precision": 0.9498330407980847, "eval_recall": 0.9497964721845319, "eval_runtime": 32.0593, "eval_samples_per_second": 68.966, "eval_steps_per_second": 8.64, "step": 1800 }, { "epoch": 11.421705426356588, "grad_norm": 2.3866167068481445, "learning_rate": 1.7218547135562155e-05, "loss": 0.1679, "step": 1850 }, { "epoch": 11.731782945736434, "grad_norm": 5.185602188110352, "learning_rate": 1.700689662778458e-05, "loss": 0.1465, "step": 1900 }, { "epoch": 11.731782945736434, "eval_accuracy": 0.9525101763907734, "eval_f1": 0.9523724534109135, "eval_loss": 0.12334821373224258, "eval_precision": 0.9524932751901988, "eval_recall": 0.9525101763907734, "eval_runtime": 31.92, "eval_samples_per_second": 69.267, "eval_steps_per_second": 8.678, "step": 1900 }, { "epoch": 12.037209302325582, "grad_norm": 9.162732124328613, "learning_rate": 1.678889196980572e-05, "loss": 0.1291, "step": 1950 }, { "epoch": 12.347286821705426, "grad_norm": 5.636198043823242, "learning_rate": 1.6564730857469392e-05, "loss": 0.1311, "step": 2000 }, { "epoch": 12.347286821705426, "eval_accuracy": 0.9520578923563998, "eval_f1": 0.9520149841974469, "eval_loss": 0.12798894941806793, "eval_precision": 0.9519983389426212, "eval_recall": 0.9520578923563998, "eval_runtime": 31.9866, "eval_samples_per_second": 69.123, "eval_steps_per_second": 8.66, "step": 2000 }, { "epoch": 12.657364341085271, "grad_norm": 2.2612485885620117, "learning_rate": 1.633461656955259e-05, "loss": 0.1619, "step": 2050 }, { "epoch": 12.967441860465117, "grad_norm": 2.522465229034424, "learning_rate": 1.609875778342372e-05, "loss": 0.129, "step": 2100 }, { "epoch": 12.967441860465117, "eval_accuracy": 0.9556761646313885, "eval_f1": 0.9555744118131845, "eval_loss": 0.11725445836782455, "eval_precision": 0.9556383737245242, "eval_recall": 0.9556761646313885, "eval_runtime": 31.8471, "eval_samples_per_second": 69.426, "eval_steps_per_second": 8.698, "step": 2100 }, { "epoch": 13.272868217054263, "grad_norm": 7.201235294342041, "learning_rate": 1.5857368385805087e-05, "loss": 0.1351, "step": 2150 }, { "epoch": 13.582945736434109, "grad_norm": 4.276857852935791, "learning_rate": 1.561565191643536e-05, "loss": 0.1425, "step": 2200 }, { "epoch": 13.582945736434109, "eval_accuracy": 0.9552238805970149, "eval_f1": 0.9552090145107874, "eval_loss": 0.11896734684705734, "eval_precision": 0.9551978476090869, "eval_recall": 0.9552238805970149, "eval_runtime": 31.9931, "eval_samples_per_second": 69.109, "eval_steps_per_second": 8.658, "step": 2200 }, { "epoch": 13.893023255813954, "grad_norm": 3.126692295074463, "learning_rate": 1.5363962356243304e-05, "loss": 0.1249, "step": 2250 }, { "epoch": 14.1984496124031, "grad_norm": 2.0163676738739014, "learning_rate": 1.5107408528131196e-05, "loss": 0.1256, "step": 2300 }, { "epoch": 14.1984496124031, "eval_accuracy": 0.9565807327001357, "eval_f1": 0.9563271746916711, "eval_loss": 0.12249390035867691, "eval_precision": 0.9570420286206239, "eval_recall": 0.9565807327001357, "eval_runtime": 32.0483, "eval_samples_per_second": 68.99, "eval_steps_per_second": 8.643, "step": 2300 }, { "epoch": 14.508527131782946, "grad_norm": 2.3576133251190186, "learning_rate": 1.4846223085960681e-05, "loss": 0.1331, "step": 2350 }, { "epoch": 14.81860465116279, "grad_norm": 3.005863666534424, "learning_rate": 1.45806428837369e-05, "loss": 0.1461, "step": 2400 }, { "epoch": 14.81860465116279, "eval_accuracy": 0.9588421528720036, "eval_f1": 0.9587720391887182, "eval_loss": 0.11707326769828796, "eval_precision": 0.9587952797993504, "eval_recall": 0.9588421528720036, "eval_runtime": 31.9123, "eval_samples_per_second": 69.284, "eval_steps_per_second": 8.68, "step": 2400 }, { "epoch": 15.124031007751938, "grad_norm": 2.8450655937194824, "learning_rate": 1.4310908760819285e-05, "loss": 0.1193, "step": 2450 }, { "epoch": 15.434108527131784, "grad_norm": 2.168471097946167, "learning_rate": 1.4037265323518223e-05, "loss": 0.133, "step": 2500 }, { "epoch": 15.434108527131784, "eval_accuracy": 0.9547715965626413, "eval_f1": 0.9545783302992185, "eval_loss": 0.11651919037103653, "eval_precision": 0.9549254114560721, "eval_recall": 0.9547715965626413, "eval_runtime": 31.9187, "eval_samples_per_second": 69.27, "eval_steps_per_second": 8.678, "step": 2500 }, { "epoch": 15.744186046511627, "grad_norm": 4.06069278717041, "learning_rate": 1.375996072327573e-05, "loss": 0.117, "step": 2550 }, { "epoch": 16.049612403100774, "grad_norm": 0.5793169736862183, "learning_rate": 1.3479246431631214e-05, "loss": 0.1258, "step": 2600 }, { "epoch": 16.049612403100774, "eval_accuracy": 0.9479873360470376, "eval_f1": 0.9473924145380525, "eval_loss": 0.1302204728126526, "eval_precision": 0.9500128100803557, "eval_recall": 0.9479873360470376, "eval_runtime": 32.0659, "eval_samples_per_second": 68.952, "eval_steps_per_second": 8.638, "step": 2600 }, { "epoch": 16.35968992248062, "grad_norm": 3.8317296504974365, "learning_rate": 1.3195377012176449e-05, "loss": 0.1479, "step": 2650 }, { "epoch": 16.669767441860465, "grad_norm": 3.2260377407073975, "learning_rate": 1.2908609889706538e-05, "loss": 0.115, "step": 2700 }, { "epoch": 16.669767441860465, "eval_accuracy": 0.9534147444595206, "eval_f1": 0.9536709713857593, "eval_loss": 0.1319713443517685, "eval_precision": 0.9552310659222302, "eval_recall": 0.9534147444595206, "eval_runtime": 31.9757, "eval_samples_per_second": 69.146, "eval_steps_per_second": 8.663, "step": 2700 }, { "epoch": 16.97984496124031, "grad_norm": 1.9483188390731812, "learning_rate": 1.2619205116776244e-05, "loss": 0.1269, "step": 2750 }, { "epoch": 17.28527131782946, "grad_norm": 4.348479270935059, "learning_rate": 1.2327425137873315e-05, "loss": 0.1134, "step": 2800 }, { "epoch": 17.28527131782946, "eval_accuracy": 0.9552238805970149, "eval_f1": 0.9548701351406266, "eval_loss": 0.117097869515419, "eval_precision": 0.9561949341655123, "eval_recall": 0.9552238805970149, "eval_runtime": 31.9852, "eval_samples_per_second": 69.126, "eval_steps_per_second": 8.66, "step": 2800 }, { "epoch": 17.5953488372093, "grad_norm": 5.097657203674316, "learning_rate": 1.2033534551422718e-05, "loss": 0.1069, "step": 2850 }, { "epoch": 17.905426356589146, "grad_norm": 2.5790622234344482, "learning_rate": 1.173779986983762e-05, "loss": 0.1069, "step": 2900 }, { "epoch": 17.905426356589146, "eval_accuracy": 0.9597467209407508, "eval_f1": 0.9595276897831222, "eval_loss": 0.11270730942487717, "eval_precision": 0.9601617456499858, "eval_recall": 0.9597467209407508, "eval_runtime": 32.0361, "eval_samples_per_second": 69.016, "eval_steps_per_second": 8.646, "step": 2900 }, { "epoch": 17.905426356589146, "step": 2900, "total_flos": 1.0232905043346113e+19, "train_loss": 0.2441097328580659, "train_runtime": 4508.9942, "train_samples_per_second": 41.182, "train_steps_per_second": 1.285 } ], "logging_steps": 50, "max_steps": 5796, "num_input_tokens_seen": 0, "num_train_epochs": 36, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0232905043346113e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }