|
{ |
|
"best_metric": 0.9595276897831222, |
|
"best_model_checkpoint": "dino-vitb8-finetuned-stroke-binary/checkpoint-2900", |
|
"epoch": 17.905426356589146, |
|
"eval_steps": 100, |
|
"global_step": 2900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31007751937984496, |
|
"grad_norm": 6.24950647354126, |
|
"learning_rate": 1.6896551724137933e-06, |
|
"loss": 0.8366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6201550387596899, |
|
"grad_norm": 5.115139007568359, |
|
"learning_rate": 3.4137931034482764e-06, |
|
"loss": 0.7965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6201550387596899, |
|
"eval_accuracy": 0.5382180009045681, |
|
"eval_f1": 0.5057986540639501, |
|
"eval_loss": 0.8312332630157471, |
|
"eval_precision": 0.496820169669259, |
|
"eval_recall": 0.5382180009045681, |
|
"eval_runtime": 32.0161, |
|
"eval_samples_per_second": 69.059, |
|
"eval_steps_per_second": 8.652, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 8.835868835449219, |
|
"learning_rate": 5.137931034482759e-06, |
|
"loss": 0.7713, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.235658914728682, |
|
"grad_norm": 9.565229415893555, |
|
"learning_rate": 6.862068965517242e-06, |
|
"loss": 0.6839, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.235658914728682, |
|
"eval_accuracy": 0.6246042514699232, |
|
"eval_f1": 0.5749818840100107, |
|
"eval_loss": 0.6796302199363708, |
|
"eval_precision": 0.5990946901610464, |
|
"eval_recall": 0.6246042514699232, |
|
"eval_runtime": 32.1198, |
|
"eval_samples_per_second": 68.836, |
|
"eval_steps_per_second": 8.624, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5457364341085271, |
|
"grad_norm": 8.520139694213867, |
|
"learning_rate": 8.586206896551726e-06, |
|
"loss": 0.6219, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8558139534883722, |
|
"grad_norm": 6.877398490905762, |
|
"learning_rate": 1.0310344827586208e-05, |
|
"loss": 0.5662, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8558139534883722, |
|
"eval_accuracy": 0.7317955676164631, |
|
"eval_f1": 0.7119348319857804, |
|
"eval_loss": 0.5344149470329285, |
|
"eval_precision": 0.7376504962044594, |
|
"eval_recall": 0.7317955676164631, |
|
"eval_runtime": 32.0121, |
|
"eval_samples_per_second": 69.068, |
|
"eval_steps_per_second": 8.653, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.1612403100775195, |
|
"grad_norm": 6.749499797821045, |
|
"learning_rate": 1.203448275862069e-05, |
|
"loss": 0.4908, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.471317829457364, |
|
"grad_norm": 11.308539390563965, |
|
"learning_rate": 1.3758620689655173e-05, |
|
"loss": 0.4408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.471317829457364, |
|
"eval_accuracy": 0.8123021257349615, |
|
"eval_f1": 0.8082421593111531, |
|
"eval_loss": 0.40815532207489014, |
|
"eval_precision": 0.8120443114553312, |
|
"eval_recall": 0.8123021257349615, |
|
"eval_runtime": 31.919, |
|
"eval_samples_per_second": 69.269, |
|
"eval_steps_per_second": 8.678, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.781395348837209, |
|
"grad_norm": 3.188601493835449, |
|
"learning_rate": 1.548275862068966e-05, |
|
"loss": 0.3761, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.0868217054263565, |
|
"grad_norm": 9.05437183380127, |
|
"learning_rate": 1.7206896551724138e-05, |
|
"loss": 0.3611, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0868217054263565, |
|
"eval_accuracy": 0.8602442333785617, |
|
"eval_f1": 0.859700206427454, |
|
"eval_loss": 0.333509624004364, |
|
"eval_precision": 0.8595584495556567, |
|
"eval_recall": 0.8602442333785617, |
|
"eval_runtime": 32.0233, |
|
"eval_samples_per_second": 69.044, |
|
"eval_steps_per_second": 8.65, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3968992248062015, |
|
"grad_norm": 2.100419759750366, |
|
"learning_rate": 1.893103448275862e-05, |
|
"loss": 0.3072, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.7069767441860466, |
|
"grad_norm": 6.307981014251709, |
|
"learning_rate": 1.9999345217525786e-05, |
|
"loss": 0.3121, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.7069767441860466, |
|
"eval_accuracy": 0.8860244233378561, |
|
"eval_f1": 0.8831883391472106, |
|
"eval_loss": 0.27464836835861206, |
|
"eval_precision": 0.8913973600585926, |
|
"eval_recall": 0.8860244233378561, |
|
"eval_runtime": 31.9786, |
|
"eval_samples_per_second": 69.14, |
|
"eval_steps_per_second": 8.662, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.0124031007751935, |
|
"grad_norm": 4.161785125732422, |
|
"learning_rate": 1.999136563788985e-05, |
|
"loss": 0.2786, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.322480620155039, |
|
"grad_norm": 5.515849590301514, |
|
"learning_rate": 1.997432546533359e-05, |
|
"loss": 0.2614, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.322480620155039, |
|
"eval_accuracy": 0.9050203527815468, |
|
"eval_f1": 0.9039775360154476, |
|
"eval_loss": 0.2299066036939621, |
|
"eval_precision": 0.90576736213102, |
|
"eval_recall": 0.9050203527815468, |
|
"eval_runtime": 31.9694, |
|
"eval_samples_per_second": 69.16, |
|
"eval_steps_per_second": 8.665, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.632558139534884, |
|
"grad_norm": 2.6630172729492188, |
|
"learning_rate": 1.9948240152606154e-05, |
|
"loss": 0.2622, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.942635658914728, |
|
"grad_norm": 4.852052211761475, |
|
"learning_rate": 1.9913133354972336e-05, |
|
"loss": 0.242, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.942635658914728, |
|
"eval_accuracy": 0.9176843057440073, |
|
"eval_f1": 0.9177728397549259, |
|
"eval_loss": 0.21027454733848572, |
|
"eval_precision": 0.9179004728816644, |
|
"eval_recall": 0.9176843057440073, |
|
"eval_runtime": 31.9231, |
|
"eval_samples_per_second": 69.26, |
|
"eval_steps_per_second": 8.677, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.248062015503876, |
|
"grad_norm": 5.338088035583496, |
|
"learning_rate": 1.9869036908760982e-05, |
|
"loss": 0.2332, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.558139534883721, |
|
"grad_norm": 1.9332038164138794, |
|
"learning_rate": 1.981599080249447e-05, |
|
"loss": 0.2239, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.558139534883721, |
|
"eval_accuracy": 0.9081863410221619, |
|
"eval_f1": 0.908987571769516, |
|
"eval_loss": 0.22980067133903503, |
|
"eval_precision": 0.9136294473473457, |
|
"eval_recall": 0.9081863410221619, |
|
"eval_runtime": 31.9612, |
|
"eval_samples_per_second": 69.178, |
|
"eval_steps_per_second": 8.667, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.868217054263566, |
|
"grad_norm": 4.6057281494140625, |
|
"learning_rate": 1.9754043140625415e-05, |
|
"loss": 0.2216, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.173643410852713, |
|
"grad_norm": 3.815474271774292, |
|
"learning_rate": 1.9683250099913514e-05, |
|
"loss": 0.1979, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.173643410852713, |
|
"eval_accuracy": 0.9208502939846224, |
|
"eval_f1": 0.9197449979522355, |
|
"eval_loss": 0.20585399866104126, |
|
"eval_precision": 0.9230342997728979, |
|
"eval_recall": 0.9208502939846224, |
|
"eval_runtime": 32.0879, |
|
"eval_samples_per_second": 68.904, |
|
"eval_steps_per_second": 8.633, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.4837209302325585, |
|
"grad_norm": 3.7233753204345703, |
|
"learning_rate": 1.960367587848211e-05, |
|
"loss": 0.2074, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.793798449612403, |
|
"grad_norm": 3.686628580093384, |
|
"learning_rate": 1.9515392637600583e-05, |
|
"loss": 0.2082, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.793798449612403, |
|
"eval_accuracy": 0.9262777023971054, |
|
"eval_f1": 0.926117263289722, |
|
"eval_loss": 0.17793285846710205, |
|
"eval_precision": 0.9261011200078554, |
|
"eval_recall": 0.9262777023971054, |
|
"eval_runtime": 31.9195, |
|
"eval_samples_per_second": 69.268, |
|
"eval_steps_per_second": 8.678, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.09922480620155, |
|
"grad_norm": 3.4548275470733643, |
|
"learning_rate": 1.9418480436245487e-05, |
|
"loss": 0.1998, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.409302325581395, |
|
"grad_norm": 1.4032052755355835, |
|
"learning_rate": 1.931302715849967e-05, |
|
"loss": 0.1723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.409302325581395, |
|
"eval_accuracy": 0.9308005427408412, |
|
"eval_f1": 0.9301945813993165, |
|
"eval_loss": 0.1693263202905655, |
|
"eval_precision": 0.9315485344818776, |
|
"eval_recall": 0.9308005427408412, |
|
"eval_runtime": 31.8606, |
|
"eval_samples_per_second": 69.396, |
|
"eval_steps_per_second": 8.694, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.7193798449612405, |
|
"grad_norm": 2.225787401199341, |
|
"learning_rate": 1.9199128433855277e-05, |
|
"loss": 0.1864, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.024806201550387, |
|
"grad_norm": 4.083457946777344, |
|
"learning_rate": 1.9076887550492877e-05, |
|
"loss": 0.1877, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.024806201550387, |
|
"eval_accuracy": 0.9380370872908186, |
|
"eval_f1": 0.9381736344846147, |
|
"eval_loss": 0.168083056807518, |
|
"eval_precision": 0.9384861550793825, |
|
"eval_recall": 0.9380370872908186, |
|
"eval_runtime": 32.0939, |
|
"eval_samples_per_second": 68.892, |
|
"eval_steps_per_second": 8.631, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.334883720930232, |
|
"grad_norm": 5.380185127258301, |
|
"learning_rate": 1.894641536161537e-05, |
|
"loss": 0.1662, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 8.644961240310078, |
|
"grad_norm": 2.990643262863159, |
|
"learning_rate": 1.8807830184921636e-05, |
|
"loss": 0.2, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.644961240310078, |
|
"eval_accuracy": 0.9402985074626866, |
|
"eval_f1": 0.9401756807643212, |
|
"eval_loss": 0.14824680984020233, |
|
"eval_precision": 0.9401845264674767, |
|
"eval_recall": 0.9402985074626866, |
|
"eval_runtime": 31.8892, |
|
"eval_samples_per_second": 69.334, |
|
"eval_steps_per_second": 8.686, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.955038759689922, |
|
"grad_norm": 3.6064834594726562, |
|
"learning_rate": 1.8661257695311006e-05, |
|
"loss": 0.1621, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 9.26046511627907, |
|
"grad_norm": 5.486879825592041, |
|
"learning_rate": 1.8506830810915983e-05, |
|
"loss": 0.1642, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.26046511627907, |
|
"eval_accuracy": 0.9330619629127092, |
|
"eval_f1": 0.9322023134638141, |
|
"eval_loss": 0.16374026238918304, |
|
"eval_precision": 0.9352048827585053, |
|
"eval_recall": 0.9330619629127092, |
|
"eval_runtime": 32.0395, |
|
"eval_samples_per_second": 69.009, |
|
"eval_steps_per_second": 8.646, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.570542635658915, |
|
"grad_norm": 2.025575637817383, |
|
"learning_rate": 1.8344689572566436e-05, |
|
"loss": 0.1586, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.88062015503876, |
|
"grad_norm": 3.0411391258239746, |
|
"learning_rate": 1.8174981016794653e-05, |
|
"loss": 0.1525, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.88062015503876, |
|
"eval_accuracy": 0.942107643600181, |
|
"eval_f1": 0.94173840230828, |
|
"eval_loss": 0.14938268065452576, |
|
"eval_precision": 0.9424881755892641, |
|
"eval_recall": 0.942107643600181, |
|
"eval_runtime": 31.8834, |
|
"eval_samples_per_second": 69.347, |
|
"eval_steps_per_second": 8.688, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.186046511627907, |
|
"grad_norm": 3.4836244583129883, |
|
"learning_rate": 1.799785904249642e-05, |
|
"loss": 0.1649, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 10.496124031007753, |
|
"grad_norm": 3.605983257293701, |
|
"learning_rate": 1.7813484271368947e-05, |
|
"loss": 0.158, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.496124031007753, |
|
"eval_accuracy": 0.9484396200814111, |
|
"eval_f1": 0.947995545399902, |
|
"eval_loss": 0.1402619332075119, |
|
"eval_precision": 0.9494961047542965, |
|
"eval_recall": 0.9484396200814111, |
|
"eval_runtime": 32.0182, |
|
"eval_samples_per_second": 69.055, |
|
"eval_steps_per_second": 8.651, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.806201550387597, |
|
"grad_norm": 4.386783123016357, |
|
"learning_rate": 1.7622023902252378e-05, |
|
"loss": 0.1669, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 11.111627906976745, |
|
"grad_norm": 3.84214186668396, |
|
"learning_rate": 1.742365155950676e-05, |
|
"loss": 0.1327, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.111627906976745, |
|
"eval_accuracy": 0.9497964721845319, |
|
"eval_f1": 0.9498129231725488, |
|
"eval_loss": 0.13292284309864044, |
|
"eval_precision": 0.9498330407980847, |
|
"eval_recall": 0.9497964721845319, |
|
"eval_runtime": 32.0593, |
|
"eval_samples_per_second": 68.966, |
|
"eval_steps_per_second": 8.64, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.421705426356588, |
|
"grad_norm": 2.3866167068481445, |
|
"learning_rate": 1.7218547135562155e-05, |
|
"loss": 0.1679, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 11.731782945736434, |
|
"grad_norm": 5.185602188110352, |
|
"learning_rate": 1.700689662778458e-05, |
|
"loss": 0.1465, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.731782945736434, |
|
"eval_accuracy": 0.9525101763907734, |
|
"eval_f1": 0.9523724534109135, |
|
"eval_loss": 0.12334821373224258, |
|
"eval_precision": 0.9524932751901988, |
|
"eval_recall": 0.9525101763907734, |
|
"eval_runtime": 31.92, |
|
"eval_samples_per_second": 69.267, |
|
"eval_steps_per_second": 8.678, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 12.037209302325582, |
|
"grad_norm": 9.162732124328613, |
|
"learning_rate": 1.678889196980572e-05, |
|
"loss": 0.1291, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 12.347286821705426, |
|
"grad_norm": 5.636198043823242, |
|
"learning_rate": 1.6564730857469392e-05, |
|
"loss": 0.1311, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.347286821705426, |
|
"eval_accuracy": 0.9520578923563998, |
|
"eval_f1": 0.9520149841974469, |
|
"eval_loss": 0.12798894941806793, |
|
"eval_precision": 0.9519983389426212, |
|
"eval_recall": 0.9520578923563998, |
|
"eval_runtime": 31.9866, |
|
"eval_samples_per_second": 69.123, |
|
"eval_steps_per_second": 8.66, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.657364341085271, |
|
"grad_norm": 2.2612485885620117, |
|
"learning_rate": 1.633461656955259e-05, |
|
"loss": 0.1619, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 12.967441860465117, |
|
"grad_norm": 2.522465229034424, |
|
"learning_rate": 1.609875778342372e-05, |
|
"loss": 0.129, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.967441860465117, |
|
"eval_accuracy": 0.9556761646313885, |
|
"eval_f1": 0.9555744118131845, |
|
"eval_loss": 0.11725445836782455, |
|
"eval_precision": 0.9556383737245242, |
|
"eval_recall": 0.9556761646313885, |
|
"eval_runtime": 31.8471, |
|
"eval_samples_per_second": 69.426, |
|
"eval_steps_per_second": 8.698, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 13.272868217054263, |
|
"grad_norm": 7.201235294342041, |
|
"learning_rate": 1.5857368385805087e-05, |
|
"loss": 0.1351, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 13.582945736434109, |
|
"grad_norm": 4.276857852935791, |
|
"learning_rate": 1.561565191643536e-05, |
|
"loss": 0.1425, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.582945736434109, |
|
"eval_accuracy": 0.9552238805970149, |
|
"eval_f1": 0.9552090145107874, |
|
"eval_loss": 0.11896734684705734, |
|
"eval_precision": 0.9551978476090869, |
|
"eval_recall": 0.9552238805970149, |
|
"eval_runtime": 31.9931, |
|
"eval_samples_per_second": 69.109, |
|
"eval_steps_per_second": 8.658, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.893023255813954, |
|
"grad_norm": 3.126692295074463, |
|
"learning_rate": 1.5363962356243304e-05, |
|
"loss": 0.1249, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 14.1984496124031, |
|
"grad_norm": 2.0163676738739014, |
|
"learning_rate": 1.5107408528131196e-05, |
|
"loss": 0.1256, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 14.1984496124031, |
|
"eval_accuracy": 0.9565807327001357, |
|
"eval_f1": 0.9563271746916711, |
|
"eval_loss": 0.12249390035867691, |
|
"eval_precision": 0.9570420286206239, |
|
"eval_recall": 0.9565807327001357, |
|
"eval_runtime": 32.0483, |
|
"eval_samples_per_second": 68.99, |
|
"eval_steps_per_second": 8.643, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 14.508527131782946, |
|
"grad_norm": 2.3576133251190186, |
|
"learning_rate": 1.4846223085960681e-05, |
|
"loss": 0.1331, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 14.81860465116279, |
|
"grad_norm": 3.005863666534424, |
|
"learning_rate": 1.45806428837369e-05, |
|
"loss": 0.1461, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.81860465116279, |
|
"eval_accuracy": 0.9588421528720036, |
|
"eval_f1": 0.9587720391887182, |
|
"eval_loss": 0.11707326769828796, |
|
"eval_precision": 0.9587952797993504, |
|
"eval_recall": 0.9588421528720036, |
|
"eval_runtime": 31.9123, |
|
"eval_samples_per_second": 69.284, |
|
"eval_steps_per_second": 8.68, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 15.124031007751938, |
|
"grad_norm": 2.8450655937194824, |
|
"learning_rate": 1.4310908760819285e-05, |
|
"loss": 0.1193, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 15.434108527131784, |
|
"grad_norm": 2.168471097946167, |
|
"learning_rate": 1.4037265323518223e-05, |
|
"loss": 0.133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 15.434108527131784, |
|
"eval_accuracy": 0.9547715965626413, |
|
"eval_f1": 0.9545783302992185, |
|
"eval_loss": 0.11651919037103653, |
|
"eval_precision": 0.9549254114560721, |
|
"eval_recall": 0.9547715965626413, |
|
"eval_runtime": 31.9187, |
|
"eval_samples_per_second": 69.27, |
|
"eval_steps_per_second": 8.678, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 15.744186046511627, |
|
"grad_norm": 4.06069278717041, |
|
"learning_rate": 1.375996072327573e-05, |
|
"loss": 0.117, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 16.049612403100774, |
|
"grad_norm": 0.5793169736862183, |
|
"learning_rate": 1.3479246431631214e-05, |
|
"loss": 0.1258, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 16.049612403100774, |
|
"eval_accuracy": 0.9479873360470376, |
|
"eval_f1": 0.9473924145380525, |
|
"eval_loss": 0.1302204728126526, |
|
"eval_precision": 0.9500128100803557, |
|
"eval_recall": 0.9479873360470376, |
|
"eval_runtime": 32.0659, |
|
"eval_samples_per_second": 68.952, |
|
"eval_steps_per_second": 8.638, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 16.35968992248062, |
|
"grad_norm": 3.8317296504974365, |
|
"learning_rate": 1.3195377012176449e-05, |
|
"loss": 0.1479, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 16.669767441860465, |
|
"grad_norm": 3.2260377407073975, |
|
"learning_rate": 1.2908609889706538e-05, |
|
"loss": 0.115, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.669767441860465, |
|
"eval_accuracy": 0.9534147444595206, |
|
"eval_f1": 0.9536709713857593, |
|
"eval_loss": 0.1319713443517685, |
|
"eval_precision": 0.9552310659222302, |
|
"eval_recall": 0.9534147444595206, |
|
"eval_runtime": 31.9757, |
|
"eval_samples_per_second": 69.146, |
|
"eval_steps_per_second": 8.663, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.97984496124031, |
|
"grad_norm": 1.9483188390731812, |
|
"learning_rate": 1.2619205116776244e-05, |
|
"loss": 0.1269, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 17.28527131782946, |
|
"grad_norm": 4.348479270935059, |
|
"learning_rate": 1.2327425137873315e-05, |
|
"loss": 0.1134, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 17.28527131782946, |
|
"eval_accuracy": 0.9552238805970149, |
|
"eval_f1": 0.9548701351406266, |
|
"eval_loss": 0.117097869515419, |
|
"eval_precision": 0.9561949341655123, |
|
"eval_recall": 0.9552238805970149, |
|
"eval_runtime": 31.9852, |
|
"eval_samples_per_second": 69.126, |
|
"eval_steps_per_second": 8.66, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 17.5953488372093, |
|
"grad_norm": 5.097657203674316, |
|
"learning_rate": 1.2033534551422718e-05, |
|
"loss": 0.1069, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 17.905426356589146, |
|
"grad_norm": 2.5790622234344482, |
|
"learning_rate": 1.173779986983762e-05, |
|
"loss": 0.1069, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.905426356589146, |
|
"eval_accuracy": 0.9597467209407508, |
|
"eval_f1": 0.9595276897831222, |
|
"eval_loss": 0.11270730942487717, |
|
"eval_precision": 0.9601617456499858, |
|
"eval_recall": 0.9597467209407508, |
|
"eval_runtime": 32.0361, |
|
"eval_samples_per_second": 69.016, |
|
"eval_steps_per_second": 8.646, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.905426356589146, |
|
"step": 2900, |
|
"total_flos": 1.0232905043346113e+19, |
|
"train_loss": 0.2441097328580659, |
|
"train_runtime": 4508.9942, |
|
"train_samples_per_second": 41.182, |
|
"train_steps_per_second": 1.285 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5796, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 36, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0232905043346113e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|