|
{ |
|
"best_metric": 0.07892899960279465, |
|
"best_model_checkpoint": "./modernbert_ner_model_20250225/checkpoint-8900", |
|
"epoch": 3.834554071520896, |
|
"eval_steps": 100, |
|
"global_step": 8900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.043084877208099955, |
|
"eval_accuracy": 0.935206015281409, |
|
"eval_f1": 0.5963316214233308, |
|
"eval_loss": 0.17664727568626404, |
|
"eval_precision": 0.6165983917463207, |
|
"eval_recall": 0.5773547378889047, |
|
"eval_runtime": 5.1315, |
|
"eval_samples_per_second": 584.63, |
|
"eval_steps_per_second": 2.923, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08616975441619991, |
|
"eval_accuracy": 0.9478526985671145, |
|
"eval_f1": 0.6602559467862049, |
|
"eval_loss": 0.1430799961090088, |
|
"eval_precision": 0.6722614840989399, |
|
"eval_recall": 0.6486716863190795, |
|
"eval_runtime": 5.1042, |
|
"eval_samples_per_second": 587.755, |
|
"eval_steps_per_second": 2.939, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12925463162429987, |
|
"eval_accuracy": 0.954013903244766, |
|
"eval_f1": 0.6945600584154802, |
|
"eval_loss": 0.12783046066761017, |
|
"eval_precision": 0.7145432692307693, |
|
"eval_recall": 0.6756641568404603, |
|
"eval_runtime": 5.1393, |
|
"eval_samples_per_second": 583.739, |
|
"eval_steps_per_second": 2.919, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17233950883239982, |
|
"eval_accuracy": 0.9566587624896131, |
|
"eval_f1": 0.710146001630475, |
|
"eval_loss": 0.12156905978918076, |
|
"eval_precision": 0.7423303377750232, |
|
"eval_recall": 0.6806364540417673, |
|
"eval_runtime": 5.1439, |
|
"eval_samples_per_second": 583.215, |
|
"eval_steps_per_second": 2.916, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2154243860404998, |
|
"grad_norm": 0.1655338853597641, |
|
"learning_rate": 0.00029463593278759154, |
|
"loss": 0.2008, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2154243860404998, |
|
"eval_accuracy": 0.959161751889909, |
|
"eval_f1": 0.7199349545420948, |
|
"eval_loss": 0.112856425344944, |
|
"eval_precision": 0.7503852080123267, |
|
"eval_recall": 0.6918596391532889, |
|
"eval_runtime": 5.1102, |
|
"eval_samples_per_second": 587.066, |
|
"eval_steps_per_second": 2.935, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25850926324859974, |
|
"eval_accuracy": 0.9605500496544456, |
|
"eval_f1": 0.7329311359623308, |
|
"eval_loss": 0.10928678512573242, |
|
"eval_precision": 0.7601098733404548, |
|
"eval_recall": 0.7076289245631482, |
|
"eval_runtime": 5.1042, |
|
"eval_samples_per_second": 587.749, |
|
"eval_steps_per_second": 2.939, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3015941404566997, |
|
"eval_accuracy": 0.9617863440141059, |
|
"eval_f1": 0.7338403041825095, |
|
"eval_loss": 0.10559003055095673, |
|
"eval_precision": 0.7560644869670031, |
|
"eval_recall": 0.7128853530331013, |
|
"eval_runtime": 5.1458, |
|
"eval_samples_per_second": 583.005, |
|
"eval_steps_per_second": 2.915, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34467901766479964, |
|
"eval_accuracy": 0.9620498165825582, |
|
"eval_f1": 0.7305187678349309, |
|
"eval_loss": 0.10398514568805695, |
|
"eval_precision": 0.7531683765841883, |
|
"eval_recall": 0.7091916465407018, |
|
"eval_runtime": 5.1291, |
|
"eval_samples_per_second": 584.896, |
|
"eval_steps_per_second": 2.924, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38776389487289964, |
|
"eval_accuracy": 0.9628402342879148, |
|
"eval_f1": 0.7411920140927774, |
|
"eval_loss": 0.101581871509552, |
|
"eval_precision": 0.7667425968109339, |
|
"eval_recall": 0.717289387697116, |
|
"eval_runtime": 5.1388, |
|
"eval_samples_per_second": 583.791, |
|
"eval_steps_per_second": 2.919, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4308487720809996, |
|
"grad_norm": 0.1194114089012146, |
|
"learning_rate": 0.00028925032313657903, |
|
"loss": 0.1049, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4308487720809996, |
|
"eval_accuracy": 0.9633671794248191, |
|
"eval_f1": 0.7432511737089201, |
|
"eval_loss": 0.10185921937227249, |
|
"eval_precision": 0.7683907174275747, |
|
"eval_recall": 0.719704503480608, |
|
"eval_runtime": 5.1836, |
|
"eval_samples_per_second": 578.743, |
|
"eval_steps_per_second": 2.894, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"eval_accuracy": 0.9641271964492004, |
|
"eval_f1": 0.7452248016456068, |
|
"eval_loss": 0.09846850484609604, |
|
"eval_precision": 0.7716415639738323, |
|
"eval_recall": 0.7205568972865464, |
|
"eval_runtime": 5.145, |
|
"eval_samples_per_second": 583.095, |
|
"eval_steps_per_second": 2.915, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5170185264971995, |
|
"eval_accuracy": 0.9632151760199429, |
|
"eval_f1": 0.7427463372594082, |
|
"eval_loss": 0.09953544288873672, |
|
"eval_precision": 0.7510530137981118, |
|
"eval_recall": 0.7346213950845291, |
|
"eval_runtime": 5.1262, |
|
"eval_samples_per_second": 585.225, |
|
"eval_steps_per_second": 2.926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5601034037052994, |
|
"eval_accuracy": 0.9641575971301757, |
|
"eval_f1": 0.7527304499781564, |
|
"eval_loss": 0.09697850793600082, |
|
"eval_precision": 0.7720687079910381, |
|
"eval_recall": 0.7343372638158829, |
|
"eval_runtime": 5.1395, |
|
"eval_samples_per_second": 583.715, |
|
"eval_steps_per_second": 2.919, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6031882809133994, |
|
"eval_accuracy": 0.9648466792322815, |
|
"eval_f1": 0.7408794666267137, |
|
"eval_loss": 0.09701889753341675, |
|
"eval_precision": 0.783676703645008, |
|
"eval_recall": 0.7025145617275181, |
|
"eval_runtime": 5.1321, |
|
"eval_samples_per_second": 584.561, |
|
"eval_steps_per_second": 2.923, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6462731581214993, |
|
"grad_norm": 0.11641442030668259, |
|
"learning_rate": 0.0002838647134855665, |
|
"loss": 0.0955, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6462731581214993, |
|
"eval_accuracy": 0.9641069293285502, |
|
"eval_f1": 0.7526039742926792, |
|
"eval_loss": 0.0987541675567627, |
|
"eval_precision": 0.7839335180055401, |
|
"eval_recall": 0.7236823412416536, |
|
"eval_runtime": 5.1118, |
|
"eval_samples_per_second": 586.873, |
|
"eval_steps_per_second": 2.934, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6893580353295993, |
|
"eval_accuracy": 0.9651101518007337, |
|
"eval_f1": 0.7509688531649203, |
|
"eval_loss": 0.09490419179201126, |
|
"eval_precision": 0.7588107324147934, |
|
"eval_recall": 0.7432873987782356, |
|
"eval_runtime": 5.0796, |
|
"eval_samples_per_second": 590.593, |
|
"eval_steps_per_second": 2.953, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7324429125376992, |
|
"eval_accuracy": 0.9662653776777933, |
|
"eval_f1": 0.7569454545454545, |
|
"eval_loss": 0.09300602227449417, |
|
"eval_precision": 0.7754433020414245, |
|
"eval_recall": 0.7393095610171899, |
|
"eval_runtime": 5.1475, |
|
"eval_samples_per_second": 582.81, |
|
"eval_steps_per_second": 2.914, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7755277897457993, |
|
"eval_accuracy": 0.9657992339028394, |
|
"eval_f1": 0.7599940775836542, |
|
"eval_loss": 0.09315136820077896, |
|
"eval_precision": 0.793476580615242, |
|
"eval_recall": 0.7292229009802529, |
|
"eval_runtime": 5.8376, |
|
"eval_samples_per_second": 513.907, |
|
"eval_steps_per_second": 2.57, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8186126669538992, |
|
"eval_accuracy": 0.9664883160049452, |
|
"eval_f1": 0.754410779423807, |
|
"eval_loss": 0.09128749370574951, |
|
"eval_precision": 0.792461682827651, |
|
"eval_recall": 0.7198465691149311, |
|
"eval_runtime": 5.152, |
|
"eval_samples_per_second": 582.301, |
|
"eval_steps_per_second": 2.912, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8616975441619992, |
|
"grad_norm": 0.10880027711391449, |
|
"learning_rate": 0.00027847910383455406, |
|
"loss": 0.0906, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8616975441619992, |
|
"eval_accuracy": 0.9675118055977787, |
|
"eval_f1": 0.7641537107656204, |
|
"eval_loss": 0.0904640182852745, |
|
"eval_precision": 0.7897529179930272, |
|
"eval_recall": 0.7401619548231283, |
|
"eval_runtime": 5.1464, |
|
"eval_samples_per_second": 582.93, |
|
"eval_steps_per_second": 2.915, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9047824213700991, |
|
"eval_accuracy": 0.966711254332097, |
|
"eval_f1": 0.7613536478171697, |
|
"eval_loss": 0.09170496463775635, |
|
"eval_precision": 0.7858763042492061, |
|
"eval_recall": 0.7383151015769286, |
|
"eval_runtime": 5.0869, |
|
"eval_samples_per_second": 589.746, |
|
"eval_steps_per_second": 2.949, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"eval_accuracy": 0.9671064631847753, |
|
"eval_f1": 0.754022988505747, |
|
"eval_loss": 0.09009002894163132, |
|
"eval_precision": 0.7887061743717034, |
|
"eval_recall": 0.722261684898423, |
|
"eval_runtime": 5.1309, |
|
"eval_samples_per_second": 584.697, |
|
"eval_steps_per_second": 2.923, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.990952175786299, |
|
"eval_accuracy": 0.9677651446059058, |
|
"eval_f1": 0.7611605527819985, |
|
"eval_loss": 0.08897808939218521, |
|
"eval_precision": 0.775582424063698, |
|
"eval_recall": 0.7472652365392811, |
|
"eval_runtime": 5.1531, |
|
"eval_samples_per_second": 582.168, |
|
"eval_steps_per_second": 2.911, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.034037052994399, |
|
"eval_accuracy": 0.9675624733994042, |
|
"eval_f1": 0.7690079652425779, |
|
"eval_loss": 0.08915847539901733, |
|
"eval_precision": 0.7842268498006203, |
|
"eval_recall": 0.754368518255434, |
|
"eval_runtime": 5.145, |
|
"eval_samples_per_second": 583.086, |
|
"eval_steps_per_second": 2.915, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.077121930202499, |
|
"grad_norm": 0.09866651147603989, |
|
"learning_rate": 0.00027309349418354155, |
|
"loss": 0.0842, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.077121930202499, |
|
"eval_accuracy": 0.9678664802091567, |
|
"eval_f1": 0.7673179396092362, |
|
"eval_loss": 0.08764609694480896, |
|
"eval_precision": 0.8008651320871312, |
|
"eval_recall": 0.7364682483307288, |
|
"eval_runtime": 5.0632, |
|
"eval_samples_per_second": 592.507, |
|
"eval_steps_per_second": 2.963, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1202068074105989, |
|
"eval_accuracy": 0.9675320727184289, |
|
"eval_f1": 0.7698676720632808, |
|
"eval_loss": 0.08784696459770203, |
|
"eval_precision": 0.8025585696670777, |
|
"eval_recall": 0.7397357579201591, |
|
"eval_runtime": 5.1556, |
|
"eval_samples_per_second": 581.896, |
|
"eval_steps_per_second": 2.909, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.163291684618699, |
|
"eval_accuracy": 0.9681603534585841, |
|
"eval_f1": 0.7680871421211453, |
|
"eval_loss": 0.08904842287302017, |
|
"eval_precision": 0.7968845448992059, |
|
"eval_recall": 0.7412984798977127, |
|
"eval_runtime": 5.109, |
|
"eval_samples_per_second": 587.204, |
|
"eval_steps_per_second": 2.936, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2063765618267988, |
|
"eval_accuracy": 0.9686467643541882, |
|
"eval_f1": 0.7770339106389171, |
|
"eval_loss": 0.08730504661798477, |
|
"eval_precision": 0.7965090258093391, |
|
"eval_recall": 0.7584884216508027, |
|
"eval_runtime": 5.1308, |
|
"eval_samples_per_second": 584.709, |
|
"eval_steps_per_second": 2.924, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2494614390348988, |
|
"eval_accuracy": 0.9690521067671916, |
|
"eval_f1": 0.7750579374275782, |
|
"eval_loss": 0.08751235157251358, |
|
"eval_precision": 0.7905155857586054, |
|
"eval_recall": 0.7601932092626793, |
|
"eval_runtime": 5.1442, |
|
"eval_samples_per_second": 583.186, |
|
"eval_steps_per_second": 2.916, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2925463162429986, |
|
"grad_norm": 0.1591762900352478, |
|
"learning_rate": 0.0002677078845325291, |
|
"loss": 0.0794, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2925463162429986, |
|
"eval_accuracy": 0.9680691514156584, |
|
"eval_f1": 0.7753859857482184, |
|
"eval_loss": 0.08817364275455475, |
|
"eval_precision": 0.8119073527125757, |
|
"eval_recall": 0.742008808069328, |
|
"eval_runtime": 5.1488, |
|
"eval_samples_per_second": 582.661, |
|
"eval_steps_per_second": 2.913, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3356311934510987, |
|
"eval_accuracy": 0.9690014389655661, |
|
"eval_f1": 0.7762973868237025, |
|
"eval_loss": 0.08583438396453857, |
|
"eval_precision": 0.8055300947143293, |
|
"eval_recall": 0.7491120897854809, |
|
"eval_runtime": 5.1482, |
|
"eval_samples_per_second": 582.727, |
|
"eval_steps_per_second": 2.914, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3787160706591985, |
|
"eval_accuracy": 0.9687075657161387, |
|
"eval_f1": 0.7790055248618786, |
|
"eval_loss": 0.08625612407922745, |
|
"eval_precision": 0.7976775346136669, |
|
"eval_recall": 0.7611876687029407, |
|
"eval_runtime": 5.1662, |
|
"eval_samples_per_second": 580.698, |
|
"eval_steps_per_second": 2.903, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"eval_accuracy": 0.968626497233538, |
|
"eval_f1": 0.7790014684287813, |
|
"eval_loss": 0.08638014644384384, |
|
"eval_precision": 0.8061084941498252, |
|
"eval_recall": 0.7536581900838187, |
|
"eval_runtime": 5.1072, |
|
"eval_samples_per_second": 587.401, |
|
"eval_steps_per_second": 2.937, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4648858250753984, |
|
"eval_accuracy": 0.9683528911047607, |
|
"eval_f1": 0.7756664980853986, |
|
"eval_loss": 0.08518864214420319, |
|
"eval_precision": 0.7891796530432226, |
|
"eval_recall": 0.7626083250461714, |
|
"eval_runtime": 5.1521, |
|
"eval_samples_per_second": 582.282, |
|
"eval_steps_per_second": 2.911, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5079707022834985, |
|
"grad_norm": 0.10778328031301498, |
|
"learning_rate": 0.0002623222748815166, |
|
"loss": 0.0786, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5079707022834985, |
|
"eval_accuracy": 0.9685150280699621, |
|
"eval_f1": 0.7766948234784491, |
|
"eval_loss": 0.08616286516189575, |
|
"eval_precision": 0.7896359365825014, |
|
"eval_recall": 0.764171047023725, |
|
"eval_runtime": 5.1115, |
|
"eval_samples_per_second": 586.911, |
|
"eval_steps_per_second": 2.935, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5510555794915986, |
|
"eval_accuracy": 0.9693054457753187, |
|
"eval_f1": 0.7803809940880228, |
|
"eval_loss": 0.08442429453134537, |
|
"eval_precision": 0.8024617232062444, |
|
"eval_recall": 0.7594828810910641, |
|
"eval_runtime": 5.1816, |
|
"eval_samples_per_second": 578.971, |
|
"eval_steps_per_second": 2.895, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5941404566996984, |
|
"eval_accuracy": 0.9684846273889869, |
|
"eval_f1": 0.7758786533481317, |
|
"eval_loss": 0.08674349635839462, |
|
"eval_precision": 0.8096046942557134, |
|
"eval_recall": 0.7448501207557892, |
|
"eval_runtime": 5.8233, |
|
"eval_samples_per_second": 515.172, |
|
"eval_steps_per_second": 2.576, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6372253339077982, |
|
"eval_accuracy": 0.9685048945096371, |
|
"eval_f1": 0.7758557232241442, |
|
"eval_loss": 0.08749110996723175, |
|
"eval_precision": 0.8050717995722578, |
|
"eval_recall": 0.7486858928825117, |
|
"eval_runtime": 5.1543, |
|
"eval_samples_per_second": 582.041, |
|
"eval_steps_per_second": 2.91, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6803102111158983, |
|
"eval_accuracy": 0.9685048945096371, |
|
"eval_f1": 0.7757390417940877, |
|
"eval_loss": 0.08649948239326477, |
|
"eval_precision": 0.7956684092606423, |
|
"eval_recall": 0.756783634038926, |
|
"eval_runtime": 5.1578, |
|
"eval_samples_per_second": 581.646, |
|
"eval_steps_per_second": 2.908, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.7233950883239983, |
|
"grad_norm": 0.09820359200239182, |
|
"learning_rate": 0.00025693666523050407, |
|
"loss": 0.0774, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7233950883239983, |
|
"eval_accuracy": 0.9692547779736933, |
|
"eval_f1": 0.7781892107747042, |
|
"eval_loss": 0.08616424351930618, |
|
"eval_precision": 0.7958122958122958, |
|
"eval_recall": 0.7613297343372638, |
|
"eval_runtime": 5.1939, |
|
"eval_samples_per_second": 577.605, |
|
"eval_steps_per_second": 2.888, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7664799655320982, |
|
"eval_accuracy": 0.9701465312823008, |
|
"eval_f1": 0.7791577864334533, |
|
"eval_loss": 0.08393336832523346, |
|
"eval_precision": 0.807095006090134, |
|
"eval_recall": 0.7530899275465265, |
|
"eval_runtime": 5.1775, |
|
"eval_samples_per_second": 579.433, |
|
"eval_steps_per_second": 2.897, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.8095648427401982, |
|
"eval_accuracy": 0.9697310553089723, |
|
"eval_f1": 0.7807633808937495, |
|
"eval_loss": 0.08360669761896133, |
|
"eval_precision": 0.7963947990543735, |
|
"eval_recall": 0.7657337690012785, |
|
"eval_runtime": 5.1736, |
|
"eval_samples_per_second": 579.871, |
|
"eval_steps_per_second": 2.899, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.8526497199482983, |
|
"eval_accuracy": 0.9697614559899476, |
|
"eval_f1": 0.7807478924994596, |
|
"eval_loss": 0.08350168168544769, |
|
"eval_precision": 0.7921052631578948, |
|
"eval_recall": 0.7697116067623242, |
|
"eval_runtime": 5.1598, |
|
"eval_samples_per_second": 581.416, |
|
"eval_steps_per_second": 2.907, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_accuracy": 0.970166798402951, |
|
"eval_f1": 0.7817303469477382, |
|
"eval_loss": 0.08246050775051117, |
|
"eval_precision": 0.8062811414766722, |
|
"eval_recall": 0.7586304872851257, |
|
"eval_runtime": 5.1425, |
|
"eval_samples_per_second": 583.372, |
|
"eval_steps_per_second": 2.917, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.938819474364498, |
|
"grad_norm": 0.11857543140649796, |
|
"learning_rate": 0.00025155105557949156, |
|
"loss": 0.0771, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.938819474364498, |
|
"eval_accuracy": 0.9691331752497923, |
|
"eval_f1": 0.7826024131204393, |
|
"eval_loss": 0.0859459713101387, |
|
"eval_precision": 0.7962364010585122, |
|
"eval_recall": 0.7694274754936781, |
|
"eval_runtime": 5.128, |
|
"eval_samples_per_second": 585.027, |
|
"eval_steps_per_second": 2.925, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.981904351572598, |
|
"eval_accuracy": 0.9700755963600252, |
|
"eval_f1": 0.7879845806967779, |
|
"eval_loss": 0.0816790908575058, |
|
"eval_precision": 0.8073025335320417, |
|
"eval_recall": 0.7695695411280011, |
|
"eval_runtime": 5.1709, |
|
"eval_samples_per_second": 580.174, |
|
"eval_steps_per_second": 2.901, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.024989228780698, |
|
"eval_accuracy": 0.9698019902312478, |
|
"eval_f1": 0.7841042154566745, |
|
"eval_loss": 0.08529265224933624, |
|
"eval_precision": 0.8086037735849056, |
|
"eval_recall": 0.7610456030686177, |
|
"eval_runtime": 5.1834, |
|
"eval_samples_per_second": 578.77, |
|
"eval_steps_per_second": 2.894, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.068074105988798, |
|
"eval_accuracy": 0.9703796031697777, |
|
"eval_f1": 0.7924996351962643, |
|
"eval_loss": 0.0835157036781311, |
|
"eval_precision": 0.8146092695365231, |
|
"eval_recall": 0.7715584600085239, |
|
"eval_runtime": 5.1951, |
|
"eval_samples_per_second": 577.468, |
|
"eval_steps_per_second": 2.887, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.1111589831968978, |
|
"eval_accuracy": 0.9694473156198699, |
|
"eval_f1": 0.7853357531760434, |
|
"eval_loss": 0.08416793495416641, |
|
"eval_precision": 0.8029988123515439, |
|
"eval_recall": 0.7684330160534166, |
|
"eval_runtime": 5.1858, |
|
"eval_samples_per_second": 578.5, |
|
"eval_steps_per_second": 2.892, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.154243860404998, |
|
"grad_norm": 0.08804752677679062, |
|
"learning_rate": 0.0002461654459284791, |
|
"loss": 0.0681, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.154243860404998, |
|
"eval_accuracy": 0.9703390689284773, |
|
"eval_f1": 0.7868997565833149, |
|
"eval_loss": 0.08355652540922165, |
|
"eval_precision": 0.8183491868671372, |
|
"eval_recall": 0.7577780934791873, |
|
"eval_runtime": 5.1276, |
|
"eval_samples_per_second": 585.071, |
|
"eval_steps_per_second": 2.925, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.197328737613098, |
|
"eval_accuracy": 0.9699134593948238, |
|
"eval_f1": 0.7855120853596573, |
|
"eval_loss": 0.08640210330486298, |
|
"eval_precision": 0.803057287028792, |
|
"eval_recall": 0.7687171473220628, |
|
"eval_runtime": 5.1532, |
|
"eval_samples_per_second": 582.162, |
|
"eval_steps_per_second": 2.911, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.2404136148211977, |
|
"eval_accuracy": 0.9702073326442512, |
|
"eval_f1": 0.789276448544249, |
|
"eval_loss": 0.08586513251066208, |
|
"eval_precision": 0.8009360830773731, |
|
"eval_recall": 0.7779514135530615, |
|
"eval_runtime": 5.1699, |
|
"eval_samples_per_second": 580.277, |
|
"eval_steps_per_second": 2.901, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.2834984920292976, |
|
"eval_accuracy": 0.9699539936361241, |
|
"eval_f1": 0.7828817644051704, |
|
"eval_loss": 0.08344285935163498, |
|
"eval_precision": 0.8055305079651337, |
|
"eval_recall": 0.7614717999715869, |
|
"eval_runtime": 5.1913, |
|
"eval_samples_per_second": 577.886, |
|
"eval_steps_per_second": 2.889, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.326583369237398, |
|
"eval_accuracy": 0.9698323909122231, |
|
"eval_f1": 0.7898440333696048, |
|
"eval_loss": 0.08490483462810516, |
|
"eval_precision": 0.8069967388081827, |
|
"eval_recall": 0.7734053132547237, |
|
"eval_runtime": 5.153, |
|
"eval_samples_per_second": 582.188, |
|
"eval_steps_per_second": 2.911, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 0.10004045069217682, |
|
"learning_rate": 0.00024077983627746657, |
|
"loss": 0.0674, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"eval_accuracy": 0.9706329421779047, |
|
"eval_f1": 0.7900129888872853, |
|
"eval_loss": 0.08235176652669907, |
|
"eval_precision": 0.8027570024930342, |
|
"eval_recall": 0.7776672822844154, |
|
"eval_runtime": 5.8382, |
|
"eval_samples_per_second": 513.861, |
|
"eval_steps_per_second": 2.569, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4127531236535975, |
|
"eval_accuracy": 0.9701465312823008, |
|
"eval_f1": 0.7867615038467122, |
|
"eval_loss": 0.08183197677135468, |
|
"eval_precision": 0.8042736311025375, |
|
"eval_recall": 0.7699957380309703, |
|
"eval_runtime": 5.1409, |
|
"eval_samples_per_second": 583.555, |
|
"eval_steps_per_second": 2.918, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.4558380008616973, |
|
"eval_accuracy": 0.9702073326442512, |
|
"eval_f1": 0.7872696817420435, |
|
"eval_loss": 0.08382030576467514, |
|
"eval_precision": 0.8076808129109384, |
|
"eval_recall": 0.7678647535161245, |
|
"eval_runtime": 5.1657, |
|
"eval_samples_per_second": 580.751, |
|
"eval_steps_per_second": 2.904, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.4989228780697976, |
|
"eval_accuracy": 0.9702275997649014, |
|
"eval_f1": 0.7875814600571136, |
|
"eval_loss": 0.08378946781158447, |
|
"eval_precision": 0.8126322151707465, |
|
"eval_recall": 0.7640289813894019, |
|
"eval_runtime": 5.133, |
|
"eval_samples_per_second": 584.453, |
|
"eval_steps_per_second": 2.922, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.5420077552778975, |
|
"eval_accuracy": 0.9705620072556292, |
|
"eval_f1": 0.7894349617422014, |
|
"eval_loss": 0.08313048630952835, |
|
"eval_precision": 0.8187089882496567, |
|
"eval_recall": 0.7621821281432022, |
|
"eval_runtime": 5.1336, |
|
"eval_samples_per_second": 584.386, |
|
"eval_steps_per_second": 2.922, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.5850926324859973, |
|
"grad_norm": 0.08104603737592697, |
|
"learning_rate": 0.00023539422662645408, |
|
"loss": 0.0674, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.5850926324859973, |
|
"eval_accuracy": 0.9705721408159543, |
|
"eval_f1": 0.7914712778429074, |
|
"eval_loss": 0.08241896331310272, |
|
"eval_precision": 0.8172189438644273, |
|
"eval_recall": 0.7672964909788322, |
|
"eval_runtime": 5.1433, |
|
"eval_samples_per_second": 583.282, |
|
"eval_steps_per_second": 2.916, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6281775096940976, |
|
"eval_accuracy": 0.9694777163008451, |
|
"eval_f1": 0.7920289855072463, |
|
"eval_loss": 0.0877351462841034, |
|
"eval_precision": 0.8083123798254696, |
|
"eval_recall": 0.7763886915755079, |
|
"eval_runtime": 5.1298, |
|
"eval_samples_per_second": 584.819, |
|
"eval_steps_per_second": 2.924, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.6712623869021974, |
|
"eval_accuracy": 0.970187065523601, |
|
"eval_f1": 0.7875146541617819, |
|
"eval_loss": 0.08446252346038818, |
|
"eval_precision": 0.8131336056892117, |
|
"eval_recall": 0.7634607188521096, |
|
"eval_runtime": 5.1724, |
|
"eval_samples_per_second": 580.006, |
|
"eval_steps_per_second": 2.9, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.7143472641102973, |
|
"eval_accuracy": 0.9709268154273323, |
|
"eval_f1": 0.79413046636641, |
|
"eval_loss": 0.0830024853348732, |
|
"eval_precision": 0.8125464545859967, |
|
"eval_recall": 0.7765307572098309, |
|
"eval_runtime": 5.1622, |
|
"eval_samples_per_second": 581.144, |
|
"eval_steps_per_second": 2.906, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.757432141318397, |
|
"eval_accuracy": 0.9705518736953042, |
|
"eval_f1": 0.7958092167924391, |
|
"eval_loss": 0.08372634649276733, |
|
"eval_precision": 0.8216338880484115, |
|
"eval_recall": 0.7715584600085239, |
|
"eval_runtime": 5.1748, |
|
"eval_samples_per_second": 579.728, |
|
"eval_steps_per_second": 2.899, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.800517018526497, |
|
"grad_norm": 0.07959684729576111, |
|
"learning_rate": 0.00023000861697544157, |
|
"loss": 0.0667, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.800517018526497, |
|
"eval_accuracy": 0.9703188018078271, |
|
"eval_f1": 0.7892570933506605, |
|
"eval_loss": 0.08360672742128372, |
|
"eval_precision": 0.8024075161479741, |
|
"eval_recall": 0.7765307572098309, |
|
"eval_runtime": 5.1231, |
|
"eval_samples_per_second": 585.583, |
|
"eval_steps_per_second": 2.928, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"eval_accuracy": 0.9709369489876574, |
|
"eval_f1": 0.7909653916211292, |
|
"eval_loss": 0.08242646604776382, |
|
"eval_precision": 0.8118456476218965, |
|
"eval_recall": 0.7711322631055547, |
|
"eval_runtime": 5.1572, |
|
"eval_samples_per_second": 581.707, |
|
"eval_steps_per_second": 2.909, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.886686772942697, |
|
"eval_accuracy": 0.9714638941245617, |
|
"eval_f1": 0.7976650857351331, |
|
"eval_loss": 0.08036693930625916, |
|
"eval_precision": 0.81998199819982, |
|
"eval_recall": 0.7765307572098309, |
|
"eval_runtime": 5.1014, |
|
"eval_samples_per_second": 588.074, |
|
"eval_steps_per_second": 2.94, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.929771650150797, |
|
"eval_accuracy": 0.9718692365375651, |
|
"eval_f1": 0.793916460486101, |
|
"eval_loss": 0.08084654808044434, |
|
"eval_precision": 0.8138147098314188, |
|
"eval_recall": 0.7749680352322773, |
|
"eval_runtime": 5.1757, |
|
"eval_samples_per_second": 579.63, |
|
"eval_steps_per_second": 2.898, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.972856527358897, |
|
"eval_accuracy": 0.9711700208751343, |
|
"eval_f1": 0.792, |
|
"eval_loss": 0.08285341411828995, |
|
"eval_precision": 0.81135449262405, |
|
"eval_recall": 0.7735473788890468, |
|
"eval_runtime": 5.1616, |
|
"eval_samples_per_second": 581.214, |
|
"eval_steps_per_second": 2.906, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.015941404566997, |
|
"grad_norm": 0.07623889297246933, |
|
"learning_rate": 0.00022462300732442912, |
|
"loss": 0.066, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.015941404566997, |
|
"eval_accuracy": 0.9713220242800106, |
|
"eval_f1": 0.7958886135005102, |
|
"eval_loss": 0.0849044919013977, |
|
"eval_precision": 0.8173379248390478, |
|
"eval_recall": 0.7755362977695696, |
|
"eval_runtime": 5.1766, |
|
"eval_samples_per_second": 579.536, |
|
"eval_steps_per_second": 2.898, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.059026281775097, |
|
"eval_accuracy": 0.9710990859528587, |
|
"eval_f1": 0.7920576041893957, |
|
"eval_loss": 0.08765333890914917, |
|
"eval_precision": 0.8114754098360656, |
|
"eval_recall": 0.7735473788890468, |
|
"eval_runtime": 5.1302, |
|
"eval_samples_per_second": 584.777, |
|
"eval_steps_per_second": 2.924, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.102111158983197, |
|
"eval_accuracy": 0.970774812022456, |
|
"eval_f1": 0.7927130207577296, |
|
"eval_loss": 0.08476532250642776, |
|
"eval_precision": 0.8103576198248998, |
|
"eval_recall": 0.7758204290382157, |
|
"eval_runtime": 5.1437, |
|
"eval_samples_per_second": 583.242, |
|
"eval_steps_per_second": 2.916, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.145196036191297, |
|
"eval_accuracy": 0.9712206886767597, |
|
"eval_f1": 0.7949370771804759, |
|
"eval_loss": 0.08828677237033844, |
|
"eval_precision": 0.8145497912939773, |
|
"eval_recall": 0.7762466259411849, |
|
"eval_runtime": 5.1438, |
|
"eval_samples_per_second": 583.229, |
|
"eval_steps_per_second": 2.916, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.188280913399397, |
|
"eval_accuracy": 0.9700249285583997, |
|
"eval_f1": 0.7877159866414986, |
|
"eval_loss": 0.08564765751361847, |
|
"eval_precision": 0.8054936896807721, |
|
"eval_recall": 0.7707060662025856, |
|
"eval_runtime": 5.8414, |
|
"eval_samples_per_second": 513.575, |
|
"eval_steps_per_second": 2.568, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.2313657906074966, |
|
"grad_norm": 0.14740775525569916, |
|
"learning_rate": 0.00021923739767341663, |
|
"loss": 0.0556, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.2313657906074966, |
|
"eval_accuracy": 0.9712409557974099, |
|
"eval_f1": 0.7959982601130926, |
|
"eval_loss": 0.0849863737821579, |
|
"eval_precision": 0.8127313101406366, |
|
"eval_recall": 0.7799403324335843, |
|
"eval_runtime": 5.1395, |
|
"eval_samples_per_second": 583.711, |
|
"eval_steps_per_second": 2.919, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.274450667815597, |
|
"eval_accuracy": 0.9710686852718834, |
|
"eval_f1": 0.7949167397020157, |
|
"eval_loss": 0.08467870950698853, |
|
"eval_precision": 0.8179768525477228, |
|
"eval_recall": 0.7731211819860776, |
|
"eval_runtime": 5.1447, |
|
"eval_samples_per_second": 583.126, |
|
"eval_steps_per_second": 2.916, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.3175355450236967, |
|
"eval_accuracy": 0.971615897529438, |
|
"eval_f1": 0.7966792892513836, |
|
"eval_loss": 0.08322973549365997, |
|
"eval_precision": 0.8172717764828926, |
|
"eval_recall": 0.7770990197471231, |
|
"eval_runtime": 5.1113, |
|
"eval_samples_per_second": 586.93, |
|
"eval_steps_per_second": 2.935, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.3606204222317966, |
|
"eval_accuracy": 0.9713321578403357, |
|
"eval_f1": 0.7940792337831956, |
|
"eval_loss": 0.08206350356340408, |
|
"eval_precision": 0.811508230757823, |
|
"eval_recall": 0.7773831510157693, |
|
"eval_runtime": 5.1313, |
|
"eval_samples_per_second": 584.642, |
|
"eval_steps_per_second": 2.923, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.4037052994398964, |
|
"eval_accuracy": 0.9709977503496078, |
|
"eval_f1": 0.7957571324067301, |
|
"eval_loss": 0.08641249686479568, |
|
"eval_precision": 0.8202382747700196, |
|
"eval_recall": 0.7726949850831084, |
|
"eval_runtime": 5.1503, |
|
"eval_samples_per_second": 582.489, |
|
"eval_steps_per_second": 2.912, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.4467901766479967, |
|
"grad_norm": 0.10462938249111176, |
|
"learning_rate": 0.00021385178802240412, |
|
"loss": 0.0571, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4467901766479967, |
|
"eval_accuracy": 0.9705924079366044, |
|
"eval_f1": 0.7928046989720997, |
|
"eval_loss": 0.08726098388433456, |
|
"eval_precision": 0.8203920376842425, |
|
"eval_recall": 0.7670123597101861, |
|
"eval_runtime": 5.1929, |
|
"eval_samples_per_second": 577.716, |
|
"eval_steps_per_second": 2.889, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.4898750538560965, |
|
"eval_accuracy": 0.971018017470258, |
|
"eval_f1": 0.792296511627907, |
|
"eval_loss": 0.08289187401533127, |
|
"eval_precision": 0.8110400238059813, |
|
"eval_recall": 0.7743997726949851, |
|
"eval_runtime": 5.1659, |
|
"eval_samples_per_second": 580.728, |
|
"eval_steps_per_second": 2.904, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.5329599310641964, |
|
"eval_accuracy": 0.9715246954865122, |
|
"eval_f1": 0.7980417945345607, |
|
"eval_loss": 0.0846642255783081, |
|
"eval_precision": 0.8215736422446216, |
|
"eval_recall": 0.7758204290382157, |
|
"eval_runtime": 5.165, |
|
"eval_samples_per_second": 580.835, |
|
"eval_steps_per_second": 2.904, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.5760448082722966, |
|
"eval_accuracy": 0.9707849455827811, |
|
"eval_f1": 0.7915995902239135, |
|
"eval_loss": 0.08357686549425125, |
|
"eval_precision": 0.8162064282480761, |
|
"eval_recall": 0.7684330160534166, |
|
"eval_runtime": 5.1568, |
|
"eval_samples_per_second": 581.751, |
|
"eval_steps_per_second": 2.909, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.6191296854803965, |
|
"eval_accuracy": 0.9708457469447316, |
|
"eval_f1": 0.7932402645113886, |
|
"eval_loss": 0.08390816301107407, |
|
"eval_precision": 0.8214883579363872, |
|
"eval_recall": 0.766870294075863, |
|
"eval_runtime": 5.1482, |
|
"eval_samples_per_second": 582.733, |
|
"eval_steps_per_second": 2.914, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.6622145626884963, |
|
"grad_norm": 0.11067754775285721, |
|
"learning_rate": 0.00020846617837139164, |
|
"loss": 0.0576, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.6622145626884963, |
|
"eval_accuracy": 0.9711902879957844, |
|
"eval_f1": 0.7922267679719464, |
|
"eval_loss": 0.0809941291809082, |
|
"eval_precision": 0.8154609715746729, |
|
"eval_recall": 0.7702798692996164, |
|
"eval_runtime": 5.1845, |
|
"eval_samples_per_second": 578.644, |
|
"eval_steps_per_second": 2.893, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.705299439896596, |
|
"eval_accuracy": 0.9711598873148092, |
|
"eval_f1": 0.7963543565439302, |
|
"eval_loss": 0.08366210758686066, |
|
"eval_precision": 0.8180047932893948, |
|
"eval_recall": 0.7758204290382157, |
|
"eval_runtime": 5.1558, |
|
"eval_samples_per_second": 581.865, |
|
"eval_steps_per_second": 2.909, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.748384317104696, |
|
"eval_accuracy": 0.9712814900387102, |
|
"eval_f1": 0.7941414587473319, |
|
"eval_loss": 0.08393159508705139, |
|
"eval_precision": 0.8239156994502138, |
|
"eval_recall": 0.7664440971728939, |
|
"eval_runtime": 5.1323, |
|
"eval_samples_per_second": 584.53, |
|
"eval_steps_per_second": 2.923, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.7914691943127963, |
|
"eval_accuracy": 0.9712916235990353, |
|
"eval_f1": 0.7929502585390722, |
|
"eval_loss": 0.0845068171620369, |
|
"eval_precision": 0.8135086670651525, |
|
"eval_recall": 0.7734053132547237, |
|
"eval_runtime": 5.1296, |
|
"eval_samples_per_second": 584.843, |
|
"eval_steps_per_second": 2.924, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.834554071520896, |
|
"eval_accuracy": 0.9722441782695932, |
|
"eval_f1": 0.7972508591065292, |
|
"eval_loss": 0.07892899960279465, |
|
"eval_precision": 0.8213317264236216, |
|
"eval_recall": 0.7745418383293081, |
|
"eval_runtime": 5.1954, |
|
"eval_samples_per_second": 577.434, |
|
"eval_steps_per_second": 2.887, |
|
"step": 8900 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 27852, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.129811768410176e+17, |
|
"train_batch_size": 200, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|