{ "best_metric": 0.07892899960279465, "best_model_checkpoint": "./modernbert_ner_model_20250225/checkpoint-8900", "epoch": 3.834554071520896, "eval_steps": 100, "global_step": 8900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.043084877208099955, "eval_accuracy": 0.935206015281409, "eval_f1": 0.5963316214233308, "eval_loss": 0.17664727568626404, "eval_precision": 0.6165983917463207, "eval_recall": 0.5773547378889047, "eval_runtime": 5.1315, "eval_samples_per_second": 584.63, "eval_steps_per_second": 2.923, "step": 100 }, { "epoch": 0.08616975441619991, "eval_accuracy": 0.9478526985671145, "eval_f1": 0.6602559467862049, "eval_loss": 0.1430799961090088, "eval_precision": 0.6722614840989399, "eval_recall": 0.6486716863190795, "eval_runtime": 5.1042, "eval_samples_per_second": 587.755, "eval_steps_per_second": 2.939, "step": 200 }, { "epoch": 0.12925463162429987, "eval_accuracy": 0.954013903244766, "eval_f1": 0.6945600584154802, "eval_loss": 0.12783046066761017, "eval_precision": 0.7145432692307693, "eval_recall": 0.6756641568404603, "eval_runtime": 5.1393, "eval_samples_per_second": 583.739, "eval_steps_per_second": 2.919, "step": 300 }, { "epoch": 0.17233950883239982, "eval_accuracy": 0.9566587624896131, "eval_f1": 0.710146001630475, "eval_loss": 0.12156905978918076, "eval_precision": 0.7423303377750232, "eval_recall": 0.6806364540417673, "eval_runtime": 5.1439, "eval_samples_per_second": 583.215, "eval_steps_per_second": 2.916, "step": 400 }, { "epoch": 0.2154243860404998, "grad_norm": 0.1655338853597641, "learning_rate": 0.00029463593278759154, "loss": 0.2008, "step": 500 }, { "epoch": 0.2154243860404998, "eval_accuracy": 0.959161751889909, "eval_f1": 0.7199349545420948, "eval_loss": 0.112856425344944, "eval_precision": 0.7503852080123267, "eval_recall": 0.6918596391532889, "eval_runtime": 5.1102, "eval_samples_per_second": 587.066, "eval_steps_per_second": 2.935, "step": 500 }, { "epoch": 0.25850926324859974, "eval_accuracy": 0.9605500496544456, "eval_f1": 0.7329311359623308, "eval_loss": 0.10928678512573242, "eval_precision": 0.7601098733404548, "eval_recall": 0.7076289245631482, "eval_runtime": 5.1042, "eval_samples_per_second": 587.749, "eval_steps_per_second": 2.939, "step": 600 }, { "epoch": 0.3015941404566997, "eval_accuracy": 0.9617863440141059, "eval_f1": 0.7338403041825095, "eval_loss": 0.10559003055095673, "eval_precision": 0.7560644869670031, "eval_recall": 0.7128853530331013, "eval_runtime": 5.1458, "eval_samples_per_second": 583.005, "eval_steps_per_second": 2.915, "step": 700 }, { "epoch": 0.34467901766479964, "eval_accuracy": 0.9620498165825582, "eval_f1": 0.7305187678349309, "eval_loss": 0.10398514568805695, "eval_precision": 0.7531683765841883, "eval_recall": 0.7091916465407018, "eval_runtime": 5.1291, "eval_samples_per_second": 584.896, "eval_steps_per_second": 2.924, "step": 800 }, { "epoch": 0.38776389487289964, "eval_accuracy": 0.9628402342879148, "eval_f1": 0.7411920140927774, "eval_loss": 0.101581871509552, "eval_precision": 0.7667425968109339, "eval_recall": 0.717289387697116, "eval_runtime": 5.1388, "eval_samples_per_second": 583.791, "eval_steps_per_second": 2.919, "step": 900 }, { "epoch": 0.4308487720809996, "grad_norm": 0.1194114089012146, "learning_rate": 0.00028925032313657903, "loss": 0.1049, "step": 1000 }, { "epoch": 0.4308487720809996, "eval_accuracy": 0.9633671794248191, "eval_f1": 0.7432511737089201, "eval_loss": 0.10185921937227249, "eval_precision": 0.7683907174275747, "eval_recall": 0.719704503480608, "eval_runtime": 5.1836, "eval_samples_per_second": 578.743, "eval_steps_per_second": 2.894, "step": 1000 }, { "epoch": 0.47393364928909953, "eval_accuracy": 0.9641271964492004, "eval_f1": 0.7452248016456068, "eval_loss": 0.09846850484609604, "eval_precision": 0.7716415639738323, "eval_recall": 0.7205568972865464, "eval_runtime": 5.145, "eval_samples_per_second": 583.095, "eval_steps_per_second": 2.915, "step": 1100 }, { "epoch": 0.5170185264971995, "eval_accuracy": 0.9632151760199429, "eval_f1": 0.7427463372594082, "eval_loss": 0.09953544288873672, "eval_precision": 0.7510530137981118, "eval_recall": 0.7346213950845291, "eval_runtime": 5.1262, "eval_samples_per_second": 585.225, "eval_steps_per_second": 2.926, "step": 1200 }, { "epoch": 0.5601034037052994, "eval_accuracy": 0.9641575971301757, "eval_f1": 0.7527304499781564, "eval_loss": 0.09697850793600082, "eval_precision": 0.7720687079910381, "eval_recall": 0.7343372638158829, "eval_runtime": 5.1395, "eval_samples_per_second": 583.715, "eval_steps_per_second": 2.919, "step": 1300 }, { "epoch": 0.6031882809133994, "eval_accuracy": 0.9648466792322815, "eval_f1": 0.7408794666267137, "eval_loss": 0.09701889753341675, "eval_precision": 0.783676703645008, "eval_recall": 0.7025145617275181, "eval_runtime": 5.1321, "eval_samples_per_second": 584.561, "eval_steps_per_second": 2.923, "step": 1400 }, { "epoch": 0.6462731581214993, "grad_norm": 0.11641442030668259, "learning_rate": 0.0002838647134855665, "loss": 0.0955, "step": 1500 }, { "epoch": 0.6462731581214993, "eval_accuracy": 0.9641069293285502, "eval_f1": 0.7526039742926792, "eval_loss": 0.0987541675567627, "eval_precision": 0.7839335180055401, "eval_recall": 0.7236823412416536, "eval_runtime": 5.1118, "eval_samples_per_second": 586.873, "eval_steps_per_second": 2.934, "step": 1500 }, { "epoch": 0.6893580353295993, "eval_accuracy": 0.9651101518007337, "eval_f1": 0.7509688531649203, "eval_loss": 0.09490419179201126, "eval_precision": 0.7588107324147934, "eval_recall": 0.7432873987782356, "eval_runtime": 5.0796, "eval_samples_per_second": 590.593, "eval_steps_per_second": 2.953, "step": 1600 }, { "epoch": 0.7324429125376992, "eval_accuracy": 0.9662653776777933, "eval_f1": 0.7569454545454545, "eval_loss": 0.09300602227449417, "eval_precision": 0.7754433020414245, "eval_recall": 0.7393095610171899, "eval_runtime": 5.1475, "eval_samples_per_second": 582.81, "eval_steps_per_second": 2.914, "step": 1700 }, { "epoch": 0.7755277897457993, "eval_accuracy": 0.9657992339028394, "eval_f1": 0.7599940775836542, "eval_loss": 0.09315136820077896, "eval_precision": 0.793476580615242, "eval_recall": 0.7292229009802529, "eval_runtime": 5.8376, "eval_samples_per_second": 513.907, "eval_steps_per_second": 2.57, "step": 1800 }, { "epoch": 0.8186126669538992, "eval_accuracy": 0.9664883160049452, "eval_f1": 0.754410779423807, "eval_loss": 0.09128749370574951, "eval_precision": 0.792461682827651, "eval_recall": 0.7198465691149311, "eval_runtime": 5.152, "eval_samples_per_second": 582.301, "eval_steps_per_second": 2.912, "step": 1900 }, { "epoch": 0.8616975441619992, "grad_norm": 0.10880027711391449, "learning_rate": 0.00027847910383455406, "loss": 0.0906, "step": 2000 }, { "epoch": 0.8616975441619992, "eval_accuracy": 0.9675118055977787, "eval_f1": 0.7641537107656204, "eval_loss": 0.0904640182852745, "eval_precision": 0.7897529179930272, "eval_recall": 0.7401619548231283, "eval_runtime": 5.1464, "eval_samples_per_second": 582.93, "eval_steps_per_second": 2.915, "step": 2000 }, { "epoch": 0.9047824213700991, "eval_accuracy": 0.966711254332097, "eval_f1": 0.7613536478171697, "eval_loss": 0.09170496463775635, "eval_precision": 0.7858763042492061, "eval_recall": 0.7383151015769286, "eval_runtime": 5.0869, "eval_samples_per_second": 589.746, "eval_steps_per_second": 2.949, "step": 2100 }, { "epoch": 0.9478672985781991, "eval_accuracy": 0.9671064631847753, "eval_f1": 0.754022988505747, "eval_loss": 0.09009002894163132, "eval_precision": 0.7887061743717034, "eval_recall": 0.722261684898423, "eval_runtime": 5.1309, "eval_samples_per_second": 584.697, "eval_steps_per_second": 2.923, "step": 2200 }, { "epoch": 0.990952175786299, "eval_accuracy": 0.9677651446059058, "eval_f1": 0.7611605527819985, "eval_loss": 0.08897808939218521, "eval_precision": 0.775582424063698, "eval_recall": 0.7472652365392811, "eval_runtime": 5.1531, "eval_samples_per_second": 582.168, "eval_steps_per_second": 2.911, "step": 2300 }, { "epoch": 1.034037052994399, "eval_accuracy": 0.9675624733994042, "eval_f1": 0.7690079652425779, "eval_loss": 0.08915847539901733, "eval_precision": 0.7842268498006203, "eval_recall": 0.754368518255434, "eval_runtime": 5.145, "eval_samples_per_second": 583.086, "eval_steps_per_second": 2.915, "step": 2400 }, { "epoch": 1.077121930202499, "grad_norm": 0.09866651147603989, "learning_rate": 0.00027309349418354155, "loss": 0.0842, "step": 2500 }, { "epoch": 1.077121930202499, "eval_accuracy": 0.9678664802091567, "eval_f1": 0.7673179396092362, "eval_loss": 0.08764609694480896, "eval_precision": 0.8008651320871312, "eval_recall": 0.7364682483307288, "eval_runtime": 5.0632, "eval_samples_per_second": 592.507, "eval_steps_per_second": 2.963, "step": 2500 }, { "epoch": 1.1202068074105989, "eval_accuracy": 0.9675320727184289, "eval_f1": 0.7698676720632808, "eval_loss": 0.08784696459770203, "eval_precision": 0.8025585696670777, "eval_recall": 0.7397357579201591, "eval_runtime": 5.1556, "eval_samples_per_second": 581.896, "eval_steps_per_second": 2.909, "step": 2600 }, { "epoch": 1.163291684618699, "eval_accuracy": 0.9681603534585841, "eval_f1": 0.7680871421211453, "eval_loss": 0.08904842287302017, "eval_precision": 0.7968845448992059, "eval_recall": 0.7412984798977127, "eval_runtime": 5.109, "eval_samples_per_second": 587.204, "eval_steps_per_second": 2.936, "step": 2700 }, { "epoch": 1.2063765618267988, "eval_accuracy": 0.9686467643541882, "eval_f1": 0.7770339106389171, "eval_loss": 0.08730504661798477, "eval_precision": 0.7965090258093391, "eval_recall": 0.7584884216508027, "eval_runtime": 5.1308, "eval_samples_per_second": 584.709, "eval_steps_per_second": 2.924, "step": 2800 }, { "epoch": 1.2494614390348988, "eval_accuracy": 0.9690521067671916, "eval_f1": 0.7750579374275782, "eval_loss": 0.08751235157251358, "eval_precision": 0.7905155857586054, "eval_recall": 0.7601932092626793, "eval_runtime": 5.1442, "eval_samples_per_second": 583.186, "eval_steps_per_second": 2.916, "step": 2900 }, { "epoch": 1.2925463162429986, "grad_norm": 0.1591762900352478, "learning_rate": 0.0002677078845325291, "loss": 0.0794, "step": 3000 }, { "epoch": 1.2925463162429986, "eval_accuracy": 0.9680691514156584, "eval_f1": 0.7753859857482184, "eval_loss": 0.08817364275455475, "eval_precision": 0.8119073527125757, "eval_recall": 0.742008808069328, "eval_runtime": 5.1488, "eval_samples_per_second": 582.661, "eval_steps_per_second": 2.913, "step": 3000 }, { "epoch": 1.3356311934510987, "eval_accuracy": 0.9690014389655661, "eval_f1": 0.7762973868237025, "eval_loss": 0.08583438396453857, "eval_precision": 0.8055300947143293, "eval_recall": 0.7491120897854809, "eval_runtime": 5.1482, "eval_samples_per_second": 582.727, "eval_steps_per_second": 2.914, "step": 3100 }, { "epoch": 1.3787160706591985, "eval_accuracy": 0.9687075657161387, "eval_f1": 0.7790055248618786, "eval_loss": 0.08625612407922745, "eval_precision": 0.7976775346136669, "eval_recall": 0.7611876687029407, "eval_runtime": 5.1662, "eval_samples_per_second": 580.698, "eval_steps_per_second": 2.903, "step": 3200 }, { "epoch": 1.4218009478672986, "eval_accuracy": 0.968626497233538, "eval_f1": 0.7790014684287813, "eval_loss": 0.08638014644384384, "eval_precision": 0.8061084941498252, "eval_recall": 0.7536581900838187, "eval_runtime": 5.1072, "eval_samples_per_second": 587.401, "eval_steps_per_second": 2.937, "step": 3300 }, { "epoch": 1.4648858250753984, "eval_accuracy": 0.9683528911047607, "eval_f1": 0.7756664980853986, "eval_loss": 0.08518864214420319, "eval_precision": 0.7891796530432226, "eval_recall": 0.7626083250461714, "eval_runtime": 5.1521, "eval_samples_per_second": 582.282, "eval_steps_per_second": 2.911, "step": 3400 }, { "epoch": 1.5079707022834985, "grad_norm": 0.10778328031301498, "learning_rate": 0.0002623222748815166, "loss": 0.0786, "step": 3500 }, { "epoch": 1.5079707022834985, "eval_accuracy": 0.9685150280699621, "eval_f1": 0.7766948234784491, "eval_loss": 0.08616286516189575, "eval_precision": 0.7896359365825014, "eval_recall": 0.764171047023725, "eval_runtime": 5.1115, "eval_samples_per_second": 586.911, "eval_steps_per_second": 2.935, "step": 3500 }, { "epoch": 1.5510555794915986, "eval_accuracy": 0.9693054457753187, "eval_f1": 0.7803809940880228, "eval_loss": 0.08442429453134537, "eval_precision": 0.8024617232062444, "eval_recall": 0.7594828810910641, "eval_runtime": 5.1816, "eval_samples_per_second": 578.971, "eval_steps_per_second": 2.895, "step": 3600 }, { "epoch": 1.5941404566996984, "eval_accuracy": 0.9684846273889869, "eval_f1": 0.7758786533481317, "eval_loss": 0.08674349635839462, "eval_precision": 0.8096046942557134, "eval_recall": 0.7448501207557892, "eval_runtime": 5.8233, "eval_samples_per_second": 515.172, "eval_steps_per_second": 2.576, "step": 3700 }, { "epoch": 1.6372253339077982, "eval_accuracy": 0.9685048945096371, "eval_f1": 0.7758557232241442, "eval_loss": 0.08749110996723175, "eval_precision": 0.8050717995722578, "eval_recall": 0.7486858928825117, "eval_runtime": 5.1543, "eval_samples_per_second": 582.041, "eval_steps_per_second": 2.91, "step": 3800 }, { "epoch": 1.6803102111158983, "eval_accuracy": 0.9685048945096371, "eval_f1": 0.7757390417940877, "eval_loss": 0.08649948239326477, "eval_precision": 0.7956684092606423, "eval_recall": 0.756783634038926, "eval_runtime": 5.1578, "eval_samples_per_second": 581.646, "eval_steps_per_second": 2.908, "step": 3900 }, { "epoch": 1.7233950883239983, "grad_norm": 0.09820359200239182, "learning_rate": 0.00025693666523050407, "loss": 0.0774, "step": 4000 }, { "epoch": 1.7233950883239983, "eval_accuracy": 0.9692547779736933, "eval_f1": 0.7781892107747042, "eval_loss": 0.08616424351930618, "eval_precision": 0.7958122958122958, "eval_recall": 0.7613297343372638, "eval_runtime": 5.1939, "eval_samples_per_second": 577.605, "eval_steps_per_second": 2.888, "step": 4000 }, { "epoch": 1.7664799655320982, "eval_accuracy": 0.9701465312823008, "eval_f1": 0.7791577864334533, "eval_loss": 0.08393336832523346, "eval_precision": 0.807095006090134, "eval_recall": 0.7530899275465265, "eval_runtime": 5.1775, "eval_samples_per_second": 579.433, "eval_steps_per_second": 2.897, "step": 4100 }, { "epoch": 1.8095648427401982, "eval_accuracy": 0.9697310553089723, "eval_f1": 0.7807633808937495, "eval_loss": 0.08360669761896133, "eval_precision": 0.7963947990543735, "eval_recall": 0.7657337690012785, "eval_runtime": 5.1736, "eval_samples_per_second": 579.871, "eval_steps_per_second": 2.899, "step": 4200 }, { "epoch": 1.8526497199482983, "eval_accuracy": 0.9697614559899476, "eval_f1": 0.7807478924994596, "eval_loss": 0.08350168168544769, "eval_precision": 0.7921052631578948, "eval_recall": 0.7697116067623242, "eval_runtime": 5.1598, "eval_samples_per_second": 581.416, "eval_steps_per_second": 2.907, "step": 4300 }, { "epoch": 1.8957345971563981, "eval_accuracy": 0.970166798402951, "eval_f1": 0.7817303469477382, "eval_loss": 0.08246050775051117, "eval_precision": 0.8062811414766722, "eval_recall": 0.7586304872851257, "eval_runtime": 5.1425, "eval_samples_per_second": 583.372, "eval_steps_per_second": 2.917, "step": 4400 }, { "epoch": 1.938819474364498, "grad_norm": 0.11857543140649796, "learning_rate": 0.00025155105557949156, "loss": 0.0771, "step": 4500 }, { "epoch": 1.938819474364498, "eval_accuracy": 0.9691331752497923, "eval_f1": 0.7826024131204393, "eval_loss": 0.0859459713101387, "eval_precision": 0.7962364010585122, "eval_recall": 0.7694274754936781, "eval_runtime": 5.128, "eval_samples_per_second": 585.027, "eval_steps_per_second": 2.925, "step": 4500 }, { "epoch": 1.981904351572598, "eval_accuracy": 0.9700755963600252, "eval_f1": 0.7879845806967779, "eval_loss": 0.0816790908575058, "eval_precision": 0.8073025335320417, "eval_recall": 0.7695695411280011, "eval_runtime": 5.1709, "eval_samples_per_second": 580.174, "eval_steps_per_second": 2.901, "step": 4600 }, { "epoch": 2.024989228780698, "eval_accuracy": 0.9698019902312478, "eval_f1": 0.7841042154566745, "eval_loss": 0.08529265224933624, "eval_precision": 0.8086037735849056, "eval_recall": 0.7610456030686177, "eval_runtime": 5.1834, "eval_samples_per_second": 578.77, "eval_steps_per_second": 2.894, "step": 4700 }, { "epoch": 2.068074105988798, "eval_accuracy": 0.9703796031697777, "eval_f1": 0.7924996351962643, "eval_loss": 0.0835157036781311, "eval_precision": 0.8146092695365231, "eval_recall": 0.7715584600085239, "eval_runtime": 5.1951, "eval_samples_per_second": 577.468, "eval_steps_per_second": 2.887, "step": 4800 }, { "epoch": 2.1111589831968978, "eval_accuracy": 0.9694473156198699, "eval_f1": 0.7853357531760434, "eval_loss": 0.08416793495416641, "eval_precision": 0.8029988123515439, "eval_recall": 0.7684330160534166, "eval_runtime": 5.1858, "eval_samples_per_second": 578.5, "eval_steps_per_second": 2.892, "step": 4900 }, { "epoch": 2.154243860404998, "grad_norm": 0.08804752677679062, "learning_rate": 0.0002461654459284791, "loss": 0.0681, "step": 5000 }, { "epoch": 2.154243860404998, "eval_accuracy": 0.9703390689284773, "eval_f1": 0.7868997565833149, "eval_loss": 0.08355652540922165, "eval_precision": 0.8183491868671372, "eval_recall": 0.7577780934791873, "eval_runtime": 5.1276, "eval_samples_per_second": 585.071, "eval_steps_per_second": 2.925, "step": 5000 }, { "epoch": 2.197328737613098, "eval_accuracy": 0.9699134593948238, "eval_f1": 0.7855120853596573, "eval_loss": 0.08640210330486298, "eval_precision": 0.803057287028792, "eval_recall": 0.7687171473220628, "eval_runtime": 5.1532, "eval_samples_per_second": 582.162, "eval_steps_per_second": 2.911, "step": 5100 }, { "epoch": 2.2404136148211977, "eval_accuracy": 0.9702073326442512, "eval_f1": 0.789276448544249, "eval_loss": 0.08586513251066208, "eval_precision": 0.8009360830773731, "eval_recall": 0.7779514135530615, "eval_runtime": 5.1699, "eval_samples_per_second": 580.277, "eval_steps_per_second": 2.901, "step": 5200 }, { "epoch": 2.2834984920292976, "eval_accuracy": 0.9699539936361241, "eval_f1": 0.7828817644051704, "eval_loss": 0.08344285935163498, "eval_precision": 0.8055305079651337, "eval_recall": 0.7614717999715869, "eval_runtime": 5.1913, "eval_samples_per_second": 577.886, "eval_steps_per_second": 2.889, "step": 5300 }, { "epoch": 2.326583369237398, "eval_accuracy": 0.9698323909122231, "eval_f1": 0.7898440333696048, "eval_loss": 0.08490483462810516, "eval_precision": 0.8069967388081827, "eval_recall": 0.7734053132547237, "eval_runtime": 5.153, "eval_samples_per_second": 582.188, "eval_steps_per_second": 2.911, "step": 5400 }, { "epoch": 2.3696682464454977, "grad_norm": 0.10004045069217682, "learning_rate": 0.00024077983627746657, "loss": 0.0674, "step": 5500 }, { "epoch": 2.3696682464454977, "eval_accuracy": 0.9706329421779047, "eval_f1": 0.7900129888872853, "eval_loss": 0.08235176652669907, "eval_precision": 0.8027570024930342, "eval_recall": 0.7776672822844154, "eval_runtime": 5.8382, "eval_samples_per_second": 513.861, "eval_steps_per_second": 2.569, "step": 5500 }, { "epoch": 2.4127531236535975, "eval_accuracy": 0.9701465312823008, "eval_f1": 0.7867615038467122, "eval_loss": 0.08183197677135468, "eval_precision": 0.8042736311025375, "eval_recall": 0.7699957380309703, "eval_runtime": 5.1409, "eval_samples_per_second": 583.555, "eval_steps_per_second": 2.918, "step": 5600 }, { "epoch": 2.4558380008616973, "eval_accuracy": 0.9702073326442512, "eval_f1": 0.7872696817420435, "eval_loss": 0.08382030576467514, "eval_precision": 0.8076808129109384, "eval_recall": 0.7678647535161245, "eval_runtime": 5.1657, "eval_samples_per_second": 580.751, "eval_steps_per_second": 2.904, "step": 5700 }, { "epoch": 2.4989228780697976, "eval_accuracy": 0.9702275997649014, "eval_f1": 0.7875814600571136, "eval_loss": 0.08378946781158447, "eval_precision": 0.8126322151707465, "eval_recall": 0.7640289813894019, "eval_runtime": 5.133, "eval_samples_per_second": 584.453, "eval_steps_per_second": 2.922, "step": 5800 }, { "epoch": 2.5420077552778975, "eval_accuracy": 0.9705620072556292, "eval_f1": 0.7894349617422014, "eval_loss": 0.08313048630952835, "eval_precision": 0.8187089882496567, "eval_recall": 0.7621821281432022, "eval_runtime": 5.1336, "eval_samples_per_second": 584.386, "eval_steps_per_second": 2.922, "step": 5900 }, { "epoch": 2.5850926324859973, "grad_norm": 0.08104603737592697, "learning_rate": 0.00023539422662645408, "loss": 0.0674, "step": 6000 }, { "epoch": 2.5850926324859973, "eval_accuracy": 0.9705721408159543, "eval_f1": 0.7914712778429074, "eval_loss": 0.08241896331310272, "eval_precision": 0.8172189438644273, "eval_recall": 0.7672964909788322, "eval_runtime": 5.1433, "eval_samples_per_second": 583.282, "eval_steps_per_second": 2.916, "step": 6000 }, { "epoch": 2.6281775096940976, "eval_accuracy": 0.9694777163008451, "eval_f1": 0.7920289855072463, "eval_loss": 0.0877351462841034, "eval_precision": 0.8083123798254696, "eval_recall": 0.7763886915755079, "eval_runtime": 5.1298, "eval_samples_per_second": 584.819, "eval_steps_per_second": 2.924, "step": 6100 }, { "epoch": 2.6712623869021974, "eval_accuracy": 0.970187065523601, "eval_f1": 0.7875146541617819, "eval_loss": 0.08446252346038818, "eval_precision": 0.8131336056892117, "eval_recall": 0.7634607188521096, "eval_runtime": 5.1724, "eval_samples_per_second": 580.006, "eval_steps_per_second": 2.9, "step": 6200 }, { "epoch": 2.7143472641102973, "eval_accuracy": 0.9709268154273323, "eval_f1": 0.79413046636641, "eval_loss": 0.0830024853348732, "eval_precision": 0.8125464545859967, "eval_recall": 0.7765307572098309, "eval_runtime": 5.1622, "eval_samples_per_second": 581.144, "eval_steps_per_second": 2.906, "step": 6300 }, { "epoch": 2.757432141318397, "eval_accuracy": 0.9705518736953042, "eval_f1": 0.7958092167924391, "eval_loss": 0.08372634649276733, "eval_precision": 0.8216338880484115, "eval_recall": 0.7715584600085239, "eval_runtime": 5.1748, "eval_samples_per_second": 579.728, "eval_steps_per_second": 2.899, "step": 6400 }, { "epoch": 2.800517018526497, "grad_norm": 0.07959684729576111, "learning_rate": 0.00023000861697544157, "loss": 0.0667, "step": 6500 }, { "epoch": 2.800517018526497, "eval_accuracy": 0.9703188018078271, "eval_f1": 0.7892570933506605, "eval_loss": 0.08360672742128372, "eval_precision": 0.8024075161479741, "eval_recall": 0.7765307572098309, "eval_runtime": 5.1231, "eval_samples_per_second": 585.583, "eval_steps_per_second": 2.928, "step": 6500 }, { "epoch": 2.843601895734597, "eval_accuracy": 0.9709369489876574, "eval_f1": 0.7909653916211292, "eval_loss": 0.08242646604776382, "eval_precision": 0.8118456476218965, "eval_recall": 0.7711322631055547, "eval_runtime": 5.1572, "eval_samples_per_second": 581.707, "eval_steps_per_second": 2.909, "step": 6600 }, { "epoch": 2.886686772942697, "eval_accuracy": 0.9714638941245617, "eval_f1": 0.7976650857351331, "eval_loss": 0.08036693930625916, "eval_precision": 0.81998199819982, "eval_recall": 0.7765307572098309, "eval_runtime": 5.1014, "eval_samples_per_second": 588.074, "eval_steps_per_second": 2.94, "step": 6700 }, { "epoch": 2.929771650150797, "eval_accuracy": 0.9718692365375651, "eval_f1": 0.793916460486101, "eval_loss": 0.08084654808044434, "eval_precision": 0.8138147098314188, "eval_recall": 0.7749680352322773, "eval_runtime": 5.1757, "eval_samples_per_second": 579.63, "eval_steps_per_second": 2.898, "step": 6800 }, { "epoch": 2.972856527358897, "eval_accuracy": 0.9711700208751343, "eval_f1": 0.792, "eval_loss": 0.08285341411828995, "eval_precision": 0.81135449262405, "eval_recall": 0.7735473788890468, "eval_runtime": 5.1616, "eval_samples_per_second": 581.214, "eval_steps_per_second": 2.906, "step": 6900 }, { "epoch": 3.015941404566997, "grad_norm": 0.07623889297246933, "learning_rate": 0.00022462300732442912, "loss": 0.066, "step": 7000 }, { "epoch": 3.015941404566997, "eval_accuracy": 0.9713220242800106, "eval_f1": 0.7958886135005102, "eval_loss": 0.0849044919013977, "eval_precision": 0.8173379248390478, "eval_recall": 0.7755362977695696, "eval_runtime": 5.1766, "eval_samples_per_second": 579.536, "eval_steps_per_second": 2.898, "step": 7000 }, { "epoch": 3.059026281775097, "eval_accuracy": 0.9710990859528587, "eval_f1": 0.7920576041893957, "eval_loss": 0.08765333890914917, "eval_precision": 0.8114754098360656, "eval_recall": 0.7735473788890468, "eval_runtime": 5.1302, "eval_samples_per_second": 584.777, "eval_steps_per_second": 2.924, "step": 7100 }, { "epoch": 3.102111158983197, "eval_accuracy": 0.970774812022456, "eval_f1": 0.7927130207577296, "eval_loss": 0.08476532250642776, "eval_precision": 0.8103576198248998, "eval_recall": 0.7758204290382157, "eval_runtime": 5.1437, "eval_samples_per_second": 583.242, "eval_steps_per_second": 2.916, "step": 7200 }, { "epoch": 3.145196036191297, "eval_accuracy": 0.9712206886767597, "eval_f1": 0.7949370771804759, "eval_loss": 0.08828677237033844, "eval_precision": 0.8145497912939773, "eval_recall": 0.7762466259411849, "eval_runtime": 5.1438, "eval_samples_per_second": 583.229, "eval_steps_per_second": 2.916, "step": 7300 }, { "epoch": 3.188280913399397, "eval_accuracy": 0.9700249285583997, "eval_f1": 0.7877159866414986, "eval_loss": 0.08564765751361847, "eval_precision": 0.8054936896807721, "eval_recall": 0.7707060662025856, "eval_runtime": 5.8414, "eval_samples_per_second": 513.575, "eval_steps_per_second": 2.568, "step": 7400 }, { "epoch": 3.2313657906074966, "grad_norm": 0.14740775525569916, "learning_rate": 0.00021923739767341663, "loss": 0.0556, "step": 7500 }, { "epoch": 3.2313657906074966, "eval_accuracy": 0.9712409557974099, "eval_f1": 0.7959982601130926, "eval_loss": 0.0849863737821579, "eval_precision": 0.8127313101406366, "eval_recall": 0.7799403324335843, "eval_runtime": 5.1395, "eval_samples_per_second": 583.711, "eval_steps_per_second": 2.919, "step": 7500 }, { "epoch": 3.274450667815597, "eval_accuracy": 0.9710686852718834, "eval_f1": 0.7949167397020157, "eval_loss": 0.08467870950698853, "eval_precision": 0.8179768525477228, "eval_recall": 0.7731211819860776, "eval_runtime": 5.1447, "eval_samples_per_second": 583.126, "eval_steps_per_second": 2.916, "step": 7600 }, { "epoch": 3.3175355450236967, "eval_accuracy": 0.971615897529438, "eval_f1": 0.7966792892513836, "eval_loss": 0.08322973549365997, "eval_precision": 0.8172717764828926, "eval_recall": 0.7770990197471231, "eval_runtime": 5.1113, "eval_samples_per_second": 586.93, "eval_steps_per_second": 2.935, "step": 7700 }, { "epoch": 3.3606204222317966, "eval_accuracy": 0.9713321578403357, "eval_f1": 0.7940792337831956, "eval_loss": 0.08206350356340408, "eval_precision": 0.811508230757823, "eval_recall": 0.7773831510157693, "eval_runtime": 5.1313, "eval_samples_per_second": 584.642, "eval_steps_per_second": 2.923, "step": 7800 }, { "epoch": 3.4037052994398964, "eval_accuracy": 0.9709977503496078, "eval_f1": 0.7957571324067301, "eval_loss": 0.08641249686479568, "eval_precision": 0.8202382747700196, "eval_recall": 0.7726949850831084, "eval_runtime": 5.1503, "eval_samples_per_second": 582.489, "eval_steps_per_second": 2.912, "step": 7900 }, { "epoch": 3.4467901766479967, "grad_norm": 0.10462938249111176, "learning_rate": 0.00021385178802240412, "loss": 0.0571, "step": 8000 }, { "epoch": 3.4467901766479967, "eval_accuracy": 0.9705924079366044, "eval_f1": 0.7928046989720997, "eval_loss": 0.08726098388433456, "eval_precision": 0.8203920376842425, "eval_recall": 0.7670123597101861, "eval_runtime": 5.1929, "eval_samples_per_second": 577.716, "eval_steps_per_second": 2.889, "step": 8000 }, { "epoch": 3.4898750538560965, "eval_accuracy": 0.971018017470258, "eval_f1": 0.792296511627907, "eval_loss": 0.08289187401533127, "eval_precision": 0.8110400238059813, "eval_recall": 0.7743997726949851, "eval_runtime": 5.1659, "eval_samples_per_second": 580.728, "eval_steps_per_second": 2.904, "step": 8100 }, { "epoch": 3.5329599310641964, "eval_accuracy": 0.9715246954865122, "eval_f1": 0.7980417945345607, "eval_loss": 0.0846642255783081, "eval_precision": 0.8215736422446216, "eval_recall": 0.7758204290382157, "eval_runtime": 5.165, "eval_samples_per_second": 580.835, "eval_steps_per_second": 2.904, "step": 8200 }, { "epoch": 3.5760448082722966, "eval_accuracy": 0.9707849455827811, "eval_f1": 0.7915995902239135, "eval_loss": 0.08357686549425125, "eval_precision": 0.8162064282480761, "eval_recall": 0.7684330160534166, "eval_runtime": 5.1568, "eval_samples_per_second": 581.751, "eval_steps_per_second": 2.909, "step": 8300 }, { "epoch": 3.6191296854803965, "eval_accuracy": 0.9708457469447316, "eval_f1": 0.7932402645113886, "eval_loss": 0.08390816301107407, "eval_precision": 0.8214883579363872, "eval_recall": 0.766870294075863, "eval_runtime": 5.1482, "eval_samples_per_second": 582.733, "eval_steps_per_second": 2.914, "step": 8400 }, { "epoch": 3.6622145626884963, "grad_norm": 0.11067754775285721, "learning_rate": 0.00020846617837139164, "loss": 0.0576, "step": 8500 }, { "epoch": 3.6622145626884963, "eval_accuracy": 0.9711902879957844, "eval_f1": 0.7922267679719464, "eval_loss": 0.0809941291809082, "eval_precision": 0.8154609715746729, "eval_recall": 0.7702798692996164, "eval_runtime": 5.1845, "eval_samples_per_second": 578.644, "eval_steps_per_second": 2.893, "step": 8500 }, { "epoch": 3.705299439896596, "eval_accuracy": 0.9711598873148092, "eval_f1": 0.7963543565439302, "eval_loss": 0.08366210758686066, "eval_precision": 0.8180047932893948, "eval_recall": 0.7758204290382157, "eval_runtime": 5.1558, "eval_samples_per_second": 581.865, "eval_steps_per_second": 2.909, "step": 8600 }, { "epoch": 3.748384317104696, "eval_accuracy": 0.9712814900387102, "eval_f1": 0.7941414587473319, "eval_loss": 0.08393159508705139, "eval_precision": 0.8239156994502138, "eval_recall": 0.7664440971728939, "eval_runtime": 5.1323, "eval_samples_per_second": 584.53, "eval_steps_per_second": 2.923, "step": 8700 }, { "epoch": 3.7914691943127963, "eval_accuracy": 0.9712916235990353, "eval_f1": 0.7929502585390722, "eval_loss": 0.0845068171620369, "eval_precision": 0.8135086670651525, "eval_recall": 0.7734053132547237, "eval_runtime": 5.1296, "eval_samples_per_second": 584.843, "eval_steps_per_second": 2.924, "step": 8800 }, { "epoch": 3.834554071520896, "eval_accuracy": 0.9722441782695932, "eval_f1": 0.7972508591065292, "eval_loss": 0.07892899960279465, "eval_precision": 0.8213317264236216, "eval_recall": 0.7745418383293081, "eval_runtime": 5.1954, "eval_samples_per_second": 577.434, "eval_steps_per_second": 2.887, "step": 8900 } ], "logging_steps": 500, "max_steps": 27852, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.129811768410176e+17, "train_batch_size": 200, "trial_name": null, "trial_params": null }