Token Classification
Transformers
ONNX
Safetensors
modernbert
PII
MikeDoes's picture
#1 original upload
16db7f2 verified
{
"best_metric": 0.07892899960279465,
"best_model_checkpoint": "./modernbert_ner_model_20250225/checkpoint-8900",
"epoch": 3.834554071520896,
"eval_steps": 100,
"global_step": 8900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.043084877208099955,
"eval_accuracy": 0.935206015281409,
"eval_f1": 0.5963316214233308,
"eval_loss": 0.17664727568626404,
"eval_precision": 0.6165983917463207,
"eval_recall": 0.5773547378889047,
"eval_runtime": 5.1315,
"eval_samples_per_second": 584.63,
"eval_steps_per_second": 2.923,
"step": 100
},
{
"epoch": 0.08616975441619991,
"eval_accuracy": 0.9478526985671145,
"eval_f1": 0.6602559467862049,
"eval_loss": 0.1430799961090088,
"eval_precision": 0.6722614840989399,
"eval_recall": 0.6486716863190795,
"eval_runtime": 5.1042,
"eval_samples_per_second": 587.755,
"eval_steps_per_second": 2.939,
"step": 200
},
{
"epoch": 0.12925463162429987,
"eval_accuracy": 0.954013903244766,
"eval_f1": 0.6945600584154802,
"eval_loss": 0.12783046066761017,
"eval_precision": 0.7145432692307693,
"eval_recall": 0.6756641568404603,
"eval_runtime": 5.1393,
"eval_samples_per_second": 583.739,
"eval_steps_per_second": 2.919,
"step": 300
},
{
"epoch": 0.17233950883239982,
"eval_accuracy": 0.9566587624896131,
"eval_f1": 0.710146001630475,
"eval_loss": 0.12156905978918076,
"eval_precision": 0.7423303377750232,
"eval_recall": 0.6806364540417673,
"eval_runtime": 5.1439,
"eval_samples_per_second": 583.215,
"eval_steps_per_second": 2.916,
"step": 400
},
{
"epoch": 0.2154243860404998,
"grad_norm": 0.1655338853597641,
"learning_rate": 0.00029463593278759154,
"loss": 0.2008,
"step": 500
},
{
"epoch": 0.2154243860404998,
"eval_accuracy": 0.959161751889909,
"eval_f1": 0.7199349545420948,
"eval_loss": 0.112856425344944,
"eval_precision": 0.7503852080123267,
"eval_recall": 0.6918596391532889,
"eval_runtime": 5.1102,
"eval_samples_per_second": 587.066,
"eval_steps_per_second": 2.935,
"step": 500
},
{
"epoch": 0.25850926324859974,
"eval_accuracy": 0.9605500496544456,
"eval_f1": 0.7329311359623308,
"eval_loss": 0.10928678512573242,
"eval_precision": 0.7601098733404548,
"eval_recall": 0.7076289245631482,
"eval_runtime": 5.1042,
"eval_samples_per_second": 587.749,
"eval_steps_per_second": 2.939,
"step": 600
},
{
"epoch": 0.3015941404566997,
"eval_accuracy": 0.9617863440141059,
"eval_f1": 0.7338403041825095,
"eval_loss": 0.10559003055095673,
"eval_precision": 0.7560644869670031,
"eval_recall": 0.7128853530331013,
"eval_runtime": 5.1458,
"eval_samples_per_second": 583.005,
"eval_steps_per_second": 2.915,
"step": 700
},
{
"epoch": 0.34467901766479964,
"eval_accuracy": 0.9620498165825582,
"eval_f1": 0.7305187678349309,
"eval_loss": 0.10398514568805695,
"eval_precision": 0.7531683765841883,
"eval_recall": 0.7091916465407018,
"eval_runtime": 5.1291,
"eval_samples_per_second": 584.896,
"eval_steps_per_second": 2.924,
"step": 800
},
{
"epoch": 0.38776389487289964,
"eval_accuracy": 0.9628402342879148,
"eval_f1": 0.7411920140927774,
"eval_loss": 0.101581871509552,
"eval_precision": 0.7667425968109339,
"eval_recall": 0.717289387697116,
"eval_runtime": 5.1388,
"eval_samples_per_second": 583.791,
"eval_steps_per_second": 2.919,
"step": 900
},
{
"epoch": 0.4308487720809996,
"grad_norm": 0.1194114089012146,
"learning_rate": 0.00028925032313657903,
"loss": 0.1049,
"step": 1000
},
{
"epoch": 0.4308487720809996,
"eval_accuracy": 0.9633671794248191,
"eval_f1": 0.7432511737089201,
"eval_loss": 0.10185921937227249,
"eval_precision": 0.7683907174275747,
"eval_recall": 0.719704503480608,
"eval_runtime": 5.1836,
"eval_samples_per_second": 578.743,
"eval_steps_per_second": 2.894,
"step": 1000
},
{
"epoch": 0.47393364928909953,
"eval_accuracy": 0.9641271964492004,
"eval_f1": 0.7452248016456068,
"eval_loss": 0.09846850484609604,
"eval_precision": 0.7716415639738323,
"eval_recall": 0.7205568972865464,
"eval_runtime": 5.145,
"eval_samples_per_second": 583.095,
"eval_steps_per_second": 2.915,
"step": 1100
},
{
"epoch": 0.5170185264971995,
"eval_accuracy": 0.9632151760199429,
"eval_f1": 0.7427463372594082,
"eval_loss": 0.09953544288873672,
"eval_precision": 0.7510530137981118,
"eval_recall": 0.7346213950845291,
"eval_runtime": 5.1262,
"eval_samples_per_second": 585.225,
"eval_steps_per_second": 2.926,
"step": 1200
},
{
"epoch": 0.5601034037052994,
"eval_accuracy": 0.9641575971301757,
"eval_f1": 0.7527304499781564,
"eval_loss": 0.09697850793600082,
"eval_precision": 0.7720687079910381,
"eval_recall": 0.7343372638158829,
"eval_runtime": 5.1395,
"eval_samples_per_second": 583.715,
"eval_steps_per_second": 2.919,
"step": 1300
},
{
"epoch": 0.6031882809133994,
"eval_accuracy": 0.9648466792322815,
"eval_f1": 0.7408794666267137,
"eval_loss": 0.09701889753341675,
"eval_precision": 0.783676703645008,
"eval_recall": 0.7025145617275181,
"eval_runtime": 5.1321,
"eval_samples_per_second": 584.561,
"eval_steps_per_second": 2.923,
"step": 1400
},
{
"epoch": 0.6462731581214993,
"grad_norm": 0.11641442030668259,
"learning_rate": 0.0002838647134855665,
"loss": 0.0955,
"step": 1500
},
{
"epoch": 0.6462731581214993,
"eval_accuracy": 0.9641069293285502,
"eval_f1": 0.7526039742926792,
"eval_loss": 0.0987541675567627,
"eval_precision": 0.7839335180055401,
"eval_recall": 0.7236823412416536,
"eval_runtime": 5.1118,
"eval_samples_per_second": 586.873,
"eval_steps_per_second": 2.934,
"step": 1500
},
{
"epoch": 0.6893580353295993,
"eval_accuracy": 0.9651101518007337,
"eval_f1": 0.7509688531649203,
"eval_loss": 0.09490419179201126,
"eval_precision": 0.7588107324147934,
"eval_recall": 0.7432873987782356,
"eval_runtime": 5.0796,
"eval_samples_per_second": 590.593,
"eval_steps_per_second": 2.953,
"step": 1600
},
{
"epoch": 0.7324429125376992,
"eval_accuracy": 0.9662653776777933,
"eval_f1": 0.7569454545454545,
"eval_loss": 0.09300602227449417,
"eval_precision": 0.7754433020414245,
"eval_recall": 0.7393095610171899,
"eval_runtime": 5.1475,
"eval_samples_per_second": 582.81,
"eval_steps_per_second": 2.914,
"step": 1700
},
{
"epoch": 0.7755277897457993,
"eval_accuracy": 0.9657992339028394,
"eval_f1": 0.7599940775836542,
"eval_loss": 0.09315136820077896,
"eval_precision": 0.793476580615242,
"eval_recall": 0.7292229009802529,
"eval_runtime": 5.8376,
"eval_samples_per_second": 513.907,
"eval_steps_per_second": 2.57,
"step": 1800
},
{
"epoch": 0.8186126669538992,
"eval_accuracy": 0.9664883160049452,
"eval_f1": 0.754410779423807,
"eval_loss": 0.09128749370574951,
"eval_precision": 0.792461682827651,
"eval_recall": 0.7198465691149311,
"eval_runtime": 5.152,
"eval_samples_per_second": 582.301,
"eval_steps_per_second": 2.912,
"step": 1900
},
{
"epoch": 0.8616975441619992,
"grad_norm": 0.10880027711391449,
"learning_rate": 0.00027847910383455406,
"loss": 0.0906,
"step": 2000
},
{
"epoch": 0.8616975441619992,
"eval_accuracy": 0.9675118055977787,
"eval_f1": 0.7641537107656204,
"eval_loss": 0.0904640182852745,
"eval_precision": 0.7897529179930272,
"eval_recall": 0.7401619548231283,
"eval_runtime": 5.1464,
"eval_samples_per_second": 582.93,
"eval_steps_per_second": 2.915,
"step": 2000
},
{
"epoch": 0.9047824213700991,
"eval_accuracy": 0.966711254332097,
"eval_f1": 0.7613536478171697,
"eval_loss": 0.09170496463775635,
"eval_precision": 0.7858763042492061,
"eval_recall": 0.7383151015769286,
"eval_runtime": 5.0869,
"eval_samples_per_second": 589.746,
"eval_steps_per_second": 2.949,
"step": 2100
},
{
"epoch": 0.9478672985781991,
"eval_accuracy": 0.9671064631847753,
"eval_f1": 0.754022988505747,
"eval_loss": 0.09009002894163132,
"eval_precision": 0.7887061743717034,
"eval_recall": 0.722261684898423,
"eval_runtime": 5.1309,
"eval_samples_per_second": 584.697,
"eval_steps_per_second": 2.923,
"step": 2200
},
{
"epoch": 0.990952175786299,
"eval_accuracy": 0.9677651446059058,
"eval_f1": 0.7611605527819985,
"eval_loss": 0.08897808939218521,
"eval_precision": 0.775582424063698,
"eval_recall": 0.7472652365392811,
"eval_runtime": 5.1531,
"eval_samples_per_second": 582.168,
"eval_steps_per_second": 2.911,
"step": 2300
},
{
"epoch": 1.034037052994399,
"eval_accuracy": 0.9675624733994042,
"eval_f1": 0.7690079652425779,
"eval_loss": 0.08915847539901733,
"eval_precision": 0.7842268498006203,
"eval_recall": 0.754368518255434,
"eval_runtime": 5.145,
"eval_samples_per_second": 583.086,
"eval_steps_per_second": 2.915,
"step": 2400
},
{
"epoch": 1.077121930202499,
"grad_norm": 0.09866651147603989,
"learning_rate": 0.00027309349418354155,
"loss": 0.0842,
"step": 2500
},
{
"epoch": 1.077121930202499,
"eval_accuracy": 0.9678664802091567,
"eval_f1": 0.7673179396092362,
"eval_loss": 0.08764609694480896,
"eval_precision": 0.8008651320871312,
"eval_recall": 0.7364682483307288,
"eval_runtime": 5.0632,
"eval_samples_per_second": 592.507,
"eval_steps_per_second": 2.963,
"step": 2500
},
{
"epoch": 1.1202068074105989,
"eval_accuracy": 0.9675320727184289,
"eval_f1": 0.7698676720632808,
"eval_loss": 0.08784696459770203,
"eval_precision": 0.8025585696670777,
"eval_recall": 0.7397357579201591,
"eval_runtime": 5.1556,
"eval_samples_per_second": 581.896,
"eval_steps_per_second": 2.909,
"step": 2600
},
{
"epoch": 1.163291684618699,
"eval_accuracy": 0.9681603534585841,
"eval_f1": 0.7680871421211453,
"eval_loss": 0.08904842287302017,
"eval_precision": 0.7968845448992059,
"eval_recall": 0.7412984798977127,
"eval_runtime": 5.109,
"eval_samples_per_second": 587.204,
"eval_steps_per_second": 2.936,
"step": 2700
},
{
"epoch": 1.2063765618267988,
"eval_accuracy": 0.9686467643541882,
"eval_f1": 0.7770339106389171,
"eval_loss": 0.08730504661798477,
"eval_precision": 0.7965090258093391,
"eval_recall": 0.7584884216508027,
"eval_runtime": 5.1308,
"eval_samples_per_second": 584.709,
"eval_steps_per_second": 2.924,
"step": 2800
},
{
"epoch": 1.2494614390348988,
"eval_accuracy": 0.9690521067671916,
"eval_f1": 0.7750579374275782,
"eval_loss": 0.08751235157251358,
"eval_precision": 0.7905155857586054,
"eval_recall": 0.7601932092626793,
"eval_runtime": 5.1442,
"eval_samples_per_second": 583.186,
"eval_steps_per_second": 2.916,
"step": 2900
},
{
"epoch": 1.2925463162429986,
"grad_norm": 0.1591762900352478,
"learning_rate": 0.0002677078845325291,
"loss": 0.0794,
"step": 3000
},
{
"epoch": 1.2925463162429986,
"eval_accuracy": 0.9680691514156584,
"eval_f1": 0.7753859857482184,
"eval_loss": 0.08817364275455475,
"eval_precision": 0.8119073527125757,
"eval_recall": 0.742008808069328,
"eval_runtime": 5.1488,
"eval_samples_per_second": 582.661,
"eval_steps_per_second": 2.913,
"step": 3000
},
{
"epoch": 1.3356311934510987,
"eval_accuracy": 0.9690014389655661,
"eval_f1": 0.7762973868237025,
"eval_loss": 0.08583438396453857,
"eval_precision": 0.8055300947143293,
"eval_recall": 0.7491120897854809,
"eval_runtime": 5.1482,
"eval_samples_per_second": 582.727,
"eval_steps_per_second": 2.914,
"step": 3100
},
{
"epoch": 1.3787160706591985,
"eval_accuracy": 0.9687075657161387,
"eval_f1": 0.7790055248618786,
"eval_loss": 0.08625612407922745,
"eval_precision": 0.7976775346136669,
"eval_recall": 0.7611876687029407,
"eval_runtime": 5.1662,
"eval_samples_per_second": 580.698,
"eval_steps_per_second": 2.903,
"step": 3200
},
{
"epoch": 1.4218009478672986,
"eval_accuracy": 0.968626497233538,
"eval_f1": 0.7790014684287813,
"eval_loss": 0.08638014644384384,
"eval_precision": 0.8061084941498252,
"eval_recall": 0.7536581900838187,
"eval_runtime": 5.1072,
"eval_samples_per_second": 587.401,
"eval_steps_per_second": 2.937,
"step": 3300
},
{
"epoch": 1.4648858250753984,
"eval_accuracy": 0.9683528911047607,
"eval_f1": 0.7756664980853986,
"eval_loss": 0.08518864214420319,
"eval_precision": 0.7891796530432226,
"eval_recall": 0.7626083250461714,
"eval_runtime": 5.1521,
"eval_samples_per_second": 582.282,
"eval_steps_per_second": 2.911,
"step": 3400
},
{
"epoch": 1.5079707022834985,
"grad_norm": 0.10778328031301498,
"learning_rate": 0.0002623222748815166,
"loss": 0.0786,
"step": 3500
},
{
"epoch": 1.5079707022834985,
"eval_accuracy": 0.9685150280699621,
"eval_f1": 0.7766948234784491,
"eval_loss": 0.08616286516189575,
"eval_precision": 0.7896359365825014,
"eval_recall": 0.764171047023725,
"eval_runtime": 5.1115,
"eval_samples_per_second": 586.911,
"eval_steps_per_second": 2.935,
"step": 3500
},
{
"epoch": 1.5510555794915986,
"eval_accuracy": 0.9693054457753187,
"eval_f1": 0.7803809940880228,
"eval_loss": 0.08442429453134537,
"eval_precision": 0.8024617232062444,
"eval_recall": 0.7594828810910641,
"eval_runtime": 5.1816,
"eval_samples_per_second": 578.971,
"eval_steps_per_second": 2.895,
"step": 3600
},
{
"epoch": 1.5941404566996984,
"eval_accuracy": 0.9684846273889869,
"eval_f1": 0.7758786533481317,
"eval_loss": 0.08674349635839462,
"eval_precision": 0.8096046942557134,
"eval_recall": 0.7448501207557892,
"eval_runtime": 5.8233,
"eval_samples_per_second": 515.172,
"eval_steps_per_second": 2.576,
"step": 3700
},
{
"epoch": 1.6372253339077982,
"eval_accuracy": 0.9685048945096371,
"eval_f1": 0.7758557232241442,
"eval_loss": 0.08749110996723175,
"eval_precision": 0.8050717995722578,
"eval_recall": 0.7486858928825117,
"eval_runtime": 5.1543,
"eval_samples_per_second": 582.041,
"eval_steps_per_second": 2.91,
"step": 3800
},
{
"epoch": 1.6803102111158983,
"eval_accuracy": 0.9685048945096371,
"eval_f1": 0.7757390417940877,
"eval_loss": 0.08649948239326477,
"eval_precision": 0.7956684092606423,
"eval_recall": 0.756783634038926,
"eval_runtime": 5.1578,
"eval_samples_per_second": 581.646,
"eval_steps_per_second": 2.908,
"step": 3900
},
{
"epoch": 1.7233950883239983,
"grad_norm": 0.09820359200239182,
"learning_rate": 0.00025693666523050407,
"loss": 0.0774,
"step": 4000
},
{
"epoch": 1.7233950883239983,
"eval_accuracy": 0.9692547779736933,
"eval_f1": 0.7781892107747042,
"eval_loss": 0.08616424351930618,
"eval_precision": 0.7958122958122958,
"eval_recall": 0.7613297343372638,
"eval_runtime": 5.1939,
"eval_samples_per_second": 577.605,
"eval_steps_per_second": 2.888,
"step": 4000
},
{
"epoch": 1.7664799655320982,
"eval_accuracy": 0.9701465312823008,
"eval_f1": 0.7791577864334533,
"eval_loss": 0.08393336832523346,
"eval_precision": 0.807095006090134,
"eval_recall": 0.7530899275465265,
"eval_runtime": 5.1775,
"eval_samples_per_second": 579.433,
"eval_steps_per_second": 2.897,
"step": 4100
},
{
"epoch": 1.8095648427401982,
"eval_accuracy": 0.9697310553089723,
"eval_f1": 0.7807633808937495,
"eval_loss": 0.08360669761896133,
"eval_precision": 0.7963947990543735,
"eval_recall": 0.7657337690012785,
"eval_runtime": 5.1736,
"eval_samples_per_second": 579.871,
"eval_steps_per_second": 2.899,
"step": 4200
},
{
"epoch": 1.8526497199482983,
"eval_accuracy": 0.9697614559899476,
"eval_f1": 0.7807478924994596,
"eval_loss": 0.08350168168544769,
"eval_precision": 0.7921052631578948,
"eval_recall": 0.7697116067623242,
"eval_runtime": 5.1598,
"eval_samples_per_second": 581.416,
"eval_steps_per_second": 2.907,
"step": 4300
},
{
"epoch": 1.8957345971563981,
"eval_accuracy": 0.970166798402951,
"eval_f1": 0.7817303469477382,
"eval_loss": 0.08246050775051117,
"eval_precision": 0.8062811414766722,
"eval_recall": 0.7586304872851257,
"eval_runtime": 5.1425,
"eval_samples_per_second": 583.372,
"eval_steps_per_second": 2.917,
"step": 4400
},
{
"epoch": 1.938819474364498,
"grad_norm": 0.11857543140649796,
"learning_rate": 0.00025155105557949156,
"loss": 0.0771,
"step": 4500
},
{
"epoch": 1.938819474364498,
"eval_accuracy": 0.9691331752497923,
"eval_f1": 0.7826024131204393,
"eval_loss": 0.0859459713101387,
"eval_precision": 0.7962364010585122,
"eval_recall": 0.7694274754936781,
"eval_runtime": 5.128,
"eval_samples_per_second": 585.027,
"eval_steps_per_second": 2.925,
"step": 4500
},
{
"epoch": 1.981904351572598,
"eval_accuracy": 0.9700755963600252,
"eval_f1": 0.7879845806967779,
"eval_loss": 0.0816790908575058,
"eval_precision": 0.8073025335320417,
"eval_recall": 0.7695695411280011,
"eval_runtime": 5.1709,
"eval_samples_per_second": 580.174,
"eval_steps_per_second": 2.901,
"step": 4600
},
{
"epoch": 2.024989228780698,
"eval_accuracy": 0.9698019902312478,
"eval_f1": 0.7841042154566745,
"eval_loss": 0.08529265224933624,
"eval_precision": 0.8086037735849056,
"eval_recall": 0.7610456030686177,
"eval_runtime": 5.1834,
"eval_samples_per_second": 578.77,
"eval_steps_per_second": 2.894,
"step": 4700
},
{
"epoch": 2.068074105988798,
"eval_accuracy": 0.9703796031697777,
"eval_f1": 0.7924996351962643,
"eval_loss": 0.0835157036781311,
"eval_precision": 0.8146092695365231,
"eval_recall": 0.7715584600085239,
"eval_runtime": 5.1951,
"eval_samples_per_second": 577.468,
"eval_steps_per_second": 2.887,
"step": 4800
},
{
"epoch": 2.1111589831968978,
"eval_accuracy": 0.9694473156198699,
"eval_f1": 0.7853357531760434,
"eval_loss": 0.08416793495416641,
"eval_precision": 0.8029988123515439,
"eval_recall": 0.7684330160534166,
"eval_runtime": 5.1858,
"eval_samples_per_second": 578.5,
"eval_steps_per_second": 2.892,
"step": 4900
},
{
"epoch": 2.154243860404998,
"grad_norm": 0.08804752677679062,
"learning_rate": 0.0002461654459284791,
"loss": 0.0681,
"step": 5000
},
{
"epoch": 2.154243860404998,
"eval_accuracy": 0.9703390689284773,
"eval_f1": 0.7868997565833149,
"eval_loss": 0.08355652540922165,
"eval_precision": 0.8183491868671372,
"eval_recall": 0.7577780934791873,
"eval_runtime": 5.1276,
"eval_samples_per_second": 585.071,
"eval_steps_per_second": 2.925,
"step": 5000
},
{
"epoch": 2.197328737613098,
"eval_accuracy": 0.9699134593948238,
"eval_f1": 0.7855120853596573,
"eval_loss": 0.08640210330486298,
"eval_precision": 0.803057287028792,
"eval_recall": 0.7687171473220628,
"eval_runtime": 5.1532,
"eval_samples_per_second": 582.162,
"eval_steps_per_second": 2.911,
"step": 5100
},
{
"epoch": 2.2404136148211977,
"eval_accuracy": 0.9702073326442512,
"eval_f1": 0.789276448544249,
"eval_loss": 0.08586513251066208,
"eval_precision": 0.8009360830773731,
"eval_recall": 0.7779514135530615,
"eval_runtime": 5.1699,
"eval_samples_per_second": 580.277,
"eval_steps_per_second": 2.901,
"step": 5200
},
{
"epoch": 2.2834984920292976,
"eval_accuracy": 0.9699539936361241,
"eval_f1": 0.7828817644051704,
"eval_loss": 0.08344285935163498,
"eval_precision": 0.8055305079651337,
"eval_recall": 0.7614717999715869,
"eval_runtime": 5.1913,
"eval_samples_per_second": 577.886,
"eval_steps_per_second": 2.889,
"step": 5300
},
{
"epoch": 2.326583369237398,
"eval_accuracy": 0.9698323909122231,
"eval_f1": 0.7898440333696048,
"eval_loss": 0.08490483462810516,
"eval_precision": 0.8069967388081827,
"eval_recall": 0.7734053132547237,
"eval_runtime": 5.153,
"eval_samples_per_second": 582.188,
"eval_steps_per_second": 2.911,
"step": 5400
},
{
"epoch": 2.3696682464454977,
"grad_norm": 0.10004045069217682,
"learning_rate": 0.00024077983627746657,
"loss": 0.0674,
"step": 5500
},
{
"epoch": 2.3696682464454977,
"eval_accuracy": 0.9706329421779047,
"eval_f1": 0.7900129888872853,
"eval_loss": 0.08235176652669907,
"eval_precision": 0.8027570024930342,
"eval_recall": 0.7776672822844154,
"eval_runtime": 5.8382,
"eval_samples_per_second": 513.861,
"eval_steps_per_second": 2.569,
"step": 5500
},
{
"epoch": 2.4127531236535975,
"eval_accuracy": 0.9701465312823008,
"eval_f1": 0.7867615038467122,
"eval_loss": 0.08183197677135468,
"eval_precision": 0.8042736311025375,
"eval_recall": 0.7699957380309703,
"eval_runtime": 5.1409,
"eval_samples_per_second": 583.555,
"eval_steps_per_second": 2.918,
"step": 5600
},
{
"epoch": 2.4558380008616973,
"eval_accuracy": 0.9702073326442512,
"eval_f1": 0.7872696817420435,
"eval_loss": 0.08382030576467514,
"eval_precision": 0.8076808129109384,
"eval_recall": 0.7678647535161245,
"eval_runtime": 5.1657,
"eval_samples_per_second": 580.751,
"eval_steps_per_second": 2.904,
"step": 5700
},
{
"epoch": 2.4989228780697976,
"eval_accuracy": 0.9702275997649014,
"eval_f1": 0.7875814600571136,
"eval_loss": 0.08378946781158447,
"eval_precision": 0.8126322151707465,
"eval_recall": 0.7640289813894019,
"eval_runtime": 5.133,
"eval_samples_per_second": 584.453,
"eval_steps_per_second": 2.922,
"step": 5800
},
{
"epoch": 2.5420077552778975,
"eval_accuracy": 0.9705620072556292,
"eval_f1": 0.7894349617422014,
"eval_loss": 0.08313048630952835,
"eval_precision": 0.8187089882496567,
"eval_recall": 0.7621821281432022,
"eval_runtime": 5.1336,
"eval_samples_per_second": 584.386,
"eval_steps_per_second": 2.922,
"step": 5900
},
{
"epoch": 2.5850926324859973,
"grad_norm": 0.08104603737592697,
"learning_rate": 0.00023539422662645408,
"loss": 0.0674,
"step": 6000
},
{
"epoch": 2.5850926324859973,
"eval_accuracy": 0.9705721408159543,
"eval_f1": 0.7914712778429074,
"eval_loss": 0.08241896331310272,
"eval_precision": 0.8172189438644273,
"eval_recall": 0.7672964909788322,
"eval_runtime": 5.1433,
"eval_samples_per_second": 583.282,
"eval_steps_per_second": 2.916,
"step": 6000
},
{
"epoch": 2.6281775096940976,
"eval_accuracy": 0.9694777163008451,
"eval_f1": 0.7920289855072463,
"eval_loss": 0.0877351462841034,
"eval_precision": 0.8083123798254696,
"eval_recall": 0.7763886915755079,
"eval_runtime": 5.1298,
"eval_samples_per_second": 584.819,
"eval_steps_per_second": 2.924,
"step": 6100
},
{
"epoch": 2.6712623869021974,
"eval_accuracy": 0.970187065523601,
"eval_f1": 0.7875146541617819,
"eval_loss": 0.08446252346038818,
"eval_precision": 0.8131336056892117,
"eval_recall": 0.7634607188521096,
"eval_runtime": 5.1724,
"eval_samples_per_second": 580.006,
"eval_steps_per_second": 2.9,
"step": 6200
},
{
"epoch": 2.7143472641102973,
"eval_accuracy": 0.9709268154273323,
"eval_f1": 0.79413046636641,
"eval_loss": 0.0830024853348732,
"eval_precision": 0.8125464545859967,
"eval_recall": 0.7765307572098309,
"eval_runtime": 5.1622,
"eval_samples_per_second": 581.144,
"eval_steps_per_second": 2.906,
"step": 6300
},
{
"epoch": 2.757432141318397,
"eval_accuracy": 0.9705518736953042,
"eval_f1": 0.7958092167924391,
"eval_loss": 0.08372634649276733,
"eval_precision": 0.8216338880484115,
"eval_recall": 0.7715584600085239,
"eval_runtime": 5.1748,
"eval_samples_per_second": 579.728,
"eval_steps_per_second": 2.899,
"step": 6400
},
{
"epoch": 2.800517018526497,
"grad_norm": 0.07959684729576111,
"learning_rate": 0.00023000861697544157,
"loss": 0.0667,
"step": 6500
},
{
"epoch": 2.800517018526497,
"eval_accuracy": 0.9703188018078271,
"eval_f1": 0.7892570933506605,
"eval_loss": 0.08360672742128372,
"eval_precision": 0.8024075161479741,
"eval_recall": 0.7765307572098309,
"eval_runtime": 5.1231,
"eval_samples_per_second": 585.583,
"eval_steps_per_second": 2.928,
"step": 6500
},
{
"epoch": 2.843601895734597,
"eval_accuracy": 0.9709369489876574,
"eval_f1": 0.7909653916211292,
"eval_loss": 0.08242646604776382,
"eval_precision": 0.8118456476218965,
"eval_recall": 0.7711322631055547,
"eval_runtime": 5.1572,
"eval_samples_per_second": 581.707,
"eval_steps_per_second": 2.909,
"step": 6600
},
{
"epoch": 2.886686772942697,
"eval_accuracy": 0.9714638941245617,
"eval_f1": 0.7976650857351331,
"eval_loss": 0.08036693930625916,
"eval_precision": 0.81998199819982,
"eval_recall": 0.7765307572098309,
"eval_runtime": 5.1014,
"eval_samples_per_second": 588.074,
"eval_steps_per_second": 2.94,
"step": 6700
},
{
"epoch": 2.929771650150797,
"eval_accuracy": 0.9718692365375651,
"eval_f1": 0.793916460486101,
"eval_loss": 0.08084654808044434,
"eval_precision": 0.8138147098314188,
"eval_recall": 0.7749680352322773,
"eval_runtime": 5.1757,
"eval_samples_per_second": 579.63,
"eval_steps_per_second": 2.898,
"step": 6800
},
{
"epoch": 2.972856527358897,
"eval_accuracy": 0.9711700208751343,
"eval_f1": 0.792,
"eval_loss": 0.08285341411828995,
"eval_precision": 0.81135449262405,
"eval_recall": 0.7735473788890468,
"eval_runtime": 5.1616,
"eval_samples_per_second": 581.214,
"eval_steps_per_second": 2.906,
"step": 6900
},
{
"epoch": 3.015941404566997,
"grad_norm": 0.07623889297246933,
"learning_rate": 0.00022462300732442912,
"loss": 0.066,
"step": 7000
},
{
"epoch": 3.015941404566997,
"eval_accuracy": 0.9713220242800106,
"eval_f1": 0.7958886135005102,
"eval_loss": 0.0849044919013977,
"eval_precision": 0.8173379248390478,
"eval_recall": 0.7755362977695696,
"eval_runtime": 5.1766,
"eval_samples_per_second": 579.536,
"eval_steps_per_second": 2.898,
"step": 7000
},
{
"epoch": 3.059026281775097,
"eval_accuracy": 0.9710990859528587,
"eval_f1": 0.7920576041893957,
"eval_loss": 0.08765333890914917,
"eval_precision": 0.8114754098360656,
"eval_recall": 0.7735473788890468,
"eval_runtime": 5.1302,
"eval_samples_per_second": 584.777,
"eval_steps_per_second": 2.924,
"step": 7100
},
{
"epoch": 3.102111158983197,
"eval_accuracy": 0.970774812022456,
"eval_f1": 0.7927130207577296,
"eval_loss": 0.08476532250642776,
"eval_precision": 0.8103576198248998,
"eval_recall": 0.7758204290382157,
"eval_runtime": 5.1437,
"eval_samples_per_second": 583.242,
"eval_steps_per_second": 2.916,
"step": 7200
},
{
"epoch": 3.145196036191297,
"eval_accuracy": 0.9712206886767597,
"eval_f1": 0.7949370771804759,
"eval_loss": 0.08828677237033844,
"eval_precision": 0.8145497912939773,
"eval_recall": 0.7762466259411849,
"eval_runtime": 5.1438,
"eval_samples_per_second": 583.229,
"eval_steps_per_second": 2.916,
"step": 7300
},
{
"epoch": 3.188280913399397,
"eval_accuracy": 0.9700249285583997,
"eval_f1": 0.7877159866414986,
"eval_loss": 0.08564765751361847,
"eval_precision": 0.8054936896807721,
"eval_recall": 0.7707060662025856,
"eval_runtime": 5.8414,
"eval_samples_per_second": 513.575,
"eval_steps_per_second": 2.568,
"step": 7400
},
{
"epoch": 3.2313657906074966,
"grad_norm": 0.14740775525569916,
"learning_rate": 0.00021923739767341663,
"loss": 0.0556,
"step": 7500
},
{
"epoch": 3.2313657906074966,
"eval_accuracy": 0.9712409557974099,
"eval_f1": 0.7959982601130926,
"eval_loss": 0.0849863737821579,
"eval_precision": 0.8127313101406366,
"eval_recall": 0.7799403324335843,
"eval_runtime": 5.1395,
"eval_samples_per_second": 583.711,
"eval_steps_per_second": 2.919,
"step": 7500
},
{
"epoch": 3.274450667815597,
"eval_accuracy": 0.9710686852718834,
"eval_f1": 0.7949167397020157,
"eval_loss": 0.08467870950698853,
"eval_precision": 0.8179768525477228,
"eval_recall": 0.7731211819860776,
"eval_runtime": 5.1447,
"eval_samples_per_second": 583.126,
"eval_steps_per_second": 2.916,
"step": 7600
},
{
"epoch": 3.3175355450236967,
"eval_accuracy": 0.971615897529438,
"eval_f1": 0.7966792892513836,
"eval_loss": 0.08322973549365997,
"eval_precision": 0.8172717764828926,
"eval_recall": 0.7770990197471231,
"eval_runtime": 5.1113,
"eval_samples_per_second": 586.93,
"eval_steps_per_second": 2.935,
"step": 7700
},
{
"epoch": 3.3606204222317966,
"eval_accuracy": 0.9713321578403357,
"eval_f1": 0.7940792337831956,
"eval_loss": 0.08206350356340408,
"eval_precision": 0.811508230757823,
"eval_recall": 0.7773831510157693,
"eval_runtime": 5.1313,
"eval_samples_per_second": 584.642,
"eval_steps_per_second": 2.923,
"step": 7800
},
{
"epoch": 3.4037052994398964,
"eval_accuracy": 0.9709977503496078,
"eval_f1": 0.7957571324067301,
"eval_loss": 0.08641249686479568,
"eval_precision": 0.8202382747700196,
"eval_recall": 0.7726949850831084,
"eval_runtime": 5.1503,
"eval_samples_per_second": 582.489,
"eval_steps_per_second": 2.912,
"step": 7900
},
{
"epoch": 3.4467901766479967,
"grad_norm": 0.10462938249111176,
"learning_rate": 0.00021385178802240412,
"loss": 0.0571,
"step": 8000
},
{
"epoch": 3.4467901766479967,
"eval_accuracy": 0.9705924079366044,
"eval_f1": 0.7928046989720997,
"eval_loss": 0.08726098388433456,
"eval_precision": 0.8203920376842425,
"eval_recall": 0.7670123597101861,
"eval_runtime": 5.1929,
"eval_samples_per_second": 577.716,
"eval_steps_per_second": 2.889,
"step": 8000
},
{
"epoch": 3.4898750538560965,
"eval_accuracy": 0.971018017470258,
"eval_f1": 0.792296511627907,
"eval_loss": 0.08289187401533127,
"eval_precision": 0.8110400238059813,
"eval_recall": 0.7743997726949851,
"eval_runtime": 5.1659,
"eval_samples_per_second": 580.728,
"eval_steps_per_second": 2.904,
"step": 8100
},
{
"epoch": 3.5329599310641964,
"eval_accuracy": 0.9715246954865122,
"eval_f1": 0.7980417945345607,
"eval_loss": 0.0846642255783081,
"eval_precision": 0.8215736422446216,
"eval_recall": 0.7758204290382157,
"eval_runtime": 5.165,
"eval_samples_per_second": 580.835,
"eval_steps_per_second": 2.904,
"step": 8200
},
{
"epoch": 3.5760448082722966,
"eval_accuracy": 0.9707849455827811,
"eval_f1": 0.7915995902239135,
"eval_loss": 0.08357686549425125,
"eval_precision": 0.8162064282480761,
"eval_recall": 0.7684330160534166,
"eval_runtime": 5.1568,
"eval_samples_per_second": 581.751,
"eval_steps_per_second": 2.909,
"step": 8300
},
{
"epoch": 3.6191296854803965,
"eval_accuracy": 0.9708457469447316,
"eval_f1": 0.7932402645113886,
"eval_loss": 0.08390816301107407,
"eval_precision": 0.8214883579363872,
"eval_recall": 0.766870294075863,
"eval_runtime": 5.1482,
"eval_samples_per_second": 582.733,
"eval_steps_per_second": 2.914,
"step": 8400
},
{
"epoch": 3.6622145626884963,
"grad_norm": 0.11067754775285721,
"learning_rate": 0.00020846617837139164,
"loss": 0.0576,
"step": 8500
},
{
"epoch": 3.6622145626884963,
"eval_accuracy": 0.9711902879957844,
"eval_f1": 0.7922267679719464,
"eval_loss": 0.0809941291809082,
"eval_precision": 0.8154609715746729,
"eval_recall": 0.7702798692996164,
"eval_runtime": 5.1845,
"eval_samples_per_second": 578.644,
"eval_steps_per_second": 2.893,
"step": 8500
},
{
"epoch": 3.705299439896596,
"eval_accuracy": 0.9711598873148092,
"eval_f1": 0.7963543565439302,
"eval_loss": 0.08366210758686066,
"eval_precision": 0.8180047932893948,
"eval_recall": 0.7758204290382157,
"eval_runtime": 5.1558,
"eval_samples_per_second": 581.865,
"eval_steps_per_second": 2.909,
"step": 8600
},
{
"epoch": 3.748384317104696,
"eval_accuracy": 0.9712814900387102,
"eval_f1": 0.7941414587473319,
"eval_loss": 0.08393159508705139,
"eval_precision": 0.8239156994502138,
"eval_recall": 0.7664440971728939,
"eval_runtime": 5.1323,
"eval_samples_per_second": 584.53,
"eval_steps_per_second": 2.923,
"step": 8700
},
{
"epoch": 3.7914691943127963,
"eval_accuracy": 0.9712916235990353,
"eval_f1": 0.7929502585390722,
"eval_loss": 0.0845068171620369,
"eval_precision": 0.8135086670651525,
"eval_recall": 0.7734053132547237,
"eval_runtime": 5.1296,
"eval_samples_per_second": 584.843,
"eval_steps_per_second": 2.924,
"step": 8800
},
{
"epoch": 3.834554071520896,
"eval_accuracy": 0.9722441782695932,
"eval_f1": 0.7972508591065292,
"eval_loss": 0.07892899960279465,
"eval_precision": 0.8213317264236216,
"eval_recall": 0.7745418383293081,
"eval_runtime": 5.1954,
"eval_samples_per_second": 577.434,
"eval_steps_per_second": 2.887,
"step": 8900
}
],
"logging_steps": 500,
"max_steps": 27852,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.129811768410176e+17,
"train_batch_size": 200,
"trial_name": null,
"trial_params": null
}