diff --git "a/checkpoint-3240/trainer_state.json" "b/checkpoint-3240/trainer_state.json" --- "a/checkpoint-3240/trainer_state.json" +++ "b/checkpoint-3240/trainer_state.json" @@ -11,1842 +11,1842 @@ { "epoch": 1.0, "eval_Claim": { - "f1-score": 0.4888068880688806, - "precision": 0.6228840125391849, - "recall": 0.40222672064777326, - "support": 4940.0 + "f1-score": 0.41092129347162903, + "precision": 0.5640703517587939, + "recall": 0.32317658349328215, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.7325278370054489, - "precision": 0.7604525332021643, - "recall": 0.706581352833638, - "support": 2188.0 + "f1-score": 0.6523102655237298, + "precision": 0.5929304446978335, + "recall": 0.724907063197026, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.8634848138844484, - "precision": 0.827613377692173, - "recall": 0.902606702950444, - "support": 10473.0 + "f1-score": 0.8834432924913603, + "precision": 0.8545380875202593, + "recall": 0.9143724257533059, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8792819197655248, - "precision": 0.8543458914268763, - "recall": 0.9057173407132524, - "support": 15899.0 + "f1-score": 0.8664166767469054, + "precision": 0.8441231929604023, + "recall": 0.8899196554294707, + "support": 12073.0 }, - "eval_accuracy": 0.8174925373134329, - "eval_loss": 0.49806877970695496, + "eval_accuracy": 0.7997031029363844, + "eval_loss": 0.5104541182518005, "eval_macro avg": { - "f1-score": 0.7410253646810757, - "precision": 0.7663239537150996, - "recall": 0.7292830292862769, - "support": 33500.0 - }, - "eval_runtime": 4.9194, - "eval_samples_per_second": 16.465, - "eval_steps_per_second": 2.236, + "f1-score": 0.7032728820584061, + "precision": 0.7139155192343223, + "recall": 0.7130939319682712, + "support": 27619.0 + }, + "eval_runtime": 4.7759, + "eval_samples_per_second": 16.751, + "eval_steps_per_second": 2.094, "eval_weighted avg": { - "f1-score": 0.8071778099281224, - "precision": 0.80572410140167, - "recall": 0.8174925373134329, - "support": 33500.0 + "f1-score": 0.7866826459135919, + "precision": 0.7857670171690952, + "recall": 0.7997031029363844, + "support": 27619.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { - "f1-score": 0.5993816254416962, - "precision": 0.6593780369290574, - "recall": 0.5493927125506073, - "support": 4940.0 + "f1-score": 0.5200312174817897, + "precision": 0.5678977272727272, + "recall": 0.47960652591170827, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8017817371937639, - "precision": 0.7197382769901853, - "recall": 0.9049360146252285, - "support": 2188.0 + "f1-score": 0.7526302911671151, + "precision": 0.7948320413436692, + "recall": 0.7146840148698885, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9058997050147494, - "precision": 0.9337184554575859, - "recall": 0.8796906330564308, - "support": 10473.0 + "f1-score": 0.9116096466629278, + "precision": 0.944560669456067, + "recall": 0.8808801213960546, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8854737486606459, - "precision": 0.862579028987236, - "recall": 0.9096169570413234, - "support": 15899.0 + "f1-score": 0.8738735224125151, + "precision": 0.8259587020648967, + "recall": 0.9276898865236478, + "support": 12073.0 }, - "eval_accuracy": 0.8468358208955223, - "eval_loss": 0.4562840759754181, + "eval_accuracy": 0.8278359100619139, + "eval_loss": 0.44692954421043396, "eval_macro avg": { - "f1-score": 0.7981342040777137, - "precision": 0.7938534495910161, - "recall": 0.8109090793183975, - "support": 33500.0 - }, - "eval_runtime": 4.904, - "eval_samples_per_second": 16.517, - "eval_steps_per_second": 2.243, + "f1-score": 0.7645361694310869, + "precision": 0.7833122850343401, + "recall": 0.7507151371753248, + "support": 27619.0 + }, + "eval_runtime": 4.7439, + "eval_samples_per_second": 16.864, + "eval_steps_per_second": 2.108, "eval_weighted avg": { - "f1-score": 0.8442053257085675, - "precision": 0.8455251408465503, - "recall": 0.8468358208955223, - "support": 33500.0 + "f1-score": 0.8236335905447046, + "precision": 0.8242076985653165, + "recall": 0.8278359100619139, + "support": 27619.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { - "f1-score": 0.6235462150581514, - "precision": 0.6285479226655697, - "recall": 0.6186234817813765, - "support": 4940.0 + "f1-score": 0.6218357913273168, + "precision": 0.5744204961366409, + "recall": 0.6777831094049904, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8338848444738584, - "precision": 0.8059701492537313, - "recall": 0.8638025594149908, - "support": 2188.0 + "f1-score": 0.7799868909766223, + "precision": 0.7360824742268042, + "recall": 0.8294609665427509, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9066250826953974, - "precision": 0.8974646833193002, - "recall": 0.9159744103886184, - "support": 10473.0 + "f1-score": 0.9093637121422617, + "precision": 0.9330596419204014, + "recall": 0.8868415347929763, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8827730692315017, - "precision": 0.8911176621379133, - "recall": 0.8745833071262343, - "support": 15899.0 + "f1-score": 0.876420695504665, + "precision": 0.89797514556357, + "recall": 0.855876749772219, + "support": 12073.0 }, - "eval_accuracy": 0.8490746268656716, - "eval_loss": 0.43820467591285706, + "eval_accuracy": 0.8372859263550454, + "eval_loss": 0.40934404730796814, "eval_macro avg": { - "f1-score": 0.8117073028647273, - "precision": 0.8057751043441286, - "recall": 0.818245939677805, - "support": 33500.0 - }, - "eval_runtime": 4.9179, - "eval_samples_per_second": 16.47, - "eval_steps_per_second": 2.237, + "f1-score": 0.7969017724877165, + "precision": 0.7853844394618541, + "recall": 0.8124905901282342, + "support": 27619.0 + }, + "eval_runtime": 4.7941, + "eval_samples_per_second": 16.687, + "eval_steps_per_second": 2.086, "eval_weighted avg": { - "f1-score": 0.8488105033097496, - "precision": 0.8488213959184715, - "recall": 0.8490746268656716, - "support": 33500.0 + "f1-score": 0.8414917278933443, + "precision": 0.8482528803063183, + "recall": 0.8372859263550454, + "support": 27619.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { - "f1-score": 0.6415287144900879, - "precision": 0.6477507222451506, - "recall": 0.6354251012145749, - "support": 4940.0 + "f1-score": 0.608644284729743, + "precision": 0.516112873601603, + "recall": 0.7416026871401151, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8005126014523707, - "precision": 0.7514033680834001, - "recall": 0.856489945155393, - "support": 2188.0 + "f1-score": 0.788337924701561, + "precision": 0.7790381125226861, + "recall": 0.7978624535315985, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9065169636319258, - "precision": 0.9273871354374751, - "recall": 0.8865654540246348, - "support": 10473.0 + "f1-score": 0.9068679925629612, + "precision": 0.9442684500762643, + "recall": 0.8723173639713853, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8932505382719131, - "precision": 0.8863636363636364, - "recall": 0.9002452984464432, - "support": 15899.0 + "f1-score": 0.8528029247910863, + "precision": 0.8985600293497202, + "recall": 0.8114801623457302, + "support": 12073.0 }, - "eval_accuracy": 0.8540597014925373, - "eval_loss": 0.4749625325202942, + "eval_accuracy": 0.8201962417176581, + "eval_loss": 0.5160673260688782, "eval_macro avg": { - "f1-score": 0.8104522044615743, - "precision": 0.8032262155324155, - "recall": 0.8196814497102615, - "support": 33500.0 - }, - "eval_runtime": 4.8972, - "eval_samples_per_second": 16.54, - "eval_steps_per_second": 2.246, + "f1-score": 0.789163281696338, + "precision": 0.7844948663875684, + "recall": 0.8058156667472073, + "support": 27619.0 + }, + "eval_runtime": 4.7669, + "eval_samples_per_second": 16.782, + "eval_steps_per_second": 2.098, "eval_weighted avg": { - "f1-score": 0.8542213698406307, - "precision": 0.8551874645146165, - "recall": 0.8540597014925373, - "support": 33500.0 + "f1-score": 0.8289940404467937, + "precision": 0.8468005514342695, + "recall": 0.8201962417176581, + "support": 27619.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { - "f1-score": 0.6099687080911935, - "precision": 0.6808882235528942, - "recall": 0.5524291497975709, - "support": 4940.0 + "f1-score": 0.5923049770561243, + "precision": 0.5811590856615101, + "recall": 0.6038867562380038, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8315018315018317, - "precision": 0.8330275229357799, - "recall": 0.829981718464351, - "support": 2188.0 + "f1-score": 0.7829807463750891, + "precision": 0.8014598540145985, + "recall": 0.7653345724907064, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9103454839630722, - "precision": 0.9074091642159188, - "recall": 0.9133008689009835, - "support": 10473.0 + "f1-score": 0.9075350077795066, + "precision": 0.9311288483466362, + "recall": 0.8851073054411446, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8913988368533824, - "precision": 0.8682249120505635, - "recall": 0.915843763758727, - "support": 15899.0 + "f1-score": 0.8770785784153896, + "precision": 0.8633555323758325, + "recall": 0.891244926695933, + "support": 12073.0 }, - "eval_accuracy": 0.8558507462686568, - "eval_loss": 0.5162495374679565, + "eval_accuracy": 0.8360186827908324, + "eval_loss": 0.5153732299804688, "eval_macro avg": { - "f1-score": 0.8108037151023699, - "precision": 0.8223874556887891, - "recall": 0.8028888752304081, - "support": 33500.0 - }, - "eval_runtime": 4.9413, - "eval_samples_per_second": 16.392, - "eval_steps_per_second": 2.226, + "f1-score": 0.7899748274065275, + "precision": 0.7942758300996444, + "recall": 0.7863933902164468, + "support": 27619.0 + }, + "eval_runtime": 4.7506, + "eval_samples_per_second": 16.84, + "eval_steps_per_second": 2.105, "eval_weighted avg": { - "f1-score": 0.8519095458499607, - "precision": 0.8505509283002989, - "recall": 0.8558507462686568, - "support": 33500.0 + "f1-score": 0.8369451960444566, + "precision": 0.8385857117236715, + "recall": 0.8360186827908324, + "support": 27619.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { - "f1-score": 0.646279703934554, - "precision": 0.6227477477477478, - "recall": 0.6716599190283401, - "support": 4940.0 + "f1-score": 0.5533534584591154, + "precision": 0.6108374384236454, + "recall": 0.5057581573896354, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8414067684140676, - "precision": 0.8152593227603944, - "recall": 0.8692870201096892, - "support": 2188.0 + "f1-score": 0.7910518800571156, + "precision": 0.8107317073170732, + "recall": 0.7723048327137546, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9112506693929215, - "precision": 0.9295788637266588, - "recall": 0.8936312422419556, - "support": 10473.0 + "f1-score": 0.909817664129661, + "precision": 0.919331636605068, + "recall": 0.9004985909386516, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8861383012314494, - "precision": 0.8897343224906474, - "recall": 0.8825712308950249, - "support": 15899.0 + "f1-score": 0.8747714081259442, + "precision": 0.8410671966975002, + "recall": 0.9112896546011762, + "support": 12073.0 }, - "eval_accuracy": 0.8540597014925373, - "eval_loss": 0.552895724773407, + "eval_accuracy": 0.8356566132010572, + "eval_loss": 0.5417770147323608, "eval_macro avg": { - "f1-score": 0.8212688607432481, - "precision": 0.8143300641813621, - "recall": 0.8292873530687525, - "support": 33500.0 - }, - "eval_runtime": 4.9336, - "eval_samples_per_second": 16.418, - "eval_steps_per_second": 2.23, + "f1-score": 0.782248602692959, + "precision": 0.7954919947608217, + "recall": 0.7724628089108043, + "support": 27619.0 + }, + "eval_runtime": 4.7432, + "eval_samples_per_second": 16.866, + "eval_steps_per_second": 2.108, "eval_weighted avg": { - "f1-score": 0.8556973390614198, - "precision": 0.8579560210496036, - "recall": 0.8540597014925373, - "support": 33500.0 + "f1-score": 0.8314498656832355, + "precision": 0.8301032992701898, + "recall": 0.8356566132010572, + "support": 27619.0 }, "step": 486 }, { "epoch": 6.17, - "grad_norm": 9.790245056152344, + "grad_norm": 6.106111526489258, "learning_rate": 1.7530864197530865e-05, - "loss": 0.3328, + "loss": 0.345, "step": 500 }, { "epoch": 7.0, "eval_Claim": { - "f1-score": 0.6202600808654791, - "precision": 0.6739491807171694, - "recall": 0.5744939271255061, - "support": 4940.0 + "f1-score": 0.5860923076923077, + "precision": 0.6017184735911044, + "recall": 0.571257197696737, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8670041417395307, - "precision": 0.8730305838739574, - "recall": 0.8610603290676416, - "support": 2188.0 + "f1-score": 0.8099526066350711, + "precision": 0.8264023210831721, + "recall": 0.7941449814126395, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9109892729439809, - "precision": 0.9097314797181489, - "recall": 0.9122505490308412, - "support": 10473.0 + "f1-score": 0.9184632841627491, + "precision": 0.9157418381639909, + "recall": 0.9212009538261435, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8872356123954747, - "precision": 0.8677611401767996, - "recall": 0.9076042518397384, - "support": 15899.0 + "f1-score": 0.8751742803247764, + "precision": 0.8666450093397222, + "recall": 0.8838731052762362, + "support": 12073.0 }, - "eval_accuracy": 0.8568955223880597, - "eval_loss": 0.5894299149513245, + "eval_accuracy": 0.84217386581701, + "eval_loss": 0.5831862688064575, "eval_macro avg": { - "f1-score": 0.8213722769861164, - "precision": 0.8311180961215189, - "recall": 0.8138522642659318, - "support": 33500.0 - }, - "eval_runtime": 4.9037, - "eval_samples_per_second": 16.518, - "eval_steps_per_second": 2.243, + "f1-score": 0.7974206197037261, + "precision": 0.8026269105444974, + "recall": 0.792619059552939, + "support": 27619.0 + }, + "eval_runtime": 4.7225, + "eval_samples_per_second": 16.94, + "eval_steps_per_second": 2.118, "eval_weighted avg": { - "f1-score": 0.8539713289140156, - "precision": 0.8526463291050193, - "recall": 0.8568955223880597, - "support": 33500.0 + "f1-score": 0.8409273360363069, + "precision": 0.8399297508801243, + "recall": 0.84217386581701, + "support": 27619.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { - "f1-score": 0.6441500279902966, - "precision": 0.5974385600553825, - "recall": 0.6987854251012146, - "support": 4940.0 + "f1-score": 0.6089751514402275, + "precision": 0.6281560826319816, + "recall": 0.5909309021113244, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.834510811389424, - "precision": 0.7849375755134917, - "recall": 0.8907678244972578, - "support": 2188.0 + "f1-score": 0.8056783802653014, + "precision": 0.806993006993007, + "recall": 0.804368029739777, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9059161060546101, - "precision": 0.9398542543364466, - "recall": 0.874343550081161, - "support": 10473.0 + "f1-score": 0.9201371876531113, + "precision": 0.9243136825987094, + "recall": 0.9159982657706481, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8801401497053671, - "precision": 0.8915849251419721, - "recall": 0.8689854707843261, - "support": 15899.0 + "f1-score": 0.8842053669893396, + "precision": 0.8721998388396455, + "recall": 0.8965460117617825, + "support": 12073.0 }, - "eval_accuracy": 0.8469850746268657, - "eval_loss": 0.5878348350524902, + "eval_accuracy": 0.8497411202433107, + "eval_loss": 0.5915889739990234, "eval_macro avg": { - "f1-score": 0.8161792737849245, - "precision": 0.8034538287618231, - "recall": 0.8332205676159898, - "support": 33500.0 - }, - "eval_runtime": 4.9086, - "eval_samples_per_second": 16.502, - "eval_steps_per_second": 2.241, + "f1-score": 0.8047490215869949, + "precision": 0.8079156527658359, + "recall": 0.801960802345883, + "support": 27619.0 + }, + "eval_runtime": 4.7226, + "eval_samples_per_second": 16.94, + "eval_steps_per_second": 2.117, "eval_weighted avg": { - "f1-score": 0.8504184600736624, - "precision": 0.8563340964894008, - "recall": 0.8469850746268657, - "support": 33500.0 + "f1-score": 0.8485544514458778, + "precision": 0.8476986926907486, + "recall": 0.8497411202433107, + "support": 27619.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { - "f1-score": 0.6323094193285029, - "precision": 0.6375797489195307, - "recall": 0.6271255060728745, - "support": 4940.0 + "f1-score": 0.6137689614935823, + "precision": 0.5974557019536574, + "recall": 0.6309980806142035, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8525914292124747, - "precision": 0.8373732921992068, - "recall": 0.8683729433272395, - "support": 2188.0 + "f1-score": 0.8104347826086956, + "precision": 0.761437908496732, + "recall": 0.8661710037174721, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9054132151801194, - "precision": 0.9134194927606647, - "recall": 0.8975460708488494, - "support": 10473.0 + "f1-score": 0.9178336980306346, + "precision": 0.9265518003092555, + "recall": 0.9092781270323, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8888055034396498, - "precision": 0.883775884584292, - "recall": 0.8938926976539405, - "support": 15899.0 + "f1-score": 0.8752312090129477, + "precision": 0.8886043533930857, + "recall": 0.8622546177420691, + "support": 12073.0 }, - "eval_accuracy": 0.8540298507462687, - "eval_loss": 0.7043101191520691, + "eval_accuracy": 0.8433686954632681, + "eval_loss": 0.6680669784545898, "eval_macro avg": { - "f1-score": 0.8197798917901866, - "precision": 0.8180371046159236, - "recall": 0.821734304475726, - "support": 33500.0 - }, - "eval_runtime": 4.935, - "eval_samples_per_second": 16.414, - "eval_steps_per_second": 2.229, + "f1-score": 0.804317162786465, + "precision": 0.7935124410381827, + "recall": 0.8171754572765112, + "support": 27619.0 + }, + "eval_runtime": 4.7195, + "eval_samples_per_second": 16.951, + "eval_steps_per_second": 2.119, "eval_weighted avg": { - "f1-score": 0.853808653145316, - "precision": 0.8537078167069386, - "recall": 0.8540298507462687, - "support": 33500.0 + "f1-score": 0.8449561811840803, + "precision": 0.8474346288061019, + "recall": 0.8433686954632681, + "support": 27619.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { - "f1-score": 0.6347110795166041, - "precision": 0.6368453230079478, - "recall": 0.6325910931174089, - "support": 4940.0 + "f1-score": 0.5938368860055606, + "precision": 0.5741487455197133, + "recall": 0.6149232245681382, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8442090055381651, - "precision": 0.8921119592875318, - "recall": 0.8011882998171846, - "support": 2188.0 + "f1-score": 0.8202093764223942, + "precision": 0.8037466547725245, + "recall": 0.837360594795539, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9106677408653618, - "precision": 0.9040270982310877, - "recall": 0.9174066647569942, - "support": 10473.0 + "f1-score": 0.9145026408450704, + "precision": 0.9286033519553073, + "recall": 0.9008237589421201, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8841656478259506, - "precision": 0.881375, - "recall": 0.8869740235234921, - "support": 15899.0 + "f1-score": 0.8716092527874855, + "precision": 0.8756164841594918, + "recall": 0.8676385322620724, + "support": 12073.0 }, - "eval_accuracy": 0.8533731343283583, - "eval_loss": 0.7164433002471924, + "eval_accuracy": 0.8382273072884608, + "eval_loss": 0.7243372201919556, "eval_macro avg": { - "f1-score": 0.8184383684365204, - "precision": 0.8285898451316418, - "recall": 0.8095400203037699, - "support": 33500.0 - }, - "eval_runtime": 4.9093, - "eval_samples_per_second": 16.499, - "eval_steps_per_second": 2.241, + "f1-score": 0.8000395390151277, + "precision": 0.7955288091017592, + "recall": 0.8051865276419675, + "support": 27619.0 + }, + "eval_runtime": 4.7291, + "eval_samples_per_second": 16.917, + "eval_steps_per_second": 2.115, "eval_weighted avg": { - "f1-score": 0.8530559678148433, - "precision": 0.8530989190255094, - "recall": 0.8533731343283583, - "support": 33500.0 + "f1-score": 0.840013852502701, + "precision": 0.8422219164630815, + "recall": 0.8382273072884608, + "support": 27619.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { - "f1-score": 0.645658668208538, - "precision": 0.6161486113665624, - "recall": 0.6781376518218624, - "support": 4940.0 + "f1-score": 0.6018801410105759, + "precision": 0.5898203592814372, + "recall": 0.614443378119002, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8331177231565331, - "precision": 0.7885714285714286, - "recall": 0.8829981718464351, - "support": 2188.0 + "f1-score": 0.7984663311766117, + "precision": 0.8243443839683325, + "recall": 0.7741635687732342, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9059711059612445, - "precision": 0.9366843393148451, - "recall": 0.8772080588179128, - "support": 10473.0 + "f1-score": 0.9056203605514316, + "precision": 0.9334944195144402, + "recall": 0.8793626707132018, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8902977542265961, - "precision": 0.8929452704840241, - "recall": 0.8876658909365369, - "support": 15899.0 + "f1-score": 0.878886273236464, + "precision": 0.8616792678074016, + "recall": 0.8967945001242442, + "support": 12073.0 }, - "eval_accuracy": 0.8531940298507462, - "eval_loss": 0.7852362394332886, + "eval_accuracy": 0.838806618632101, + "eval_loss": 0.7492838501930237, "eval_macro avg": { - "f1-score": 0.818761312888228, - "precision": 0.8085874124342151, - "recall": 0.8315024433556868, - "support": 33500.0 - }, - "eval_runtime": 4.89, - "eval_samples_per_second": 16.565, - "eval_steps_per_second": 2.25, + "f1-score": 0.7962132764937708, + "precision": 0.8023346076429029, + "recall": 0.7911910294324205, + "support": 27619.0 + }, + "eval_runtime": 4.7534, + "eval_samples_per_second": 16.83, + "eval_steps_per_second": 2.104, "eval_weighted avg": { - "f1-score": 0.8553879040715653, - "precision": 0.8589850855801485, - "recall": 0.8531940298507462, - "support": 33500.0 + "f1-score": 0.8397473983726959, + "precision": 0.8417333606018996, + "recall": 0.838806618632101, + "support": 27619.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { - "f1-score": 0.6329215627196947, - "precision": 0.6280645804265498, - "recall": 0.6378542510121458, - "support": 4940.0 + "f1-score": 0.6351054481546573, + "precision": 0.5856969205834684, + "recall": 0.6936180422264875, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8528368794326241, - "precision": 0.8278829604130808, - "recall": 0.8793418647166362, - "support": 2188.0 + "f1-score": 0.8096330275229358, + "precision": 0.7993659420289855, + "recall": 0.8201672862453532, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9126755145220234, - "precision": 0.9193064031773709, - "recall": 0.9061395970591044, - "support": 10473.0 + "f1-score": 0.9158836689038031, + "precision": 0.9461520684076727, + "recall": 0.8874918707999133, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8876634630534112, - "precision": 0.8894291487749432, - "recall": 0.88590477388515, - "support": 15899.0 + "f1-score": 0.8832342847576798, + "precision": 0.8926486760849336, + "recall": 0.8740164002319225, + "support": 12073.0 }, - "eval_accuracy": 0.8552238805970149, - "eval_loss": 0.8324545621871948, + "eval_accuracy": 0.8470980122379521, + "eval_loss": 0.7472469806671143, "eval_macro avg": { - "f1-score": 0.8215243549319383, - "precision": 0.8161707731979861, - "recall": 0.827310121668259, - "support": 33500.0 - }, - "eval_runtime": 4.8939, - "eval_samples_per_second": 16.551, - "eval_steps_per_second": 2.248, + "f1-score": 0.810964107334769, + "precision": 0.8059659017762651, + "recall": 0.8188233998759191, + "support": 27619.0 + }, + "eval_runtime": 4.7526, + "eval_samples_per_second": 16.833, + "eval_steps_per_second": 2.104, "eval_weighted avg": { - "f1-score": 0.8556433335734092, - "precision": 0.8562082669116539, - "recall": 0.8552238805970149, - "support": 33500.0 + "f1-score": 0.8509605717920246, + "precision": 0.8569306173916821, + "recall": 0.8470980122379521, + "support": 27619.0 }, "step": 972 }, { "epoch": 12.35, - "grad_norm": 2.6090729236602783, + "grad_norm": 3.1761698722839355, "learning_rate": 1.506172839506173e-05, - "loss": 0.0589, + "loss": 0.0604, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { - "f1-score": 0.6431152147731835, - "precision": 0.6377388535031847, - "recall": 0.648582995951417, - "support": 4940.0 + "f1-score": 0.6302578018995931, + "precision": 0.5960222412318221, + "recall": 0.6686660268714012, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8326953388876379, - "precision": 0.8206835330670218, - "recall": 0.8450639853747715, - "support": 2188.0 + "f1-score": 0.8086405259450574, + "precision": 0.8172757475083057, + "recall": 0.800185873605948, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.904438383337466, - "precision": 0.9408790755262072, - "recall": 0.8707151723479424, - "support": 10473.0 + "f1-score": 0.9224348987313599, + "precision": 0.9477475417333638, + "recall": 0.8984391935833514, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.892013567684243, - "precision": 0.8749621922448733, - "recall": 0.9097427511164224, - "support": 15899.0 + "f1-score": 0.8844928196002152, + "precision": 0.8838709677419355, + "recall": 0.8851155470885447, + "support": 12073.0 }, - "eval_accuracy": 0.8548059701492537, - "eval_loss": 0.8543176651000977, + "eval_accuracy": 0.8502842246279735, + "eval_loss": 0.812764048576355, "eval_macro avg": { - "f1-score": 0.8180656261706326, - "precision": 0.8185659135853218, - "recall": 0.8185262261976383, - "support": 33500.0 - }, - "eval_runtime": 4.9338, - "eval_samples_per_second": 16.417, - "eval_steps_per_second": 2.23, + "f1-score": 0.8114565115440564, + "precision": 0.8112291245538568, + "recall": 0.8131016602873112, + "support": 27619.0 + }, + "eval_runtime": 4.7412, + "eval_samples_per_second": 16.873, + "eval_steps_per_second": 2.109, "eval_weighted avg": { - "f1-score": 0.8553204019036041, - "precision": 0.8570428644520474, - "recall": 0.8548059701492537, - "support": 33500.0 + "f1-score": 0.8528902247322565, + "precision": 0.8565802934093382, + "recall": 0.8502842246279735, + "support": 27619.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { - "f1-score": 0.6541843971631205, - "precision": 0.6138420585625555, - "recall": 0.7002024291497976, - "support": 4940.0 + "f1-score": 0.6177896267571498, + "precision": 0.6241429970617042, + "recall": 0.6115642994241842, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8336528221512247, - "precision": 0.7806142800159553, - "recall": 0.8944241316270567, - "support": 2188.0 + "f1-score": 0.7891914191419142, + "precision": 0.7095697329376854, + "recall": 0.8889405204460966, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9079845651528643, - "precision": 0.9421004003695719, - "recall": 0.8762532225723288, - "support": 10473.0 + "f1-score": 0.915085817524842, + "precision": 0.9549846806504831, + "recall": 0.8783871667027965, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.889389516436096, - "precision": 0.8974194787731319, - "recall": 0.8815019812566828, - "support": 15899.0 + "f1-score": 0.8833210513387375, + "precision": 0.8733101270946328, + "recall": 0.8935641514122422, + "support": 12073.0 }, - "eval_accuracy": 0.8539701492537314, - "eval_loss": 0.9095103144645691, + "eval_accuracy": 0.8455773199608965, + "eval_loss": 0.8994765281677246, "eval_macro avg": { - "f1-score": 0.8213028252258264, - "precision": 0.8084940544303038, - "recall": 0.8380954411514664, - "support": 33500.0 - }, - "eval_runtime": 4.8917, - "eval_samples_per_second": 16.559, - "eval_steps_per_second": 2.249, + "f1-score": 0.8013469786906608, + "precision": 0.7905018844361265, + "recall": 0.8181140344963299, + "support": 27619.0 + }, + "eval_runtime": 4.7538, + "eval_samples_per_second": 16.829, + "eval_steps_per_second": 2.104, "eval_weighted avg": { - "f1-score": 0.8568784946124219, - "precision": 0.8619418985092084, - "recall": 0.8539701492537314, - "support": 33500.0 + "f1-score": 0.8465260836240982, + "precision": 0.850232952139105, + "recall": 0.8455773199608965, + "support": 27619.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { - "f1-score": 0.631984585741811, - "precision": 0.6332046332046332, - "recall": 0.6307692307692307, - "support": 4940.0 + "f1-score": 0.6071797224452345, + "precision": 0.5816304108987036, + "recall": 0.6350767754318618, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8264781491002571, - "precision": 0.7778225806451613, - "recall": 0.8816270566727605, - "support": 2188.0 + "f1-score": 0.8077178975382568, + "precision": 0.7725922783198982, + "recall": 0.8461895910780669, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.8970963775654971, - "precision": 0.8886910896655111, - "recall": 0.9056621789363124, - "support": 10473.0 + "f1-score": 0.9198298062926884, + "precision": 0.9512505789717461, + "recall": 0.8904183828311294, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8838946528332003, - "precision": 0.8974458706080644, - "recall": 0.8707465878357129, - "support": 15899.0 + "f1-score": 0.8720390922643698, + "precision": 0.8719668737060041, + "recall": 0.8721113227863828, + "support": 12073.0 }, - "eval_accuracy": 0.8469850746268657, - "eval_loss": 0.9576827883720398, + "eval_accuracy": 0.8404359317860893, + "eval_loss": 0.9128327369689941, "eval_macro avg": { - "f1-score": 0.8098634413101913, - "precision": 0.7992910435308425, - "recall": 0.8222012635535042, - "support": 33500.0 - }, - "eval_runtime": 4.8883, - "eval_samples_per_second": 16.57, - "eval_steps_per_second": 2.25, + "f1-score": 0.8016916296351374, + "precision": 0.794360035474088, + "recall": 0.8109490180318603, + "support": 27619.0 + }, + "eval_runtime": 4.7243, + "eval_samples_per_second": 16.934, + "eval_steps_per_second": 2.117, "eval_weighted avg": { - "f1-score": 0.8471244624308779, - "precision": 0.8479301603984184, - "recall": 0.8469850746268657, - "support": 33500.0 + "f1-score": 0.8430215341764057, + "precision": 0.8468933720777774, + "recall": 0.8404359317860893, + "support": 27619.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { - "f1-score": 0.6332942805410586, - "precision": 0.6682400539447066, - "recall": 0.6018218623481781, - "support": 4940.0 + "f1-score": 0.6309481002037637, + "precision": 0.6304191616766467, + "recall": 0.6314779270633397, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8478211270819075, - "precision": 0.8464692482915718, - "recall": 0.8491773308957953, - "support": 2188.0 + "f1-score": 0.8244803695150116, + "precision": 0.8195592286501377, + "recall": 0.8294609665427509, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9089773764349872, - "precision": 0.9145563502803016, - "recall": 0.9034660555714695, - "support": 10473.0 + "f1-score": 0.9188405797101449, + "precision": 0.9101446193109315, + "recall": 0.9277043138955127, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8924681415656145, - "precision": 0.8759539672925499, - "recall": 0.9096169570413234, - "support": 15899.0 + "f1-score": 0.8854815124294965, + "precision": 0.8933569381217332, + "recall": 0.8777437256688478, + "support": 12073.0 }, - "eval_accuracy": 0.8583582089552239, - "eval_loss": 0.9394893646240234, + "eval_accuracy": 0.8535066439769724, + "eval_loss": 0.8802785873413086, "eval_macro avg": { - "f1-score": 0.8206402314058919, - "precision": 0.8263049049522826, - "recall": 0.8160205514641916, - "support": 33500.0 - }, - "eval_runtime": 4.896, - "eval_samples_per_second": 16.544, - "eval_steps_per_second": 2.247, + "f1-score": 0.8149376404646043, + "precision": 0.8133699869398623, + "recall": 0.8165967332926127, + "support": 27619.0 + }, + "eval_runtime": 4.7225, + "eval_samples_per_second": 16.94, + "eval_steps_per_second": 2.118, "eval_weighted avg": { - "f1-score": 0.8564948483009961, - "precision": 0.8554663093796615, - "recall": 0.8583582089552239, - "support": 33500.0 + "f1-score": 0.8534601153123827, + "precision": 0.8535345630772185, + "recall": 0.8535066439769724, + "support": 27619.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { - "f1-score": 0.6431561322729175, - "precision": 0.6657279029462738, - "recall": 0.6220647773279352, - "support": 4940.0 + "f1-score": 0.6351891003053795, + "precision": 0.6221813161527842, + "recall": 0.6487523992322457, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8454073572556984, - "precision": 0.8350423539901917, - "recall": 0.8560329067641682, - "support": 2188.0 + "f1-score": 0.8157058556513844, + "precision": 0.7972493345164152, + "recall": 0.8350371747211895, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.909496556407023, - "precision": 0.9241991128634796, - "recall": 0.8952544638594481, - "support": 10473.0 + "f1-score": 0.9181362943609842, + "precision": 0.9221517603323858, + "recall": 0.914155647084327, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8953850902917116, - "precision": 0.8791828322017459, - "recall": 0.9121957355808541, - "support": 15899.0 + "f1-score": 0.8847406664996242, + "precision": 0.8921923692411353, + "recall": 0.8774124078522323, + "support": 12073.0 }, - "eval_accuracy": 0.8604477611940299, - "eval_loss": 0.9581068754196167, + "eval_accuracy": 0.8518773308229842, + "eval_loss": 0.8617361783981323, "eval_macro avg": { - "f1-score": 0.8233612840568376, - "precision": 0.8260380505004228, - "recall": 0.8213869708831014, - "support": 33500.0 - }, - "eval_runtime": 4.8973, - "eval_samples_per_second": 16.54, - "eval_steps_per_second": 2.246, + "f1-score": 0.813442979204343, + "precision": 0.8084436950606801, + "recall": 0.8188394072224987, + "support": 27619.0 + }, + "eval_runtime": 4.7584, + "eval_samples_per_second": 16.812, + "eval_steps_per_second": 2.102, "eval_weighted avg": { - "f1-score": 0.859338136623951, - "precision": 0.8588965274411914, - "recall": 0.8604477611940299, - "support": 33500.0 + "f1-score": 0.8528573333523664, + "precision": 0.8540549226358285, + "recall": 0.8518773308229842, + "support": 27619.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { - "f1-score": 0.6493941483597675, - "precision": 0.632508155824218, - "recall": 0.6672064777327935, - "support": 4940.0 + "f1-score": 0.5979982262764474, + "precision": 0.6335570469798658, + "recall": 0.5662188099808061, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8654810761513908, - "precision": 0.8635122838944495, - "recall": 0.8674588665447898, - "support": 2188.0 + "f1-score": 0.8218923933209649, + "precision": 0.8203703703703704, + "recall": 0.8234200743494424, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9125304729400293, - "precision": 0.9323503038756601, - "recall": 0.8935357586173971, - "support": 10473.0 + "f1-score": 0.9185111222055806, + "precision": 0.9406885581183957, + "recall": 0.8973553002384566, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8928739085531874, - "precision": 0.8885635978572318, - "recall": 0.8972262406440656, - "support": 15899.0 + "f1-score": 0.8841078141246101, + "precision": 0.8547127503286167, + "recall": 0.9155967862171789, + "support": 12073.0 }, - "eval_accuracy": 0.8602089552238806, - "eval_loss": 0.9848275184631348, + "eval_accuracy": 0.8495962924074008, + "eval_loss": 0.9965270757675171, "eval_macro avg": { - "f1-score": 0.8300699015010937, - "precision": 0.8292335853628898, - "recall": 0.8313568358847614, - "support": 33500.0 - }, - "eval_runtime": 4.9301, - "eval_samples_per_second": 16.43, - "eval_steps_per_second": 2.231, + "f1-score": 0.8056273889819007, + "precision": 0.8123321814493122, + "recall": 0.800647742696471, + "support": 27619.0 + }, + "eval_runtime": 4.735, + "eval_samples_per_second": 16.895, + "eval_steps_per_second": 2.112, "eval_weighted avg": { - "f1-score": 0.8613257791852104, - "precision": 0.8628576878135704, - "recall": 0.8602089552238806, - "support": 33500.0 + "f1-score": 0.8475754477331567, + "precision": 0.8473819646173586, + "recall": 0.8495962924074008, + "support": 27619.0 }, "step": 1458 }, { "epoch": 18.52, - "grad_norm": 1.7008241415023804, + "grad_norm": 15.414011001586914, "learning_rate": 1.2592592592592593e-05, - "loss": 0.0216, + "loss": 0.0201, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { - "f1-score": 0.6404391582799633, - "precision": 0.6432509699816213, - "recall": 0.6376518218623481, - "support": 4940.0 + "f1-score": 0.6417134539405281, + "precision": 0.5662385321100918, + "recall": 0.7404030710172744, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8286799195890104, - "precision": 0.8103975535168195, - "recall": 0.8478062157221207, - "support": 2188.0 + "f1-score": 0.8129032258064516, + "precision": 0.8062157221206582, + "recall": 0.8197026022304833, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9006844397645144, - "precision": 0.9029750479846449, - "recall": 0.898405423469875, - "support": 10473.0 + "f1-score": 0.9158699808795411, + "precision": 0.9517297802711547, + "recall": 0.8826143507478864, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8906363035888403, - "precision": 0.8907763936076507, - "recall": 0.8904962576262658, - "support": 15899.0 + "f1-score": 0.8692654694016513, + "precision": 0.8939168490153173, + "recall": 0.8459372152737513, + "support": 12073.0 }, - "eval_accuracy": 0.8528955223880597, - "eval_loss": 1.0161080360412598, + "eval_accuracy": 0.8402186900322242, + "eval_loss": 0.9760411381721497, "eval_macro avg": { - "f1-score": 0.8151099553055821, - "precision": 0.8118499912726841, - "recall": 0.8185899296701524, - "support": 33500.0 - }, - "eval_runtime": 4.8924, - "eval_samples_per_second": 16.556, - "eval_steps_per_second": 2.248, + "f1-score": 0.809938032507043, + "precision": 0.8045252208793054, + "recall": 0.8221643098173489, + "support": 27619.0 + }, + "eval_runtime": 4.7957, + "eval_samples_per_second": 16.682, + "eval_steps_per_second": 2.085, "eval_weighted avg": { - "f1-score": 0.8528362935634777, - "precision": 0.8528394387556787, - "recall": 0.8528955223880597, - "support": 33500.0 + "f1-score": 0.8461018818074655, + "precision": 0.8569454182549012, + "recall": 0.8402186900322242, + "support": 27619.0 }, "step": 1539 }, { "epoch": 20.0, "eval_Claim": { - "f1-score": 0.650995688770273, - "precision": 0.6603498542274052, - "recall": 0.6419028340080971, - "support": 4940.0 + "f1-score": 0.651902203705734, + "precision": 0.6002422774076317, + "recall": 0.7132917466410749, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8586199145875477, - "precision": 0.8447589562140646, - "recall": 0.8729433272394881, - "support": 2188.0 + "f1-score": 0.8352472463088821, + "precision": 0.8425531914893617, + "recall": 0.8280669144981413, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9104241552839684, - "precision": 0.913972286374134, - "recall": 0.9069034660555715, - "support": 10473.0 + "f1-score": 0.9225629791894853, + "precision": 0.9323666150099623, + "recall": 0.9129633644049425, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8906836964688204, - "precision": 0.8866313493300093, - "recall": 0.894773256179634, - "support": 15899.0 + "f1-score": 0.8775752437473505, + "precision": 0.8987583572110793, + "recall": 0.8573676799469891, + "support": 12073.0 }, - "eval_accuracy": 0.8598507462686568, - "eval_loss": 0.9872655272483826, + "eval_accuracy": 0.8519135377819617, + "eval_loss": 0.9999569058418274, "eval_macro avg": { - "f1-score": 0.8276808637776524, - "precision": 0.8264281115364033, - "recall": 0.8291307208706977, - "support": 33500.0 - }, - "eval_runtime": 4.9351, - "eval_samples_per_second": 16.413, - "eval_steps_per_second": 2.229, + "f1-score": 0.821821918237863, + "precision": 0.8184801102795087, + "recall": 0.8279224263727869, + "support": 27619.0 + }, + "eval_runtime": 4.7617, + "eval_samples_per_second": 16.801, + "eval_steps_per_second": 2.1, "eval_weighted avg": { - "f1-score": 0.8594158610175964, - "precision": 0.8590759538589217, - "recall": 0.8598507462686568, - "support": 33500.0 + "f1-score": 0.8552486484979966, + "precision": 0.8605564400235849, + "recall": 0.8519135377819617, + "support": 27619.0 }, "step": 1620 }, { "epoch": 21.0, "eval_Claim": { - "f1-score": 0.6439562566534404, - "precision": 0.6169108103096607, - "recall": 0.6734817813765183, - "support": 4940.0 + "f1-score": 0.6376654371881102, + "precision": 0.5819801980198019, + "recall": 0.7051343570057581, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8524001794526693, - "precision": 0.8370044052863436, - "recall": 0.8683729433272395, - "support": 2188.0 + "f1-score": 0.792661361626879, + "precision": 0.7559021922428331, + "recall": 0.83317843866171, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9078541717641301, - "precision": 0.9311383256374222, - "recall": 0.8857061014036093, - "support": 10473.0 + "f1-score": 0.9065665187208377, + "precision": 0.9542405366554864, + "recall": 0.8634294385432474, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8851262038144394, - "precision": 0.8857952755905512, - "recall": 0.8844581420215107, - "support": 15899.0 + "f1-score": 0.8798595435164284, + "precision": 0.8881762174023125, + "recall": 0.8716971755156133, + "support": 12073.0 }, - "eval_accuracy": 0.8526865671641791, - "eval_loss": 1.0280390977859497, + "eval_accuracy": 0.8407980013758645, + "eval_loss": 1.0823462009429932, "eval_macro avg": { - "f1-score": 0.8223342029211698, - "precision": 0.8177122042059943, - "recall": 0.8280047420322194, - "support": 33500.0 - }, - "eval_runtime": 4.9316, - "eval_samples_per_second": 16.425, - "eval_steps_per_second": 2.231, + "f1-score": 0.8041882152630638, + "precision": 0.7950747860801084, + "recall": 0.8183598524315823, + "support": 27619.0 + }, + "eval_runtime": 4.8337, + "eval_samples_per_second": 16.55, + "eval_steps_per_second": 2.069, "eval_weighted avg": { - "f1-score": 0.8545305598758789, - "precision": 0.8571336063495862, - "recall": 0.8526865671641791, - "support": 33500.0 + "f1-score": 0.8454369732073347, + "precision": 0.8537300281304444, + "recall": 0.8407980013758645, + "support": 27619.0 }, "step": 1701 }, { "epoch": 22.0, "eval_Claim": { - "f1-score": 0.6535234899328859, - "precision": 0.6779808529155787, - "recall": 0.6307692307692307, - "support": 4940.0 + "f1-score": 0.6413618587531631, + "precision": 0.6159964648696421, + "recall": 0.6689059500959693, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8511305126483099, - "precision": 0.8341377797279509, - "recall": 0.8688299817184644, - "support": 2188.0 + "f1-score": 0.8162887552059233, + "precision": 0.8129032258064516, + "recall": 0.8197026022304833, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9092228079508632, - "precision": 0.9212073696589572, - "recall": 0.8975460708488494, - "support": 10473.0 + "f1-score": 0.9215077605321508, + "precision": 0.9430451554345359, + "recall": 0.9009321482766096, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8968440594059406, - "precision": 0.882589367273613, - "recall": 0.9115667652053588, - "support": 15899.0 + "f1-score": 0.8851211645025227, + "precision": 0.8838054339747295, + "recall": 0.886440818355007, + "support": 12073.0 }, - "eval_accuracy": 0.8629850746268657, - "eval_loss": 1.0620397329330444, + "eval_accuracy": 0.8532531952641298, + "eval_loss": 0.9629964828491211, "eval_macro avg": { - "f1-score": 0.8276802174844999, - "precision": 0.828978842394025, - "recall": 0.8271780121354758, - "support": 33500.0 - }, - "eval_runtime": 4.8953, - "eval_samples_per_second": 16.546, - "eval_steps_per_second": 2.247, + "f1-score": 0.8160698847484399, + "precision": 0.8139375700213398, + "recall": 0.8189953797395173, + "support": 27619.0 + }, + "eval_runtime": 4.7476, + "eval_samples_per_second": 16.851, + "eval_steps_per_second": 2.106, "eval_weighted avg": { - "f1-score": 0.8618475752270865, - "precision": 0.8613257315871389, - "recall": 0.8629850746268657, - "support": 33500.0 + "f1-score": 0.8551268346498754, + "precision": 0.8576544630844016, + "recall": 0.8532531952641298, + "support": 27619.0 }, "step": 1782 }, { "epoch": 23.0, "eval_Claim": { - "f1-score": 0.6436781609195402, - "precision": 0.5858518764530056, - "recall": 0.71417004048583, - "support": 4940.0 + "f1-score": 0.6246913580246913, + "precision": 0.6434384537131231, + "recall": 0.6070057581573897, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8615044247787611, - "precision": 0.8349056603773585, - "recall": 0.8898537477148081, - "support": 2188.0 + "f1-score": 0.8306414397784957, + "precision": 0.8249312557286893, + "recall": 0.8364312267657993, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9092565073582283, - "precision": 0.9129765113592607, - "recall": 0.905566695311754, - "support": 10473.0 + "f1-score": 0.9210203854294309, + "precision": 0.9472960586617782, + "recall": 0.8961630175590722, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.873210033597547, - "precision": 0.9069657135113159, - "recall": 0.841876847600478, - "support": 15899.0 + "f1-score": 0.8880482897384305, + "precision": 0.8635830007043908, + "recall": 0.9139401971341009, + "support": 12073.0 }, - "eval_accuracy": 0.846089552238806, - "eval_loss": 1.189612627029419, + "eval_accuracy": 0.8556428545566458, + "eval_loss": 1.0519344806671143, "eval_macro avg": { - "f1-score": 0.8219122816635192, - "precision": 0.8101749404252352, - "recall": 0.8378668327782175, - "support": 33500.0 - }, - "eval_runtime": 4.9275, - "eval_samples_per_second": 16.438, - "eval_steps_per_second": 2.232, + "f1-score": 0.8161003682427621, + "precision": 0.8198121922019954, + "recall": 0.8133850499040906, + "support": 27619.0 + }, + "eval_runtime": 4.7514, + "eval_samples_per_second": 16.837, + "eval_steps_per_second": 2.105, "eval_weighted avg": { - "f1-score": 0.8498672096145845, - "precision": 0.8567860518556853, - "recall": 0.846089552238806, - "support": 33500.0 + "f1-score": 0.8548461217507599, + "precision": 0.8553131736167897, + "recall": 0.8556428545566458, + "support": 27619.0 }, "step": 1863 }, { "epoch": 24.0, "eval_Claim": { - "f1-score": 0.6632010396880935, - "precision": 0.6551451708473237, - "recall": 0.6714574898785425, - "support": 4940.0 + "f1-score": 0.6429456142326315, + "precision": 0.605771270952684, + "recall": 0.6849808061420346, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.84557309540151, - "precision": 0.8465414567109483, - "recall": 0.8446069469835467, - "support": 2188.0 + "f1-score": 0.8262760357061112, + "precision": 0.8141632837167343, + "recall": 0.8387546468401487, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.911568570600174, - "precision": 0.922805987672439, - "recall": 0.9006015468347178, - "support": 10473.0 + "f1-score": 0.9164846244328684, + "precision": 0.9483018430508867, + "recall": 0.8867331454584869, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8970938243768007, - "precision": 0.8933449759870268, - "recall": 0.9008742688219384, - "support": 15899.0 + "f1-score": 0.8827014708928941, + "precision": 0.8831039628585641, + "recall": 0.8822993456473122, + "support": 12073.0 }, - "eval_accuracy": 0.8632835820895522, - "eval_loss": 1.108552098274231, + "eval_accuracy": 0.8506100872587712, + "eval_loss": 1.0345954895019531, "eval_macro avg": { - "f1-score": 0.8293591325166445, - "precision": 0.8294593978044343, - "recall": 0.8293850631296864, - "support": 33500.0 - }, - "eval_runtime": 4.9242, - "eval_samples_per_second": 16.449, - "eval_steps_per_second": 2.234, + "f1-score": 0.8171019363161263, + "precision": 0.8128350901447173, + "recall": 0.8231919860219956, + "support": 27619.0 + }, + "eval_runtime": 4.7433, + "eval_samples_per_second": 16.866, + "eval_steps_per_second": 2.108, "eval_weighted avg": { - "f1-score": 0.8637635648495541, - "precision": 0.864372798011359, - "recall": 0.8632835820895522, - "support": 33500.0 + "f1-score": 0.8534083548306859, + "precision": 0.8576588939305595, + "recall": 0.8506100872587712, + "support": 27619.0 }, "step": 1944 }, { "epoch": 24.69, - "grad_norm": 0.41564273834228516, + "grad_norm": 0.018422244116663933, "learning_rate": 1.0123456790123458e-05, - "loss": 0.0102, + "loss": 0.0091, "step": 2000 }, { "epoch": 25.0, "eval_Claim": { - "f1-score": 0.6543478260869565, - "precision": 0.6391891891891892, - "recall": 0.6702429149797571, - "support": 4940.0 + "f1-score": 0.6397129186602871, + "precision": 0.637881679389313, + "recall": 0.6415547024952015, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8417964342134958, - "precision": 0.8314757021845742, - "recall": 0.8523765996343693, - "support": 2188.0 + "f1-score": 0.8255892255892255, + "precision": 0.798523664785063, + "recall": 0.8545539033457249, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9077508785630614, - "precision": 0.9285072391412881, - "recall": 0.8879022247684523, - "support": 10473.0 + "f1-score": 0.9217740138176956, + "precision": 0.9484063288236643, + "recall": 0.8965965748970302, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8887081130127343, - "precision": 0.884198729921554, - "recall": 0.8932637272784452, - "support": 15899.0 + "f1-score": 0.8884167517875383, + "precision": 0.8766328011611031, + "recall": 0.9005218255611696, + "support": 12073.0 }, - "eval_accuracy": 0.8560298507462687, - "eval_loss": 1.0723340511322021, + "eval_accuracy": 0.8565480285310837, + "eval_loss": 1.0353806018829346, "eval_macro avg": { - "f1-score": 0.8231508129690621, - "precision": 0.8208427151091513, - "recall": 0.8259463666652559, - "support": 33500.0 - }, - "eval_runtime": 4.8953, - "eval_samples_per_second": 16.546, - "eval_steps_per_second": 2.247, + "f1-score": 0.8188732274636866, + "precision": 0.8153611185397858, + "recall": 0.8233067515747815, + "support": 27619.0 + }, + "eval_runtime": 4.7632, + "eval_samples_per_second": 16.795, + "eval_steps_per_second": 2.099, "eval_weighted avg": { - "f1-score": 0.8570380328032566, - "precision": 0.8584774732395206, - "recall": 0.8560298507462687, - "support": 33500.0 + "f1-score": 0.8571321899510156, + "precision": 0.8584922830101466, + "recall": 0.8565480285310837, + "support": 27619.0 }, "step": 2025 }, { "epoch": 26.0, "eval_Claim": { - "f1-score": 0.6502824298880191, - "precision": 0.636963696369637, - "recall": 0.66417004048583, - "support": 4940.0 + "f1-score": 0.639289678135405, + "precision": 0.6577010910936311, + "recall": 0.6218809980806143, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8458110516934045, - "precision": 0.8252173913043478, - "recall": 0.8674588665447898, - "support": 2188.0 + "f1-score": 0.8237179487179487, + "precision": 0.8118231046931408, + "recall": 0.8359665427509294, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.912881895844394, - "precision": 0.9229943392543432, - "recall": 0.9029886374486775, - "support": 10473.0 + "f1-score": 0.9226105170323007, + "precision": 0.9386496186630776, + "recall": 0.9071103403425103, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8878304207936407, - "precision": 0.8905271151047269, - "recall": 0.8851500094345557, - "support": 15899.0 + "f1-score": 0.8938624639506073, + "precision": 0.8770125936553483, + "recall": 0.9113724840553301, + "support": 12073.0 }, - "eval_accuracy": 0.8569850746268657, - "eval_loss": 1.126774787902832, + "eval_accuracy": 0.8603859661827004, + "eval_loss": 1.0636707544326782, "eval_macro avg": { - "f1-score": 0.8242014495548646, - "precision": 0.8189256355082637, - "recall": 0.8299418884784633, - "support": 33500.0 - }, - "eval_runtime": 4.9541, - "eval_samples_per_second": 16.35, - "eval_steps_per_second": 2.22, + "f1-score": 0.8198701519590654, + "precision": 0.8212966020262995, + "recall": 0.8190825913073461, + "support": 27619.0 + }, + "eval_runtime": 4.7618, + "eval_samples_per_second": 16.8, + "eval_steps_per_second": 2.1, "eval_weighted avg": { - "f1-score": 0.8578882907501019, - "precision": 0.8590204964268868, - "recall": 0.8569850746268657, - "support": 33500.0 + "f1-score": 0.8595824092662689, + "precision": 0.8594263331027361, + "recall": 0.8603859661827004, + "support": 27619.0 }, "step": 2106 }, { "epoch": 27.0, "eval_Claim": { - "f1-score": 0.6497180730042537, - "precision": 0.6353259818146644, - "recall": 0.6647773279352227, - "support": 4940.0 + "f1-score": 0.643669985775249, + "precision": 0.6361293345829429, + "recall": 0.6513915547024952, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8471862491170237, - "precision": 0.8737251092763477, - "recall": 0.8222120658135283, - "support": 2188.0 + "f1-score": 0.835773822933093, + "precision": 0.8111062527328378, + "recall": 0.8619888475836431, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9101418812401472, - "precision": 0.9107074569789675, - "recall": 0.9095770075432064, - "support": 10473.0 + "f1-score": 0.9197544642857143, + "precision": 0.9478951000690131, + "recall": 0.893236505527856, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.889659739522563, - "precision": 0.8921072603086264, - "recall": 0.8872256116736902, - "support": 15899.0 + "f1-score": 0.892443644397169, + "precision": 0.8817299919159256, + "recall": 0.9034208564565559, + "support": 12073.0 }, - "eval_accuracy": 0.8571641791044776, - "eval_loss": 1.101758360862732, + "eval_accuracy": 0.8587566530287121, + "eval_loss": 1.0722382068634033, "eval_macro avg": { - "f1-score": 0.824176485720997, - "precision": 0.8279664520946515, - "recall": 0.8209480032414119, - "support": 33500.0 - }, - "eval_runtime": 4.9026, - "eval_samples_per_second": 16.522, - "eval_steps_per_second": 2.244, + "f1-score": 0.8229104793478063, + "precision": 0.8192151698251797, + "recall": 0.8275094410676376, + "support": 27619.0 + }, + "eval_runtime": 4.7416, + "eval_samples_per_second": 16.872, + "eval_steps_per_second": 2.109, "eval_weighted avg": { - "f1-score": 0.8579064750628762, - "precision": 0.8588559229208557, - "recall": 0.8571641791044776, - "support": 33500.0 + "f1-score": 0.8596085511412532, + "precision": 0.8612655819566406, + "recall": 0.8587566530287121, + "support": 27619.0 }, "step": 2187 }, { "epoch": 28.0, "eval_Claim": { - "f1-score": 0.6493561775034585, - "precision": 0.6845411711913844, - "recall": 0.6176113360323887, - "support": 4940.0 + "f1-score": 0.6362290227048372, + "precision": 0.654979674796748, + "recall": 0.6185220729366603, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.846973475402403, - "precision": 0.8403058929374719, - "recall": 0.8537477148080439, - "support": 2188.0 + "f1-score": 0.831056793673616, + "precision": 0.8579910935180604, + "recall": 0.8057620817843866, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9125746196803388, - "precision": 0.9202835226721041, - "recall": 0.9049937935644037, - "support": 10473.0 + "f1-score": 0.9264762322495751, + "precision": 0.9374237212914679, + "recall": 0.9157814871016692, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8957433682911783, - "precision": 0.8788814236426367, - "recall": 0.9132649852191962, - "support": 15899.0 + "f1-score": 0.8929698244478602, + "precision": 0.8726381532136928, + "recall": 0.9142715149507165, + "support": 12073.0 }, - "eval_accuracy": 0.8631940298507462, - "eval_loss": 1.0291390419006348, + "eval_accuracy": 0.8616894167058908, + "eval_loss": 1.0252758264541626, "eval_macro avg": { - "f1-score": 0.8261619102193447, - "precision": 0.8310030026108993, - "recall": 0.8224044574060082, - "support": 33500.0 - }, - "eval_runtime": 4.895, - "eval_samples_per_second": 16.548, - "eval_steps_per_second": 2.247, + "f1-score": 0.8216829682689721, + "precision": 0.8307581607049922, + "recall": 0.8135842891933581, + "support": 27619.0 + }, + "eval_runtime": 4.7454, + "eval_samples_per_second": 16.858, + "eval_steps_per_second": 2.107, "eval_weighted avg": { - "f1-score": 0.8614870234454082, - "precision": 0.8606473960260256, - "recall": 0.8631940298507462, - "support": 33500.0 + "f1-score": 0.8605934753580137, + "precision": 0.8602912413261778, + "recall": 0.8616894167058908, + "support": 27619.0 }, "step": 2268 }, { "epoch": 29.0, "eval_Claim": { - "f1-score": 0.6583255135022417, - "precision": 0.6787787572565039, - "recall": 0.6390688259109312, - "support": 4940.0 + "f1-score": 0.6346723545443773, + "precision": 0.6270194333879654, + "recall": 0.6425143953934741, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8515981735159819, - "precision": 0.8508211678832117, - "recall": 0.8523765996343693, - "support": 2188.0 + "f1-score": 0.8310436249698723, + "precision": 0.8632949424136205, + "recall": 0.8011152416356877, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9123429557878584, - "precision": 0.916385704652731, - "recall": 0.9083357204239473, - "support": 10473.0 + "f1-score": 0.9246601728028178, + "precision": 0.9391839016210173, + "recall": 0.9105787990461739, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8947940947940948, - "precision": 0.8844310641435242, - "recall": 0.9054028555255047, - "support": 15899.0 + "f1-score": 0.8919482004983863, + "precision": 0.8799774302756731, + "recall": 0.9042491509980949, + "support": 12073.0 }, - "eval_accuracy": 0.8635820895522388, - "eval_loss": 1.1136515140533447, + "eval_accuracy": 0.8588290669466672, + "eval_loss": 1.0103356838226318, "eval_macro avg": { - "f1-score": 0.8292651844000442, - "precision": 0.8326041734839926, - "recall": 0.8262960003736881, - "support": 33500.0 - }, - "eval_runtime": 4.9179, - "eval_samples_per_second": 16.47, - "eval_steps_per_second": 2.237, + "f1-score": 0.8205810882038633, + "precision": 0.8273689269245691, + "recall": 0.8146143967683577, + "support": 27619.0 + }, + "eval_runtime": 4.7642, + "eval_samples_per_second": 16.792, + "eval_steps_per_second": 2.099, "eval_weighted avg": { - "f1-score": 0.8625887740134804, - "precision": 0.8618997238752699, - "recall": 0.8635820895522388, - "support": 33500.0 + "f1-score": 0.8593043062229604, + "precision": 0.8602811798583894, + "recall": 0.8588290669466672, + "support": 27619.0 }, "step": 2349 }, { "epoch": 30.0, "eval_Claim": { - "f1-score": 0.6518860985604706, - "precision": 0.677736508630107, - "recall": 0.6279352226720648, - "support": 4940.0 + "f1-score": 0.6266573621772505, + "precision": 0.6081264108352145, + "recall": 0.6463531669865643, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8529746979712788, - "precision": 0.8508412914961346, - "recall": 0.8551188299817185, - "support": 2188.0 + "f1-score": 0.8181200453001133, + "precision": 0.7980556783031374, + "recall": 0.8392193308550185, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.911866692352148, - "precision": 0.919930035953746, - "recall": 0.9039434736942614, - "support": 10473.0 + "f1-score": 0.9268776883202824, + "precision": 0.9434216434665469, + "recall": 0.9109039670496423, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8956451812445873, - "precision": 0.8810929227773383, - "recall": 0.9106862066796654, - "support": 15899.0 + "f1-score": 0.8838985513262215, + "precision": 0.8859211183225162, + "recall": 0.8818851983765427, + "support": 12073.0 }, - "eval_accuracy": 0.8632537313432835, - "eval_loss": 1.123184084892273, + "eval_accuracy": 0.852710090879467, + "eval_loss": 1.0344375371932983, "eval_macro avg": { - "f1-score": 0.8280931675321211, - "precision": 0.8324001897143315, - "recall": 0.8244209332569276, - "support": 33500.0 - }, - "eval_runtime": 4.9146, - "eval_samples_per_second": 16.482, - "eval_steps_per_second": 2.238, + "f1-score": 0.8138884117809669, + "precision": 0.8088812127318536, + "recall": 0.819590415816942, + "support": 27619.0 + }, + "eval_runtime": 4.7499, + "eval_samples_per_second": 16.843, + "eval_steps_per_second": 2.105, "eval_weighted avg": { - "f1-score": 0.8619841364675112, - "precision": 0.8612711266927389, - "recall": 0.8632537313432835, - "support": 33500.0 + "f1-score": 0.8543098224282204, + "precision": 0.8563604925666979, + "recall": 0.852710090879467, + "support": 27619.0 }, "step": 2430 }, { "epoch": 30.86, - "grad_norm": 0.36268848180770874, + "grad_norm": 0.13328897953033447, "learning_rate": 7.654320987654322e-06, - "loss": 0.0075, + "loss": 0.0054, "step": 2500 }, { "epoch": 31.0, "eval_Claim": { - "f1-score": 0.6331647476595287, - "precision": 0.6758557316793017, - "recall": 0.5955465587044534, - "support": 4940.0 + "f1-score": 0.6430961409770243, + "precision": 0.6008753647353063, + "recall": 0.6916986564299424, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8522257916475449, - "precision": 0.8557603686635945, - "recall": 0.8487202925045704, - "support": 2188.0 + "f1-score": 0.8156321839080459, + "precision": 0.8070973612374887, + "recall": 0.8243494423791822, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9158597986057321, - "precision": 0.928901109692625, - "recall": 0.9031796046977943, - "support": 10473.0 + "f1-score": 0.922759926491062, + "precision": 0.948917649753751, + "recall": 0.8980056362453934, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8934022573639618, - "precision": 0.8695962843872812, - "recall": 0.9185483363733569, - "support": 15899.0 + "f1-score": 0.884039223868141, + "precision": 0.8907669021190716, + "recall": 0.8774124078522323, + "support": 12073.0 }, - "eval_accuracy": 0.8615522388059701, - "eval_loss": 1.120849370956421, + "eval_accuracy": 0.8521307795358268, + "eval_loss": 1.0802738666534424, "eval_macro avg": { - "f1-score": 0.8236631488191918, - "precision": 0.8325283736057006, - "recall": 0.8164986980700437, - "support": 33500.0 - }, - "eval_runtime": 4.9277, - "eval_samples_per_second": 16.438, - "eval_steps_per_second": 2.232, + "f1-score": 0.8163818688110683, + "precision": 0.8119143194614044, + "recall": 0.8228665357266876, + "support": 27619.0 + }, + "eval_runtime": 4.7458, + "eval_samples_per_second": 16.857, + "eval_steps_per_second": 2.107, "eval_weighted avg": { - "f1-score": 0.8593583894385183, - "precision": 0.858663392490028, - "recall": 0.8615522388059701, - "support": 33500.0 + "f1-score": 0.85528273315214, + "precision": 0.8599248737286467, + "recall": 0.8521307795358268, + "support": 27619.0 }, "step": 2511 }, { "epoch": 32.0, "eval_Claim": { - "f1-score": 0.6540556600970584, - "precision": 0.6402947450067868, - "recall": 0.6684210526315789, - "support": 4940.0 + "f1-score": 0.6231684408265498, + "precision": 0.6518208016767094, + "recall": 0.5969289827255279, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8491992882562277, - "precision": 0.8271230502599654, - "recall": 0.8724862888482633, - "support": 2188.0 + "f1-score": 0.802954491303312, + "precision": 0.823960880195599, + "recall": 0.7829925650557621, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9160624787114983, - "precision": 0.934014685453463, - "recall": 0.8987873579681085, - "support": 10473.0 + "f1-score": 0.9214892082339233, + "precision": 0.9439581675571217, + "recall": 0.9000650336006937, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8915745856353591, - "precision": 0.8899542520523908, - "recall": 0.8932008302408957, - "support": 15899.0 + "f1-score": 0.8923421084168898, + "precision": 0.8618055555555556, + "recall": 0.925122173444877, + "support": 12073.0 }, - "eval_accuracy": 0.8604477611940299, - "eval_loss": 1.1111246347427368, + "eval_accuracy": 0.856149751982331, + "eval_loss": 1.0980205535888672, "eval_macro avg": { - "f1-score": 0.8277230031750359, - "precision": 0.8228466831931516, - "recall": 0.8332238824222116, - "support": 33500.0 - }, - "eval_runtime": 4.919, - "eval_samples_per_second": 16.467, - "eval_steps_per_second": 2.236, + "f1-score": 0.8099885621951687, + "precision": 0.8203863512462465, + "recall": 0.8012771887067152, + "support": 27619.0 + }, + "eval_runtime": 4.7314, + "eval_samples_per_second": 16.908, + "eval_steps_per_second": 2.114, "eval_weighted avg": { - "f1-score": 0.8614373038849609, - "precision": 0.8628095441324599, - "recall": 0.8604477611940299, - "support": 33500.0 + "f1-score": 0.8544925535585312, + "precision": 0.8546105739408988, + "recall": 0.856149751982331, + "support": 27619.0 }, "step": 2592 }, { "epoch": 33.0, "eval_Claim": { - "f1-score": 0.6564330438295295, - "precision": 0.6549778315195486, - "recall": 0.6578947368421053, - "support": 4940.0 + "f1-score": 0.645293315143247, + "precision": 0.6132238547968885, + "recall": 0.6809021113243762, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8466696468484577, - "precision": 0.8285214348206474, - "recall": 0.8656307129798904, - "support": 2188.0 + "f1-score": 0.8171494785631519, + "precision": 0.8150716597318539, + "recall": 0.8192379182156134, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9148585819449826, - "precision": 0.9282555282555283, - "recall": 0.9018428339539769, - "support": 10473.0 + "f1-score": 0.9240913120567376, + "precision": 0.9452505100884153, + "recall": 0.9038586603078257, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8948586439829872, - "precision": 0.8899048329912297, - "recall": 0.899867916221146, - "support": 15899.0 + "f1-score": 0.8876614477345405, + "precision": 0.8901382642012328, + "recall": 0.8851983765426986, + "support": 12073.0 }, - "eval_accuracy": 0.8625671641791045, - "eval_loss": 1.145164132118225, + "eval_accuracy": 0.8554618197617582, + "eval_loss": 1.103171467781067, "eval_macro avg": { - "f1-score": 0.8282049791514892, - "precision": 0.8254149068967386, - "recall": 0.8313090499992797, - "support": 33500.0 - }, - "eval_runtime": 5.0162, - "eval_samples_per_second": 16.148, - "eval_steps_per_second": 2.193, + "f1-score": 0.8185488883744192, + "precision": 0.8159210722045976, + "recall": 0.8222992665976285, + "support": 27619.0 + }, + "eval_runtime": 4.7195, + "eval_samples_per_second": 16.951, + "eval_steps_per_second": 2.119, "eval_weighted avg": { - "f1-score": 0.862804893528884, - "precision": 0.8632421634102047, - "recall": 0.8625671641791045, - "support": 33500.0 + "f1-score": 0.8577606473413056, + "precision": 0.8609099427319448, + "recall": 0.8554618197617582, + "support": 27619.0 }, "step": 2673 }, { "epoch": 34.0, "eval_Claim": { - "f1-score": 0.6356152512998267, - "precision": 0.6835973904939422, - "recall": 0.5939271255060729, - "support": 4940.0 + "f1-score": 0.6230995586071605, + "precision": 0.6371614844533601, + "recall": 0.6096449136276392, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8559207564160288, - "precision": 0.8433895297249334, - "recall": 0.8688299817184644, - "support": 2188.0 + "f1-score": 0.8227470763586332, + "precision": 0.812132186509733, + "recall": 0.8336431226765799, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9127349443805141, - "precision": 0.9166907444861794, - "recall": 0.9088131385467393, - "support": 10473.0 + "f1-score": 0.9239675800849093, + "precision": 0.9402985074626866, + "recall": 0.9081942336874052, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8947336002463814, - "precision": 0.876591635990586, - "recall": 0.9136423674444933, - "support": 15899.0 + "f1-score": 0.8910673608851285, + "precision": 0.875469586763648, + "recall": 0.9072310113476352, + "support": 12073.0 }, - "eval_accuracy": 0.8620597014925373, - "eval_loss": 1.1328134536743164, + "eval_accuracy": 0.8569100981208588, + "eval_loss": 1.1183395385742188, "eval_macro avg": { - "f1-score": 0.8247511380856878, - "precision": 0.8300673251739102, - "recall": 0.8213031533039424, - "support": 33500.0 - }, - "eval_runtime": 4.886, - "eval_samples_per_second": 16.578, - "eval_steps_per_second": 2.251, + "f1-score": 0.8152203939839578, + "precision": 0.8162654412973569, + "recall": 0.8146783203348149, + "support": 27619.0 + }, + "eval_runtime": 4.7358, + "eval_samples_per_second": 16.893, + "eval_steps_per_second": 2.112, "eval_weighted avg": { - "f1-score": 0.8596160160977241, - "precision": 0.858499701125263, - "recall": 0.8620597014925373, - "support": 33500.0 + "f1-score": 0.8562950074379215, + "precision": 0.856227085825657, + "recall": 0.8569100981208588, + "support": 27619.0 }, "step": 2754 }, { "epoch": 35.0, "eval_Claim": { - "f1-score": 0.6474583247581807, - "precision": 0.6584344914190038, - "recall": 0.6368421052631579, - "support": 4940.0 + "f1-score": 0.6287781538102462, + "precision": 0.6121336059986366, + "recall": 0.6463531669865643, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8435185185185186, - "precision": 0.8545966228893058, - "recall": 0.8327239488117002, - "support": 2188.0 + "f1-score": 0.8306414397784957, + "precision": 0.8249312557286893, + "recall": 0.8364312267657993, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9133458015633241, - "precision": 0.9174373795761079, - "recall": 0.9092905566695312, - "support": 10473.0 + "f1-score": 0.9218662169758292, + "precision": 0.9574964969640355, + "recall": 0.8887925428137872, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8930206484163319, - "precision": 0.8844540407156076, - "recall": 0.9017548273476319, - "support": 15899.0 + "f1-score": 0.8870238337746996, + "precision": 0.8728351507376524, + "recall": 0.9016814379193241, + "support": 12073.0 }, - "eval_accuracy": 0.8605373134328358, - "eval_loss": 1.16542649269104, + "eval_accuracy": 0.853760092689815, + "eval_loss": 1.126227617263794, "eval_macro avg": { - "f1-score": 0.8243358233140887, - "precision": 0.8287306336500063, - "recall": 0.8201528595230052, - "support": 33500.0 - }, - "eval_runtime": 4.925, - "eval_samples_per_second": 16.447, - "eval_steps_per_second": 2.233, + "f1-score": 0.8170774110848177, + "precision": 0.8168491273572535, + "recall": 0.8183145936213687, + "support": 27619.0 + }, + "eval_runtime": 4.7324, + "eval_samples_per_second": 16.905, + "eval_steps_per_second": 2.113, "eval_weighted avg": { - "f1-score": 0.8599304033363548, - "precision": 0.8594859781531254, - "recall": 0.8605373134328358, - "support": 33500.0 + "f1-score": 0.8552975917471803, + "precision": 0.8580407106693336, + "recall": 0.853760092689815, + "support": 27619.0 }, "step": 2835 }, { "epoch": 36.0, "eval_Claim": { - "f1-score": 0.63220675944334, - "precision": 0.654320987654321, - "recall": 0.6115384615384616, - "support": 4940.0 + "f1-score": 0.6262910798122067, + "precision": 0.6130514705882353, + "recall": 0.6401151631477927, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8513604677310547, - "precision": 0.8379814077025233, - "recall": 0.8651736745886655, - "support": 2188.0 + "f1-score": 0.8252405459834415, + "precision": 0.7958567112645663, + "recall": 0.8568773234200744, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9143658430443401, - "precision": 0.9262343260188087, - "recall": 0.9027976701995608, - "support": 10473.0 + "f1-score": 0.9250767085076709, + "precision": 0.9530980572479595, + "recall": 0.8986559722523304, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8918458920006188, - "precision": 0.8778021442495126, - "recall": 0.9063463110887477, - "support": 15899.0 + "f1-score": 0.8858740338760073, + "precision": 0.87943841319076, + "recall": 0.8924045390540877, + "support": 12073.0 }, - "eval_accuracy": 0.8590746268656716, - "eval_loss": 1.1980067491531372, + "eval_accuracy": 0.8536514718128825, + "eval_loss": 1.135550856590271, "eval_macro avg": { - "f1-score": 0.8224447405548384, - "precision": 0.8240847164062914, - "recall": 0.8214640293538589, - "support": 33500.0 - }, - "eval_runtime": 4.9337, - "eval_samples_per_second": 16.418, - "eval_steps_per_second": 2.23, + "f1-score": 0.8156205920448316, + "precision": 0.8103611630728802, + "recall": 0.8220132494685712, + "support": 27619.0 + }, + "eval_runtime": 4.7408, + "eval_samples_per_second": 16.875, + "eval_steps_per_second": 2.109, "eval_weighted avg": { - "f1-score": 0.857954907646772, - "precision": 0.8573873846830882, - "recall": 0.8590746268656716, - "support": 33500.0 + "f1-score": 0.8550712842351443, + "precision": 0.8573309971640839, + "recall": 0.8536514718128825, + "support": 27619.0 }, "step": 2916 }, { "epoch": 37.0, "eval_Claim": { - "f1-score": 0.6368211365066052, - "precision": 0.6605045672031318, - "recall": 0.6147773279352227, - "support": 4940.0 + "f1-score": 0.6159482232262792, + "precision": 0.6272071623974136, + "recall": 0.6050863723608445, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8573378839590444, - "precision": 0.8536474852741278, - "recall": 0.8610603290676416, - "support": 2188.0 + "f1-score": 0.81899518238128, + "precision": 0.8087902129587675, + "recall": 0.8294609665427509, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9157544485701886, - "precision": 0.9250779423226813, - "recall": 0.9066170151818963, - "support": 10473.0 + "f1-score": 0.9242474363215348, + "precision": 0.9405296229802513, + "recall": 0.9085194016908736, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8922981750695949, - "precision": 0.8778528391455176, - "recall": 0.9072268696144412, - "support": 15899.0 + "f1-score": 0.889622026718801, + "precision": 0.8751502524240724, + "recall": 0.9045804688147105, + "support": 12073.0 }, - "eval_accuracy": 0.8608955223880597, - "eval_loss": 1.1901870965957642, + "eval_accuracy": 0.8548463014591404, + "eval_loss": 1.1102262735366821, "eval_macro avg": { - "f1-score": 0.8255529110263582, - "precision": 0.8292707084863646, - "recall": 0.8224203854498005, - "support": 33500.0 - }, - "eval_runtime": 4.927, - "eval_samples_per_second": 16.44, - "eval_steps_per_second": 2.233, + "f1-score": 0.8122032171619737, + "precision": 0.8129193126901262, + "recall": 0.8119118023522949, + "support": 27619.0 + }, + "eval_runtime": 4.7479, + "eval_samples_per_second": 16.85, + "eval_steps_per_second": 2.106, "eval_weighted avg": { - "f1-score": 0.859674529246331, - "precision": 0.8589849802771131, - "recall": 0.8608955223880597, - "support": 33500.0 + "f1-score": 0.8543851480129697, + "precision": 0.8544020887900109, + "recall": 0.8548463014591404, + "support": 27619.0 }, "step": 2997 }, { "epoch": 37.04, - "grad_norm": 0.8262068033218384, + "grad_norm": 1.1952095031738281, "learning_rate": 5.185185185185185e-06, - "loss": 0.0042, + "loss": 0.0039, "step": 3000 }, { "epoch": 38.0, "eval_Claim": { - "f1-score": 0.6409235668789809, - "precision": 0.6303837118245889, - "recall": 0.6518218623481782, - "support": 4940.0 + "f1-score": 0.6296296296296297, + "precision": 0.6231203007518797, + "recall": 0.6362763915547025, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8523321956769055, - "precision": 0.8486633439057544, - "recall": 0.8560329067641682, - "support": 2188.0 + "f1-score": 0.8261065943992774, + "precision": 0.8036028119507909, + "recall": 0.849907063197026, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9147144240077445, - "precision": 0.9275547266123491, - "recall": 0.9022247684522104, - "support": 10473.0 + "f1-score": 0.9291572631694831, + "precision": 0.9439659993289341, + "recall": 0.9148059830912638, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8904285669498698, - "precision": 0.8876734591823978, - "recall": 0.8932008302408957, - "support": 15899.0 + "f1-score": 0.8880630909616417, + "precision": 0.885394368516384, + "recall": 0.8907479499710097, + "support": 12073.0 }, - "eval_accuracy": 0.858, - "eval_loss": 1.198099136352539, + "eval_accuracy": 0.857199753792679, + "eval_loss": 1.096726655960083, "eval_macro avg": { - "f1-score": 0.8245996883783752, - "precision": 0.8235688103812725, - "recall": 0.8258200919513632, - "support": 33500.0 - }, - "eval_runtime": 4.9138, - "eval_samples_per_second": 16.484, - "eval_steps_per_second": 2.239, + "f1-score": 0.8182391445400079, + "precision": 0.8140208701369972, + "recall": 0.8229343469535004, + "support": 27619.0 + }, + "eval_runtime": 4.7721, + "eval_samples_per_second": 16.764, + "eval_steps_per_second": 2.096, "eval_weighted avg": { - "f1-score": 0.8587400959132037, - "precision": 0.8596528929024279, - "recall": 0.858, - "support": 33500.0 + "f1-score": 0.8579625726718959, + "precision": 0.8590070308758098, + "recall": 0.857199753792679, + "support": 27619.0 }, "step": 3078 }, { "epoch": 39.0, "eval_Claim": { - "f1-score": 0.6499377851513894, - "precision": 0.6662414965986394, - "recall": 0.6344129554655871, - "support": 4940.0 + "f1-score": 0.6354609929078013, + "precision": 0.6262814538676608, + "recall": 0.6449136276391555, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8481362172112287, - "precision": 0.8540315106580166, - "recall": 0.8423217550274223, - "support": 2188.0 + "f1-score": 0.8238434163701068, + "precision": 0.7901023890784983, + "recall": 0.8605947955390335, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9112606807007495, - "precision": 0.9111302023673158, - "recall": 0.9113911964098157, - "support": 10473.0 + "f1-score": 0.9278488794500642, + "precision": 0.9577708549671167, + "recall": 0.8997398655972252, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8928604847010387, - "precision": 0.8855958420987502, - "recall": 0.9002452984464432, - "support": 15899.0 + "f1-score": 0.8900733937430809, + "precision": 0.8812926274764534, + "recall": 0.8990308953863994, + "support": 12073.0 }, - "eval_accuracy": 0.8607462686567164, - "eval_loss": 1.214090347290039, + "eval_accuracy": 0.8579238929722293, + "eval_loss": 1.1243098974227905, "eval_macro avg": { - "f1-score": 0.8255487919411015, - "precision": 0.8292497629306805, - "recall": 0.822092801337317, - "support": 33500.0 - }, - "eval_runtime": 4.9164, - "eval_samples_per_second": 16.475, - "eval_steps_per_second": 2.237, + "f1-score": 0.8193066706177633, + "precision": 0.8138618313474322, + "recall": 0.8260697960404534, + "support": 27619.0 + }, + "eval_runtime": 4.7248, + "eval_samples_per_second": 16.932, + "eval_steps_per_second": 2.117, "eval_weighted avg": { - "f1-score": 0.8598697509596058, - "precision": 0.8591704131772522, - "recall": 0.8607462686567164, - "support": 33500.0 + "f1-score": 0.8591079436234004, + "precision": 0.8612506332766275, + "recall": 0.8579238929722293, + "support": 27619.0 }, "step": 3159 }, { "epoch": 40.0, "eval_Claim": { - "f1-score": 0.6468762929251138, - "precision": 0.6613790186125211, - "recall": 0.632995951417004, - "support": 4940.0 + "f1-score": 0.6420784040825794, + "precision": 0.6214638527166592, + "recall": 0.6641074856046065, + "support": 4168.0 }, "eval_MajorClaim": { - "f1-score": 0.8509220173294822, - "precision": 0.8279290964115866, - "recall": 0.8752285191956124, - "support": 2188.0 + "f1-score": 0.8344163118978265, + "precision": 0.8057118130679359, + "recall": 0.8652416356877324, + "support": 2152.0 }, "eval_O": { - "f1-score": 0.9132591629340654, - "precision": 0.921379980563654, - "recall": 0.9052802444380789, - "support": 10473.0 + "f1-score": 0.9288822784112818, + "precision": 0.9444382211269183, + "recall": 0.9138304790808585, + "support": 9226.0 }, "eval_Premise": { - "f1-score": 0.8917924410627417, - "precision": 0.8843465891520812, - "recall": 0.8993647399207497, - "support": 15899.0 + "f1-score": 0.8881666666666668, + "precision": 0.8936027500628826, + "recall": 0.8827963223722356, + "support": 12073.0 }, - "eval_accuracy": 0.8603582089552239, - "eval_loss": 1.218752145767212, + "eval_accuracy": 0.8587928599876896, + "eval_loss": 1.112131118774414, "eval_macro avg": { - "f1-score": 0.8257124785628508, - "precision": 0.8237586711849608, - "recall": 0.8282173637428613, - "support": 33500.0 - }, - "eval_runtime": 5.1955, - "eval_samples_per_second": 15.59, - "eval_steps_per_second": 2.117, + "f1-score": 0.8233859152645886, + "precision": 0.816304159243599, + "recall": 0.8314939806863582, + "support": 27619.0 + }, + "eval_runtime": 4.7649, + "eval_samples_per_second": 16.789, + "eval_steps_per_second": 2.099, "eval_weighted avg": { - "f1-score": 0.8597181341740886, - "precision": 0.8593600051422834, - "recall": 0.8603582089552239, - "support": 33500.0 + "f1-score": 0.8604421144396781, + "precision": 0.8626672287002196, + "recall": 0.8587928599876896, + "support": 27619.0 }, "step": 3240 } @@ -1856,7 +1856,7 @@ "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, - "total_flos": 5733687354696000.0, + "total_flos": 5751549309072000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null