{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.0, "eval_steps": 500, "global_step": 1296, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.4888068880688806, "precision": 0.6228840125391849, "recall": 0.40222672064777326, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.7325278370054489, "precision": 0.7604525332021643, "recall": 0.706581352833638, "support": 2188.0 }, "eval_O": { "f1-score": 0.8634848138844484, "precision": 0.827613377692173, "recall": 0.902606702950444, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8792819197655248, "precision": 0.8543458914268763, "recall": 0.9057173407132524, "support": 15899.0 }, "eval_accuracy": 0.8174925373134329, "eval_loss": 0.49806877970695496, "eval_macro avg": { "f1-score": 0.7410253646810757, "precision": 0.7663239537150996, "recall": 0.7292830292862769, "support": 33500.0 }, "eval_runtime": 4.9194, "eval_samples_per_second": 16.465, "eval_steps_per_second": 2.236, "eval_weighted avg": { "f1-score": 0.8071778099281224, "precision": 0.80572410140167, "recall": 0.8174925373134329, "support": 33500.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.5993816254416962, "precision": 0.6593780369290574, "recall": 0.5493927125506073, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8017817371937639, "precision": 0.7197382769901853, "recall": 0.9049360146252285, "support": 2188.0 }, "eval_O": { "f1-score": 0.9058997050147494, "precision": 0.9337184554575859, "recall": 0.8796906330564308, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8854737486606459, "precision": 0.862579028987236, "recall": 0.9096169570413234, "support": 15899.0 }, "eval_accuracy": 0.8468358208955223, "eval_loss": 0.4562840759754181, "eval_macro avg": { "f1-score": 0.7981342040777137, "precision": 0.7938534495910161, "recall": 0.8109090793183975, "support": 33500.0 }, "eval_runtime": 4.904, "eval_samples_per_second": 16.517, "eval_steps_per_second": 2.243, "eval_weighted avg": { "f1-score": 0.8442053257085675, "precision": 0.8455251408465503, "recall": 0.8468358208955223, "support": 33500.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6235462150581514, "precision": 0.6285479226655697, "recall": 0.6186234817813765, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8338848444738584, "precision": 0.8059701492537313, "recall": 0.8638025594149908, "support": 2188.0 }, "eval_O": { "f1-score": 0.9066250826953974, "precision": 0.8974646833193002, "recall": 0.9159744103886184, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8827730692315017, "precision": 0.8911176621379133, "recall": 0.8745833071262343, "support": 15899.0 }, "eval_accuracy": 0.8490746268656716, "eval_loss": 0.43820467591285706, "eval_macro avg": { "f1-score": 0.8117073028647273, "precision": 0.8057751043441286, "recall": 0.818245939677805, "support": 33500.0 }, "eval_runtime": 4.9179, "eval_samples_per_second": 16.47, "eval_steps_per_second": 2.237, "eval_weighted avg": { "f1-score": 0.8488105033097496, "precision": 0.8488213959184715, "recall": 0.8490746268656716, "support": 33500.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6415287144900879, "precision": 0.6477507222451506, "recall": 0.6354251012145749, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8005126014523707, "precision": 0.7514033680834001, "recall": 0.856489945155393, "support": 2188.0 }, "eval_O": { "f1-score": 0.9065169636319258, "precision": 0.9273871354374751, "recall": 0.8865654540246348, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8932505382719131, "precision": 0.8863636363636364, "recall": 0.9002452984464432, "support": 15899.0 }, "eval_accuracy": 0.8540597014925373, "eval_loss": 0.4749625325202942, "eval_macro avg": { "f1-score": 0.8104522044615743, "precision": 0.8032262155324155, "recall": 0.8196814497102615, "support": 33500.0 }, "eval_runtime": 4.8972, "eval_samples_per_second": 16.54, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.8542213698406307, "precision": 0.8551874645146165, "recall": 0.8540597014925373, "support": 33500.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6099687080911935, "precision": 0.6808882235528942, "recall": 0.5524291497975709, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8315018315018317, "precision": 0.8330275229357799, "recall": 0.829981718464351, "support": 2188.0 }, "eval_O": { "f1-score": 0.9103454839630722, "precision": 0.9074091642159188, "recall": 0.9133008689009835, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8913988368533824, "precision": 0.8682249120505635, "recall": 0.915843763758727, "support": 15899.0 }, "eval_accuracy": 0.8558507462686568, "eval_loss": 0.5162495374679565, "eval_macro avg": { "f1-score": 0.8108037151023699, "precision": 0.8223874556887891, "recall": 0.8028888752304081, "support": 33500.0 }, "eval_runtime": 4.9413, "eval_samples_per_second": 16.392, "eval_steps_per_second": 2.226, "eval_weighted avg": { "f1-score": 0.8519095458499607, "precision": 0.8505509283002989, "recall": 0.8558507462686568, "support": 33500.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.646279703934554, "precision": 0.6227477477477478, "recall": 0.6716599190283401, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8414067684140676, "precision": 0.8152593227603944, "recall": 0.8692870201096892, "support": 2188.0 }, "eval_O": { "f1-score": 0.9112506693929215, "precision": 0.9295788637266588, "recall": 0.8936312422419556, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8861383012314494, "precision": 0.8897343224906474, "recall": 0.8825712308950249, "support": 15899.0 }, "eval_accuracy": 0.8540597014925373, "eval_loss": 0.552895724773407, "eval_macro avg": { "f1-score": 0.8212688607432481, "precision": 0.8143300641813621, "recall": 0.8292873530687525, "support": 33500.0 }, "eval_runtime": 4.9336, "eval_samples_per_second": 16.418, "eval_steps_per_second": 2.23, "eval_weighted avg": { "f1-score": 0.8556973390614198, "precision": 0.8579560210496036, "recall": 0.8540597014925373, "support": 33500.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 9.790245056152344, "learning_rate": 1.7530864197530865e-05, "loss": 0.3328, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6202600808654791, "precision": 0.6739491807171694, "recall": 0.5744939271255061, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8670041417395307, "precision": 0.8730305838739574, "recall": 0.8610603290676416, "support": 2188.0 }, "eval_O": { "f1-score": 0.9109892729439809, "precision": 0.9097314797181489, "recall": 0.9122505490308412, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8872356123954747, "precision": 0.8677611401767996, "recall": 0.9076042518397384, "support": 15899.0 }, "eval_accuracy": 0.8568955223880597, "eval_loss": 0.5894299149513245, "eval_macro avg": { "f1-score": 0.8213722769861164, "precision": 0.8311180961215189, "recall": 0.8138522642659318, "support": 33500.0 }, "eval_runtime": 4.9037, "eval_samples_per_second": 16.518, "eval_steps_per_second": 2.243, "eval_weighted avg": { "f1-score": 0.8539713289140156, "precision": 0.8526463291050193, "recall": 0.8568955223880597, "support": 33500.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6441500279902966, "precision": 0.5974385600553825, "recall": 0.6987854251012146, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.834510811389424, "precision": 0.7849375755134917, "recall": 0.8907678244972578, "support": 2188.0 }, "eval_O": { "f1-score": 0.9059161060546101, "precision": 0.9398542543364466, "recall": 0.874343550081161, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8801401497053671, "precision": 0.8915849251419721, "recall": 0.8689854707843261, "support": 15899.0 }, "eval_accuracy": 0.8469850746268657, "eval_loss": 0.5878348350524902, "eval_macro avg": { "f1-score": 0.8161792737849245, "precision": 0.8034538287618231, "recall": 0.8332205676159898, "support": 33500.0 }, "eval_runtime": 4.9086, "eval_samples_per_second": 16.502, "eval_steps_per_second": 2.241, "eval_weighted avg": { "f1-score": 0.8504184600736624, "precision": 0.8563340964894008, "recall": 0.8469850746268657, "support": 33500.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6323094193285029, "precision": 0.6375797489195307, "recall": 0.6271255060728745, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8525914292124747, "precision": 0.8373732921992068, "recall": 0.8683729433272395, "support": 2188.0 }, "eval_O": { "f1-score": 0.9054132151801194, "precision": 0.9134194927606647, "recall": 0.8975460708488494, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8888055034396498, "precision": 0.883775884584292, "recall": 0.8938926976539405, "support": 15899.0 }, "eval_accuracy": 0.8540298507462687, "eval_loss": 0.7043101191520691, "eval_macro avg": { "f1-score": 0.8197798917901866, "precision": 0.8180371046159236, "recall": 0.821734304475726, "support": 33500.0 }, "eval_runtime": 4.935, "eval_samples_per_second": 16.414, "eval_steps_per_second": 2.229, "eval_weighted avg": { "f1-score": 0.853808653145316, "precision": 0.8537078167069386, "recall": 0.8540298507462687, "support": 33500.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6347110795166041, "precision": 0.6368453230079478, "recall": 0.6325910931174089, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8442090055381651, "precision": 0.8921119592875318, "recall": 0.8011882998171846, "support": 2188.0 }, "eval_O": { "f1-score": 0.9106677408653618, "precision": 0.9040270982310877, "recall": 0.9174066647569942, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8841656478259506, "precision": 0.881375, "recall": 0.8869740235234921, "support": 15899.0 }, "eval_accuracy": 0.8533731343283583, "eval_loss": 0.7164433002471924, "eval_macro avg": { "f1-score": 0.8184383684365204, "precision": 0.8285898451316418, "recall": 0.8095400203037699, "support": 33500.0 }, "eval_runtime": 4.9093, "eval_samples_per_second": 16.499, "eval_steps_per_second": 2.241, "eval_weighted avg": { "f1-score": 0.8530559678148433, "precision": 0.8530989190255094, "recall": 0.8533731343283583, "support": 33500.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.645658668208538, "precision": 0.6161486113665624, "recall": 0.6781376518218624, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8331177231565331, "precision": 0.7885714285714286, "recall": 0.8829981718464351, "support": 2188.0 }, "eval_O": { "f1-score": 0.9059711059612445, "precision": 0.9366843393148451, "recall": 0.8772080588179128, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8902977542265961, "precision": 0.8929452704840241, "recall": 0.8876658909365369, "support": 15899.0 }, "eval_accuracy": 0.8531940298507462, "eval_loss": 0.7852362394332886, "eval_macro avg": { "f1-score": 0.818761312888228, "precision": 0.8085874124342151, "recall": 0.8315024433556868, "support": 33500.0 }, "eval_runtime": 4.89, "eval_samples_per_second": 16.565, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.8553879040715653, "precision": 0.8589850855801485, "recall": 0.8531940298507462, "support": 33500.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6329215627196947, "precision": 0.6280645804265498, "recall": 0.6378542510121458, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8528368794326241, "precision": 0.8278829604130808, "recall": 0.8793418647166362, "support": 2188.0 }, "eval_O": { "f1-score": 0.9126755145220234, "precision": 0.9193064031773709, "recall": 0.9061395970591044, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8876634630534112, "precision": 0.8894291487749432, "recall": 0.88590477388515, "support": 15899.0 }, "eval_accuracy": 0.8552238805970149, "eval_loss": 0.8324545621871948, "eval_macro avg": { "f1-score": 0.8215243549319383, "precision": 0.8161707731979861, "recall": 0.827310121668259, "support": 33500.0 }, "eval_runtime": 4.8939, "eval_samples_per_second": 16.551, "eval_steps_per_second": 2.248, "eval_weighted avg": { "f1-score": 0.8556433335734092, "precision": 0.8562082669116539, "recall": 0.8552238805970149, "support": 33500.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 2.6090729236602783, "learning_rate": 1.506172839506173e-05, "loss": 0.0589, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6431152147731835, "precision": 0.6377388535031847, "recall": 0.648582995951417, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8326953388876379, "precision": 0.8206835330670218, "recall": 0.8450639853747715, "support": 2188.0 }, "eval_O": { "f1-score": 0.904438383337466, "precision": 0.9408790755262072, "recall": 0.8707151723479424, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.892013567684243, "precision": 0.8749621922448733, "recall": 0.9097427511164224, "support": 15899.0 }, "eval_accuracy": 0.8548059701492537, "eval_loss": 0.8543176651000977, "eval_macro avg": { "f1-score": 0.8180656261706326, "precision": 0.8185659135853218, "recall": 0.8185262261976383, "support": 33500.0 }, "eval_runtime": 4.9338, "eval_samples_per_second": 16.417, "eval_steps_per_second": 2.23, "eval_weighted avg": { "f1-score": 0.8553204019036041, "precision": 0.8570428644520474, "recall": 0.8548059701492537, "support": 33500.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.6541843971631205, "precision": 0.6138420585625555, "recall": 0.7002024291497976, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8336528221512247, "precision": 0.7806142800159553, "recall": 0.8944241316270567, "support": 2188.0 }, "eval_O": { "f1-score": 0.9079845651528643, "precision": 0.9421004003695719, "recall": 0.8762532225723288, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.889389516436096, "precision": 0.8974194787731319, "recall": 0.8815019812566828, "support": 15899.0 }, "eval_accuracy": 0.8539701492537314, "eval_loss": 0.9095103144645691, "eval_macro avg": { "f1-score": 0.8213028252258264, "precision": 0.8084940544303038, "recall": 0.8380954411514664, "support": 33500.0 }, "eval_runtime": 4.8917, "eval_samples_per_second": 16.559, "eval_steps_per_second": 2.249, "eval_weighted avg": { "f1-score": 0.8568784946124219, "precision": 0.8619418985092084, "recall": 0.8539701492537314, "support": 33500.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.631984585741811, "precision": 0.6332046332046332, "recall": 0.6307692307692307, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8264781491002571, "precision": 0.7778225806451613, "recall": 0.8816270566727605, "support": 2188.0 }, "eval_O": { "f1-score": 0.8970963775654971, "precision": 0.8886910896655111, "recall": 0.9056621789363124, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8838946528332003, "precision": 0.8974458706080644, "recall": 0.8707465878357129, "support": 15899.0 }, "eval_accuracy": 0.8469850746268657, "eval_loss": 0.9576827883720398, "eval_macro avg": { "f1-score": 0.8098634413101913, "precision": 0.7992910435308425, "recall": 0.8222012635535042, "support": 33500.0 }, "eval_runtime": 4.8883, "eval_samples_per_second": 16.57, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.8471244624308779, "precision": 0.8479301603984184, "recall": 0.8469850746268657, "support": 33500.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.6332942805410586, "precision": 0.6682400539447066, "recall": 0.6018218623481781, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8478211270819075, "precision": 0.8464692482915718, "recall": 0.8491773308957953, "support": 2188.0 }, "eval_O": { "f1-score": 0.9089773764349872, "precision": 0.9145563502803016, "recall": 0.9034660555714695, "support": 10473.0 }, "eval_Premise": { "f1-score": 0.8924681415656145, "precision": 0.8759539672925499, "recall": 0.9096169570413234, "support": 15899.0 }, "eval_accuracy": 0.8583582089552239, "eval_loss": 0.9394893646240234, "eval_macro avg": { "f1-score": 0.8206402314058919, "precision": 0.8263049049522826, "recall": 0.8160205514641916, "support": 33500.0 }, "eval_runtime": 4.896, "eval_samples_per_second": 16.544, "eval_steps_per_second": 2.247, "eval_weighted avg": { "f1-score": 0.8564948483009961, "precision": 0.8554663093796615, "recall": 0.8583582089552239, "support": 33500.0 }, "step": 1296 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 2293474941878400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }