| { | |
| "best_metric": 0.2252955436706543, | |
| "best_model_checkpoint": "longformer-spans/checkpoint-162", | |
| "epoch": 17.0, | |
| "eval_steps": 500, | |
| "global_step": 1377, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_B": { | |
| "f1-score": 0.816, | |
| "precision": 0.7461594732991953, | |
| "recall": 0.9002647837599294, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9321376763813793, | |
| "precision": 0.9024103768767235, | |
| "recall": 0.9638902525500463, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8560784528570645, | |
| "precision": 0.931782945736434, | |
| "recall": 0.7917511147142278, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9035249198881844, | |
| "eval_loss": 0.2619660496711731, | |
| "eval_macro avg": { | |
| "f1-score": 0.8680720430794812, | |
| "precision": 0.860117598637451, | |
| "recall": 0.8853020503414012, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5032, | |
| "eval_samples_per_second": 14.537, | |
| "eval_steps_per_second": 1.817, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9020655278480035, | |
| "precision": 0.9062562975065145, | |
| "recall": 0.9035249198881844, | |
| "support": 29334.0 | |
| }, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_B": { | |
| "f1-score": 0.8512256973795435, | |
| "precision": 0.8167072181670721, | |
| "recall": 0.8887908208296558, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9416781438711729, | |
| "precision": 0.9152551099212274, | |
| "recall": 0.9696721758577429, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8778173190984578, | |
| "precision": 0.9380041484212952, | |
| "recall": 0.8248885285772193, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9178427763005387, | |
| "eval_loss": 0.2252955436706543, | |
| "eval_macro avg": { | |
| "f1-score": 0.8902403867830581, | |
| "precision": 0.8899888255031981, | |
| "recall": 0.8944505084215394, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4972, | |
| "eval_samples_per_second": 14.553, | |
| "eval_steps_per_second": 1.819, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9167016237671239, | |
| "precision": 0.9191015935430046, | |
| "recall": 0.9178427763005387, | |
| "support": 29334.0 | |
| }, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_B": { | |
| "f1-score": 0.8531120331950207, | |
| "precision": 0.8050117462803446, | |
| "recall": 0.9073256840247131, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9469915571230095, | |
| "precision": 0.9280963603037444, | |
| "recall": 0.9666721213112965, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8903876792352629, | |
| "precision": 0.9353938852934612, | |
| "recall": 0.8495135792460479, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9249676143724006, | |
| "eval_loss": 0.22786012291908264, | |
| "eval_macro avg": { | |
| "f1-score": 0.8968304231844311, | |
| "precision": 0.8895006639591835, | |
| "recall": 0.9078371281940192, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5036, | |
| "eval_samples_per_second": 14.536, | |
| "eval_steps_per_second": 1.817, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9243239165827936, | |
| "precision": 0.9257972230878861, | |
| "recall": 0.9249676143724006, | |
| "support": 29334.0 | |
| }, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_B": { | |
| "f1-score": 0.8567807351077312, | |
| "precision": 0.8217179902755267, | |
| "recall": 0.8949691085613416, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9472597903427299, | |
| "precision": 0.9432635621180161, | |
| "recall": 0.9512900234549719, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8979927100980543, | |
| "precision": 0.9099989595255437, | |
| "recall": 0.8862991487636805, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9272516533715143, | |
| "eval_loss": 0.23897655308246613, | |
| "eval_macro avg": { | |
| "f1-score": 0.9006777451828384, | |
| "precision": 0.8916601706396955, | |
| "recall": 0.910852760259998, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4904, | |
| "eval_samples_per_second": 14.571, | |
| "eval_steps_per_second": 1.821, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9271915992526735, | |
| "precision": 0.9273787107073643, | |
| "recall": 0.9272516533715143, | |
| "support": 29334.0 | |
| }, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_B": { | |
| "f1-score": 0.8624407072013798, | |
| "precision": 0.8431703204047217, | |
| "recall": 0.8826125330979699, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.948190701170407, | |
| "precision": 0.9335059992600032, | |
| "recall": 0.9633447880870561, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8955333298423835, | |
| "precision": 0.9265359193845487, | |
| "recall": 0.8665383056343737, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9276607349832958, | |
| "eval_loss": 0.25390708446502686, | |
| "eval_macro avg": { | |
| "f1-score": 0.9020549127380568, | |
| "precision": 0.9010707463497579, | |
| "recall": 0.9041652089397999, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4919, | |
| "eval_samples_per_second": 14.567, | |
| "eval_steps_per_second": 1.821, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9271646670996412, | |
| "precision": 0.9276721180179627, | |
| "recall": 0.9276607349832958, | |
| "support": 29334.0 | |
| }, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_B": { | |
| "f1-score": 0.8601036269430052, | |
| "precision": 0.841927303465765, | |
| "recall": 0.8790820829655781, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9474285714285714, | |
| "precision": 0.9452679589509693, | |
| "recall": 0.9495990836197021, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8983777165595348, | |
| "precision": 0.9045613314156564, | |
| "recall": 0.8922780705310093, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9275925547146656, | |
| "eval_loss": 0.29299432039260864, | |
| "eval_macro avg": { | |
| "f1-score": 0.9019699716437038, | |
| "precision": 0.8972521979441302, | |
| "recall": 0.9069864123720964, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4971, | |
| "eval_samples_per_second": 14.553, | |
| "eval_steps_per_second": 1.819, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9275549436263691, | |
| "precision": 0.9275827485063247, | |
| "recall": 0.9275925547146656, | |
| "support": 29334.0 | |
| }, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "grad_norm": 2.853372573852539, | |
| "learning_rate": 1.3827160493827162e-05, | |
| "loss": 0.1621, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_B": { | |
| "f1-score": 0.8665526090675792, | |
| "precision": 0.8406639004149378, | |
| "recall": 0.8940864960282436, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9492722371967655, | |
| "precision": 0.9382959450098577, | |
| "recall": 0.9605083728795069, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8984919396775871, | |
| "precision": 0.9227729117709891, | |
| "recall": 0.8754560194568302, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9293311515647371, | |
| "eval_loss": 0.3148973286151886, | |
| "eval_macro avg": { | |
| "f1-score": 0.9047722619806439, | |
| "precision": 0.9005775857319281, | |
| "recall": 0.9100169627881934, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4821, | |
| "eval_samples_per_second": 14.593, | |
| "eval_steps_per_second": 1.824, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9289946986889036, | |
| "precision": 0.9293030221719495, | |
| "recall": 0.9293311515647371, | |
| "support": 29334.0 | |
| }, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_B": { | |
| "f1-score": 0.8624629707998307, | |
| "precision": 0.8284552845528456, | |
| "recall": 0.8993821712268314, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9467755410030451, | |
| "precision": 0.9356556940449557, | |
| "recall": 0.9581628756886489, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8934263985831857, | |
| "precision": 0.9191854233654877, | |
| "recall": 0.8690717470612079, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9259221381332242, | |
| "eval_loss": 0.34766319394111633, | |
| "eval_macro avg": { | |
| "f1-score": 0.9008883034620205, | |
| "precision": 0.8944321339877629, | |
| "recall": 0.9088722646588961, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5202, | |
| "eval_samples_per_second": 14.492, | |
| "eval_steps_per_second": 1.812, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9255723133682386, | |
| "precision": 0.9259745494680297, | |
| "recall": 0.9259221381332242, | |
| "support": 29334.0 | |
| }, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_B": { | |
| "f1-score": 0.8654012079378774, | |
| "precision": 0.8464135021097047, | |
| "recall": 0.8852603706972639, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9474813007694164, | |
| "precision": 0.9316216786166175, | |
| "recall": 0.9638902525500463, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8933802299333298, | |
| "precision": 0.9268053588933667, | |
| "recall": 0.8622821240372922, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9266721210881571, | |
| "eval_loss": 0.3807723820209503, | |
| "eval_macro avg": { | |
| "f1-score": 0.9020875795468745, | |
| "precision": 0.9016135132065629, | |
| "recall": 0.9038109157615342, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.532, | |
| "eval_samples_per_second": 14.461, | |
| "eval_steps_per_second": 1.808, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9261113508073029, | |
| "precision": 0.9267103706800465, | |
| "recall": 0.9266721210881571, | |
| "support": 29334.0 | |
| }, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_B": { | |
| "f1-score": 0.8593548387096774, | |
| "precision": 0.8380872483221476, | |
| "recall": 0.881729920564872, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9447995351539802, | |
| "precision": 0.9158687080751703, | |
| "recall": 0.9756177385043364, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8826362209837131, | |
| "precision": 0.9469406710786021, | |
| "recall": 0.8265099310903932, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9218313220154087, | |
| "eval_loss": 0.46634167432785034, | |
| "eval_macro avg": { | |
| "f1-score": 0.8955968649491236, | |
| "precision": 0.9002988758253068, | |
| "recall": 0.8946191967198672, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5106, | |
| "eval_samples_per_second": 14.517, | |
| "eval_steps_per_second": 1.815, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9205874800198834, | |
| "precision": 0.9233171207368492, | |
| "recall": 0.9218313220154087, | |
| "support": 29334.0 | |
| }, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_B": { | |
| "f1-score": 0.8585028126352229, | |
| "precision": 0.8421052631578947, | |
| "recall": 0.8755516328331863, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9467715680954965, | |
| "precision": 0.941814648890808, | |
| "recall": 0.9517809414716631, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8955223880597014, | |
| "precision": 0.906636203136359, | |
| "recall": 0.8846777462505067, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9262630394763756, | |
| "eval_loss": 0.39979249238967896, | |
| "eval_macro avg": { | |
| "f1-score": 0.9002655895968069, | |
| "precision": 0.8968520383950206, | |
| "recall": 0.9040034401851186, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5098, | |
| "eval_samples_per_second": 14.52, | |
| "eval_steps_per_second": 1.815, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9261219666592889, | |
| "precision": 0.9261293813943774, | |
| "recall": 0.9262630394763756, | |
| "support": 29334.0 | |
| }, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_B": { | |
| "f1-score": 0.8661758336942399, | |
| "precision": 0.8503401360544217, | |
| "recall": 0.8826125330979699, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9483851819874267, | |
| "precision": 0.9383342231713828, | |
| "recall": 0.9586537937053401, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8972844112769486, | |
| "precision": 0.9182223165040305, | |
| "recall": 0.8772800972841508, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9283425376695984, | |
| "eval_loss": 0.4524174630641937, | |
| "eval_macro avg": { | |
| "f1-score": 0.9039484756528716, | |
| "precision": 0.9022988919099451, | |
| "recall": 0.906182141362487, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5194, | |
| "eval_samples_per_second": 14.494, | |
| "eval_steps_per_second": 1.812, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9280195449455239, | |
| "precision": 0.9281698543264606, | |
| "recall": 0.9283425376695984, | |
| "support": 29334.0 | |
| }, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "grad_norm": 1.163489818572998, | |
| "learning_rate": 7.654320987654322e-06, | |
| "loss": 0.0212, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_B": { | |
| "f1-score": 0.8624407072013798, | |
| "precision": 0.8431703204047217, | |
| "recall": 0.8826125330979699, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.94720379658092, | |
| "precision": 0.9365968111768783, | |
| "recall": 0.9580537827960508, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8942532315838654, | |
| "precision": 0.9167642362959021, | |
| "recall": 0.8728212403729225, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9264675802822663, | |
| "eval_loss": 0.45370540022850037, | |
| "eval_macro avg": { | |
| "f1-score": 0.9012992451220551, | |
| "precision": 0.8988437892925006, | |
| "recall": 0.9044958520889811, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5225, | |
| "eval_samples_per_second": 14.486, | |
| "eval_steps_per_second": 1.811, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9261172500595471, | |
| "precision": 0.926316588126141, | |
| "recall": 0.9264675802822663, | |
| "support": 29334.0 | |
| }, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_B": { | |
| "f1-score": 0.8689595124074879, | |
| "precision": 0.8573883161512027, | |
| "recall": 0.880847308031774, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9480582004921365, | |
| "precision": 0.9300970873786408, | |
| "recall": 0.9667266677575956, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8934309645472265, | |
| "precision": 0.9303346132748217, | |
| "recall": 0.8593433319821646, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9272857435058294, | |
| "eval_loss": 0.49018412828445435, | |
| "eval_macro avg": { | |
| "f1-score": 0.9034828924822836, | |
| "precision": 0.9059400056015551, | |
| "recall": 0.9023057692571781, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5116, | |
| "eval_samples_per_second": 14.515, | |
| "eval_steps_per_second": 1.814, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9266264019680934, | |
| "precision": 0.9273686789700647, | |
| "recall": 0.9272857435058294, | |
| "support": 29334.0 | |
| }, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_B": { | |
| "f1-score": 0.865451388888889, | |
| "precision": 0.8514090520922288, | |
| "recall": 0.8799646954986761, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9478711568207105, | |
| "precision": 0.943136407819419, | |
| "recall": 0.9526536846124475, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8980679546968688, | |
| "precision": 0.9084499740798341, | |
| "recall": 0.8879205512768544, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9280698165950774, | |
| "eval_loss": 0.463104248046875, | |
| "eval_macro avg": { | |
| "f1-score": 0.9037968334688228, | |
| "precision": 0.9009984779971606, | |
| "recall": 0.9068463104626593, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.477, | |
| "eval_samples_per_second": 14.607, | |
| "eval_steps_per_second": 1.826, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9279338964530544, | |
| "precision": 0.9279249527781314, | |
| "recall": 0.9280698165950774, | |
| "support": 29334.0 | |
| }, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_B": { | |
| "f1-score": 0.8670469504168494, | |
| "precision": 0.8621291448516579, | |
| "recall": 0.8720211827007943, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.949639510706667, | |
| "precision": 0.9403208556149732, | |
| "recall": 0.9591447117220313, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.8996693531721429, | |
| "precision": 0.917685497470489, | |
| "recall": 0.8823469801378192, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9299447739824095, | |
| "eval_loss": 0.4684809744358063, | |
| "eval_macro avg": { | |
| "f1-score": 0.9054519380985532, | |
| "precision": 0.9067118326457067, | |
| "recall": 0.9045042915202149, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.5034, | |
| "eval_samples_per_second": 14.536, | |
| "eval_steps_per_second": 1.817, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9296394123443894, | |
| "precision": 0.9296862022276206, | |
| "recall": 0.9299447739824095, | |
| "support": 29334.0 | |
| }, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_B": { | |
| "f1-score": 0.8695278969957082, | |
| "precision": 0.8462823725981621, | |
| "recall": 0.8940864960282436, | |
| "support": 1133.0 | |
| }, | |
| "eval_I": { | |
| "f1-score": 0.9479728646973986, | |
| "precision": 0.9287246847035429, | |
| "recall": 0.9680357824687722, | |
| "support": 18333.0 | |
| }, | |
| "eval_O": { | |
| "f1-score": 0.892887383573243, | |
| "precision": 0.9344262295081968, | |
| "recall": 0.8548844750709363, | |
| "support": 9868.0 | |
| }, | |
| "eval_accuracy": 0.9271152928342538, | |
| "eval_loss": 0.5304692983627319, | |
| "eval_macro avg": { | |
| "f1-score": 0.9034627150887832, | |
| "precision": 0.9031444289366339, | |
| "recall": 0.9056689178559841, | |
| "support": 29334.0 | |
| }, | |
| "eval_runtime": 5.4944, | |
| "eval_samples_per_second": 14.56, | |
| "eval_steps_per_second": 1.82, | |
| "eval_weighted avg": { | |
| "f1-score": 0.9264121612086422, | |
| "precision": 0.927458430681484, | |
| "recall": 0.9271152928342538, | |
| "support": 29334.0 | |
| }, | |
| "step": 1377 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1620, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 2444373096505200.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |