|
{ |
|
"best_metric": 0.06875239312648773, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pos/beto_uncased/epochs_4_bs_32_lr_5e-5/checkpoint-850", |
|
"epoch": 4.0, |
|
"global_step": 1792, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.9416445623342176, |
|
"eval_f1": 0.9315524877309181, |
|
"eval_loss": 0.20549777150154114, |
|
"eval_precision": 0.9304758488761358, |
|
"eval_recall": 0.9326316209954462, |
|
"eval_runtime": 2.2237, |
|
"eval_samples_per_second": 743.792, |
|
"eval_steps_per_second": 23.384, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9580969009166302, |
|
"eval_f1": 0.9511250424261785, |
|
"eval_loss": 0.14205431938171387, |
|
"eval_precision": 0.9507643795154272, |
|
"eval_recall": 0.9514859790684669, |
|
"eval_runtime": 2.1719, |
|
"eval_samples_per_second": 761.545, |
|
"eval_steps_per_second": 23.942, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.963922371822852, |
|
"eval_f1": 0.9588180131287536, |
|
"eval_loss": 0.12034180760383606, |
|
"eval_precision": 0.9578433326689256, |
|
"eval_recall": 0.9597946792362387, |
|
"eval_runtime": 2.8499, |
|
"eval_samples_per_second": 580.367, |
|
"eval_steps_per_second": 18.246, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9677668468589464, |
|
"eval_f1": 0.9631556392700891, |
|
"eval_loss": 0.10953102260828018, |
|
"eval_precision": 0.9611918922682156, |
|
"eval_recall": 0.9651274266996884, |
|
"eval_runtime": 3.0514, |
|
"eval_samples_per_second": 542.046, |
|
"eval_steps_per_second": 17.041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9704865191552228, |
|
"eval_f1": 0.9662253768243082, |
|
"eval_loss": 0.1019054427742958, |
|
"eval_precision": 0.9645330785765465, |
|
"eval_recall": 0.9679236238715347, |
|
"eval_runtime": 3.1025, |
|
"eval_samples_per_second": 533.122, |
|
"eval_steps_per_second": 16.761, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9732061914514992, |
|
"eval_f1": 0.9694056641404069, |
|
"eval_loss": 0.09022298455238342, |
|
"eval_precision": 0.9680156137974986, |
|
"eval_recall": 0.970799712391148, |
|
"eval_runtime": 3.1303, |
|
"eval_samples_per_second": 528.378, |
|
"eval_steps_per_second": 16.612, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.973928079777054, |
|
"eval_f1": 0.9705615354688107, |
|
"eval_loss": 0.08854629099369049, |
|
"eval_precision": 0.9695266567015446, |
|
"eval_recall": 0.9715986258688184, |
|
"eval_runtime": 3.1649, |
|
"eval_samples_per_second": 522.602, |
|
"eval_steps_per_second": 16.43, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9757579827418326, |
|
"eval_f1": 0.9730894722662914, |
|
"eval_loss": 0.08236456662416458, |
|
"eval_precision": 0.972264869499332, |
|
"eval_recall": 0.9739154749540625, |
|
"eval_runtime": 3.2614, |
|
"eval_samples_per_second": 507.147, |
|
"eval_steps_per_second": 15.944, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9756908303394554, |
|
"eval_f1": 0.9728064139767254, |
|
"eval_loss": 0.08048809319734573, |
|
"eval_precision": 0.9714018282118176, |
|
"eval_recall": 0.9742150675081889, |
|
"eval_runtime": 3.0696, |
|
"eval_samples_per_second": 538.832, |
|
"eval_steps_per_second": 16.94, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.607700892857143e-05, |
|
"loss": 0.1891, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.976681328274519, |
|
"eval_f1": 0.9741610570929944, |
|
"eval_loss": 0.08373625576496124, |
|
"eval_precision": 0.9728125560180851, |
|
"eval_recall": 0.9755133019094032, |
|
"eval_runtime": 3.2499, |
|
"eval_samples_per_second": 508.931, |
|
"eval_steps_per_second": 16.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.9778229191149314, |
|
"eval_f1": 0.975405828768695, |
|
"eval_loss": 0.08112843334674835, |
|
"eval_precision": 0.9745210231463944, |
|
"eval_recall": 0.9762922425501318, |
|
"eval_runtime": 3.1199, |
|
"eval_samples_per_second": 530.139, |
|
"eval_steps_per_second": 16.667, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.9784944431387033, |
|
"eval_f1": 0.9759250024932683, |
|
"eval_loss": 0.07825577259063721, |
|
"eval_precision": 0.974622524999004, |
|
"eval_recall": 0.9772309658863945, |
|
"eval_runtime": 3.0821, |
|
"eval_samples_per_second": 536.646, |
|
"eval_steps_per_second": 16.872, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.9782594097303831, |
|
"eval_f1": 0.976126314948999, |
|
"eval_loss": 0.07251805067062378, |
|
"eval_precision": 0.9755615848062882, |
|
"eval_recall": 0.976691699288967, |
|
"eval_runtime": 3.0567, |
|
"eval_samples_per_second": 541.099, |
|
"eval_steps_per_second": 17.012, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.9793506362690125, |
|
"eval_f1": 0.9770881149585869, |
|
"eval_loss": 0.07318206131458282, |
|
"eval_precision": 0.976367117386622, |
|
"eval_recall": 0.9778101781577055, |
|
"eval_runtime": 3.123, |
|
"eval_samples_per_second": 529.611, |
|
"eval_steps_per_second": 16.65, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.9792331195648525, |
|
"eval_f1": 0.9769158636100637, |
|
"eval_loss": 0.07212899625301361, |
|
"eval_precision": 0.9758839239446725, |
|
"eval_recall": 0.9779499880162978, |
|
"eval_runtime": 3.1071, |
|
"eval_samples_per_second": 532.328, |
|
"eval_steps_per_second": 16.736, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.9795856696773327, |
|
"eval_f1": 0.9771997366898726, |
|
"eval_loss": 0.07169780880212784, |
|
"eval_precision": 0.9759732238913017, |
|
"eval_recall": 0.9784293361029001, |
|
"eval_runtime": 3.1916, |
|
"eval_samples_per_second": 518.234, |
|
"eval_steps_per_second": 16.293, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.9804082866064533, |
|
"eval_f1": 0.9784274595889044, |
|
"eval_loss": 0.06875239312648773, |
|
"eval_precision": 0.9776080714627532, |
|
"eval_recall": 0.9792482224175122, |
|
"eval_runtime": 3.2127, |
|
"eval_samples_per_second": 514.836, |
|
"eval_steps_per_second": 16.186, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.9791995433636639, |
|
"eval_f1": 0.977885114366692, |
|
"eval_loss": 0.07294180244207382, |
|
"eval_precision": 0.9772219562789214, |
|
"eval_recall": 0.9785491731245506, |
|
"eval_runtime": 3.0782, |
|
"eval_samples_per_second": 537.322, |
|
"eval_steps_per_second": 16.893, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.9806097438135849, |
|
"eval_f1": 0.9782331112086509, |
|
"eval_loss": 0.0767468586564064, |
|
"eval_precision": 0.977180325255102, |
|
"eval_recall": 0.9792881680913957, |
|
"eval_runtime": 3.1248, |
|
"eval_samples_per_second": 529.314, |
|
"eval_steps_per_second": 16.641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2126116071428573e-05, |
|
"loss": 0.049, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.9794177886713897, |
|
"eval_f1": 0.9773666247530087, |
|
"eval_loss": 0.07647667080163956, |
|
"eval_precision": 0.9766843511907136, |
|
"eval_recall": 0.9780498522010066, |
|
"eval_runtime": 3.0978, |
|
"eval_samples_per_second": 533.936, |
|
"eval_steps_per_second": 16.786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.97977033878387, |
|
"eval_f1": 0.9773353018634531, |
|
"eval_loss": 0.07652360200881958, |
|
"eval_precision": 0.9762834821428571, |
|
"eval_recall": 0.9783893904290165, |
|
"eval_runtime": 3.0494, |
|
"eval_samples_per_second": 542.399, |
|
"eval_steps_per_second": 17.052, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.9807272605177451, |
|
"eval_f1": 0.9790142798694754, |
|
"eval_loss": 0.07351543754339218, |
|
"eval_precision": 0.9782821131563728, |
|
"eval_recall": 0.9797475433410562, |
|
"eval_runtime": 2.1592, |
|
"eval_samples_per_second": 766.034, |
|
"eval_steps_per_second": 24.083, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.9812812678373569, |
|
"eval_f1": 0.9792086517548935, |
|
"eval_loss": 0.07407119125127792, |
|
"eval_precision": 0.9782522026870789, |
|
"eval_recall": 0.9801669729168331, |
|
"eval_runtime": 2.3144, |
|
"eval_samples_per_second": 714.646, |
|
"eval_steps_per_second": 22.468, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.9805761676123963, |
|
"eval_f1": 0.9790627120673825, |
|
"eval_loss": 0.07566250115633011, |
|
"eval_precision": 0.9783987553853518, |
|
"eval_recall": 0.9797275705041144, |
|
"eval_runtime": 2.157, |
|
"eval_samples_per_second": 766.796, |
|
"eval_steps_per_second": 24.107, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.9812141154349797, |
|
"eval_f1": 0.9793385543971667, |
|
"eval_loss": 0.0741068422794342, |
|
"eval_precision": 0.978372236729324, |
|
"eval_recall": 0.9803067827754254, |
|
"eval_runtime": 2.1422, |
|
"eval_samples_per_second": 772.1, |
|
"eval_steps_per_second": 24.274, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.9809287177248766, |
|
"eval_f1": 0.9787904786607873, |
|
"eval_loss": 0.07299875468015671, |
|
"eval_precision": 0.9777954953159258, |
|
"eval_recall": 0.9797874890149397, |
|
"eval_runtime": 2.1492, |
|
"eval_samples_per_second": 769.58, |
|
"eval_steps_per_second": 24.195, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.9813652083403284, |
|
"eval_f1": 0.9792876384316073, |
|
"eval_loss": 0.07347211241722107, |
|
"eval_precision": 0.9783700809377617, |
|
"eval_recall": 0.9802069185907166, |
|
"eval_runtime": 2.1559, |
|
"eval_samples_per_second": 767.204, |
|
"eval_steps_per_second": 24.12, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9818856394587516, |
|
"eval_f1": 0.9799355463098764, |
|
"eval_loss": 0.07577352970838547, |
|
"eval_precision": 0.9790466317111585, |
|
"eval_recall": 0.9808260765359111, |
|
"eval_runtime": 2.1533, |
|
"eval_samples_per_second": 768.11, |
|
"eval_steps_per_second": 24.149, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_accuracy": 0.98168418225162, |
|
"eval_f1": 0.9799245674602383, |
|
"eval_loss": 0.07578457146883011, |
|
"eval_precision": 0.9790844199864418, |
|
"eval_recall": 0.9807661580250859, |
|
"eval_runtime": 2.1906, |
|
"eval_samples_per_second": 755.058, |
|
"eval_steps_per_second": 23.738, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.175223214285714e-06, |
|
"loss": 0.0257, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.9813316321391398, |
|
"eval_f1": 0.9794737209743247, |
|
"eval_loss": 0.07658305764198303, |
|
"eval_precision": 0.9787217070495563, |
|
"eval_recall": 0.9802268914276584, |
|
"eval_runtime": 2.1425, |
|
"eval_samples_per_second": 771.995, |
|
"eval_steps_per_second": 24.271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_accuracy": 0.9814659369438942, |
|
"eval_f1": 0.9793369052251389, |
|
"eval_loss": 0.07673583924770355, |
|
"eval_precision": 0.9784485336629518, |
|
"eval_recall": 0.9802268914276584, |
|
"eval_runtime": 2.137, |
|
"eval_samples_per_second": 773.994, |
|
"eval_steps_per_second": 24.334, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_accuracy": 0.9814323607427056, |
|
"eval_f1": 0.9795576307204215, |
|
"eval_loss": 0.07736379653215408, |
|
"eval_precision": 0.9786105850692715, |
|
"eval_recall": 0.980506511144843, |
|
"eval_runtime": 2.2904, |
|
"eval_samples_per_second": 722.158, |
|
"eval_steps_per_second": 22.704, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_accuracy": 0.98168418225162, |
|
"eval_f1": 0.9797461787125354, |
|
"eval_loss": 0.07633356750011444, |
|
"eval_precision": 0.978847687400319, |
|
"eval_recall": 0.9806463210034353, |
|
"eval_runtime": 2.1499, |
|
"eval_samples_per_second": 769.338, |
|
"eval_steps_per_second": 24.187, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.9814995131450828, |
|
"eval_f1": 0.9798431356896241, |
|
"eval_loss": 0.07660035789012909, |
|
"eval_precision": 0.9790810832369481, |
|
"eval_recall": 0.9806063753295519, |
|
"eval_runtime": 2.1514, |
|
"eval_samples_per_second": 768.802, |
|
"eval_steps_per_second": 24.17, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.9819192156599402, |
|
"eval_f1": 0.9799660780205528, |
|
"eval_loss": 0.07586053013801575, |
|
"eval_precision": 0.979047884853076, |
|
"eval_recall": 0.9808859950467365, |
|
"eval_runtime": 2.182, |
|
"eval_samples_per_second": 758.038, |
|
"eval_steps_per_second": 23.832, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1792, |
|
"total_flos": 2508921974905632.0, |
|
"train_loss": 0.07637897772448403, |
|
"train_runtime": 8322.2563, |
|
"train_samples_per_second": 6.876, |
|
"train_steps_per_second": 0.215 |
|
} |
|
], |
|
"max_steps": 1792, |
|
"num_train_epochs": 4, |
|
"total_flos": 2508921974905632.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|