|
{ |
|
"best_metric": 0.06995197385549545, |
|
"best_model_checkpoint": "output_pipe/prom_300_notata/origin/checkpoint-800", |
|
"epoch": 4.0, |
|
"eval_steps": 200, |
|
"global_step": 2656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15060240963855423, |
|
"grad_norm": 1.585874080657959, |
|
"learning_rate": 2.9424405218726017e-05, |
|
"loss": 0.3415, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"grad_norm": 3.314279317855835, |
|
"learning_rate": 2.827321565617805e-05, |
|
"loss": 0.1374, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"eval_accuracy": 0.9636329376295459, |
|
"eval_f1": 0.9635801730549014, |
|
"eval_loss": 0.11874407529830933, |
|
"eval_matthews_correlation": 0.9286443161593607, |
|
"eval_precision": 0.9653044555881855, |
|
"eval_recall": 0.9633419342852427, |
|
"eval_runtime": 2.0547, |
|
"eval_samples_per_second": 2582.881, |
|
"eval_steps_per_second": 40.396, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45180722891566266, |
|
"grad_norm": 1.1476503610610962, |
|
"learning_rate": 2.7122026093630083e-05, |
|
"loss": 0.1131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 1.4456894397735596, |
|
"learning_rate": 2.597083653108212e-05, |
|
"loss": 0.0844, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"eval_accuracy": 0.9715470133785566, |
|
"eval_f1": 0.971545978842977, |
|
"eval_loss": 0.07299650460481644, |
|
"eval_matthews_correlation": 0.9431284574559641, |
|
"eval_precision": 0.9715395817834918, |
|
"eval_recall": 0.9715888769607457, |
|
"eval_runtime": 2.0485, |
|
"eval_samples_per_second": 2590.731, |
|
"eval_steps_per_second": 40.518, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7530120481927711, |
|
"grad_norm": 0.18486830592155457, |
|
"learning_rate": 2.481964696853415e-05, |
|
"loss": 0.0896, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 0.22208698093891144, |
|
"learning_rate": 2.3668457405986186e-05, |
|
"loss": 0.0785, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"eval_accuracy": 0.9739966082532504, |
|
"eval_f1": 0.9739843921840996, |
|
"eval_loss": 0.08863991498947144, |
|
"eval_matthews_correlation": 0.948218039694491, |
|
"eval_precision": 0.974341521021252, |
|
"eval_recall": 0.9738766326350019, |
|
"eval_runtime": 2.0499, |
|
"eval_samples_per_second": 2588.955, |
|
"eval_steps_per_second": 40.491, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0542168674698795, |
|
"grad_norm": 2.187502384185791, |
|
"learning_rate": 2.251726784343822e-05, |
|
"loss": 0.0722, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2048192771084336, |
|
"grad_norm": 2.979804515838623, |
|
"learning_rate": 2.1366078280890252e-05, |
|
"loss": 0.0416, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2048192771084336, |
|
"eval_accuracy": 0.9781420765027322, |
|
"eval_f1": 0.9781331874627035, |
|
"eval_loss": 0.06995197385549545, |
|
"eval_matthews_correlation": 0.9564543617799154, |
|
"eval_precision": 0.9784162660123144, |
|
"eval_recall": 0.9780381704999754, |
|
"eval_runtime": 2.0486, |
|
"eval_samples_per_second": 2590.596, |
|
"eval_steps_per_second": 40.516, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.355421686746988, |
|
"grad_norm": 3.7425546646118164, |
|
"learning_rate": 2.021488871834229e-05, |
|
"loss": 0.0336, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5060240963855422, |
|
"grad_norm": 0.8297073245048523, |
|
"learning_rate": 1.9063699155794322e-05, |
|
"loss": 0.035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5060240963855422, |
|
"eval_accuracy": 0.97719992462785, |
|
"eval_f1": 0.9771963977277897, |
|
"eval_loss": 0.08680247515439987, |
|
"eval_matthews_correlation": 0.954401194573866, |
|
"eval_precision": 0.9772252037763616, |
|
"eval_recall": 0.9771759920662539, |
|
"eval_runtime": 2.0487, |
|
"eval_samples_per_second": 2590.457, |
|
"eval_steps_per_second": 40.514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6566265060240963, |
|
"grad_norm": 0.46684613823890686, |
|
"learning_rate": 1.7912509593246355e-05, |
|
"loss": 0.0367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"grad_norm": 1.1904711723327637, |
|
"learning_rate": 1.6761320030698388e-05, |
|
"loss": 0.0452, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"eval_accuracy": 0.979272658752591, |
|
"eval_f1": 0.9792632572070812, |
|
"eval_loss": 0.07334966957569122, |
|
"eval_matthews_correlation": 0.9587606831731992, |
|
"eval_precision": 0.9796036311772153, |
|
"eval_recall": 0.9791571559531644, |
|
"eval_runtime": 2.048, |
|
"eval_samples_per_second": 2591.3, |
|
"eval_steps_per_second": 40.527, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9578313253012047, |
|
"grad_norm": 2.6981256008148193, |
|
"learning_rate": 1.5610130468150424e-05, |
|
"loss": 0.0308, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.108433734939759, |
|
"grad_norm": 0.08918892592191696, |
|
"learning_rate": 1.4458940905602456e-05, |
|
"loss": 0.0174, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.108433734939759, |
|
"eval_accuracy": 0.978895798002638, |
|
"eval_f1": 0.9788908805296799, |
|
"eval_loss": 0.08836409449577332, |
|
"eval_matthews_correlation": 0.9578287746494739, |
|
"eval_precision": 0.9789860333608025, |
|
"eval_recall": 0.9788427520053816, |
|
"eval_runtime": 2.0489, |
|
"eval_samples_per_second": 2590.197, |
|
"eval_steps_per_second": 40.51, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.2590361445783134, |
|
"grad_norm": 0.012705490924417973, |
|
"learning_rate": 1.3307751343054489e-05, |
|
"loss": 0.0106, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4096385542168672, |
|
"grad_norm": 2.8612990379333496, |
|
"learning_rate": 1.2156561780506524e-05, |
|
"loss": 0.0073, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4096385542168672, |
|
"eval_accuracy": 0.9800263802524967, |
|
"eval_f1": 0.9800205057769635, |
|
"eval_loss": 0.09836893528699875, |
|
"eval_matthews_correlation": 0.960131261055992, |
|
"eval_precision": 0.9801773617563092, |
|
"eval_recall": 0.9799539252981333, |
|
"eval_runtime": 2.0486, |
|
"eval_samples_per_second": 2590.534, |
|
"eval_steps_per_second": 40.515, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.5602409638554215, |
|
"grad_norm": 0.3261496126651764, |
|
"learning_rate": 1.1005372217958557e-05, |
|
"loss": 0.0107, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.710843373493976, |
|
"grad_norm": 4.168089389801025, |
|
"learning_rate": 9.854182655410591e-06, |
|
"loss": 0.0058, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.710843373493976, |
|
"eval_accuracy": 0.9787073676276615, |
|
"eval_f1": 0.978705903879415, |
|
"eval_loss": 0.10657216608524323, |
|
"eval_matthews_correlation": 0.9574202184236169, |
|
"eval_precision": 0.9786936551154061, |
|
"eval_recall": 0.9787265638737861, |
|
"eval_runtime": 2.0494, |
|
"eval_samples_per_second": 2589.545, |
|
"eval_steps_per_second": 40.5, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.86144578313253, |
|
"grad_norm": 0.016729481518268585, |
|
"learning_rate": 8.702993092862626e-06, |
|
"loss": 0.0088, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.0120481927710845, |
|
"grad_norm": 0.018642086535692215, |
|
"learning_rate": 7.5518035303146585e-06, |
|
"loss": 0.0061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0120481927710845, |
|
"eval_accuracy": 0.9783305068777087, |
|
"eval_f1": 0.9783205031761819, |
|
"eval_loss": 0.10219753533601761, |
|
"eval_matthews_correlation": 0.9568830909886441, |
|
"eval_precision": 0.9786702504272362, |
|
"eval_recall": 0.9782129498348509, |
|
"eval_runtime": 2.0489, |
|
"eval_samples_per_second": 2590.175, |
|
"eval_steps_per_second": 40.51, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.1626506024096384, |
|
"grad_norm": 0.004093084018677473, |
|
"learning_rate": 6.4006139677666924e-06, |
|
"loss": 0.001, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.3132530120481927, |
|
"grad_norm": 0.012232447974383831, |
|
"learning_rate": 5.249424405218726e-06, |
|
"loss": 0.0024, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.3132530120481927, |
|
"eval_accuracy": 0.978895798002638, |
|
"eval_f1": 0.9788895910096218, |
|
"eval_loss": 0.11258693039417267, |
|
"eval_matthews_correlation": 0.957869327344797, |
|
"eval_precision": 0.9790461316777106, |
|
"eval_recall": 0.9788232216042883, |
|
"eval_runtime": 2.0486, |
|
"eval_samples_per_second": 2590.571, |
|
"eval_steps_per_second": 40.516, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.463855421686747, |
|
"grad_norm": 0.005885094869881868, |
|
"learning_rate": 4.09823484267076e-06, |
|
"loss": 0.0005, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.6144578313253013, |
|
"grad_norm": 0.00728481262922287, |
|
"learning_rate": 2.9470452801227938e-06, |
|
"loss": 0.0023, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.6144578313253013, |
|
"eval_accuracy": 0.9790842283776144, |
|
"eval_f1": 0.9790764991424794, |
|
"eval_loss": 0.11937826871871948, |
|
"eval_matthews_correlation": 0.9583067060360234, |
|
"eval_precision": 0.9793165728379114, |
|
"eval_recall": 0.9789901887787265, |
|
"eval_runtime": 2.048, |
|
"eval_samples_per_second": 2591.335, |
|
"eval_steps_per_second": 40.528, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.765060240963855, |
|
"grad_norm": 0.00713815912604332, |
|
"learning_rate": 1.7958557175748275e-06, |
|
"loss": 0.0023, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.9156626506024095, |
|
"grad_norm": 0.007936985231935978, |
|
"learning_rate": 6.446661550268611e-07, |
|
"loss": 0.0058, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.9156626506024095, |
|
"eval_accuracy": 0.979272658752591, |
|
"eval_f1": 0.9792660147189736, |
|
"eval_loss": 0.11451391130685806, |
|
"eval_matthews_correlation": 0.9586433697976791, |
|
"eval_precision": 0.979451094051508, |
|
"eval_recall": 0.9791923106751326, |
|
"eval_runtime": 2.0485, |
|
"eval_samples_per_second": 2590.613, |
|
"eval_steps_per_second": 40.516, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2656, |
|
"total_flos": 2.833046111484432e+16, |
|
"train_loss": 0.04600991761446538, |
|
"train_runtime": 331.0189, |
|
"train_samples_per_second": 512.986, |
|
"train_steps_per_second": 8.024 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.833046111484432e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|