Chenghao-Qiu's picture
Upload folder using huggingface_hub
4f2b1a6 verified
{
"best_metric": 0.06995197385549545,
"best_model_checkpoint": "output_pipe/prom_300_notata/origin/checkpoint-800",
"epoch": 4.0,
"eval_steps": 200,
"global_step": 2656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15060240963855423,
"grad_norm": 1.585874080657959,
"learning_rate": 2.9424405218726017e-05,
"loss": 0.3415,
"step": 100
},
{
"epoch": 0.30120481927710846,
"grad_norm": 3.314279317855835,
"learning_rate": 2.827321565617805e-05,
"loss": 0.1374,
"step": 200
},
{
"epoch": 0.30120481927710846,
"eval_accuracy": 0.9636329376295459,
"eval_f1": 0.9635801730549014,
"eval_loss": 0.11874407529830933,
"eval_matthews_correlation": 0.9286443161593607,
"eval_precision": 0.9653044555881855,
"eval_recall": 0.9633419342852427,
"eval_runtime": 2.0547,
"eval_samples_per_second": 2582.881,
"eval_steps_per_second": 40.396,
"step": 200
},
{
"epoch": 0.45180722891566266,
"grad_norm": 1.1476503610610962,
"learning_rate": 2.7122026093630083e-05,
"loss": 0.1131,
"step": 300
},
{
"epoch": 0.6024096385542169,
"grad_norm": 1.4456894397735596,
"learning_rate": 2.597083653108212e-05,
"loss": 0.0844,
"step": 400
},
{
"epoch": 0.6024096385542169,
"eval_accuracy": 0.9715470133785566,
"eval_f1": 0.971545978842977,
"eval_loss": 0.07299650460481644,
"eval_matthews_correlation": 0.9431284574559641,
"eval_precision": 0.9715395817834918,
"eval_recall": 0.9715888769607457,
"eval_runtime": 2.0485,
"eval_samples_per_second": 2590.731,
"eval_steps_per_second": 40.518,
"step": 400
},
{
"epoch": 0.7530120481927711,
"grad_norm": 0.18486830592155457,
"learning_rate": 2.481964696853415e-05,
"loss": 0.0896,
"step": 500
},
{
"epoch": 0.9036144578313253,
"grad_norm": 0.22208698093891144,
"learning_rate": 2.3668457405986186e-05,
"loss": 0.0785,
"step": 600
},
{
"epoch": 0.9036144578313253,
"eval_accuracy": 0.9739966082532504,
"eval_f1": 0.9739843921840996,
"eval_loss": 0.08863991498947144,
"eval_matthews_correlation": 0.948218039694491,
"eval_precision": 0.974341521021252,
"eval_recall": 0.9738766326350019,
"eval_runtime": 2.0499,
"eval_samples_per_second": 2588.955,
"eval_steps_per_second": 40.491,
"step": 600
},
{
"epoch": 1.0542168674698795,
"grad_norm": 2.187502384185791,
"learning_rate": 2.251726784343822e-05,
"loss": 0.0722,
"step": 700
},
{
"epoch": 1.2048192771084336,
"grad_norm": 2.979804515838623,
"learning_rate": 2.1366078280890252e-05,
"loss": 0.0416,
"step": 800
},
{
"epoch": 1.2048192771084336,
"eval_accuracy": 0.9781420765027322,
"eval_f1": 0.9781331874627035,
"eval_loss": 0.06995197385549545,
"eval_matthews_correlation": 0.9564543617799154,
"eval_precision": 0.9784162660123144,
"eval_recall": 0.9780381704999754,
"eval_runtime": 2.0486,
"eval_samples_per_second": 2590.596,
"eval_steps_per_second": 40.516,
"step": 800
},
{
"epoch": 1.355421686746988,
"grad_norm": 3.7425546646118164,
"learning_rate": 2.021488871834229e-05,
"loss": 0.0336,
"step": 900
},
{
"epoch": 1.5060240963855422,
"grad_norm": 0.8297073245048523,
"learning_rate": 1.9063699155794322e-05,
"loss": 0.035,
"step": 1000
},
{
"epoch": 1.5060240963855422,
"eval_accuracy": 0.97719992462785,
"eval_f1": 0.9771963977277897,
"eval_loss": 0.08680247515439987,
"eval_matthews_correlation": 0.954401194573866,
"eval_precision": 0.9772252037763616,
"eval_recall": 0.9771759920662539,
"eval_runtime": 2.0487,
"eval_samples_per_second": 2590.457,
"eval_steps_per_second": 40.514,
"step": 1000
},
{
"epoch": 1.6566265060240963,
"grad_norm": 0.46684613823890686,
"learning_rate": 1.7912509593246355e-05,
"loss": 0.0367,
"step": 1100
},
{
"epoch": 1.8072289156626506,
"grad_norm": 1.1904711723327637,
"learning_rate": 1.6761320030698388e-05,
"loss": 0.0452,
"step": 1200
},
{
"epoch": 1.8072289156626506,
"eval_accuracy": 0.979272658752591,
"eval_f1": 0.9792632572070812,
"eval_loss": 0.07334966957569122,
"eval_matthews_correlation": 0.9587606831731992,
"eval_precision": 0.9796036311772153,
"eval_recall": 0.9791571559531644,
"eval_runtime": 2.048,
"eval_samples_per_second": 2591.3,
"eval_steps_per_second": 40.527,
"step": 1200
},
{
"epoch": 1.9578313253012047,
"grad_norm": 2.6981256008148193,
"learning_rate": 1.5610130468150424e-05,
"loss": 0.0308,
"step": 1300
},
{
"epoch": 2.108433734939759,
"grad_norm": 0.08918892592191696,
"learning_rate": 1.4458940905602456e-05,
"loss": 0.0174,
"step": 1400
},
{
"epoch": 2.108433734939759,
"eval_accuracy": 0.978895798002638,
"eval_f1": 0.9788908805296799,
"eval_loss": 0.08836409449577332,
"eval_matthews_correlation": 0.9578287746494739,
"eval_precision": 0.9789860333608025,
"eval_recall": 0.9788427520053816,
"eval_runtime": 2.0489,
"eval_samples_per_second": 2590.197,
"eval_steps_per_second": 40.51,
"step": 1400
},
{
"epoch": 2.2590361445783134,
"grad_norm": 0.012705490924417973,
"learning_rate": 1.3307751343054489e-05,
"loss": 0.0106,
"step": 1500
},
{
"epoch": 2.4096385542168672,
"grad_norm": 2.8612990379333496,
"learning_rate": 1.2156561780506524e-05,
"loss": 0.0073,
"step": 1600
},
{
"epoch": 2.4096385542168672,
"eval_accuracy": 0.9800263802524967,
"eval_f1": 0.9800205057769635,
"eval_loss": 0.09836893528699875,
"eval_matthews_correlation": 0.960131261055992,
"eval_precision": 0.9801773617563092,
"eval_recall": 0.9799539252981333,
"eval_runtime": 2.0486,
"eval_samples_per_second": 2590.534,
"eval_steps_per_second": 40.515,
"step": 1600
},
{
"epoch": 2.5602409638554215,
"grad_norm": 0.3261496126651764,
"learning_rate": 1.1005372217958557e-05,
"loss": 0.0107,
"step": 1700
},
{
"epoch": 2.710843373493976,
"grad_norm": 4.168089389801025,
"learning_rate": 9.854182655410591e-06,
"loss": 0.0058,
"step": 1800
},
{
"epoch": 2.710843373493976,
"eval_accuracy": 0.9787073676276615,
"eval_f1": 0.978705903879415,
"eval_loss": 0.10657216608524323,
"eval_matthews_correlation": 0.9574202184236169,
"eval_precision": 0.9786936551154061,
"eval_recall": 0.9787265638737861,
"eval_runtime": 2.0494,
"eval_samples_per_second": 2589.545,
"eval_steps_per_second": 40.5,
"step": 1800
},
{
"epoch": 2.86144578313253,
"grad_norm": 0.016729481518268585,
"learning_rate": 8.702993092862626e-06,
"loss": 0.0088,
"step": 1900
},
{
"epoch": 3.0120481927710845,
"grad_norm": 0.018642086535692215,
"learning_rate": 7.5518035303146585e-06,
"loss": 0.0061,
"step": 2000
},
{
"epoch": 3.0120481927710845,
"eval_accuracy": 0.9783305068777087,
"eval_f1": 0.9783205031761819,
"eval_loss": 0.10219753533601761,
"eval_matthews_correlation": 0.9568830909886441,
"eval_precision": 0.9786702504272362,
"eval_recall": 0.9782129498348509,
"eval_runtime": 2.0489,
"eval_samples_per_second": 2590.175,
"eval_steps_per_second": 40.51,
"step": 2000
},
{
"epoch": 3.1626506024096384,
"grad_norm": 0.004093084018677473,
"learning_rate": 6.4006139677666924e-06,
"loss": 0.001,
"step": 2100
},
{
"epoch": 3.3132530120481927,
"grad_norm": 0.012232447974383831,
"learning_rate": 5.249424405218726e-06,
"loss": 0.0024,
"step": 2200
},
{
"epoch": 3.3132530120481927,
"eval_accuracy": 0.978895798002638,
"eval_f1": 0.9788895910096218,
"eval_loss": 0.11258693039417267,
"eval_matthews_correlation": 0.957869327344797,
"eval_precision": 0.9790461316777106,
"eval_recall": 0.9788232216042883,
"eval_runtime": 2.0486,
"eval_samples_per_second": 2590.571,
"eval_steps_per_second": 40.516,
"step": 2200
},
{
"epoch": 3.463855421686747,
"grad_norm": 0.005885094869881868,
"learning_rate": 4.09823484267076e-06,
"loss": 0.0005,
"step": 2300
},
{
"epoch": 3.6144578313253013,
"grad_norm": 0.00728481262922287,
"learning_rate": 2.9470452801227938e-06,
"loss": 0.0023,
"step": 2400
},
{
"epoch": 3.6144578313253013,
"eval_accuracy": 0.9790842283776144,
"eval_f1": 0.9790764991424794,
"eval_loss": 0.11937826871871948,
"eval_matthews_correlation": 0.9583067060360234,
"eval_precision": 0.9793165728379114,
"eval_recall": 0.9789901887787265,
"eval_runtime": 2.048,
"eval_samples_per_second": 2591.335,
"eval_steps_per_second": 40.528,
"step": 2400
},
{
"epoch": 3.765060240963855,
"grad_norm": 0.00713815912604332,
"learning_rate": 1.7958557175748275e-06,
"loss": 0.0023,
"step": 2500
},
{
"epoch": 3.9156626506024095,
"grad_norm": 0.007936985231935978,
"learning_rate": 6.446661550268611e-07,
"loss": 0.0058,
"step": 2600
},
{
"epoch": 3.9156626506024095,
"eval_accuracy": 0.979272658752591,
"eval_f1": 0.9792660147189736,
"eval_loss": 0.11451391130685806,
"eval_matthews_correlation": 0.9586433697976791,
"eval_precision": 0.979451094051508,
"eval_recall": 0.9791923106751326,
"eval_runtime": 2.0485,
"eval_samples_per_second": 2590.613,
"eval_steps_per_second": 40.516,
"step": 2600
},
{
"epoch": 4.0,
"step": 2656,
"total_flos": 2.833046111484432e+16,
"train_loss": 0.04600991761446538,
"train_runtime": 331.0189,
"train_samples_per_second": 512.986,
"train_steps_per_second": 8.024
}
],
"logging_steps": 100,
"max_steps": 2656,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.833046111484432e+16,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}