{ "best_metric": 0.31758370995521545, "best_model_checkpoint": "output_pipe/prom_core_notata/origin/checkpoint-600", "epoch": 4.0, "eval_steps": 200, "global_step": 2656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15060240963855423, "grad_norm": 2.6122450828552246, "learning_rate": 2.9424405218726017e-05, "loss": 0.492, "step": 100 }, { "epoch": 0.30120481927710846, "grad_norm": 3.331883668899536, "learning_rate": 2.827321565617805e-05, "loss": 0.3811, "step": 200 }, { "epoch": 0.30120481927710846, "eval_accuracy": 0.844168079894479, "eval_f1": 0.8438998231922501, "eval_loss": 0.3547656834125519, "eval_matthews_correlation": 0.6941732916502827, "eval_precision": 0.8488631662773443, "eval_recall": 0.8453191719712101, "eval_runtime": 1.2495, "eval_samples_per_second": 4247.344, "eval_steps_per_second": 66.427, "step": 200 }, { "epoch": 0.45180722891566266, "grad_norm": 1.565757155418396, "learning_rate": 2.7122026093630083e-05, "loss": 0.3604, "step": 300 }, { "epoch": 0.6024096385542169, "grad_norm": 2.0966413021087646, "learning_rate": 2.597083653108212e-05, "loss": 0.3337, "step": 400 }, { "epoch": 0.6024096385542169, "eval_accuracy": 0.8618805351422649, "eval_f1": 0.8613509125193604, "eval_loss": 0.32256171107292175, "eval_matthews_correlation": 0.7257705478226737, "eval_precision": 0.8647857006053843, "eval_recall": 0.8609947478839368, "eval_runtime": 1.2685, "eval_samples_per_second": 4183.807, "eval_steps_per_second": 65.434, "step": 400 }, { "epoch": 0.7530120481927711, "grad_norm": 1.1383172273635864, "learning_rate": 2.481964696853415e-05, "loss": 0.3397, "step": 500 }, { "epoch": 0.9036144578313253, "grad_norm": 3.0221428871154785, "learning_rate": 2.3668457405986186e-05, "loss": 0.3159, "step": 600 }, { "epoch": 0.9036144578313253, "eval_accuracy": 0.866968155266629, "eval_f1": 0.8669429793472192, "eval_loss": 0.31758370995521545, "eval_matthews_correlation": 0.7358394132396464, "eval_precision": 0.8682598011305405, "eval_recall": 0.8675799261924673, "eval_runtime": 1.2509, "eval_samples_per_second": 4242.581, "eval_steps_per_second": 66.353, "step": 600 }, { "epoch": 1.0542168674698795, "grad_norm": 1.109787106513977, "learning_rate": 2.251726784343822e-05, "loss": 0.2776, "step": 700 }, { "epoch": 1.2048192771084336, "grad_norm": 3.197042465209961, "learning_rate": 2.1366078280890252e-05, "loss": 0.2057, "step": 800 }, { "epoch": 1.2048192771084336, "eval_accuracy": 0.866591294516676, "eval_f1": 0.8659984222336663, "eval_loss": 0.3502357602119446, "eval_matthews_correlation": 0.7358186808574204, "eval_precision": 0.8702189774301005, "eval_recall": 0.8656141122593988, "eval_runtime": 1.2536, "eval_samples_per_second": 4233.519, "eval_steps_per_second": 66.211, "step": 800 }, { "epoch": 1.355421686746988, "grad_norm": 2.543020486831665, "learning_rate": 2.021488871834229e-05, "loss": 0.2111, "step": 900 }, { "epoch": 1.5060240963855422, "grad_norm": 2.1183950901031494, "learning_rate": 1.9063699155794322e-05, "loss": 0.201, "step": 1000 }, { "epoch": 1.5060240963855422, "eval_accuracy": 0.8677218767665348, "eval_f1": 0.8677173631147045, "eval_loss": 0.33947083353996277, "eval_matthews_correlation": 0.7357722352510246, "eval_precision": 0.8678261795869671, "eval_recall": 0.8679460654310853, "eval_runtime": 1.2471, "eval_samples_per_second": 4255.356, "eval_steps_per_second": 66.553, "step": 1000 }, { "epoch": 1.6566265060240963, "grad_norm": 1.9308195114135742, "learning_rate": 1.7912509593246355e-05, "loss": 0.1937, "step": 1100 }, { "epoch": 1.8072289156626506, "grad_norm": 3.0469839572906494, "learning_rate": 1.6761320030698388e-05, "loss": 0.2008, "step": 1200 }, { "epoch": 1.8072289156626506, "eval_accuracy": 0.8679103071415112, "eval_f1": 0.867875610956949, "eval_loss": 0.3506290316581726, "eval_matthews_correlation": 0.7357734812824095, "eval_precision": 0.8678440441253761, "eval_recall": 0.8679294421129176, "eval_runtime": 1.2694, "eval_samples_per_second": 4180.735, "eval_steps_per_second": 65.386, "step": 1200 }, { "epoch": 1.9578313253012047, "grad_norm": 2.669092893600464, "learning_rate": 1.5610130468150424e-05, "loss": 0.2032, "step": 1300 }, { "epoch": 2.108433734939759, "grad_norm": 1.018779993057251, "learning_rate": 1.4458940905602456e-05, "loss": 0.0857, "step": 1400 }, { "epoch": 2.108433734939759, "eval_accuracy": 0.866968155266629, "eval_f1": 0.8669120125170288, "eval_loss": 0.5369625091552734, "eval_matthews_correlation": 0.7338250107809877, "eval_precision": 0.8669238704323894, "eval_recall": 0.8669011407006174, "eval_runtime": 1.25, "eval_samples_per_second": 4245.587, "eval_steps_per_second": 66.4, "step": 1400 }, { "epoch": 2.2590361445783134, "grad_norm": 0.7886415719985962, "learning_rate": 1.3307751343054489e-05, "loss": 0.0448, "step": 1500 }, { "epoch": 2.4096385542168672, "grad_norm": 1.834030032157898, "learning_rate": 1.2156561780506524e-05, "loss": 0.0436, "step": 1600 }, { "epoch": 2.4096385542168672, "eval_accuracy": 0.8639532692670059, "eval_f1": 0.8635816191675164, "eval_loss": 0.5898475646972656, "eval_matthews_correlation": 0.7289885454866504, "eval_precision": 0.8657342466278587, "eval_recall": 0.8632585028272428, "eval_runtime": 1.2478, "eval_samples_per_second": 4253.059, "eval_steps_per_second": 66.517, "step": 1600 }, { "epoch": 2.5602409638554215, "grad_norm": 4.06243896484375, "learning_rate": 1.1005372217958557e-05, "loss": 0.0421, "step": 1700 }, { "epoch": 2.710843373493976, "grad_norm": 1.1769578456878662, "learning_rate": 9.854182655410591e-06, "loss": 0.039, "step": 1800 }, { "epoch": 2.710843373493976, "eval_accuracy": 0.8662144337667232, "eval_f1": 0.8662079304389323, "eval_loss": 0.5943960547447205, "eval_matthews_correlation": 0.7327003607551781, "eval_precision": 0.8662847178923143, "eval_recall": 0.8664156545623336, "eval_runtime": 1.2548, "eval_samples_per_second": 4229.392, "eval_steps_per_second": 66.147, "step": 1800 }, { "epoch": 2.86144578313253, "grad_norm": 2.9796667098999023, "learning_rate": 8.702993092862626e-06, "loss": 0.0431, "step": 1900 }, { "epoch": 3.0120481927710845, "grad_norm": 0.06011148542165756, "learning_rate": 7.5518035303146585e-06, "loss": 0.0404, "step": 2000 }, { "epoch": 3.0120481927710845, "eval_accuracy": 0.861503674392312, "eval_f1": 0.8614255609018682, "eval_loss": 0.5846816897392273, "eval_matthews_correlation": 0.7228781268765005, "eval_precision": 0.8615070227953028, "eval_recall": 0.8613711168568119, "eval_runtime": 1.2512, "eval_samples_per_second": 4241.427, "eval_steps_per_second": 66.335, "step": 2000 }, { "epoch": 3.1626506024096384, "grad_norm": 2.1890785694122314, "learning_rate": 6.412125863392172e-06, "loss": 0.0103, "step": 2100 }, { "epoch": 3.3132530120481927, "grad_norm": 0.45505666732788086, "learning_rate": 5.260936300844206e-06, "loss": 0.0131, "step": 2200 }, { "epoch": 3.3132530120481927, "eval_accuracy": 0.8599962313925005, "eval_f1": 0.8599338477012316, "eval_loss": 0.6354616284370422, "eval_matthews_correlation": 0.719870690073612, "eval_precision": 0.8599553933898891, "eval_recall": 0.8599152978003515, "eval_runtime": 1.255, "eval_samples_per_second": 4228.711, "eval_steps_per_second": 66.136, "step": 2200 }, { "epoch": 3.463855421686747, "grad_norm": 0.060754936188459396, "learning_rate": 4.109746738296239e-06, "loss": 0.01, "step": 2300 }, { "epoch": 3.6144578313253013, "grad_norm": 1.6662099361419678, "learning_rate": 2.9585571757482733e-06, "loss": 0.0096, "step": 2400 }, { "epoch": 3.6144578313253013, "eval_accuracy": 0.8605615225174298, "eval_f1": 0.8604790858249838, "eval_loss": 0.6781471371650696, "eval_matthews_correlation": 0.7209934067313212, "eval_precision": 0.8605760169206037, "eval_recall": 0.8604174072568029, "eval_runtime": 1.2679, "eval_samples_per_second": 4185.581, "eval_steps_per_second": 65.461, "step": 2400 }, { "epoch": 3.765060240963855, "grad_norm": 0.031053327023983, "learning_rate": 1.807367613200307e-06, "loss": 0.0097, "step": 2500 }, { "epoch": 3.9156626506024095, "grad_norm": 1.6371121406555176, "learning_rate": 6.561780506523408e-07, "loss": 0.0075, "step": 2600 }, { "epoch": 3.9156626506024095, "eval_accuracy": 0.8620689655172413, "eval_f1": 0.8620189892409078, "eval_loss": 0.7003814578056335, "eval_matthews_correlation": 0.7240389585546707, "eval_precision": 0.8620090897337722, "eval_recall": 0.862029869119075, "eval_runtime": 1.2529, "eval_samples_per_second": 4235.844, "eval_steps_per_second": 66.247, "step": 2600 }, { "epoch": 4.0, "step": 2656, "total_flos": 8449435771093920.0, "train_loss": 0.15504802793758102, "train_runtime": 245.9871, "train_samples_per_second": 690.313, "train_steps_per_second": 10.797 } ], "logging_steps": 100, "max_steps": 2656, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8449435771093920.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }