{ "best_metric": 0.18491357564926147, "best_model_checkpoint": "output_pipe/prom_300_all/origin/checkpoint-1400", "epoch": 4.0, "eval_steps": 200, "global_step": 2960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13513513513513514, "grad_norm": 10.229140281677246, "learning_rate": 2.951546391752577e-05, "loss": 0.3957, "step": 100 }, { "epoch": 0.2702702702702703, "grad_norm": 4.366018772125244, "learning_rate": 2.8484536082474226e-05, "loss": 0.3029, "step": 200 }, { "epoch": 0.2702702702702703, "eval_accuracy": 0.8190878378378378, "eval_f1": 0.8137850769429716, "eval_loss": 0.37689200043678284, "eval_matthews_correlation": 0.675083279534266, "eval_precision": 0.8579374628996808, "eval_recall": 0.8183080017768842, "eval_runtime": 1.6963, "eval_samples_per_second": 3490.009, "eval_steps_per_second": 54.826, "step": 200 }, { "epoch": 0.40540540540540543, "grad_norm": 12.47264575958252, "learning_rate": 2.745360824742268e-05, "loss": 0.2656, "step": 300 }, { "epoch": 0.5405405405405406, "grad_norm": 8.677423477172852, "learning_rate": 2.6422680412371135e-05, "loss": 0.2217, "step": 400 }, { "epoch": 0.5405405405405406, "eval_accuracy": 0.9070945945945946, "eval_f1": 0.9068358241206858, "eval_loss": 0.23628243803977966, "eval_matthews_correlation": 0.8197057914004126, "eval_precision": 0.9123568337823056, "eval_recall": 0.9073641621822256, "eval_runtime": 1.6952, "eval_samples_per_second": 3492.293, "eval_steps_per_second": 54.862, "step": 400 }, { "epoch": 0.6756756756756757, "grad_norm": 10.468666076660156, "learning_rate": 2.5391752577319586e-05, "loss": 0.2133, "step": 500 }, { "epoch": 0.8108108108108109, "grad_norm": 6.7936506271362305, "learning_rate": 2.436082474226804e-05, "loss": 0.2092, "step": 600 }, { "epoch": 0.8108108108108109, "eval_accuracy": 0.9152027027027027, "eval_f1": 0.9149447781009592, "eval_loss": 0.2045987993478775, "eval_matthews_correlation": 0.8364946316412692, "eval_precision": 0.921029377746935, "eval_recall": 0.9154836370974333, "eval_runtime": 1.7024, "eval_samples_per_second": 3477.472, "eval_steps_per_second": 54.629, "step": 600 }, { "epoch": 0.9459459459459459, "grad_norm": 7.481766700744629, "learning_rate": 2.3329896907216496e-05, "loss": 0.2038, "step": 700 }, { "epoch": 1.0810810810810811, "grad_norm": 6.448531627655029, "learning_rate": 2.229896907216495e-05, "loss": 0.1432, "step": 800 }, { "epoch": 1.0810810810810811, "eval_accuracy": 0.93125, "eval_f1": 0.9312120773759067, "eval_loss": 0.20802178978919983, "eval_matthews_correlation": 0.8630798400949948, "eval_precision": 0.9319203976686419, "eval_recall": 0.9311597775881583, "eval_runtime": 1.7038, "eval_samples_per_second": 3474.605, "eval_steps_per_second": 54.584, "step": 800 }, { "epoch": 1.2162162162162162, "grad_norm": 5.360595703125, "learning_rate": 2.1268041237113405e-05, "loss": 0.1225, "step": 900 }, { "epoch": 1.3513513513513513, "grad_norm": 12.399473190307617, "learning_rate": 2.0237113402061856e-05, "loss": 0.119, "step": 1000 }, { "epoch": 1.3513513513513513, "eval_accuracy": 0.9273648648648649, "eval_f1": 0.9273253739163184, "eval_loss": 0.1945222169160843, "eval_matthews_correlation": 0.8561116827116911, "eval_precision": 0.9286165491178344, "eval_recall": 0.9274958671007523, "eval_runtime": 1.7116, "eval_samples_per_second": 3458.664, "eval_steps_per_second": 54.334, "step": 1000 }, { "epoch": 1.4864864864864864, "grad_norm": 4.339570045471191, "learning_rate": 1.9206185567010307e-05, "loss": 0.1273, "step": 1100 }, { "epoch": 1.6216216216216215, "grad_norm": 14.022846221923828, "learning_rate": 1.8175257731958762e-05, "loss": 0.1153, "step": 1200 }, { "epoch": 1.6216216216216215, "eval_accuracy": 0.9302364864864865, "eval_f1": 0.9302364685710435, "eval_loss": 0.18970273435115814, "eval_matthews_correlation": 0.8605061164956768, "eval_precision": 0.9302512167180906, "eval_recall": 0.9302548997854683, "eval_runtime": 1.7115, "eval_samples_per_second": 3458.921, "eval_steps_per_second": 54.338, "step": 1200 }, { "epoch": 1.7567567567567568, "grad_norm": 4.394283771514893, "learning_rate": 1.7144329896907217e-05, "loss": 0.1036, "step": 1300 }, { "epoch": 1.8918918918918919, "grad_norm": 5.471324920654297, "learning_rate": 1.611340206185567e-05, "loss": 0.1189, "step": 1400 }, { "epoch": 1.8918918918918919, "eval_accuracy": 0.9305743243243243, "eval_f1": 0.9305326495433668, "eval_loss": 0.18491357564926147, "eval_matthews_correlation": 0.8617933500441142, "eval_precision": 0.9313144616824476, "eval_recall": 0.9304792930448134, "eval_runtime": 1.7174, "eval_samples_per_second": 3447.102, "eval_steps_per_second": 54.152, "step": 1400 }, { "epoch": 2.027027027027027, "grad_norm": 5.195973873138428, "learning_rate": 1.5082474226804124e-05, "loss": 0.0993, "step": 1500 }, { "epoch": 2.1621621621621623, "grad_norm": 3.4343478679656982, "learning_rate": 1.4051546391752577e-05, "loss": 0.0416, "step": 1600 }, { "epoch": 2.1621621621621623, "eval_accuracy": 0.9320945945945946, "eval_f1": 0.9320657433152479, "eval_loss": 0.2865821123123169, "eval_matthews_correlation": 0.8646019805851967, "eval_precision": 0.9325841926158089, "eval_recall": 0.9320179733750436, "eval_runtime": 1.7142, "eval_samples_per_second": 3453.6, "eval_steps_per_second": 54.254, "step": 1600 }, { "epoch": 2.2972972972972974, "grad_norm": 1.7518272399902344, "learning_rate": 1.3020618556701032e-05, "loss": 0.0321, "step": 1700 }, { "epoch": 2.4324324324324325, "grad_norm": 0.545198380947113, "learning_rate": 1.1989690721649485e-05, "loss": 0.0472, "step": 1800 }, { "epoch": 2.4324324324324325, "eval_accuracy": 0.9346283783783784, "eval_f1": 0.9346277050025539, "eval_loss": 0.2627970576286316, "eval_matthews_correlation": 0.8693732402984062, "eval_precision": 0.9347087744082508, "eval_recall": 0.9346644670192129, "eval_runtime": 1.7138, "eval_samples_per_second": 3454.269, "eval_steps_per_second": 54.265, "step": 1800 }, { "epoch": 2.5675675675675675, "grad_norm": 8.60261344909668, "learning_rate": 1.0958762886597938e-05, "loss": 0.0463, "step": 1900 }, { "epoch": 2.7027027027027026, "grad_norm": 5.734014511108398, "learning_rate": 9.927835051546392e-06, "loss": 0.0426, "step": 2000 }, { "epoch": 2.7027027027027026, "eval_accuracy": 0.9390202702702702, "eval_f1": 0.9390032120412144, "eval_loss": 0.24185040593147278, "eval_matthews_correlation": 0.8782767377163032, "eval_precision": 0.9393146284000857, "eval_recall": 0.9389621800341589, "eval_runtime": 1.7176, "eval_samples_per_second": 3446.664, "eval_steps_per_second": 54.145, "step": 2000 }, { "epoch": 2.8378378378378377, "grad_norm": 16.915462493896484, "learning_rate": 8.896907216494845e-06, "loss": 0.0404, "step": 2100 }, { "epoch": 2.972972972972973, "grad_norm": 11.942590713500977, "learning_rate": 7.8659793814433e-06, "loss": 0.0449, "step": 2200 }, { "epoch": 2.972972972972973, "eval_accuracy": 0.9346283783783784, "eval_f1": 0.9346085835481779, "eval_loss": 0.26332417130470276, "eval_matthews_correlation": 0.8695174028730603, "eval_precision": 0.9349504933868377, "eval_recall": 0.9345669940571169, "eval_runtime": 1.7174, "eval_samples_per_second": 3447.038, "eval_steps_per_second": 54.151, "step": 2200 }, { "epoch": 3.108108108108108, "grad_norm": 0.1922147125005722, "learning_rate": 6.835051546391753e-06, "loss": 0.0183, "step": 2300 }, { "epoch": 3.2432432432432434, "grad_norm": 0.016464663669466972, "learning_rate": 5.804123711340207e-06, "loss": 0.0151, "step": 2400 }, { "epoch": 3.2432432432432434, "eval_accuracy": 0.935304054054054, "eval_f1": 0.9352742660769024, "eval_loss": 0.3918153643608093, "eval_matthews_correlation": 0.8710722575664533, "eval_precision": 0.9358489722798395, "eval_recall": 0.9352235098392906, "eval_runtime": 1.7154, "eval_samples_per_second": 3451.033, "eval_steps_per_second": 54.214, "step": 2400 }, { "epoch": 3.3783783783783785, "grad_norm": 0.8072592616081238, "learning_rate": 4.77319587628866e-06, "loss": 0.0086, "step": 2500 }, { "epoch": 3.5135135135135136, "grad_norm": 11.552366256713867, "learning_rate": 3.7422680412371135e-06, "loss": 0.013, "step": 2600 }, { "epoch": 3.5135135135135136, "eval_accuracy": 0.9363175675675676, "eval_f1": 0.936317042424479, "eval_loss": 0.35771170258522034, "eval_matthews_correlation": 0.872640550366574, "eval_precision": 0.9363156189326801, "eval_recall": 0.9363249314835842, "eval_runtime": 1.7187, "eval_samples_per_second": 3444.564, "eval_steps_per_second": 54.112, "step": 2600 }, { "epoch": 3.6486486486486487, "grad_norm": 0.023688938468694687, "learning_rate": 2.711340206185567e-06, "loss": 0.0097, "step": 2700 }, { "epoch": 3.7837837837837838, "grad_norm": 0.010523764416575432, "learning_rate": 1.6804123711340206e-06, "loss": 0.011, "step": 2800 }, { "epoch": 3.7837837837837838, "eval_accuracy": 0.9363175675675676, "eval_f1": 0.9363174785300759, "eval_loss": 0.3880373537540436, "eval_matthews_correlation": 0.8727004149732407, "eval_precision": 0.9363563085660243, "eval_recall": 0.9363441064925211, "eval_runtime": 1.7189, "eval_samples_per_second": 3444.029, "eval_steps_per_second": 54.104, "step": 2800 }, { "epoch": 3.918918918918919, "grad_norm": 0.0027509788051247597, "learning_rate": 6.494845360824742e-07, "loss": 0.0095, "step": 2900 }, { "epoch": 4.0, "step": 2960, "total_flos": 9268766323310592.0, "train_loss": 0.10634732993470655, "train_runtime": 216.3397, "train_samples_per_second": 875.586, "train_steps_per_second": 13.682 } ], "logging_steps": 100, "max_steps": 2960, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9268766323310592.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }