{ "best_metric": 0.511212944984436, "best_model_checkpoint": "output_pipe/H3K4me1/origin/checkpoint-800", "epoch": 4.0, "eval_steps": 200, "global_step": 1584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25252525252525254, "grad_norm": 2.8624508380889893, "learning_rate": 2.9022164276401565e-05, "loss": 0.6197, "step": 100 }, { "epoch": 0.5050505050505051, "grad_norm": 3.410557270050049, "learning_rate": 2.7066492829204693e-05, "loss": 0.5415, "step": 200 }, { "epoch": 0.5050505050505051, "eval_accuracy": 0.7616792929292929, "eval_f1": 0.7607729152020279, "eval_loss": 0.5343340635299683, "eval_matthews_correlation": 0.5216009133490279, "eval_precision": 0.7606072333239515, "eval_recall": 0.760993823287651, "eval_runtime": 1.923, "eval_samples_per_second": 1647.436, "eval_steps_per_second": 26.001, "step": 200 }, { "epoch": 0.7575757575757576, "grad_norm": 1.4580295085906982, "learning_rate": 2.511082138200782e-05, "loss": 0.5157, "step": 300 }, { "epoch": 1.0101010101010102, "grad_norm": 2.2710134983062744, "learning_rate": 2.3155149934810953e-05, "loss": 0.5295, "step": 400 }, { "epoch": 1.0101010101010102, "eval_accuracy": 0.7761994949494949, "eval_f1": 0.7725553629449594, "eval_loss": 0.5141143202781677, "eval_matthews_correlation": 0.5506653820018602, "eval_precision": 0.7795613839562128, "eval_recall": 0.7711679619732008, "eval_runtime": 1.9228, "eval_samples_per_second": 1647.574, "eval_steps_per_second": 26.003, "step": 400 }, { "epoch": 1.2626262626262625, "grad_norm": 1.2207655906677246, "learning_rate": 2.119947848761408e-05, "loss": 0.4729, "step": 500 }, { "epoch": 1.5151515151515151, "grad_norm": 1.5364807844161987, "learning_rate": 1.924380704041721e-05, "loss": 0.4629, "step": 600 }, { "epoch": 1.5151515151515151, "eval_accuracy": 0.7755681818181818, "eval_f1": 0.7714154656610406, "eval_loss": 0.5175797343254089, "eval_matthews_correlation": 0.5500618417358605, "eval_precision": 0.7801796351451973, "eval_recall": 0.7699768218138402, "eval_runtime": 4.2491, "eval_samples_per_second": 745.574, "eval_steps_per_second": 11.767, "step": 600 }, { "epoch": 1.7676767676767677, "grad_norm": 3.395381212234497, "learning_rate": 1.728813559322034e-05, "loss": 0.4766, "step": 700 }, { "epoch": 2.0202020202020203, "grad_norm": 1.879275918006897, "learning_rate": 1.533246414602347e-05, "loss": 0.4686, "step": 800 }, { "epoch": 2.0202020202020203, "eval_accuracy": 0.7746212121212122, "eval_f1": 0.7727430555555556, "eval_loss": 0.511212944984436, "eval_matthews_correlation": 0.5463418750991603, "eval_precision": 0.7743439291360682, "eval_recall": 0.7720029612344936, "eval_runtime": 1.9185, "eval_samples_per_second": 1651.262, "eval_steps_per_second": 26.062, "step": 800 }, { "epoch": 2.2727272727272725, "grad_norm": 4.8738484382629395, "learning_rate": 1.3376792698826597e-05, "loss": 0.342, "step": 900 }, { "epoch": 2.525252525252525, "grad_norm": 2.3210840225219727, "learning_rate": 1.1421121251629727e-05, "loss": 0.3251, "step": 1000 }, { "epoch": 2.525252525252525, "eval_accuracy": 0.7657828282828283, "eval_f1": 0.7601378281776338, "eval_loss": 0.5649052858352661, "eval_matthews_correlation": 0.5320366808458905, "eval_precision": 0.7733505537083345, "eval_recall": 0.7588828026186666, "eval_runtime": 1.9198, "eval_samples_per_second": 1650.147, "eval_steps_per_second": 26.044, "step": 1000 }, { "epoch": 2.7777777777777777, "grad_norm": 5.395249843597412, "learning_rate": 9.465449804432857e-06, "loss": 0.3096, "step": 1100 }, { "epoch": 3.0303030303030303, "grad_norm": 1.488246202468872, "learning_rate": 7.509778357235985e-06, "loss": 0.3006, "step": 1200 }, { "epoch": 3.0303030303030303, "eval_accuracy": 0.7705176767676768, "eval_f1": 0.7669679176367754, "eval_loss": 0.6334746479988098, "eval_matthews_correlation": 0.5388668724823326, "eval_precision": 0.773245758524131, "eval_recall": 0.7656743034432558, "eval_runtime": 1.9184, "eval_samples_per_second": 1651.336, "eval_steps_per_second": 26.063, "step": 1200 }, { "epoch": 3.282828282828283, "grad_norm": 3.6328012943267822, "learning_rate": 5.554106910039114e-06, "loss": 0.0893, "step": 1300 }, { "epoch": 3.5353535353535355, "grad_norm": 3.510218381881714, "learning_rate": 3.6179921773142113e-06, "loss": 0.0743, "step": 1400 }, { "epoch": 3.5353535353535355, "eval_accuracy": 0.76010101010101, "eval_f1": 0.7579302456892183, "eval_loss": 0.9358654618263245, "eval_matthews_correlation": 0.5169954898766244, "eval_precision": 0.7598433049182238, "eval_recall": 0.7571591527410038, "eval_runtime": 1.9208, "eval_samples_per_second": 1649.325, "eval_steps_per_second": 26.031, "step": 1400 }, { "epoch": 3.787878787878788, "grad_norm": 4.385198593139648, "learning_rate": 1.6623207301173403e-06, "loss": 0.0826, "step": 1500 }, { "epoch": 4.0, "step": 1584, "total_flos": 2.581211751008796e+16, "train_loss": 0.35833474180915137, "train_runtime": 278.9972, "train_samples_per_second": 363.315, "train_steps_per_second": 5.677 } ], "logging_steps": 100, "max_steps": 1584, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.581211751008796e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }