{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 1384, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 9.421965317919077e-06, "loss": 1.814, "step": 100 }, { "epoch": 0.58, "learning_rate": 8.84393063583815e-06, "loss": 1.1008, "step": 200 }, { "epoch": 0.87, "learning_rate": 8.265895953757226e-06, "loss": 0.8649, "step": 300 }, { "epoch": 1.0, "eval_accuracy": 0.762, "eval_f1": 0.7443581909981544, "eval_loss": 0.7707217931747437, "eval_precision": 0.8007820246885388, "eval_recall": 0.7255360611559928, "eval_runtime": 10.5048, "eval_samples_per_second": 47.598, "eval_steps_per_second": 3.998, "step": 346 }, { "epoch": 1.16, "learning_rate": 7.687861271676302e-06, "loss": 0.7136, "step": 400 }, { "epoch": 1.45, "learning_rate": 7.109826589595377e-06, "loss": 0.6514, "step": 500 }, { "epoch": 1.73, "learning_rate": 6.531791907514451e-06, "loss": 0.6135, "step": 600 }, { "epoch": 2.0, "eval_accuracy": 0.784, "eval_f1": 0.7845289578609783, "eval_loss": 0.6748060584068298, "eval_precision": 0.7856247693531655, "eval_recall": 0.7918236543574249, "eval_runtime": 10.4972, "eval_samples_per_second": 47.632, "eval_steps_per_second": 4.001, "step": 692 }, { "epoch": 2.02, "learning_rate": 5.9537572254335265e-06, "loss": 0.623, "step": 700 }, { "epoch": 2.31, "learning_rate": 5.375722543352601e-06, "loss": 0.4996, "step": 800 }, { "epoch": 2.6, "learning_rate": 4.797687861271676e-06, "loss": 0.4835, "step": 900 }, { "epoch": 2.89, "learning_rate": 4.219653179190752e-06, "loss": 0.4899, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.79, "eval_f1": 0.7805436767451568, "eval_loss": 0.6808088421821594, "eval_precision": 0.7803281881073597, "eval_recall": 0.7887121939209444, "eval_runtime": 10.4962, "eval_samples_per_second": 47.636, "eval_steps_per_second": 4.001, "step": 1038 }, { "epoch": 3.18, "learning_rate": 3.641618497109827e-06, "loss": 0.4174, "step": 1100 }, { "epoch": 3.47, "learning_rate": 3.063583815028902e-06, "loss": 0.3569, "step": 1200 }, { "epoch": 3.76, "learning_rate": 2.485549132947977e-06, "loss": 0.3678, "step": 1300 }, { "epoch": 4.0, "eval_accuracy": 0.792, "eval_f1": 0.7970230838037085, "eval_loss": 0.7039059996604919, "eval_precision": 0.802843584794583, "eval_recall": 0.7992789320511213, "eval_runtime": 10.5145, "eval_samples_per_second": 47.554, "eval_steps_per_second": 3.994, "step": 1384 } ], "logging_steps": 100, "max_steps": 1730, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5820434589204480.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }