{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.013036534889026497, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.518267444513249e-05, "eval_loss": 1.887902021408081, "eval_runtime": 333.4715, "eval_samples_per_second": 19.372, "eval_steps_per_second": 9.686, "step": 1 }, { "epoch": 0.0006518267444513248, "grad_norm": 0.9258672595024109, "learning_rate": 0.0002, "loss": 1.8293, "step": 10 }, { "epoch": 0.0013036534889026496, "grad_norm": 0.7830431461334229, "learning_rate": 0.0002, "loss": 1.7838, "step": 20 }, { "epoch": 0.0019554802333539745, "grad_norm": 0.9389762282371521, "learning_rate": 0.0002, "loss": 1.7733, "step": 30 }, { "epoch": 0.002607306977805299, "grad_norm": 0.9967517852783203, "learning_rate": 0.0002, "loss": 1.7201, "step": 40 }, { "epoch": 0.0032591337222566243, "grad_norm": 0.8355724215507507, "learning_rate": 0.0002, "loss": 1.5985, "step": 50 }, { "epoch": 0.0032591337222566243, "eval_loss": 1.7376784086227417, "eval_runtime": 333.6711, "eval_samples_per_second": 19.36, "eval_steps_per_second": 9.68, "step": 50 }, { "epoch": 0.003910960466707949, "grad_norm": 0.9053980708122253, "learning_rate": 0.0002, "loss": 1.7509, "step": 60 }, { "epoch": 0.004562787211159274, "grad_norm": 1.0709302425384521, "learning_rate": 0.0002, "loss": 1.7415, "step": 70 }, { "epoch": 0.005214613955610598, "grad_norm": 0.8847922682762146, "learning_rate": 0.0002, "loss": 1.8675, "step": 80 }, { "epoch": 0.005866440700061924, "grad_norm": 0.8659989237785339, "learning_rate": 0.0002, "loss": 1.7278, "step": 90 }, { "epoch": 0.006518267444513249, "grad_norm": 1.0513916015625, "learning_rate": 0.0002, "loss": 1.787, "step": 100 }, { "epoch": 0.006518267444513249, "eval_loss": 1.726641058921814, "eval_runtime": 333.7788, "eval_samples_per_second": 19.354, "eval_steps_per_second": 9.677, "step": 100 }, { "epoch": 0.007170094188964573, "grad_norm": 0.9510365128517151, "learning_rate": 0.0002, "loss": 1.6355, "step": 110 }, { "epoch": 0.007821920933415898, "grad_norm": 0.7684184312820435, "learning_rate": 0.0002, "loss": 1.6816, "step": 120 }, { "epoch": 0.008473747677867223, "grad_norm": 0.7511278390884399, "learning_rate": 0.0002, "loss": 1.7392, "step": 130 }, { "epoch": 0.009125574422318547, "grad_norm": 1.2092989683151245, "learning_rate": 0.0002, "loss": 1.5957, "step": 140 }, { "epoch": 0.009777401166769872, "grad_norm": 0.7998082041740417, "learning_rate": 0.0002, "loss": 1.5553, "step": 150 }, { "epoch": 0.009777401166769872, "eval_loss": 1.726781964302063, "eval_runtime": 333.6825, "eval_samples_per_second": 19.36, "eval_steps_per_second": 9.68, "step": 150 }, { "epoch": 0.010429227911221197, "grad_norm": 1.0430855751037598, "learning_rate": 0.0002, "loss": 1.7549, "step": 160 }, { "epoch": 0.011081054655672521, "grad_norm": 1.1361061334609985, "learning_rate": 0.0002, "loss": 1.6927, "step": 170 }, { "epoch": 0.011732881400123848, "grad_norm": 0.9144191741943359, "learning_rate": 0.0002, "loss": 1.7662, "step": 180 }, { "epoch": 0.012384708144575172, "grad_norm": 0.8297296762466431, "learning_rate": 0.0002, "loss": 1.7277, "step": 190 }, { "epoch": 0.013036534889026497, "grad_norm": 1.0495644807815552, "learning_rate": 0.0002, "loss": 1.7698, "step": 200 }, { "epoch": 0.013036534889026497, "eval_loss": 1.7259982824325562, "eval_runtime": 333.5702, "eval_samples_per_second": 19.366, "eval_steps_per_second": 9.683, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.386991357034496e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }