{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.005792903692976104, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005792903692976104, "grad_norm": 0.8005829453468323, "learning_rate": 2e-05, "loss": 2.9056, "step": 1 }, { "epoch": 0.0005792903692976104, "eval_loss": 2.9295616149902344, "eval_runtime": 93.9662, "eval_samples_per_second": 7.737, "eval_steps_per_second": 3.874, "step": 1 }, { "epoch": 0.0011585807385952208, "grad_norm": 0.7147985696792603, "learning_rate": 4e-05, "loss": 2.9177, "step": 2 }, { "epoch": 0.0017378711078928314, "grad_norm": 0.8883288502693176, "learning_rate": 6e-05, "loss": 2.4781, "step": 3 }, { "epoch": 0.0017378711078928314, "eval_loss": 2.9244561195373535, "eval_runtime": 95.1184, "eval_samples_per_second": 7.643, "eval_steps_per_second": 3.827, "step": 3 }, { "epoch": 0.0023171614771904415, "grad_norm": 0.9748960733413696, "learning_rate": 8e-05, "loss": 3.1738, "step": 4 }, { "epoch": 0.002896451846488052, "grad_norm": 0.7598217725753784, "learning_rate": 0.0001, "loss": 2.4379, "step": 5 }, { "epoch": 0.0034757422157856628, "grad_norm": 1.0207710266113281, "learning_rate": 0.00012, "loss": 2.9731, "step": 6 }, { "epoch": 0.0034757422157856628, "eval_loss": 2.835926055908203, "eval_runtime": 95.2842, "eval_samples_per_second": 7.63, "eval_steps_per_second": 3.82, "step": 6 }, { "epoch": 0.004055032585083273, "grad_norm": 1.317775011062622, "learning_rate": 0.00014, "loss": 2.9932, "step": 7 }, { "epoch": 0.004634322954380883, "grad_norm": 1.117720365524292, "learning_rate": 0.00016, "loss": 2.3084, "step": 8 }, { "epoch": 0.0052136133236784935, "grad_norm": 1.2141731977462769, "learning_rate": 0.00018, "loss": 2.8136, "step": 9 }, { "epoch": 0.0052136133236784935, "eval_loss": 2.5741186141967773, "eval_runtime": 95.2372, "eval_samples_per_second": 7.634, "eval_steps_per_second": 3.822, "step": 9 }, { "epoch": 0.005792903692976104, "grad_norm": 1.5056668519973755, "learning_rate": 0.0002, "loss": 2.2225, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1849564248145920.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }