{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9869281045751634, "eval_steps": 500, "global_step": 228, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13071895424836602, "grad_norm": 13.640703803287584, "learning_rate": 5e-06, "loss": 0.9844, "step": 10 }, { "epoch": 0.26143790849673204, "grad_norm": 0.9447823765191696, "learning_rate": 5e-06, "loss": 0.8863, "step": 20 }, { "epoch": 0.39215686274509803, "grad_norm": 1.038467083575288, "learning_rate": 5e-06, "loss": 0.8449, "step": 30 }, { "epoch": 0.5228758169934641, "grad_norm": 1.6427820915875102, "learning_rate": 5e-06, "loss": 0.8257, "step": 40 }, { "epoch": 0.6535947712418301, "grad_norm": 1.7936488531165335, "learning_rate": 5e-06, "loss": 0.8169, "step": 50 }, { "epoch": 0.7843137254901961, "grad_norm": 0.7697816321414731, "learning_rate": 5e-06, "loss": 0.8056, "step": 60 }, { "epoch": 0.9150326797385621, "grad_norm": 1.250994290613137, "learning_rate": 5e-06, "loss": 0.7971, "step": 70 }, { "epoch": 1.0490196078431373, "grad_norm": 0.843433753244107, "learning_rate": 5e-06, "loss": 0.8367, "step": 80 }, { "epoch": 1.1797385620915033, "grad_norm": 0.6027656670101825, "learning_rate": 5e-06, "loss": 0.7483, "step": 90 }, { "epoch": 1.3104575163398693, "grad_norm": 0.8024576817469242, "learning_rate": 5e-06, "loss": 0.7428, "step": 100 }, { "epoch": 1.4411764705882353, "grad_norm": 0.7594730875779195, "learning_rate": 5e-06, "loss": 0.7401, "step": 110 }, { "epoch": 1.5718954248366013, "grad_norm": 0.5293998500966177, "learning_rate": 5e-06, "loss": 0.7426, "step": 120 }, { "epoch": 1.7026143790849673, "grad_norm": 0.9249042901353932, "learning_rate": 5e-06, "loss": 0.7406, "step": 130 }, { "epoch": 1.8333333333333335, "grad_norm": 0.5767932991870924, "learning_rate": 5e-06, "loss": 0.7379, "step": 140 }, { "epoch": 1.9640522875816995, "grad_norm": 0.5974379920218519, "learning_rate": 5e-06, "loss": 0.7337, "step": 150 }, { "epoch": 2.0980392156862746, "grad_norm": 1.3799060310730653, "learning_rate": 5e-06, "loss": 0.7541, "step": 160 }, { "epoch": 2.2287581699346406, "grad_norm": 0.8440697657265467, "learning_rate": 5e-06, "loss": 0.6849, "step": 170 }, { "epoch": 2.3594771241830066, "grad_norm": 0.6523664577578698, "learning_rate": 5e-06, "loss": 0.6828, "step": 180 }, { "epoch": 2.4901960784313726, "grad_norm": 0.5604368514967889, "learning_rate": 5e-06, "loss": 0.6833, "step": 190 }, { "epoch": 2.6209150326797386, "grad_norm": 0.651015676014187, "learning_rate": 5e-06, "loss": 0.6825, "step": 200 }, { "epoch": 2.7516339869281046, "grad_norm": 0.6331718263692562, "learning_rate": 5e-06, "loss": 0.6826, "step": 210 }, { "epoch": 2.8823529411764706, "grad_norm": 0.6748382635791591, "learning_rate": 5e-06, "loss": 0.6867, "step": 220 }, { "epoch": 2.9869281045751634, "step": 228, "total_flos": 381489732648960.0, "train_loss": 0.7633350188272041, "train_runtime": 3333.6415, "train_samples_per_second": 35.22, "train_steps_per_second": 0.068 } ], "logging_steps": 10, "max_steps": 228, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 381489732648960.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }