{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4665267086540704, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023326335432703522, "grad_norm": 3.1133477687835693, "learning_rate": 4.8e-05, "loss": 3.3258, "step": 25 }, { "epoch": 0.046652670865407045, "grad_norm": 1.9544230699539185, "learning_rate": 9.8e-05, "loss": 1.7406, "step": 50 }, { "epoch": 0.06997900629811056, "grad_norm": 0.8057882785797119, "learning_rate": 9.764936336924584e-05, "loss": 0.5676, "step": 75 }, { "epoch": 0.09330534173081409, "grad_norm": 0.6476953029632568, "learning_rate": 9.520078354554359e-05, "loss": 0.4746, "step": 100 }, { "epoch": 0.1166316771635176, "grad_norm": 0.5200537443161011, "learning_rate": 9.275220372184134e-05, "loss": 0.4277, "step": 125 }, { "epoch": 0.13995801259622112, "grad_norm": 0.7842611074447632, "learning_rate": 9.030362389813909e-05, "loss": 0.3958, "step": 150 }, { "epoch": 0.16328434802892466, "grad_norm": 0.4216076731681824, "learning_rate": 8.785504407443683e-05, "loss": 0.2831, "step": 175 }, { "epoch": 0.18661068346162818, "grad_norm": 0.5324825048446655, "learning_rate": 8.540646425073458e-05, "loss": 0.2866, "step": 200 }, { "epoch": 0.2099370188943317, "grad_norm": 0.5478129386901855, "learning_rate": 8.295788442703233e-05, "loss": 0.2698, "step": 225 }, { "epoch": 0.2332633543270352, "grad_norm": 0.584348738193512, "learning_rate": 8.050930460333007e-05, "loss": 0.2681, "step": 250 }, { "epoch": 0.2565896897597387, "grad_norm": 0.7180460691452026, "learning_rate": 7.806072477962783e-05, "loss": 0.2535, "step": 275 }, { "epoch": 0.27991602519244224, "grad_norm": 0.4909381568431854, "learning_rate": 7.561214495592557e-05, "loss": 0.2714, "step": 300 }, { "epoch": 0.3032423606251458, "grad_norm": 0.4367845058441162, "learning_rate": 7.316356513222332e-05, "loss": 0.2535, "step": 325 }, { "epoch": 0.32656869605784933, "grad_norm": 0.4265742897987366, "learning_rate": 7.071498530852107e-05, "loss": 0.2695, "step": 350 }, { "epoch": 0.34989503149055284, "grad_norm": 0.4465666115283966, "learning_rate": 6.82664054848188e-05, "loss": 0.2757, "step": 375 }, { "epoch": 0.37322136692325636, "grad_norm": 0.5847889184951782, "learning_rate": 6.581782566111655e-05, "loss": 0.2701, "step": 400 }, { "epoch": 0.3965477023559599, "grad_norm": 0.4934362769126892, "learning_rate": 6.33692458374143e-05, "loss": 0.2555, "step": 425 }, { "epoch": 0.4198740377886634, "grad_norm": 0.5802527070045471, "learning_rate": 6.092066601371205e-05, "loss": 0.263, "step": 450 }, { "epoch": 0.4432003732213669, "grad_norm": 0.4295060932636261, "learning_rate": 5.84720861900098e-05, "loss": 0.2778, "step": 475 }, { "epoch": 0.4665267086540704, "grad_norm": 0.5922301411628723, "learning_rate": 5.602350636630754e-05, "loss": 0.2624, "step": 500 } ], "logging_steps": 25, "max_steps": 1071, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1038595224637952e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }