{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2, "eval_steps": 500, "global_step": 267, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00749063670411985, "grad_norm": 4.558547019958496, "learning_rate": 4.831460674157304e-05, "loss": 3.2586, "step": 10 }, { "epoch": 0.0149812734082397, "grad_norm": 4.060137748718262, "learning_rate": 4.644194756554308e-05, "loss": 2.9414, "step": 20 }, { "epoch": 0.02247191011235955, "grad_norm": 4.330236434936523, "learning_rate": 4.456928838951311e-05, "loss": 2.7364, "step": 30 }, { "epoch": 0.0299625468164794, "grad_norm": 3.8646533489227295, "learning_rate": 4.269662921348315e-05, "loss": 2.6617, "step": 40 }, { "epoch": 0.03745318352059925, "grad_norm": 4.332949161529541, "learning_rate": 4.082397003745319e-05, "loss": 2.5693, "step": 50 }, { "epoch": 0.0449438202247191, "grad_norm": 3.7690067291259766, "learning_rate": 3.8951310861423226e-05, "loss": 2.6648, "step": 60 }, { "epoch": 0.052434456928838954, "grad_norm": 3.539844274520874, "learning_rate": 3.7078651685393264e-05, "loss": 2.6171, "step": 70 }, { "epoch": 0.0599250936329588, "grad_norm": 4.24369478225708, "learning_rate": 3.52059925093633e-05, "loss": 2.4795, "step": 80 }, { "epoch": 0.06741573033707865, "grad_norm": 4.186371326446533, "learning_rate": 3.3333333333333335e-05, "loss": 2.6167, "step": 90 }, { "epoch": 0.0749063670411985, "grad_norm": 3.6899819374084473, "learning_rate": 3.1460674157303374e-05, "loss": 2.4597, "step": 100 }, { "epoch": 0.08239700374531835, "grad_norm": 4.035410404205322, "learning_rate": 2.958801498127341e-05, "loss": 2.3755, "step": 110 }, { "epoch": 0.0898876404494382, "grad_norm": 3.86106538772583, "learning_rate": 2.7715355805243448e-05, "loss": 2.4617, "step": 120 }, { "epoch": 0.09737827715355805, "grad_norm": 5.214311599731445, "learning_rate": 2.5842696629213486e-05, "loss": 2.3476, "step": 130 }, { "epoch": 0.10486891385767791, "grad_norm": 4.270963191986084, "learning_rate": 2.3970037453183522e-05, "loss": 2.4596, "step": 140 }, { "epoch": 0.11235955056179775, "grad_norm": 3.6309258937835693, "learning_rate": 2.209737827715356e-05, "loss": 2.3118, "step": 150 }, { "epoch": 0.1198501872659176, "grad_norm": 3.888986349105835, "learning_rate": 2.0224719101123596e-05, "loss": 2.3443, "step": 160 }, { "epoch": 0.12734082397003746, "grad_norm": 2.937702178955078, "learning_rate": 1.8352059925093635e-05, "loss": 2.4183, "step": 170 }, { "epoch": 0.1348314606741573, "grad_norm": 3.4217689037323, "learning_rate": 1.647940074906367e-05, "loss": 2.4306, "step": 180 }, { "epoch": 0.14232209737827714, "grad_norm": 3.5683271884918213, "learning_rate": 1.4606741573033709e-05, "loss": 2.3407, "step": 190 }, { "epoch": 0.149812734082397, "grad_norm": 3.605642318725586, "learning_rate": 1.2734082397003746e-05, "loss": 2.3402, "step": 200 }, { "epoch": 0.15730337078651685, "grad_norm": 2.3034493923187256, "learning_rate": 1.0861423220973783e-05, "loss": 2.1756, "step": 210 }, { "epoch": 0.1647940074906367, "grad_norm": 3.481696605682373, "learning_rate": 8.98876404494382e-06, "loss": 2.1487, "step": 220 }, { "epoch": 0.17228464419475656, "grad_norm": 3.515986919403076, "learning_rate": 7.116104868913858e-06, "loss": 2.2253, "step": 230 }, { "epoch": 0.1797752808988764, "grad_norm": 3.26123046875, "learning_rate": 5.243445692883896e-06, "loss": 2.3215, "step": 240 }, { "epoch": 0.18726591760299627, "grad_norm": 4.033005714416504, "learning_rate": 3.3707865168539327e-06, "loss": 2.2975, "step": 250 }, { "epoch": 0.1947565543071161, "grad_norm": 3.7090864181518555, "learning_rate": 1.4981273408239701e-06, "loss": 2.2616, "step": 260 } ], "logging_steps": 10, "max_steps": 267, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1032285369139200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }