{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9970501474926253, "eval_steps": 500, "global_step": 169, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029498525073746312, "grad_norm": 15.842525482177734, "learning_rate": 0.0002, "loss": 1.3605, "step": 5 }, { "epoch": 0.058997050147492625, "grad_norm": 1.1057137250900269, "learning_rate": 0.0002, "loss": 0.6189, "step": 10 }, { "epoch": 0.08849557522123894, "grad_norm": 0.7712059020996094, "learning_rate": 0.0002, "loss": 0.4282, "step": 15 }, { "epoch": 0.11799410029498525, "grad_norm": 1.1073462963104248, "learning_rate": 0.0002, "loss": 0.4143, "step": 20 }, { "epoch": 0.14749262536873156, "grad_norm": 0.6461352705955505, "learning_rate": 0.0002, "loss": 0.3758, "step": 25 }, { "epoch": 0.17699115044247787, "grad_norm": 0.6348222494125366, "learning_rate": 0.0002, "loss": 0.3761, "step": 30 }, { "epoch": 0.20648967551622419, "grad_norm": 0.558731198310852, "learning_rate": 0.0002, "loss": 0.3652, "step": 35 }, { "epoch": 0.2359882005899705, "grad_norm": 0.5885202884674072, "learning_rate": 0.0002, "loss": 0.3611, "step": 40 }, { "epoch": 0.26548672566371684, "grad_norm": 0.5200619697570801, "learning_rate": 0.0002, "loss": 0.3584, "step": 45 }, { "epoch": 0.2949852507374631, "grad_norm": 0.6615349650382996, "learning_rate": 0.0002, "loss": 0.3352, "step": 50 }, { "epoch": 0.32448377581120946, "grad_norm": 0.6481220126152039, "learning_rate": 0.0002, "loss": 0.3245, "step": 55 }, { "epoch": 0.35398230088495575, "grad_norm": 0.5189281702041626, "learning_rate": 0.0002, "loss": 0.3277, "step": 60 }, { "epoch": 0.3834808259587021, "grad_norm": 0.5450770854949951, "learning_rate": 0.0002, "loss": 0.3394, "step": 65 }, { "epoch": 0.41297935103244837, "grad_norm": 0.5216971635818481, "learning_rate": 0.0002, "loss": 0.3303, "step": 70 }, { "epoch": 0.4424778761061947, "grad_norm": 0.5718809962272644, "learning_rate": 0.0002, "loss": 0.3503, "step": 75 }, { "epoch": 0.471976401179941, "grad_norm": 0.46456339955329895, "learning_rate": 0.0002, "loss": 0.3363, "step": 80 }, { "epoch": 0.5014749262536873, "grad_norm": 0.8590409755706787, "learning_rate": 0.0002, "loss": 0.3111, "step": 85 }, { "epoch": 0.5309734513274337, "grad_norm": 0.4512447714805603, "learning_rate": 0.0002, "loss": 0.3142, "step": 90 }, { "epoch": 0.56047197640118, "grad_norm": 1.0052748918533325, "learning_rate": 0.0002, "loss": 0.3146, "step": 95 }, { "epoch": 0.5899705014749262, "grad_norm": 0.41449859738349915, "learning_rate": 0.0002, "loss": 0.2949, "step": 100 }, { "epoch": 0.6194690265486725, "grad_norm": 4.5440354347229, "learning_rate": 0.0002, "loss": 0.2879, "step": 105 }, { "epoch": 0.6489675516224189, "grad_norm": 0.37434083223342896, "learning_rate": 0.0002, "loss": 0.2943, "step": 110 }, { "epoch": 0.6784660766961652, "grad_norm": 0.3563149571418762, "learning_rate": 0.0002, "loss": 0.3082, "step": 115 }, { "epoch": 0.7079646017699115, "grad_norm": 0.40530529618263245, "learning_rate": 0.0002, "loss": 0.308, "step": 120 }, { "epoch": 0.7374631268436578, "grad_norm": 0.39032211899757385, "learning_rate": 0.0002, "loss": 0.3079, "step": 125 }, { "epoch": 0.7669616519174042, "grad_norm": 0.3619149327278137, "learning_rate": 0.0002, "loss": 0.2861, "step": 130 }, { "epoch": 0.7964601769911505, "grad_norm": 0.3519650995731354, "learning_rate": 0.0002, "loss": 0.2942, "step": 135 }, { "epoch": 0.8259587020648967, "grad_norm": 0.33940911293029785, "learning_rate": 0.0002, "loss": 0.288, "step": 140 }, { "epoch": 0.855457227138643, "grad_norm": 0.31343740224838257, "learning_rate": 0.0002, "loss": 0.2961, "step": 145 }, { "epoch": 0.8849557522123894, "grad_norm": 0.4065489172935486, "learning_rate": 0.0002, "loss": 0.2824, "step": 150 }, { "epoch": 0.9144542772861357, "grad_norm": 0.35969874262809753, "learning_rate": 0.0002, "loss": 0.2918, "step": 155 }, { "epoch": 0.943952802359882, "grad_norm": 0.35074159502983093, "learning_rate": 0.0002, "loss": 0.285, "step": 160 }, { "epoch": 0.9734513274336283, "grad_norm": 0.3305889964103699, "learning_rate": 0.0002, "loss": 0.2761, "step": 165 }, { "epoch": 0.9970501474926253, "step": 169, "total_flos": 1.3410656498535629e+17, "train_loss": 0.3628283569798667, "train_runtime": 1720.2693, "train_samples_per_second": 1.574, "train_steps_per_second": 0.098 } ], "logging_steps": 5, "max_steps": 169, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3410656498535629e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }