{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1966101694915254, "eval_steps": 8, "global_step": 88, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013559322033898305, "eval_loss": 10.379331588745117, "eval_runtime": 1.8159, "eval_samples_per_second": 137.125, "eval_steps_per_second": 68.838, "step": 1 }, { "epoch": 0.04067796610169491, "grad_norm": 0.04114081338047981, "learning_rate": 3e-05, "loss": 10.3795, "step": 3 }, { "epoch": 0.08135593220338982, "grad_norm": 0.03384939953684807, "learning_rate": 6e-05, "loss": 10.3792, "step": 6 }, { "epoch": 0.10847457627118644, "eval_loss": 10.378857612609863, "eval_runtime": 1.8064, "eval_samples_per_second": 137.842, "eval_steps_per_second": 69.198, "step": 8 }, { "epoch": 0.12203389830508475, "grad_norm": 0.03384710103273392, "learning_rate": 9e-05, "loss": 10.3782, "step": 9 }, { "epoch": 0.16271186440677965, "grad_norm": 0.034946031868457794, "learning_rate": 0.00012, "loss": 10.3786, "step": 12 }, { "epoch": 0.2033898305084746, "grad_norm": 0.033232275396585464, "learning_rate": 0.00015000000000000001, "loss": 10.3783, "step": 15 }, { "epoch": 0.21694915254237288, "eval_loss": 10.377367973327637, "eval_runtime": 1.8091, "eval_samples_per_second": 137.638, "eval_steps_per_second": 69.096, "step": 16 }, { "epoch": 0.2440677966101695, "grad_norm": 0.03324678912758827, "learning_rate": 0.00018, "loss": 10.3774, "step": 18 }, { "epoch": 0.2847457627118644, "grad_norm": 0.04449416324496269, "learning_rate": 0.00019989930665413147, "loss": 10.3754, "step": 21 }, { "epoch": 0.3254237288135593, "grad_norm": 0.048610322177410126, "learning_rate": 0.00019839295885986296, "loss": 10.3766, "step": 24 }, { "epoch": 0.3254237288135593, "eval_loss": 10.374491691589355, "eval_runtime": 1.8087, "eval_samples_per_second": 137.666, "eval_steps_per_second": 69.11, "step": 24 }, { "epoch": 0.36610169491525424, "grad_norm": 0.056761473417282104, "learning_rate": 0.00019510565162951537, "loss": 10.3758, "step": 27 }, { "epoch": 0.4067796610169492, "grad_norm": 0.07433681935071945, "learning_rate": 0.0001900968867902419, "loss": 10.3736, "step": 30 }, { "epoch": 0.43389830508474575, "eval_loss": 10.369266510009766, "eval_runtime": 1.8126, "eval_samples_per_second": 137.368, "eval_steps_per_second": 68.96, "step": 32 }, { "epoch": 0.44745762711864406, "grad_norm": 0.08046111464500427, "learning_rate": 0.00018345732537213027, "loss": 10.3698, "step": 33 }, { "epoch": 0.488135593220339, "grad_norm": 0.10006483644247055, "learning_rate": 0.00017530714660036112, "loss": 10.3664, "step": 36 }, { "epoch": 0.5288135593220339, "grad_norm": 0.11119823902845383, "learning_rate": 0.00016579387259397127, "loss": 10.365, "step": 39 }, { "epoch": 0.5423728813559322, "eval_loss": 10.36050796508789, "eval_runtime": 1.8025, "eval_samples_per_second": 138.142, "eval_steps_per_second": 69.348, "step": 40 }, { "epoch": 0.5694915254237288, "grad_norm": 0.10603281110525131, "learning_rate": 0.00015508969814521025, "loss": 10.3608, "step": 42 }, { "epoch": 0.6101694915254238, "grad_norm": 0.09946656972169876, "learning_rate": 0.00014338837391175582, "loss": 10.356, "step": 45 }, { "epoch": 0.6508474576271186, "grad_norm": 0.09944617748260498, "learning_rate": 0.00013090169943749476, "loss": 10.3547, "step": 48 }, { "epoch": 0.6508474576271186, "eval_loss": 10.353257179260254, "eval_runtime": 1.8077, "eval_samples_per_second": 137.745, "eval_steps_per_second": 69.149, "step": 48 }, { "epoch": 0.6915254237288135, "grad_norm": 0.07900821417570114, "learning_rate": 0.00011785568947986367, "loss": 10.3534, "step": 51 }, { "epoch": 0.7322033898305085, "grad_norm": 0.054558683186769485, "learning_rate": 0.00010448648303505151, "loss": 10.3522, "step": 54 }, { "epoch": 0.7593220338983051, "eval_loss": 10.35043716430664, "eval_runtime": 1.8076, "eval_samples_per_second": 137.75, "eval_steps_per_second": 69.152, "step": 56 }, { "epoch": 0.7728813559322034, "grad_norm": 0.07926557213068008, "learning_rate": 9.103606910965666e-05, "loss": 10.3519, "step": 57 }, { "epoch": 0.8135593220338984, "grad_norm": 0.07362005114555359, "learning_rate": 7.774790660436858e-05, "loss": 10.3487, "step": 60 }, { "epoch": 0.8542372881355932, "grad_norm": 0.054365601390600204, "learning_rate": 6.486251759186572e-05, "loss": 10.3515, "step": 63 }, { "epoch": 0.8677966101694915, "eval_loss": 10.349444389343262, "eval_runtime": 1.8121, "eval_samples_per_second": 137.407, "eval_steps_per_second": 68.979, "step": 64 }, { "epoch": 0.8949152542372881, "grad_norm": 0.061908673495054245, "learning_rate": 5.261313375270014e-05, "loss": 10.35, "step": 66 }, { "epoch": 0.9355932203389831, "grad_norm": 0.05369194224476814, "learning_rate": 4.12214747707527e-05, "loss": 10.3489, "step": 69 }, { "epoch": 0.976271186440678, "grad_norm": 0.046110741794109344, "learning_rate": 3.089373510131354e-05, "loss": 10.3496, "step": 72 }, { "epoch": 0.976271186440678, "eval_loss": 10.34906005859375, "eval_runtime": 1.8082, "eval_samples_per_second": 137.71, "eval_steps_per_second": 69.131, "step": 72 }, { "epoch": 1.0203389830508474, "grad_norm": 0.05189204961061478, "learning_rate": 2.181685175319702e-05, "loss": 11.9302, "step": 75 }, { "epoch": 1.0610169491525423, "grad_norm": 0.046334102749824524, "learning_rate": 1.415512063981339e-05, "loss": 10.4762, "step": 78 }, { "epoch": 1.088135593220339, "eval_loss": 10.34891128540039, "eval_runtime": 1.8111, "eval_samples_per_second": 137.482, "eval_steps_per_second": 69.017, "step": 80 }, { "epoch": 1.1016949152542372, "grad_norm": 0.0535203292965889, "learning_rate": 8.047222744854943e-06, "loss": 10.3181, "step": 81 }, { "epoch": 1.1423728813559322, "grad_norm": 0.07268232852220535, "learning_rate": 3.6037139304146762e-06, "loss": 10.3939, "step": 84 }, { "epoch": 1.1830508474576271, "grad_norm": 0.0458713173866272, "learning_rate": 9.0502382320653e-07, "loss": 10.3273, "step": 87 }, { "epoch": 1.1966101694915254, "eval_loss": 10.348885536193848, "eval_runtime": 1.801, "eval_samples_per_second": 138.257, "eval_steps_per_second": 69.406, "step": 88 } ], "logging_steps": 3, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 69137241341952.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }