{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9935205183585313, "eval_steps": 500, "global_step": 115, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08639308855291576, "grad_norm": 408.058349609375, "learning_rate": 1.2e-05, "loss": 9.3465, "step": 10 }, { "epoch": 0.17278617710583152, "grad_norm": 1.8677339553833008, "learning_rate": 5.2000000000000004e-05, "loss": 1.8486, "step": 20 }, { "epoch": 0.2591792656587473, "grad_norm": 0.6196443438529968, "learning_rate": 9.200000000000001e-05, "loss": 0.4004, "step": 30 }, { "epoch": 0.34557235421166305, "grad_norm": 0.8679957985877991, "learning_rate": 0.000132, "loss": 0.3089, "step": 40 }, { "epoch": 0.4319654427645788, "grad_norm": 0.4516398310661316, "learning_rate": 0.000172, "loss": 0.2921, "step": 50 }, { "epoch": 0.5183585313174947, "grad_norm": 1.6636766195297241, "learning_rate": 0.00019994896932810338, "loss": 0.4872, "step": 60 }, { "epoch": 0.6047516198704104, "grad_norm": 0.6236905455589294, "learning_rate": 0.0001990432055368971, "loss": 0.3284, "step": 70 }, { "epoch": 0.6911447084233261, "grad_norm": 0.8019087314605713, "learning_rate": 0.00019701524265130086, "loss": 0.4159, "step": 80 }, { "epoch": 0.7775377969762419, "grad_norm": 0.460245817899704, "learning_rate": 0.0001938880583011413, "loss": 0.2771, "step": 90 }, { "epoch": 0.8639308855291576, "grad_norm": 0.2711161673069, "learning_rate": 0.00018969708473466529, "loss": 0.2995, "step": 100 }, { "epoch": 0.9503239740820735, "grad_norm": 0.5418670773506165, "learning_rate": 0.000184489807357009, "loss": 0.2693, "step": 110 } ], "logging_steps": 10, "max_steps": 345, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.1844961129332736e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }