{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9935205183585314, "eval_steps": 500, "global_step": 230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08639308855291576, "grad_norm": 408.058349609375, "learning_rate": 1.2e-05, "loss": 9.3465, "step": 10 }, { "epoch": 0.17278617710583152, "grad_norm": 1.8677339553833008, "learning_rate": 5.2000000000000004e-05, "loss": 1.8486, "step": 20 }, { "epoch": 0.2591792656587473, "grad_norm": 0.6196443438529968, "learning_rate": 9.200000000000001e-05, "loss": 0.4004, "step": 30 }, { "epoch": 0.34557235421166305, "grad_norm": 0.8679957985877991, "learning_rate": 0.000132, "loss": 0.3089, "step": 40 }, { "epoch": 0.4319654427645788, "grad_norm": 0.4516398310661316, "learning_rate": 0.000172, "loss": 0.2921, "step": 50 }, { "epoch": 0.5183585313174947, "grad_norm": 1.6636766195297241, "learning_rate": 0.00019994896932810338, "loss": 0.4872, "step": 60 }, { "epoch": 0.6047516198704104, "grad_norm": 0.6236905455589294, "learning_rate": 0.0001990432055368971, "loss": 0.3284, "step": 70 }, { "epoch": 0.6911447084233261, "grad_norm": 0.8019087314605713, "learning_rate": 0.00019701524265130086, "loss": 0.4159, "step": 80 }, { "epoch": 0.7775377969762419, "grad_norm": 0.460245817899704, "learning_rate": 0.0001938880583011413, "loss": 0.2771, "step": 90 }, { "epoch": 0.8639308855291576, "grad_norm": 0.2711161673069, "learning_rate": 0.00018969708473466529, "loss": 0.2995, "step": 100 }, { "epoch": 0.9503239740820735, "grad_norm": 0.5418670773506165, "learning_rate": 0.000184489807357009, "loss": 0.2693, "step": 110 }, { "epoch": 1.043196544276458, "grad_norm": 1.3273216485977173, "learning_rate": 0.00017832522670158368, "loss": 0.2968, "step": 120 }, { "epoch": 1.1295896328293737, "grad_norm": 0.30387449264526367, "learning_rate": 0.00017127318993045686, "loss": 0.2649, "step": 130 }, { "epoch": 1.2159827213822894, "grad_norm": 0.872471272945404, "learning_rate": 0.00016341359943809628, "loss": 0.2696, "step": 140 }, { "epoch": 1.3023758099352052, "grad_norm": 0.4214591383934021, "learning_rate": 0.00015483550752531334, "loss": 0.27, "step": 150 }, { "epoch": 1.388768898488121, "grad_norm": 0.3436877131462097, "learning_rate": 0.00014563610740111162, "loss": 0.2544, "step": 160 }, { "epoch": 1.4751619870410368, "grad_norm": 0.6171785593032837, "learning_rate": 0.00013591963194479198, "loss": 0.2665, "step": 170 }, { "epoch": 1.5615550755939525, "grad_norm": 0.4470198452472687, "learning_rate": 0.0001257961727057812, "loss": 0.2732, "step": 180 }, { "epoch": 1.6479481641468683, "grad_norm": 0.5304989814758301, "learning_rate": 0.00011538043252238628, "loss": 0.2622, "step": 190 }, { "epoch": 1.734341252699784, "grad_norm": 0.35694748163223267, "learning_rate": 0.0001047904258928037, "loss": 0.2568, "step": 200 }, { "epoch": 1.8207343412526997, "grad_norm": 0.22506724298000336, "learning_rate": 9.414614182369862e-05, "loss": 0.2505, "step": 210 }, { "epoch": 1.9071274298056156, "grad_norm": 0.36761364340782166, "learning_rate": 8.356818430681408e-05, "loss": 0.2773, "step": 220 }, { "epoch": 1.9935205183585314, "grad_norm": 0.27311214804649353, "learning_rate": 7.317640582755372e-05, "loss": 0.2631, "step": 230 } ], "logging_steps": 10, "max_steps": 345, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.368992225866547e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }