{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9935205183585314, "eval_steps": 500, "global_step": 345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08639308855291576, "grad_norm": 408.058349609375, "learning_rate": 1.2e-05, "loss": 9.3465, "step": 10 }, { "epoch": 0.17278617710583152, "grad_norm": 1.8677339553833008, "learning_rate": 5.2000000000000004e-05, "loss": 1.8486, "step": 20 }, { "epoch": 0.2591792656587473, "grad_norm": 0.6196443438529968, "learning_rate": 9.200000000000001e-05, "loss": 0.4004, "step": 30 }, { "epoch": 0.34557235421166305, "grad_norm": 0.8679957985877991, "learning_rate": 0.000132, "loss": 0.3089, "step": 40 }, { "epoch": 0.4319654427645788, "grad_norm": 0.4516398310661316, "learning_rate": 0.000172, "loss": 0.2921, "step": 50 }, { "epoch": 0.5183585313174947, "grad_norm": 1.6636766195297241, "learning_rate": 0.00019994896932810338, "loss": 0.4872, "step": 60 }, { "epoch": 0.6047516198704104, "grad_norm": 0.6236905455589294, "learning_rate": 0.0001990432055368971, "loss": 0.3284, "step": 70 }, { "epoch": 0.6911447084233261, "grad_norm": 0.8019087314605713, "learning_rate": 0.00019701524265130086, "loss": 0.4159, "step": 80 }, { "epoch": 0.7775377969762419, "grad_norm": 0.460245817899704, "learning_rate": 0.0001938880583011413, "loss": 0.2771, "step": 90 }, { "epoch": 0.8639308855291576, "grad_norm": 0.2711161673069, "learning_rate": 0.00018969708473466529, "loss": 0.2995, "step": 100 }, { "epoch": 0.9503239740820735, "grad_norm": 0.5418670773506165, "learning_rate": 0.000184489807357009, "loss": 0.2693, "step": 110 }, { "epoch": 1.043196544276458, "grad_norm": 1.3273216485977173, "learning_rate": 0.00017832522670158368, "loss": 0.2968, "step": 120 }, { "epoch": 1.1295896328293737, "grad_norm": 0.30387449264526367, "learning_rate": 0.00017127318993045686, "loss": 0.2649, "step": 130 }, { "epoch": 1.2159827213822894, "grad_norm": 0.872471272945404, "learning_rate": 0.00016341359943809628, "loss": 0.2696, "step": 140 }, { "epoch": 1.3023758099352052, "grad_norm": 0.4214591383934021, "learning_rate": 0.00015483550752531334, "loss": 0.27, "step": 150 }, { "epoch": 1.388768898488121, "grad_norm": 0.3436877131462097, "learning_rate": 0.00014563610740111162, "loss": 0.2544, "step": 160 }, { "epoch": 1.4751619870410368, "grad_norm": 0.6171785593032837, "learning_rate": 0.00013591963194479198, "loss": 0.2665, "step": 170 }, { "epoch": 1.5615550755939525, "grad_norm": 0.4470198452472687, "learning_rate": 0.0001257961727057812, "loss": 0.2732, "step": 180 }, { "epoch": 1.6479481641468683, "grad_norm": 0.5304989814758301, "learning_rate": 0.00011538043252238628, "loss": 0.2622, "step": 190 }, { "epoch": 1.734341252699784, "grad_norm": 0.35694748163223267, "learning_rate": 0.0001047904258928037, "loss": 0.2568, "step": 200 }, { "epoch": 1.8207343412526997, "grad_norm": 0.22506724298000336, "learning_rate": 9.414614182369862e-05, "loss": 0.2505, "step": 210 }, { "epoch": 1.9071274298056156, "grad_norm": 0.36761364340782166, "learning_rate": 8.356818430681408e-05, "loss": 0.2773, "step": 220 }, { "epoch": 1.9935205183585314, "grad_norm": 0.27311214804649353, "learning_rate": 7.317640582755372e-05, "loss": 0.2631, "step": 230 }, { "epoch": 2.086393088552916, "grad_norm": 0.6581523418426514, "learning_rate": 6.30885493884316e-05, "loss": 0.2726, "step": 240 }, { "epoch": 2.1727861771058317, "grad_norm": 0.2702872157096863, "learning_rate": 5.3418914433805846e-05, "loss": 0.239, "step": 250 }, { "epoch": 2.2591792656587475, "grad_norm": 0.19420155882835388, "learning_rate": 4.4277061791502473e-05, "loss": 0.2457, "step": 260 }, { "epoch": 2.345572354211663, "grad_norm": 0.249477818608284, "learning_rate": 3.576657230485775e-05, "loss": 0.2324, "step": 270 }, { "epoch": 2.4319654427645787, "grad_norm": 0.3278191387653351, "learning_rate": 2.7983873220374413e-05, "loss": 0.2474, "step": 280 }, { "epoch": 2.5183585313174945, "grad_norm": 0.23057521879673004, "learning_rate": 2.101714562848841e-05, "loss": 0.2505, "step": 290 }, { "epoch": 2.6047516198704104, "grad_norm": 0.2476508766412735, "learning_rate": 1.494532533657893e-05, "loss": 0.2468, "step": 300 }, { "epoch": 2.691144708423326, "grad_norm": 0.25082650780677795, "learning_rate": 9.837208494729566e-06, "loss": 0.2375, "step": 310 }, { "epoch": 2.777537796976242, "grad_norm": 0.1992403268814087, "learning_rate": 5.7506721078584344e-06, "loss": 0.2535, "step": 320 }, { "epoch": 2.8639308855291574, "grad_norm": 0.20463792979717255, "learning_rate": 2.7320182661258685e-06, "loss": 0.2344, "step": 330 }, { "epoch": 2.9503239740820737, "grad_norm": 0.2505795657634735, "learning_rate": 8.154495237515436e-07, "loss": 0.2285, "step": 340 } ], "logging_steps": 10, "max_steps": 345, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.539702641341235e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }