{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.97196261682243, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12461059190031153, "grad_norm": 0.9156144261360168, "learning_rate": 0.00019250000000000002, "loss": 0.3465, "step": 10 }, { "epoch": 0.24922118380062305, "grad_norm": 0.8195157647132874, "learning_rate": 0.00018416666666666665, "loss": 0.329, "step": 20 }, { "epoch": 0.37383177570093457, "grad_norm": 0.6960992813110352, "learning_rate": 0.00017583333333333334, "loss": 0.2746, "step": 30 }, { "epoch": 0.4984423676012461, "grad_norm": 0.6025754809379578, "learning_rate": 0.0001675, "loss": 0.311, "step": 40 }, { "epoch": 0.6230529595015576, "grad_norm": 0.8380621075630188, "learning_rate": 0.00015916666666666667, "loss": 0.324, "step": 50 }, { "epoch": 0.7476635514018691, "grad_norm": 0.7013344764709473, "learning_rate": 0.00015083333333333333, "loss": 0.3203, "step": 60 }, { "epoch": 0.8722741433021807, "grad_norm": 0.7946021556854248, "learning_rate": 0.00014250000000000002, "loss": 0.3048, "step": 70 }, { "epoch": 0.9968847352024922, "grad_norm": 0.7311714887619019, "learning_rate": 0.00013416666666666666, "loss": 0.2809, "step": 80 }, { "epoch": 1.1121495327102804, "grad_norm": 0.8304562568664551, "learning_rate": 0.00012583333333333335, "loss": 0.2608, "step": 90 }, { "epoch": 1.236760124610592, "grad_norm": 0.6632652878761292, "learning_rate": 0.00011750000000000001, "loss": 0.2839, "step": 100 }, { "epoch": 1.3613707165109035, "grad_norm": 0.6893765330314636, "learning_rate": 0.00010916666666666666, "loss": 0.2732, "step": 110 }, { "epoch": 1.485981308411215, "grad_norm": 0.7527514100074768, "learning_rate": 0.00010083333333333334, "loss": 0.2954, "step": 120 }, { "epoch": 1.6105919003115265, "grad_norm": 0.6240414977073669, "learning_rate": 9.250000000000001e-05, "loss": 0.2624, "step": 130 }, { "epoch": 1.735202492211838, "grad_norm": 0.7276539206504822, "learning_rate": 8.416666666666668e-05, "loss": 0.2713, "step": 140 }, { "epoch": 1.8598130841121496, "grad_norm": 0.7341501712799072, "learning_rate": 7.583333333333334e-05, "loss": 0.2519, "step": 150 }, { "epoch": 1.9844236760124612, "grad_norm": 0.8342993259429932, "learning_rate": 6.750000000000001e-05, "loss": 0.2686, "step": 160 }, { "epoch": 2.0996884735202492, "grad_norm": 0.6449198126792908, "learning_rate": 5.916666666666667e-05, "loss": 0.2368, "step": 170 }, { "epoch": 2.2242990654205608, "grad_norm": 0.5292518734931946, "learning_rate": 5.0833333333333333e-05, "loss": 0.2497, "step": 180 }, { "epoch": 2.3489096573208723, "grad_norm": 0.7724623084068298, "learning_rate": 4.25e-05, "loss": 0.2412, "step": 190 }, { "epoch": 2.473520249221184, "grad_norm": 0.7042115330696106, "learning_rate": 3.4166666666666666e-05, "loss": 0.2488, "step": 200 }, { "epoch": 2.5981308411214954, "grad_norm": 0.675959050655365, "learning_rate": 2.5833333333333336e-05, "loss": 0.2772, "step": 210 }, { "epoch": 2.722741433021807, "grad_norm": 0.6327322721481323, "learning_rate": 1.75e-05, "loss": 0.2684, "step": 220 }, { "epoch": 2.8473520249221185, "grad_norm": 0.4853314757347107, "learning_rate": 9.166666666666666e-06, "loss": 0.2581, "step": 230 }, { "epoch": 2.97196261682243, "grad_norm": 0.7433005571365356, "learning_rate": 8.333333333333333e-07, "loss": 0.2624, "step": 240 } ], "logging_steps": 10, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 30, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.596380699384218e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }