{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1362654536202431, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003406636340506077, "grad_norm": 0.8387678265571594, "learning_rate": 5.4421768707483e-06, "loss": 0.8629, "step": 25 }, { "epoch": 0.006813272681012154, "grad_norm": 0.5580975413322449, "learning_rate": 1.1111111111111112e-05, "loss": 0.8351, "step": 50 }, { "epoch": 0.010219909021518231, "grad_norm": 0.4770251214504242, "learning_rate": 1.6780045351473924e-05, "loss": 0.8023, "step": 75 }, { "epoch": 0.013626545362024308, "grad_norm": 0.35437077283859253, "learning_rate": 2.2448979591836737e-05, "loss": 0.7808, "step": 100 }, { "epoch": 0.017033181702530386, "grad_norm": 0.37850669026374817, "learning_rate": 2.811791383219955e-05, "loss": 0.7554, "step": 125 }, { "epoch": 0.020439818043036462, "grad_norm": 0.40117064118385315, "learning_rate": 3.378684807256236e-05, "loss": 0.7419, "step": 150 }, { "epoch": 0.02384645438354254, "grad_norm": 0.4868236780166626, "learning_rate": 3.945578231292517e-05, "loss": 0.751, "step": 175 }, { "epoch": 0.027253090724048617, "grad_norm": 0.3966948390007019, "learning_rate": 4.512471655328798e-05, "loss": 0.7251, "step": 200 }, { "epoch": 0.030659727064554693, "grad_norm": 0.3908109664916992, "learning_rate": 5.0793650793650794e-05, "loss": 0.7088, "step": 225 }, { "epoch": 0.03406636340506077, "grad_norm": 0.3687989115715027, "learning_rate": 5.646258503401361e-05, "loss": 0.7115, "step": 250 }, { "epoch": 0.03747299974556685, "grad_norm": 0.3919059634208679, "learning_rate": 6.213151927437642e-05, "loss": 0.7026, "step": 275 }, { "epoch": 0.040879636086072924, "grad_norm": 0.42019009590148926, "learning_rate": 6.780045351473924e-05, "loss": 0.6967, "step": 300 }, { "epoch": 0.044286272426579, "grad_norm": 0.4229620695114136, "learning_rate": 7.346938775510205e-05, "loss": 0.7148, "step": 325 }, { "epoch": 0.04769290876708508, "grad_norm": 0.39575278759002686, "learning_rate": 7.913832199546486e-05, "loss": 0.744, "step": 350 }, { "epoch": 0.051099545107591154, "grad_norm": 0.45732468366622925, "learning_rate": 8.480725623582767e-05, "loss": 0.7216, "step": 375 }, { "epoch": 0.054506181448097234, "grad_norm": 0.3956912159919739, "learning_rate": 9.047619047619048e-05, "loss": 0.6953, "step": 400 }, { "epoch": 0.05791281778860331, "grad_norm": 0.3948104679584503, "learning_rate": 9.61451247165533e-05, "loss": 0.7235, "step": 425 }, { "epoch": 0.061319454129109385, "grad_norm": 0.3913336396217346, "learning_rate": 9.99438004917457e-05, "loss": 0.6676, "step": 450 }, { "epoch": 0.06472609046961547, "grad_norm": 0.3908584713935852, "learning_rate": 9.9768177028451e-05, "loss": 0.7158, "step": 475 }, { "epoch": 0.06813272681012154, "grad_norm": 0.4225063920021057, "learning_rate": 9.959255356515631e-05, "loss": 0.7129, "step": 500 }, { "epoch": 0.07153936315062762, "grad_norm": 2.2388832569122314, "learning_rate": 9.941693010186162e-05, "loss": 0.7199, "step": 525 }, { "epoch": 0.0749459994911337, "grad_norm": 0.39503997564315796, "learning_rate": 9.924130663856692e-05, "loss": 0.7298, "step": 550 }, { "epoch": 0.07835263583163977, "grad_norm": 2.1647109985351562, "learning_rate": 9.906568317527221e-05, "loss": 0.7499, "step": 575 }, { "epoch": 0.08175927217214585, "grad_norm": 0.36966434121131897, "learning_rate": 9.889005971197752e-05, "loss": 0.7212, "step": 600 }, { "epoch": 0.08516590851265193, "grad_norm": 0.36990946531295776, "learning_rate": 9.871443624868283e-05, "loss": 0.7214, "step": 625 }, { "epoch": 0.088572544853158, "grad_norm": 0.4158572852611542, "learning_rate": 9.853881278538813e-05, "loss": 0.6942, "step": 650 }, { "epoch": 0.09197918119366408, "grad_norm": 0.3846476972103119, "learning_rate": 9.836318932209344e-05, "loss": 0.7218, "step": 675 }, { "epoch": 0.09538581753417016, "grad_norm": 0.33537471294403076, "learning_rate": 9.818756585879874e-05, "loss": 0.7115, "step": 700 }, { "epoch": 0.09879245387467624, "grad_norm": 0.3672342896461487, "learning_rate": 9.801194239550405e-05, "loss": 0.7225, "step": 725 }, { "epoch": 0.10219909021518231, "grad_norm": 0.3498263955116272, "learning_rate": 9.783631893220935e-05, "loss": 0.7124, "step": 750 }, { "epoch": 0.1056057265556884, "grad_norm": 0.3860284388065338, "learning_rate": 9.766069546891466e-05, "loss": 0.7018, "step": 775 }, { "epoch": 0.10901236289619447, "grad_norm": 0.33633533120155334, "learning_rate": 9.748507200561996e-05, "loss": 0.6962, "step": 800 }, { "epoch": 0.11241899923670054, "grad_norm": 0.3424709439277649, "learning_rate": 9.730944854232526e-05, "loss": 0.7068, "step": 825 }, { "epoch": 0.11582563557720663, "grad_norm": 0.3627208173274994, "learning_rate": 9.713382507903056e-05, "loss": 0.6738, "step": 850 }, { "epoch": 0.1192322719177127, "grad_norm": 0.3304712176322937, "learning_rate": 9.695820161573587e-05, "loss": 0.7004, "step": 875 }, { "epoch": 0.12263890825821877, "grad_norm": 0.37575623393058777, "learning_rate": 9.678257815244117e-05, "loss": 0.711, "step": 900 }, { "epoch": 0.12604554459872486, "grad_norm": 0.37238940596580505, "learning_rate": 9.660695468914648e-05, "loss": 0.7172, "step": 925 }, { "epoch": 0.12945218093923094, "grad_norm": 0.39354655146598816, "learning_rate": 9.643133122585177e-05, "loss": 0.6949, "step": 950 }, { "epoch": 0.132858817279737, "grad_norm": 0.34536346793174744, "learning_rate": 9.625570776255708e-05, "loss": 0.6871, "step": 975 }, { "epoch": 0.1362654536202431, "grad_norm": 0.3518439829349518, "learning_rate": 9.608008429926238e-05, "loss": 0.7168, "step": 1000 } ], "logging_steps": 25, "max_steps": 14676, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2600066061911e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }