|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1362654536202431, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003406636340506077, |
|
"grad_norm": 0.8387678265571594, |
|
"learning_rate": 5.4421768707483e-06, |
|
"loss": 0.8629, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006813272681012154, |
|
"grad_norm": 0.5580975413322449, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.8351, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010219909021518231, |
|
"grad_norm": 0.4770251214504242, |
|
"learning_rate": 1.6780045351473924e-05, |
|
"loss": 0.8023, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.013626545362024308, |
|
"grad_norm": 0.35437077283859253, |
|
"learning_rate": 2.2448979591836737e-05, |
|
"loss": 0.7808, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.017033181702530386, |
|
"grad_norm": 0.37850669026374817, |
|
"learning_rate": 2.811791383219955e-05, |
|
"loss": 0.7554, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.020439818043036462, |
|
"grad_norm": 0.40117064118385315, |
|
"learning_rate": 3.378684807256236e-05, |
|
"loss": 0.7419, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02384645438354254, |
|
"grad_norm": 0.4868236780166626, |
|
"learning_rate": 3.945578231292517e-05, |
|
"loss": 0.751, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.027253090724048617, |
|
"grad_norm": 0.3966948390007019, |
|
"learning_rate": 4.512471655328798e-05, |
|
"loss": 0.7251, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.030659727064554693, |
|
"grad_norm": 0.3908109664916992, |
|
"learning_rate": 5.0793650793650794e-05, |
|
"loss": 0.7088, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03406636340506077, |
|
"grad_norm": 0.3687989115715027, |
|
"learning_rate": 5.646258503401361e-05, |
|
"loss": 0.7115, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03747299974556685, |
|
"grad_norm": 0.3919059634208679, |
|
"learning_rate": 6.213151927437642e-05, |
|
"loss": 0.7026, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.040879636086072924, |
|
"grad_norm": 0.42019009590148926, |
|
"learning_rate": 6.780045351473924e-05, |
|
"loss": 0.6967, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.044286272426579, |
|
"grad_norm": 0.4229620695114136, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 0.7148, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04769290876708508, |
|
"grad_norm": 0.39575278759002686, |
|
"learning_rate": 7.913832199546486e-05, |
|
"loss": 0.744, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.051099545107591154, |
|
"grad_norm": 0.45732468366622925, |
|
"learning_rate": 8.480725623582767e-05, |
|
"loss": 0.7216, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.054506181448097234, |
|
"grad_norm": 0.3956912159919739, |
|
"learning_rate": 9.047619047619048e-05, |
|
"loss": 0.6953, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05791281778860331, |
|
"grad_norm": 0.3948104679584503, |
|
"learning_rate": 9.61451247165533e-05, |
|
"loss": 0.7235, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.061319454129109385, |
|
"grad_norm": 0.3913336396217346, |
|
"learning_rate": 9.99438004917457e-05, |
|
"loss": 0.6676, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06472609046961547, |
|
"grad_norm": 0.3908584713935852, |
|
"learning_rate": 9.9768177028451e-05, |
|
"loss": 0.7158, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.06813272681012154, |
|
"grad_norm": 0.4225063920021057, |
|
"learning_rate": 9.959255356515631e-05, |
|
"loss": 0.7129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07153936315062762, |
|
"grad_norm": 2.2388832569122314, |
|
"learning_rate": 9.941693010186162e-05, |
|
"loss": 0.7199, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.0749459994911337, |
|
"grad_norm": 0.39503997564315796, |
|
"learning_rate": 9.924130663856692e-05, |
|
"loss": 0.7298, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.07835263583163977, |
|
"grad_norm": 2.1647109985351562, |
|
"learning_rate": 9.906568317527221e-05, |
|
"loss": 0.7499, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.08175927217214585, |
|
"grad_norm": 0.36966434121131897, |
|
"learning_rate": 9.889005971197752e-05, |
|
"loss": 0.7212, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08516590851265193, |
|
"grad_norm": 0.36990946531295776, |
|
"learning_rate": 9.871443624868283e-05, |
|
"loss": 0.7214, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.088572544853158, |
|
"grad_norm": 0.4158572852611542, |
|
"learning_rate": 9.853881278538813e-05, |
|
"loss": 0.6942, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.09197918119366408, |
|
"grad_norm": 0.3846476972103119, |
|
"learning_rate": 9.836318932209344e-05, |
|
"loss": 0.7218, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.09538581753417016, |
|
"grad_norm": 0.33537471294403076, |
|
"learning_rate": 9.818756585879874e-05, |
|
"loss": 0.7115, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09879245387467624, |
|
"grad_norm": 0.3672342896461487, |
|
"learning_rate": 9.801194239550405e-05, |
|
"loss": 0.7225, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.10219909021518231, |
|
"grad_norm": 0.3498263955116272, |
|
"learning_rate": 9.783631893220935e-05, |
|
"loss": 0.7124, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1056057265556884, |
|
"grad_norm": 0.3860284388065338, |
|
"learning_rate": 9.766069546891466e-05, |
|
"loss": 0.7018, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.10901236289619447, |
|
"grad_norm": 0.33633533120155334, |
|
"learning_rate": 9.748507200561996e-05, |
|
"loss": 0.6962, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11241899923670054, |
|
"grad_norm": 0.3424709439277649, |
|
"learning_rate": 9.730944854232526e-05, |
|
"loss": 0.7068, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.11582563557720663, |
|
"grad_norm": 0.3627208173274994, |
|
"learning_rate": 9.713382507903056e-05, |
|
"loss": 0.6738, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1192322719177127, |
|
"grad_norm": 0.3304712176322937, |
|
"learning_rate": 9.695820161573587e-05, |
|
"loss": 0.7004, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.12263890825821877, |
|
"grad_norm": 0.37575623393058777, |
|
"learning_rate": 9.678257815244117e-05, |
|
"loss": 0.711, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12604554459872486, |
|
"grad_norm": 0.37238940596580505, |
|
"learning_rate": 9.660695468914648e-05, |
|
"loss": 0.7172, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.12945218093923094, |
|
"grad_norm": 0.39354655146598816, |
|
"learning_rate": 9.643133122585177e-05, |
|
"loss": 0.6949, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.132858817279737, |
|
"grad_norm": 0.34536346793174744, |
|
"learning_rate": 9.625570776255708e-05, |
|
"loss": 0.6871, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.1362654536202431, |
|
"grad_norm": 0.3518439829349518, |
|
"learning_rate": 9.608008429926238e-05, |
|
"loss": 0.7168, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 14676, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.2600066061911e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|