TULU-3-SFT-LoRA / checkpoint-1000 /trainer_state.json
bnjmnmarie's picture
Upload folder using huggingface_hub
47bac12 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.1362654536202431,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003406636340506077,
"grad_norm": 0.8387678265571594,
"learning_rate": 5.4421768707483e-06,
"loss": 0.8629,
"step": 25
},
{
"epoch": 0.006813272681012154,
"grad_norm": 0.5580975413322449,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.8351,
"step": 50
},
{
"epoch": 0.010219909021518231,
"grad_norm": 0.4770251214504242,
"learning_rate": 1.6780045351473924e-05,
"loss": 0.8023,
"step": 75
},
{
"epoch": 0.013626545362024308,
"grad_norm": 0.35437077283859253,
"learning_rate": 2.2448979591836737e-05,
"loss": 0.7808,
"step": 100
},
{
"epoch": 0.017033181702530386,
"grad_norm": 0.37850669026374817,
"learning_rate": 2.811791383219955e-05,
"loss": 0.7554,
"step": 125
},
{
"epoch": 0.020439818043036462,
"grad_norm": 0.40117064118385315,
"learning_rate": 3.378684807256236e-05,
"loss": 0.7419,
"step": 150
},
{
"epoch": 0.02384645438354254,
"grad_norm": 0.4868236780166626,
"learning_rate": 3.945578231292517e-05,
"loss": 0.751,
"step": 175
},
{
"epoch": 0.027253090724048617,
"grad_norm": 0.3966948390007019,
"learning_rate": 4.512471655328798e-05,
"loss": 0.7251,
"step": 200
},
{
"epoch": 0.030659727064554693,
"grad_norm": 0.3908109664916992,
"learning_rate": 5.0793650793650794e-05,
"loss": 0.7088,
"step": 225
},
{
"epoch": 0.03406636340506077,
"grad_norm": 0.3687989115715027,
"learning_rate": 5.646258503401361e-05,
"loss": 0.7115,
"step": 250
},
{
"epoch": 0.03747299974556685,
"grad_norm": 0.3919059634208679,
"learning_rate": 6.213151927437642e-05,
"loss": 0.7026,
"step": 275
},
{
"epoch": 0.040879636086072924,
"grad_norm": 0.42019009590148926,
"learning_rate": 6.780045351473924e-05,
"loss": 0.6967,
"step": 300
},
{
"epoch": 0.044286272426579,
"grad_norm": 0.4229620695114136,
"learning_rate": 7.346938775510205e-05,
"loss": 0.7148,
"step": 325
},
{
"epoch": 0.04769290876708508,
"grad_norm": 0.39575278759002686,
"learning_rate": 7.913832199546486e-05,
"loss": 0.744,
"step": 350
},
{
"epoch": 0.051099545107591154,
"grad_norm": 0.45732468366622925,
"learning_rate": 8.480725623582767e-05,
"loss": 0.7216,
"step": 375
},
{
"epoch": 0.054506181448097234,
"grad_norm": 0.3956912159919739,
"learning_rate": 9.047619047619048e-05,
"loss": 0.6953,
"step": 400
},
{
"epoch": 0.05791281778860331,
"grad_norm": 0.3948104679584503,
"learning_rate": 9.61451247165533e-05,
"loss": 0.7235,
"step": 425
},
{
"epoch": 0.061319454129109385,
"grad_norm": 0.3913336396217346,
"learning_rate": 9.99438004917457e-05,
"loss": 0.6676,
"step": 450
},
{
"epoch": 0.06472609046961547,
"grad_norm": 0.3908584713935852,
"learning_rate": 9.9768177028451e-05,
"loss": 0.7158,
"step": 475
},
{
"epoch": 0.06813272681012154,
"grad_norm": 0.4225063920021057,
"learning_rate": 9.959255356515631e-05,
"loss": 0.7129,
"step": 500
},
{
"epoch": 0.07153936315062762,
"grad_norm": 2.2388832569122314,
"learning_rate": 9.941693010186162e-05,
"loss": 0.7199,
"step": 525
},
{
"epoch": 0.0749459994911337,
"grad_norm": 0.39503997564315796,
"learning_rate": 9.924130663856692e-05,
"loss": 0.7298,
"step": 550
},
{
"epoch": 0.07835263583163977,
"grad_norm": 2.1647109985351562,
"learning_rate": 9.906568317527221e-05,
"loss": 0.7499,
"step": 575
},
{
"epoch": 0.08175927217214585,
"grad_norm": 0.36966434121131897,
"learning_rate": 9.889005971197752e-05,
"loss": 0.7212,
"step": 600
},
{
"epoch": 0.08516590851265193,
"grad_norm": 0.36990946531295776,
"learning_rate": 9.871443624868283e-05,
"loss": 0.7214,
"step": 625
},
{
"epoch": 0.088572544853158,
"grad_norm": 0.4158572852611542,
"learning_rate": 9.853881278538813e-05,
"loss": 0.6942,
"step": 650
},
{
"epoch": 0.09197918119366408,
"grad_norm": 0.3846476972103119,
"learning_rate": 9.836318932209344e-05,
"loss": 0.7218,
"step": 675
},
{
"epoch": 0.09538581753417016,
"grad_norm": 0.33537471294403076,
"learning_rate": 9.818756585879874e-05,
"loss": 0.7115,
"step": 700
},
{
"epoch": 0.09879245387467624,
"grad_norm": 0.3672342896461487,
"learning_rate": 9.801194239550405e-05,
"loss": 0.7225,
"step": 725
},
{
"epoch": 0.10219909021518231,
"grad_norm": 0.3498263955116272,
"learning_rate": 9.783631893220935e-05,
"loss": 0.7124,
"step": 750
},
{
"epoch": 0.1056057265556884,
"grad_norm": 0.3860284388065338,
"learning_rate": 9.766069546891466e-05,
"loss": 0.7018,
"step": 775
},
{
"epoch": 0.10901236289619447,
"grad_norm": 0.33633533120155334,
"learning_rate": 9.748507200561996e-05,
"loss": 0.6962,
"step": 800
},
{
"epoch": 0.11241899923670054,
"grad_norm": 0.3424709439277649,
"learning_rate": 9.730944854232526e-05,
"loss": 0.7068,
"step": 825
},
{
"epoch": 0.11582563557720663,
"grad_norm": 0.3627208173274994,
"learning_rate": 9.713382507903056e-05,
"loss": 0.6738,
"step": 850
},
{
"epoch": 0.1192322719177127,
"grad_norm": 0.3304712176322937,
"learning_rate": 9.695820161573587e-05,
"loss": 0.7004,
"step": 875
},
{
"epoch": 0.12263890825821877,
"grad_norm": 0.37575623393058777,
"learning_rate": 9.678257815244117e-05,
"loss": 0.711,
"step": 900
},
{
"epoch": 0.12604554459872486,
"grad_norm": 0.37238940596580505,
"learning_rate": 9.660695468914648e-05,
"loss": 0.7172,
"step": 925
},
{
"epoch": 0.12945218093923094,
"grad_norm": 0.39354655146598816,
"learning_rate": 9.643133122585177e-05,
"loss": 0.6949,
"step": 950
},
{
"epoch": 0.132858817279737,
"grad_norm": 0.34536346793174744,
"learning_rate": 9.625570776255708e-05,
"loss": 0.6871,
"step": 975
},
{
"epoch": 0.1362654536202431,
"grad_norm": 0.3518439829349518,
"learning_rate": 9.608008429926238e-05,
"loss": 0.7168,
"step": 1000
}
],
"logging_steps": 25,
"max_steps": 14676,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.2600066061911e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}