|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 3564, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11231222799382283, |
|
"grad_norm": 0.39166563749313354, |
|
"learning_rate": 1.98e-05, |
|
"loss": 1.4955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22462445598764566, |
|
"grad_norm": 0.4607338309288025, |
|
"learning_rate": 1.9717948717948722e-05, |
|
"loss": 1.2752, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3369366839814685, |
|
"grad_norm": 0.7553314566612244, |
|
"learning_rate": 1.9433048433048433e-05, |
|
"loss": 1.232, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4492489119752913, |
|
"grad_norm": 0.8646947145462036, |
|
"learning_rate": 1.9148148148148148e-05, |
|
"loss": 1.2408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5615611399691142, |
|
"grad_norm": 0.7261890769004822, |
|
"learning_rate": 1.8863247863247863e-05, |
|
"loss": 1.2141, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.673873367962937, |
|
"grad_norm": 0.8889899253845215, |
|
"learning_rate": 1.857834757834758e-05, |
|
"loss": 1.1893, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7861855959567597, |
|
"grad_norm": 1.0335707664489746, |
|
"learning_rate": 1.8293447293447296e-05, |
|
"loss": 1.2119, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8984978239505826, |
|
"grad_norm": 1.185014009475708, |
|
"learning_rate": 1.800854700854701e-05, |
|
"loss": 1.1926, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.010108100519444, |
|
"grad_norm": 1.1347686052322388, |
|
"learning_rate": 1.7723646723646725e-05, |
|
"loss": 1.1979, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1224203285132668, |
|
"grad_norm": 1.22648286819458, |
|
"learning_rate": 1.743874643874644e-05, |
|
"loss": 1.1476, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2347325565070897, |
|
"grad_norm": 1.205437183380127, |
|
"learning_rate": 1.7153846153846155e-05, |
|
"loss": 1.1175, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3470447845009126, |
|
"grad_norm": 1.667339563369751, |
|
"learning_rate": 1.686894586894587e-05, |
|
"loss": 1.1291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4593570124947353, |
|
"grad_norm": 1.311352252960205, |
|
"learning_rate": 1.6584045584045584e-05, |
|
"loss": 1.1495, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.5716692404885582, |
|
"grad_norm": 1.574444055557251, |
|
"learning_rate": 1.62991452991453e-05, |
|
"loss": 1.1195, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.683981468482381, |
|
"grad_norm": 1.5072523355484009, |
|
"learning_rate": 1.6014245014245017e-05, |
|
"loss": 1.1423, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7962936964762037, |
|
"grad_norm": 1.8808943033218384, |
|
"learning_rate": 1.572934472934473e-05, |
|
"loss": 1.1365, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.9086059244700266, |
|
"grad_norm": 1.6016888618469238, |
|
"learning_rate": 1.5444444444444446e-05, |
|
"loss": 1.1399, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.020216201038888, |
|
"grad_norm": 1.48503839969635, |
|
"learning_rate": 1.5159544159544161e-05, |
|
"loss": 1.1265, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.132528429032711, |
|
"grad_norm": 1.6783254146575928, |
|
"learning_rate": 1.4874643874643874e-05, |
|
"loss": 1.0324, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.2448406570265336, |
|
"grad_norm": 1.9262216091156006, |
|
"learning_rate": 1.458974358974359e-05, |
|
"loss": 1.0686, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.3571528850203567, |
|
"grad_norm": 2.051445960998535, |
|
"learning_rate": 1.4304843304843305e-05, |
|
"loss": 1.0045, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.4694651130141794, |
|
"grad_norm": 2.8972795009613037, |
|
"learning_rate": 1.401994301994302e-05, |
|
"loss": 1.0181, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.581777341008002, |
|
"grad_norm": 1.876657247543335, |
|
"learning_rate": 1.3735042735042737e-05, |
|
"loss": 1.065, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.694089569001825, |
|
"grad_norm": 2.7444064617156982, |
|
"learning_rate": 1.3450142450142451e-05, |
|
"loss": 1.0346, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.806401796995648, |
|
"grad_norm": 2.054774761199951, |
|
"learning_rate": 1.3165242165242168e-05, |
|
"loss": 1.059, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.9187140249894705, |
|
"grad_norm": 2.7013003826141357, |
|
"learning_rate": 1.2880341880341882e-05, |
|
"loss": 1.0289, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.0303243015583323, |
|
"grad_norm": 2.9840266704559326, |
|
"learning_rate": 1.2595441595441595e-05, |
|
"loss": 1.0349, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.142636529552155, |
|
"grad_norm": 2.664555549621582, |
|
"learning_rate": 1.231054131054131e-05, |
|
"loss": 0.9633, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.2549487575459777, |
|
"grad_norm": 2.709664821624756, |
|
"learning_rate": 1.2025641025641027e-05, |
|
"loss": 0.9699, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.367260985539801, |
|
"grad_norm": 3.7343850135803223, |
|
"learning_rate": 1.1740740740740741e-05, |
|
"loss": 0.9587, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.4795732135336235, |
|
"grad_norm": 2.5082879066467285, |
|
"learning_rate": 1.1455840455840456e-05, |
|
"loss": 0.9365, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.591885441527446, |
|
"grad_norm": 2.3327536582946777, |
|
"learning_rate": 1.1170940170940173e-05, |
|
"loss": 0.9446, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.7041976695212693, |
|
"grad_norm": 3.731090784072876, |
|
"learning_rate": 1.0886039886039887e-05, |
|
"loss": 0.9709, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.816509897515092, |
|
"grad_norm": 2.961172103881836, |
|
"learning_rate": 1.0601139601139602e-05, |
|
"loss": 0.9459, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.9288221255089146, |
|
"grad_norm": 4.010087013244629, |
|
"learning_rate": 1.0316239316239318e-05, |
|
"loss": 0.9509, |
|
"step": 3500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.5973626313699164e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|