|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15687043482523652, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015687043482523653, |
|
"eval_loss": 1.8242242336273193, |
|
"eval_runtime": 142.2157, |
|
"eval_samples_per_second": 15.104, |
|
"eval_steps_per_second": 7.552, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004706113044757096, |
|
"grad_norm": 0.7702743411064148, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8967, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009412226089514192, |
|
"grad_norm": 0.7187179327011108, |
|
"learning_rate": 6e-05, |
|
"loss": 1.7129, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014118339134271287, |
|
"grad_norm": 0.5630563497543335, |
|
"learning_rate": 9e-05, |
|
"loss": 1.6705, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014118339134271287, |
|
"eval_loss": 1.6580013036727905, |
|
"eval_runtime": 137.3695, |
|
"eval_samples_per_second": 15.637, |
|
"eval_steps_per_second": 7.818, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018824452179028384, |
|
"grad_norm": 0.6352736353874207, |
|
"learning_rate": 0.00012, |
|
"loss": 1.6757, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02353056522378548, |
|
"grad_norm": 0.5756219029426575, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.6138, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.028236678268542575, |
|
"grad_norm": 0.5083643794059753, |
|
"learning_rate": 0.00018, |
|
"loss": 1.6546, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.028236678268542575, |
|
"eval_loss": 1.5876401662826538, |
|
"eval_runtime": 145.4617, |
|
"eval_samples_per_second": 14.767, |
|
"eval_steps_per_second": 7.383, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03294279131329967, |
|
"grad_norm": 0.4145301878452301, |
|
"learning_rate": 0.0001999229036240723, |
|
"loss": 1.5349, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03764890435805677, |
|
"grad_norm": 0.42459216713905334, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 1.5664, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04235501740281386, |
|
"grad_norm": 0.4329008460044861, |
|
"learning_rate": 0.00019624552364536473, |
|
"loss": 1.5261, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04235501740281386, |
|
"eval_loss": 1.5502554178237915, |
|
"eval_runtime": 148.1658, |
|
"eval_samples_per_second": 14.497, |
|
"eval_steps_per_second": 7.249, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04706113044757096, |
|
"grad_norm": 0.4267719089984894, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 1.6148, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05176724349232806, |
|
"grad_norm": 0.4181731641292572, |
|
"learning_rate": 0.00018724960070727972, |
|
"loss": 1.5572, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05647335653708515, |
|
"grad_norm": 0.4355478584766388, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.4369, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05647335653708515, |
|
"eval_loss": 1.531390905380249, |
|
"eval_runtime": 137.4641, |
|
"eval_samples_per_second": 15.626, |
|
"eval_steps_per_second": 7.813, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06117946958184225, |
|
"grad_norm": 0.4404115080833435, |
|
"learning_rate": 0.00017343225094356855, |
|
"loss": 1.515, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06588558262659934, |
|
"grad_norm": 0.39275091886520386, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 1.5592, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07059169567135644, |
|
"grad_norm": 0.37436187267303467, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 1.5643, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07059169567135644, |
|
"eval_loss": 1.515994906425476, |
|
"eval_runtime": 137.4794, |
|
"eval_samples_per_second": 15.624, |
|
"eval_steps_per_second": 7.812, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07529780871611354, |
|
"grad_norm": 0.39492252469062805, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 1.5264, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08000392176087064, |
|
"grad_norm": 0.39957186579704285, |
|
"learning_rate": 0.0001346117057077493, |
|
"loss": 1.5242, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08471003480562772, |
|
"grad_norm": 0.3886667490005493, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 1.4754, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08471003480562772, |
|
"eval_loss": 1.5080097913742065, |
|
"eval_runtime": 148.2053, |
|
"eval_samples_per_second": 14.493, |
|
"eval_steps_per_second": 7.247, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08941614785038482, |
|
"grad_norm": 0.3844744861125946, |
|
"learning_rate": 0.00011175373974578378, |
|
"loss": 1.4846, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09412226089514192, |
|
"grad_norm": 0.37326759099960327, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4674, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09882837393989902, |
|
"grad_norm": 0.3983359634876251, |
|
"learning_rate": 8.824626025421626e-05, |
|
"loss": 1.4584, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09882837393989902, |
|
"eval_loss": 1.5008231401443481, |
|
"eval_runtime": 148.4011, |
|
"eval_samples_per_second": 14.474, |
|
"eval_steps_per_second": 7.237, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10353448698465612, |
|
"grad_norm": 0.3726574778556824, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 1.4221, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1082406000294132, |
|
"grad_norm": 0.3979525566101074, |
|
"learning_rate": 6.538829429225069e-05, |
|
"loss": 1.5156, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1129467130741703, |
|
"grad_norm": 0.3521616756916046, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 1.4337, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1129467130741703, |
|
"eval_loss": 1.4966624975204468, |
|
"eval_runtime": 137.8159, |
|
"eval_samples_per_second": 15.586, |
|
"eval_steps_per_second": 7.793, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1176528261189274, |
|
"grad_norm": 0.3657877743244171, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 1.5035, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1223589391636845, |
|
"grad_norm": 0.3593926727771759, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 1.4785, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.12706505220844158, |
|
"grad_norm": 0.37910911440849304, |
|
"learning_rate": 2.6567749056431467e-05, |
|
"loss": 1.5454, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12706505220844158, |
|
"eval_loss": 1.4934673309326172, |
|
"eval_runtime": 137.7475, |
|
"eval_samples_per_second": 15.594, |
|
"eval_steps_per_second": 7.797, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13177116525319868, |
|
"grad_norm": 0.38638541102409363, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.4935, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13647727829795578, |
|
"grad_norm": 0.3478032350540161, |
|
"learning_rate": 1.2750399292720283e-05, |
|
"loss": 1.4452, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.14118339134271288, |
|
"grad_norm": 0.34858161211013794, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 1.4752, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14118339134271288, |
|
"eval_loss": 1.492301106452942, |
|
"eval_runtime": 148.3193, |
|
"eval_samples_per_second": 14.482, |
|
"eval_steps_per_second": 7.241, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14588950438746998, |
|
"grad_norm": 0.37905701994895935, |
|
"learning_rate": 3.7544763546352834e-06, |
|
"loss": 1.5333, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15059561743222707, |
|
"grad_norm": 0.37757179141044617, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 1.4955, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.15530173047698417, |
|
"grad_norm": 0.3686583936214447, |
|
"learning_rate": 7.709637592770991e-08, |
|
"loss": 1.4875, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.15530173047698417, |
|
"eval_loss": 1.4919251203536987, |
|
"eval_runtime": 145.2178, |
|
"eval_samples_per_second": 14.792, |
|
"eval_steps_per_second": 7.396, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.589828125877862e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|