|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7194244604316546, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007194244604316547, |
|
"eval_loss": 1.3505613803863525, |
|
"eval_runtime": 18.9626, |
|
"eval_samples_per_second": 12.34, |
|
"eval_steps_per_second": 1.582, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02158273381294964, |
|
"grad_norm": 7.126910209655762, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.4633, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.04316546762589928, |
|
"grad_norm": 5.5504536628723145, |
|
"learning_rate": 3e-05, |
|
"loss": 5.4233, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06474820143884892, |
|
"grad_norm": 4.061417579650879, |
|
"learning_rate": 4.5e-05, |
|
"loss": 5.131, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06474820143884892, |
|
"eval_loss": 1.1897938251495361, |
|
"eval_runtime": 19.3146, |
|
"eval_samples_per_second": 12.115, |
|
"eval_steps_per_second": 1.553, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08633093525179857, |
|
"grad_norm": 2.9950151443481445, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 4.8732, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1079136690647482, |
|
"grad_norm": 3.6927051544189453, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 4.6017, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12949640287769784, |
|
"grad_norm": 3.0494635105133057, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 4.6042, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12949640287769784, |
|
"eval_loss": 1.0919393301010132, |
|
"eval_runtime": 19.3623, |
|
"eval_samples_per_second": 12.085, |
|
"eval_steps_per_second": 1.549, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1510791366906475, |
|
"grad_norm": 2.4984405040740967, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 4.4278, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.17266187050359713, |
|
"grad_norm": 2.3186981678009033, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 4.4248, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.19424460431654678, |
|
"grad_norm": 2.286144733428955, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 4.4557, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.19424460431654678, |
|
"eval_loss": 1.0573580265045166, |
|
"eval_runtime": 19.3833, |
|
"eval_samples_per_second": 12.072, |
|
"eval_steps_per_second": 1.548, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2158273381294964, |
|
"grad_norm": 2.0901005268096924, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 4.397, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23741007194244604, |
|
"grad_norm": 3.103538751602173, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 4.2221, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2589928057553957, |
|
"grad_norm": 1.9494324922561646, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 4.3688, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2589928057553957, |
|
"eval_loss": 1.0416053533554077, |
|
"eval_runtime": 19.3673, |
|
"eval_samples_per_second": 12.082, |
|
"eval_steps_per_second": 1.549, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2805755395683453, |
|
"grad_norm": 2.292299270629883, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 4.446, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.302158273381295, |
|
"grad_norm": 2.913973808288574, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 4.0109, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3237410071942446, |
|
"grad_norm": 2.1814777851104736, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 4.2421, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3237410071942446, |
|
"eval_loss": 1.0296789407730103, |
|
"eval_runtime": 19.3399, |
|
"eval_samples_per_second": 12.099, |
|
"eval_steps_per_second": 1.551, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.34532374100719426, |
|
"grad_norm": 2.3433644771575928, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 4.616, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669064748201439, |
|
"grad_norm": 1.8417110443115234, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 3.9965, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.38848920863309355, |
|
"grad_norm": 2.258572578430176, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 4.2638, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.38848920863309355, |
|
"eval_loss": 1.0222728252410889, |
|
"eval_runtime": 19.3615, |
|
"eval_samples_per_second": 12.086, |
|
"eval_steps_per_second": 1.549, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41007194244604317, |
|
"grad_norm": 2.020596504211426, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 4.1487, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.4316546762589928, |
|
"grad_norm": 1.7905324697494507, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 4.1138, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45323741007194246, |
|
"grad_norm": 2.2661585807800293, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 4.1022, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.45323741007194246, |
|
"eval_loss": 1.0169346332550049, |
|
"eval_runtime": 19.3659, |
|
"eval_samples_per_second": 12.083, |
|
"eval_steps_per_second": 1.549, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4748201438848921, |
|
"grad_norm": 2.1625380516052246, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 4.3836, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.49640287769784175, |
|
"grad_norm": 2.1454732418060303, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 4.2899, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.5179856115107914, |
|
"grad_norm": 2.406993865966797, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 3.9958, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5179856115107914, |
|
"eval_loss": 1.0135488510131836, |
|
"eval_runtime": 19.3546, |
|
"eval_samples_per_second": 12.09, |
|
"eval_steps_per_second": 1.55, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.539568345323741, |
|
"grad_norm": 2.6071054935455322, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 4.2177, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5611510791366906, |
|
"grad_norm": 2.0609230995178223, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 4.2697, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5827338129496403, |
|
"grad_norm": 2.143162250518799, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 4.122, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5827338129496403, |
|
"eval_loss": 1.011610507965088, |
|
"eval_runtime": 19.3757, |
|
"eval_samples_per_second": 12.077, |
|
"eval_steps_per_second": 1.548, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.60431654676259, |
|
"grad_norm": 1.9092354774475098, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 3.9762, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6258992805755396, |
|
"grad_norm": 2.086115837097168, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 4.1775, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.6474820143884892, |
|
"grad_norm": 2.177617311477661, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 4.2366, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6474820143884892, |
|
"eval_loss": 1.0106626749038696, |
|
"eval_runtime": 19.3765, |
|
"eval_samples_per_second": 12.076, |
|
"eval_steps_per_second": 1.548, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6690647482014388, |
|
"grad_norm": 2.176861047744751, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 4.1401, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6906474820143885, |
|
"grad_norm": 1.9706419706344604, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 3.9663, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7122302158273381, |
|
"grad_norm": 2.0750808715820312, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 4.1725, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.7122302158273381, |
|
"eval_loss": 1.0105128288269043, |
|
"eval_runtime": 19.3722, |
|
"eval_samples_per_second": 12.079, |
|
"eval_steps_per_second": 1.549, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.708604681795666e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|