|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 323078, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.1666594444685182e-09, |
|
"loss": 0.0003, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.000649997833341e-06, |
|
"loss": 0.0017, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0001299995666681e-05, |
|
"loss": 0.0017, |
|
"step": 4616 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5001949993500023e-05, |
|
"loss": 0.0016, |
|
"step": 6924 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0002599991333363e-05, |
|
"loss": 0.0016, |
|
"step": 9232 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5003249989166704e-05, |
|
"loss": 0.0016, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.0003899987000046e-05, |
|
"loss": 0.0015, |
|
"step": 13848 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5004549984833384e-05, |
|
"loss": 0.0016, |
|
"step": 16156 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0005199982666725e-05, |
|
"loss": 0.0017, |
|
"step": 18464 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.500584998050007e-05, |
|
"loss": 0.0017, |
|
"step": 20772 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.00893497932702303, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 18.8658, |
|
"eval_samples_per_second": 13.676, |
|
"eval_steps_per_second": 0.954, |
|
"step": 23077 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.000649997833341e-05, |
|
"loss": 0.0018, |
|
"step": 23080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.500714997616675e-05, |
|
"loss": 0.0017, |
|
"step": 25388 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.000779997400009e-05, |
|
"loss": 0.0017, |
|
"step": 27696 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.500844997183343e-05, |
|
"loss": 0.0017, |
|
"step": 30004 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.000909996966677e-05, |
|
"loss": 0.0017, |
|
"step": 32312 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.50097499675001e-05, |
|
"loss": 0.0017, |
|
"step": 34620 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.001039996533345e-05, |
|
"loss": 0.0018, |
|
"step": 36928 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.501104996316679e-05, |
|
"loss": 0.0017, |
|
"step": 39236 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.001169996100013e-05, |
|
"loss": 0.0018, |
|
"step": 41544 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.501234995883348e-05, |
|
"loss": 0.0019, |
|
"step": 43852 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.008421082980930805, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 18.622, |
|
"eval_samples_per_second": 13.855, |
|
"eval_steps_per_second": 0.967, |
|
"step": 46154 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999855556037036e-05, |
|
"loss": 0.0018, |
|
"step": 46160 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.944292778283332e-05, |
|
"loss": 0.0019, |
|
"step": 48468 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.888730000529628e-05, |
|
"loss": 0.0018, |
|
"step": 50776 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.833167222775925e-05, |
|
"loss": 0.0019, |
|
"step": 53084 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.777604445022221e-05, |
|
"loss": 0.0021, |
|
"step": 55392 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.722041667268516e-05, |
|
"loss": 0.0021, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.666478889514812e-05, |
|
"loss": 0.0021, |
|
"step": 60008 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.610916111761109e-05, |
|
"loss": 0.002, |
|
"step": 62316 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.555353334007405e-05, |
|
"loss": 0.0022, |
|
"step": 64624 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.499790556253701e-05, |
|
"loss": 0.002, |
|
"step": 66932 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.008709550835192204, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.0366, |
|
"eval_samples_per_second": 15.144, |
|
"eval_steps_per_second": 1.057, |
|
"step": 69231 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.444227778499997e-05, |
|
"loss": 0.0022, |
|
"step": 69240 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.388665000746294e-05, |
|
"loss": 0.0018, |
|
"step": 71548 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.33310222299259e-05, |
|
"loss": 0.0019, |
|
"step": 73856 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.277539445238886e-05, |
|
"loss": 0.0019, |
|
"step": 76164 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.221976667485182e-05, |
|
"loss": 0.0019, |
|
"step": 78472 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.166413889731479e-05, |
|
"loss": 0.0018, |
|
"step": 80780 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.110851111977775e-05, |
|
"loss": 0.002, |
|
"step": 83088 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.055288334224071e-05, |
|
"loss": 0.0019, |
|
"step": 85396 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.999725556470368e-05, |
|
"loss": 0.002, |
|
"step": 87704 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.944162778716664e-05, |
|
"loss": 0.0021, |
|
"step": 90012 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.011967115104198456, |
|
"eval_max_distance": 4, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 18.1135, |
|
"eval_samples_per_second": 14.243, |
|
"eval_steps_per_second": 0.994, |
|
"step": 92308 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.88860000096296e-05, |
|
"loss": 0.0021, |
|
"step": 92320 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.833037223209256e-05, |
|
"loss": 0.0018, |
|
"step": 94628 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.777474445455553e-05, |
|
"loss": 0.0018, |
|
"step": 96936 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.721911667701849e-05, |
|
"loss": 0.0018, |
|
"step": 99244 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.666348889948145e-05, |
|
"loss": 0.0018, |
|
"step": 101552 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.610786112194442e-05, |
|
"loss": 0.0017, |
|
"step": 103860 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.555223334440738e-05, |
|
"loss": 0.0019, |
|
"step": 106168 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.499660556687034e-05, |
|
"loss": 0.0019, |
|
"step": 108476 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.44409777893333e-05, |
|
"loss": 0.002, |
|
"step": 110784 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.388535001179625e-05, |
|
"loss": 0.0019, |
|
"step": 113092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.010002830997109413, |
|
"eval_max_distance": 4, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.3823, |
|
"eval_samples_per_second": 14.843, |
|
"eval_steps_per_second": 1.036, |
|
"step": 115385 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.332972223425922e-05, |
|
"loss": 0.0019, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.277409445672218e-05, |
|
"loss": 0.0017, |
|
"step": 117708 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.221846667918514e-05, |
|
"loss": 0.0017, |
|
"step": 120016 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.16628389016481e-05, |
|
"loss": 0.0017, |
|
"step": 122324 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.110721112411107e-05, |
|
"loss": 0.0017, |
|
"step": 124632 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.055158334657403e-05, |
|
"loss": 0.0018, |
|
"step": 126940 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.999595556903699e-05, |
|
"loss": 0.0017, |
|
"step": 129248 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.944032779149996e-05, |
|
"loss": 0.0017, |
|
"step": 131556 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.888470001396292e-05, |
|
"loss": 0.0017, |
|
"step": 133864 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.832907223642588e-05, |
|
"loss": 0.0018, |
|
"step": 136172 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.011129369959235191, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.0083, |
|
"eval_samples_per_second": 15.169, |
|
"eval_steps_per_second": 1.058, |
|
"step": 138462 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.777344445888884e-05, |
|
"loss": 0.0018, |
|
"step": 138480 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.721781668135181e-05, |
|
"loss": 0.0016, |
|
"step": 140788 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.666218890381477e-05, |
|
"loss": 0.0016, |
|
"step": 143096 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.610656112627773e-05, |
|
"loss": 0.0017, |
|
"step": 145404 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.55509333487407e-05, |
|
"loss": 0.0017, |
|
"step": 147712 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.499530557120366e-05, |
|
"loss": 0.0017, |
|
"step": 150020 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.443967779366662e-05, |
|
"loss": 0.0016, |
|
"step": 152328 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 7.388405001612958e-05, |
|
"loss": 0.0016, |
|
"step": 154636 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 7.332842223859255e-05, |
|
"loss": 0.0017, |
|
"step": 156944 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.277279446105551e-05, |
|
"loss": 0.0017, |
|
"step": 159252 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.007010257337242365, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 16.7205, |
|
"eval_samples_per_second": 15.43, |
|
"eval_steps_per_second": 1.077, |
|
"step": 161539 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.221716668351847e-05, |
|
"loss": 0.0017, |
|
"step": 161560 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 7.166153890598143e-05, |
|
"loss": 0.0015, |
|
"step": 163868 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.11059111284444e-05, |
|
"loss": 0.0015, |
|
"step": 166176 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.055028335090735e-05, |
|
"loss": 0.0015, |
|
"step": 168484 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 6.999465557337031e-05, |
|
"loss": 0.0015, |
|
"step": 170792 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.943902779583327e-05, |
|
"loss": 0.0015, |
|
"step": 173100 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.888340001829624e-05, |
|
"loss": 0.0016, |
|
"step": 175408 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.83277722407592e-05, |
|
"loss": 0.0015, |
|
"step": 177716 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.777214446322216e-05, |
|
"loss": 0.0016, |
|
"step": 180024 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.721651668568512e-05, |
|
"loss": 0.0017, |
|
"step": 182332 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.01417616382241249, |
|
"eval_max_distance": 4, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.4179, |
|
"eval_samples_per_second": 14.812, |
|
"eval_steps_per_second": 1.033, |
|
"step": 184616 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.666088890814809e-05, |
|
"loss": 0.0016, |
|
"step": 184640 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.610526113061105e-05, |
|
"loss": 0.0014, |
|
"step": 186948 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.554963335307401e-05, |
|
"loss": 0.0014, |
|
"step": 189256 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.499400557553698e-05, |
|
"loss": 0.0014, |
|
"step": 191564 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.443837779799994e-05, |
|
"loss": 0.0015, |
|
"step": 193872 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.38827500204629e-05, |
|
"loss": 0.0014, |
|
"step": 196180 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.332712224292586e-05, |
|
"loss": 0.0014, |
|
"step": 198488 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.277149446538883e-05, |
|
"loss": 0.0015, |
|
"step": 200796 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.221586668785179e-05, |
|
"loss": 0.0015, |
|
"step": 203104 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.166023891031475e-05, |
|
"loss": 0.0014, |
|
"step": 205412 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.011828480288386345, |
|
"eval_max_distance": 4, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.0049, |
|
"eval_samples_per_second": 15.172, |
|
"eval_steps_per_second": 1.059, |
|
"step": 207693 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.110461113277771e-05, |
|
"loss": 0.0015, |
|
"step": 207720 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.054898335524067e-05, |
|
"loss": 0.0014, |
|
"step": 210028 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.9993355577703634e-05, |
|
"loss": 0.0013, |
|
"step": 212336 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.9437727800166596e-05, |
|
"loss": 0.0014, |
|
"step": 214644 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.888210002262956e-05, |
|
"loss": 0.0014, |
|
"step": 216952 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.832647224509252e-05, |
|
"loss": 0.0013, |
|
"step": 219260 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.7770844467555485e-05, |
|
"loss": 0.0013, |
|
"step": 221568 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.721521669001845e-05, |
|
"loss": 0.0013, |
|
"step": 223876 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.665958891248141e-05, |
|
"loss": 0.0014, |
|
"step": 226184 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.610396113494437e-05, |
|
"loss": 0.0014, |
|
"step": 228492 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.011539922095835209, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 17.0363, |
|
"eval_samples_per_second": 15.144, |
|
"eval_steps_per_second": 1.057, |
|
"step": 230770 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.5548333357407336e-05, |
|
"loss": 0.0015, |
|
"step": 230800 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.49927055798703e-05, |
|
"loss": 0.0013, |
|
"step": 233108 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5.443707780233326e-05, |
|
"loss": 0.0012, |
|
"step": 235416 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 5.388145002479622e-05, |
|
"loss": 0.0013, |
|
"step": 237724 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.332582224725918e-05, |
|
"loss": 0.0013, |
|
"step": 240032 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.277019446972214e-05, |
|
"loss": 0.0014, |
|
"step": 242340 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.2214566692185106e-05, |
|
"loss": 0.0013, |
|
"step": 244648 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.165893891464807e-05, |
|
"loss": 0.0013, |
|
"step": 246956 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.110331113711103e-05, |
|
"loss": 0.0013, |
|
"step": 249264 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.0547683359573994e-05, |
|
"loss": 0.0013, |
|
"step": 251572 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.011254764162003994, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 16.9212, |
|
"eval_samples_per_second": 15.247, |
|
"eval_steps_per_second": 1.064, |
|
"step": 253847 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.999205558203695e-05, |
|
"loss": 0.0012, |
|
"step": 253880 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.943642780449991e-05, |
|
"loss": 0.0012, |
|
"step": 256188 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.8880800026962876e-05, |
|
"loss": 0.0012, |
|
"step": 258496 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.832517224942584e-05, |
|
"loss": 0.0013, |
|
"step": 260804 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.77695444718888e-05, |
|
"loss": 0.0012, |
|
"step": 263112 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.7213916694351764e-05, |
|
"loss": 0.0012, |
|
"step": 265420 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.665828891681473e-05, |
|
"loss": 0.0012, |
|
"step": 267728 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.610266113927768e-05, |
|
"loss": 0.0013, |
|
"step": 270036 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.5547033361740646e-05, |
|
"loss": 0.0012, |
|
"step": 272344 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 4.499140558420361e-05, |
|
"loss": 0.0012, |
|
"step": 274652 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.012018387205898762, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 16.9292, |
|
"eval_samples_per_second": 15.24, |
|
"eval_steps_per_second": 1.063, |
|
"step": 276924 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.443577780666657e-05, |
|
"loss": 0.0013, |
|
"step": 276960 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 4.3880150029129535e-05, |
|
"loss": 0.0012, |
|
"step": 279268 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.33245222515925e-05, |
|
"loss": 0.0011, |
|
"step": 281576 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 4.276889447405546e-05, |
|
"loss": 0.0012, |
|
"step": 283884 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.221326669651842e-05, |
|
"loss": 0.0012, |
|
"step": 286192 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.1657638918981386e-05, |
|
"loss": 0.0012, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.110201114144435e-05, |
|
"loss": 0.0011, |
|
"step": 290808 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 4.054638336390731e-05, |
|
"loss": 0.0011, |
|
"step": 293116 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.9990755586370274e-05, |
|
"loss": 0.0012, |
|
"step": 295424 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.943512780883323e-05, |
|
"loss": 0.0012, |
|
"step": 297732 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.013248566538095474, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 16.6859, |
|
"eval_samples_per_second": 15.462, |
|
"eval_steps_per_second": 1.079, |
|
"step": 300001 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.887950003129619e-05, |
|
"loss": 0.0012, |
|
"step": 300040 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3.8323872253759156e-05, |
|
"loss": 0.0011, |
|
"step": 302348 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.776824447622212e-05, |
|
"loss": 0.0011, |
|
"step": 304656 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 3.721261669868508e-05, |
|
"loss": 0.0011, |
|
"step": 306964 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 3.6656988921148044e-05, |
|
"loss": 0.0012, |
|
"step": 309272 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 3.954328163153008e-06, |
|
"loss": 0.0012, |
|
"step": 311580 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.16057364927606e-06, |
|
"loss": 0.0011, |
|
"step": 313888 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 2.3668191353991126e-06, |
|
"loss": 0.0011, |
|
"step": 316196 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 1.5730646215221654e-06, |
|
"loss": 0.001, |
|
"step": 318504 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 7.79310107645218e-07, |
|
"loss": 0.001, |
|
"step": 320812 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.011406470090150833, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 18.3924, |
|
"eval_samples_per_second": 14.028, |
|
"eval_steps_per_second": 0.979, |
|
"step": 323078 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 323078, |
|
"total_flos": 8.183655700394803e+16, |
|
"train_loss": 4.6522844528394624e-05, |
|
"train_runtime": 1031.7354, |
|
"train_samples_per_second": 4696.956, |
|
"train_steps_per_second": 313.14 |
|
} |
|
], |
|
"logging_steps": 2308, |
|
"max_steps": 323078, |
|
"num_train_epochs": 14, |
|
"save_steps": 4616, |
|
"total_flos": 8.183655700394803e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|