{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 50, "global_step": 485, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008247422680412371, "grad_norm": 16.375, "learning_rate": 6.666666666666667e-06, "loss": 0.8858, "num_input_tokens_seen": 1413808, "step": 4 }, { "epoch": 0.016494845360824743, "grad_norm": 4.1875, "learning_rate": 1.3333333333333333e-05, "loss": 0.4088, "num_input_tokens_seen": 2866496, "step": 8 }, { "epoch": 0.024742268041237112, "grad_norm": 3.03125, "learning_rate": 2e-05, "loss": 0.2731, "num_input_tokens_seen": 4305104, "step": 12 }, { "epoch": 0.032989690721649485, "grad_norm": 1.6484375, "learning_rate": 2.6666666666666667e-05, "loss": 0.2563, "num_input_tokens_seen": 5594128, "step": 16 }, { "epoch": 0.041237113402061855, "grad_norm": 1.46875, "learning_rate": 3.3333333333333335e-05, "loss": 0.2536, "num_input_tokens_seen": 6683376, "step": 20 }, { "epoch": 0.049484536082474224, "grad_norm": 1.2421875, "learning_rate": 4e-05, "loss": 0.229, "num_input_tokens_seen": 8030336, "step": 24 }, { "epoch": 0.0577319587628866, "grad_norm": 1.2578125, "learning_rate": 3.9992569962849926e-05, "loss": 0.2212, "num_input_tokens_seen": 9395728, "step": 28 }, { "epoch": 0.06597938144329897, "grad_norm": 1.3671875, "learning_rate": 3.99702853719449e-05, "loss": 0.2275, "num_input_tokens_seen": 10689344, "step": 32 }, { "epoch": 0.07422680412371134, "grad_norm": 1.328125, "learning_rate": 3.9933162784818745e-05, "loss": 0.2262, "num_input_tokens_seen": 11936704, "step": 36 }, { "epoch": 0.08247422680412371, "grad_norm": 1.484375, "learning_rate": 3.988122978369162e-05, "loss": 0.2254, "num_input_tokens_seen": 13217248, "step": 40 }, { "epoch": 0.09072164948453608, "grad_norm": 1.15625, "learning_rate": 3.981452495497628e-05, "loss": 0.2186, "num_input_tokens_seen": 14587328, "step": 44 }, { "epoch": 0.09896907216494845, "grad_norm": 1.109375, "learning_rate": 3.973309786060829e-05, "loss": 0.1971, "num_input_tokens_seen": 15976464, "step": 48 }, { "epoch": 0.10721649484536082, "grad_norm": 1.125, "learning_rate": 3.963700900122124e-05, "loss": 0.2231, "num_input_tokens_seen": 17262576, "step": 52 }, { "epoch": 0.1154639175257732, "grad_norm": 0.9765625, "learning_rate": 3.952632977119465e-05, "loss": 0.2029, "num_input_tokens_seen": 18801264, "step": 56 }, { "epoch": 0.12371134020618557, "grad_norm": 1.0625, "learning_rate": 3.9401142405607594e-05, "loss": 0.2033, "num_input_tokens_seen": 20158000, "step": 60 }, { "epoch": 0.13195876288659794, "grad_norm": 1.09375, "learning_rate": 3.9261539919137776e-05, "loss": 0.2278, "num_input_tokens_seen": 21322240, "step": 64 }, { "epoch": 0.1402061855670103, "grad_norm": 1.0, "learning_rate": 3.9107626036951266e-05, "loss": 0.1998, "num_input_tokens_seen": 22631360, "step": 68 }, { "epoch": 0.14845360824742268, "grad_norm": 1.109375, "learning_rate": 3.8939515117634326e-05, "loss": 0.2148, "num_input_tokens_seen": 23848496, "step": 72 }, { "epoch": 0.15670103092783505, "grad_norm": 1.1484375, "learning_rate": 3.875733206822452e-05, "loss": 0.2246, "num_input_tokens_seen": 25148336, "step": 76 }, { "epoch": 0.16494845360824742, "grad_norm": 1.203125, "learning_rate": 3.8561212251404406e-05, "loss": 0.2056, "num_input_tokens_seen": 26427264, "step": 80 }, { "epoch": 0.1731958762886598, "grad_norm": 1.1328125, "learning_rate": 3.835130138492644e-05, "loss": 0.203, "num_input_tokens_seen": 27833024, "step": 84 }, { "epoch": 0.18144329896907216, "grad_norm": 1.1015625, "learning_rate": 3.812775543334425e-05, "loss": 0.1912, "num_input_tokens_seen": 29273008, "step": 88 }, { "epoch": 0.18969072164948453, "grad_norm": 1.2265625, "learning_rate": 3.789074049213033e-05, "loss": 0.2182, "num_input_tokens_seen": 30624112, "step": 92 }, { "epoch": 0.1979381443298969, "grad_norm": 1.1796875, "learning_rate": 3.7640432664266514e-05, "loss": 0.216, "num_input_tokens_seen": 31857552, "step": 96 }, { "epoch": 0.20618556701030927, "grad_norm": 1.125, "learning_rate": 3.737701792939881e-05, "loss": 0.2065, "num_input_tokens_seen": 33116768, "step": 100 }, { "epoch": 0.21443298969072164, "grad_norm": 1.0, "learning_rate": 3.7100692005653796e-05, "loss": 0.206, "num_input_tokens_seen": 34461024, "step": 104 }, { "epoch": 0.22268041237113403, "grad_norm": 1.0703125, "learning_rate": 3.681166020421938e-05, "loss": 0.1912, "num_input_tokens_seen": 35915264, "step": 108 }, { "epoch": 0.2309278350515464, "grad_norm": 1.0625, "learning_rate": 3.6510137276797786e-05, "loss": 0.1952, "num_input_tokens_seen": 37264080, "step": 112 }, { "epoch": 0.23917525773195877, "grad_norm": 1.109375, "learning_rate": 3.6196347256044236e-05, "loss": 0.2273, "num_input_tokens_seen": 38539072, "step": 116 }, { "epoch": 0.24742268041237114, "grad_norm": 1.109375, "learning_rate": 3.5870523289109886e-05, "loss": 0.2041, "num_input_tokens_seen": 39930480, "step": 120 }, { "epoch": 0.2556701030927835, "grad_norm": 1.15625, "learning_rate": 3.553290746441261e-05, "loss": 0.2065, "num_input_tokens_seen": 41066544, "step": 124 }, { "epoch": 0.2639175257731959, "grad_norm": 1.0390625, "learning_rate": 3.5183750631764406e-05, "loss": 0.1979, "num_input_tokens_seen": 42372160, "step": 128 }, { "epoch": 0.2721649484536082, "grad_norm": 1.0703125, "learning_rate": 3.4823312215989046e-05, "loss": 0.2079, "num_input_tokens_seen": 43644832, "step": 132 }, { "epoch": 0.2804123711340206, "grad_norm": 1.0078125, "learning_rate": 3.445186002416849e-05, "loss": 0.2058, "num_input_tokens_seen": 44948816, "step": 136 }, { "epoch": 0.28865979381443296, "grad_norm": 1.09375, "learning_rate": 3.4069670046661197e-05, "loss": 0.1857, "num_input_tokens_seen": 46404048, "step": 140 }, { "epoch": 0.29690721649484536, "grad_norm": 1.1015625, "learning_rate": 3.3677026252040306e-05, "loss": 0.212, "num_input_tokens_seen": 47646208, "step": 144 }, { "epoch": 0.30515463917525776, "grad_norm": 1.0234375, "learning_rate": 3.327422037610389e-05, "loss": 0.1983, "num_input_tokens_seen": 49010928, "step": 148 }, { "epoch": 0.3134020618556701, "grad_norm": 0.9375, "learning_rate": 3.286155170511419e-05, "loss": 0.197, "num_input_tokens_seen": 50440128, "step": 152 }, { "epoch": 0.3216494845360825, "grad_norm": 1.140625, "learning_rate": 3.2439326853426824e-05, "loss": 0.2028, "num_input_tokens_seen": 51797840, "step": 156 }, { "epoch": 0.32989690721649484, "grad_norm": 0.984375, "learning_rate": 3.200785953567517e-05, "loss": 0.196, "num_input_tokens_seen": 53109456, "step": 160 }, { "epoch": 0.33814432989690724, "grad_norm": 1.0390625, "learning_rate": 3.156747033367922e-05, "loss": 0.2016, "num_input_tokens_seen": 54440768, "step": 164 }, { "epoch": 0.3463917525773196, "grad_norm": 0.97265625, "learning_rate": 3.1118486458252094e-05, "loss": 0.1975, "num_input_tokens_seen": 55879424, "step": 168 }, { "epoch": 0.354639175257732, "grad_norm": 1.0234375, "learning_rate": 3.0661241506081236e-05, "loss": 0.1965, "num_input_tokens_seen": 57154384, "step": 172 }, { "epoch": 0.3628865979381443, "grad_norm": 0.95703125, "learning_rate": 3.019607521186475e-05, "loss": 0.2078, "num_input_tokens_seen": 58470672, "step": 176 }, { "epoch": 0.3711340206185567, "grad_norm": 1.0625, "learning_rate": 2.972333319588736e-05, "loss": 0.2092, "num_input_tokens_seen": 59684416, "step": 180 }, { "epoch": 0.37938144329896906, "grad_norm": 0.96484375, "learning_rate": 2.9243366707223165e-05, "loss": 0.2018, "num_input_tokens_seen": 61002832, "step": 184 }, { "epoch": 0.38762886597938145, "grad_norm": 1.0546875, "learning_rate": 2.875653236275632e-05, "loss": 0.2072, "num_input_tokens_seen": 62262064, "step": 188 }, { "epoch": 0.3958762886597938, "grad_norm": 0.92578125, "learning_rate": 2.8263191882213362e-05, "loss": 0.1936, "num_input_tokens_seen": 63678896, "step": 192 }, { "epoch": 0.4041237113402062, "grad_norm": 0.9609375, "learning_rate": 2.7763711819404098e-05, "loss": 0.2069, "num_input_tokens_seen": 64844672, "step": 196 }, { "epoch": 0.41237113402061853, "grad_norm": 1.0546875, "learning_rate": 2.7258463289870764e-05, "loss": 0.1924, "num_input_tokens_seen": 66274544, "step": 200 }, { "epoch": 0.42061855670103093, "grad_norm": 0.88671875, "learning_rate": 2.6747821695147806e-05, "loss": 0.1949, "num_input_tokens_seen": 67683072, "step": 204 }, { "epoch": 0.4288659793814433, "grad_norm": 1.125, "learning_rate": 2.623216644383715e-05, "loss": 0.2092, "num_input_tokens_seen": 68860288, "step": 208 }, { "epoch": 0.43711340206185567, "grad_norm": 1.078125, "learning_rate": 2.5711880669706172e-05, "loss": 0.1959, "num_input_tokens_seen": 70182736, "step": 212 }, { "epoch": 0.44536082474226807, "grad_norm": 0.9296875, "learning_rate": 2.5187350947017918e-05, "loss": 0.2101, "num_input_tokens_seen": 71494624, "step": 216 }, { "epoch": 0.4536082474226804, "grad_norm": 0.86328125, "learning_rate": 2.4658967003304986e-05, "loss": 0.1925, "num_input_tokens_seen": 72877248, "step": 220 }, { "epoch": 0.4618556701030928, "grad_norm": 1.0078125, "learning_rate": 2.4127121429800498e-05, "loss": 0.1841, "num_input_tokens_seen": 74118560, "step": 224 }, { "epoch": 0.47010309278350515, "grad_norm": 0.88671875, "learning_rate": 2.3592209389741372e-05, "loss": 0.174, "num_input_tokens_seen": 75598912, "step": 228 }, { "epoch": 0.47835051546391755, "grad_norm": 1.0234375, "learning_rate": 2.30546283247606e-05, "loss": 0.207, "num_input_tokens_seen": 76742752, "step": 232 }, { "epoch": 0.4865979381443299, "grad_norm": 1.015625, "learning_rate": 2.251477765958655e-05, "loss": 0.1932, "num_input_tokens_seen": 78206256, "step": 236 }, { "epoch": 0.4948453608247423, "grad_norm": 1.0546875, "learning_rate": 2.1973058505269007e-05, "loss": 0.1946, "num_input_tokens_seen": 79491408, "step": 240 }, { "epoch": 0.5030927835051546, "grad_norm": 1.0546875, "learning_rate": 2.1429873361152124e-05, "loss": 0.1975, "num_input_tokens_seen": 80718320, "step": 244 }, { "epoch": 0.511340206185567, "grad_norm": 0.91015625, "learning_rate": 2.088562581581592e-05, "loss": 0.1964, "num_input_tokens_seen": 81915456, "step": 248 }, { "epoch": 0.5195876288659794, "grad_norm": 1.1015625, "learning_rate": 2.0340720247208447e-05, "loss": 0.191, "num_input_tokens_seen": 83180624, "step": 252 }, { "epoch": 0.5278350515463918, "grad_norm": 0.90234375, "learning_rate": 1.9795561522191523e-05, "loss": 0.1832, "num_input_tokens_seen": 84571536, "step": 256 }, { "epoch": 0.5360824742268041, "grad_norm": 1.046875, "learning_rate": 1.9250554695723107e-05, "loss": 0.1964, "num_input_tokens_seen": 85841328, "step": 260 }, { "epoch": 0.5443298969072164, "grad_norm": 0.953125, "learning_rate": 1.8706104709899964e-05, "loss": 0.1875, "num_input_tokens_seen": 87241616, "step": 264 }, { "epoch": 0.5525773195876289, "grad_norm": 0.92578125, "learning_rate": 1.816261609308419e-05, "loss": 0.1809, "num_input_tokens_seen": 88600352, "step": 268 }, { "epoch": 0.5608247422680412, "grad_norm": 0.87109375, "learning_rate": 1.7620492659337155e-05, "loss": 0.1793, "num_input_tokens_seen": 90051376, "step": 272 }, { "epoch": 0.5690721649484536, "grad_norm": 1.0390625, "learning_rate": 1.7080137208384122e-05, "loss": 0.1865, "num_input_tokens_seen": 91429472, "step": 276 }, { "epoch": 0.5773195876288659, "grad_norm": 0.9140625, "learning_rate": 1.6541951226332565e-05, "loss": 0.1745, "num_input_tokens_seen": 92791856, "step": 280 }, { "epoch": 0.5855670103092784, "grad_norm": 0.875, "learning_rate": 1.600633458736653e-05, "loss": 0.1925, "num_input_tokens_seen": 94068304, "step": 284 }, { "epoch": 0.5938144329896907, "grad_norm": 0.98828125, "learning_rate": 1.5473685256638572e-05, "loss": 0.1903, "num_input_tokens_seen": 95338656, "step": 288 }, { "epoch": 0.6020618556701031, "grad_norm": 0.90625, "learning_rate": 1.4944398994580232e-05, "loss": 0.1834, "num_input_tokens_seen": 96565872, "step": 292 }, { "epoch": 0.6103092783505155, "grad_norm": 1.0, "learning_rate": 1.4418869062850514e-05, "loss": 0.211, "num_input_tokens_seen": 97845776, "step": 296 }, { "epoch": 0.6185567010309279, "grad_norm": 0.92578125, "learning_rate": 1.3897485932141042e-05, "loss": 0.1872, "num_input_tokens_seen": 99080048, "step": 300 }, { "epoch": 0.6268041237113402, "grad_norm": 0.88671875, "learning_rate": 1.3380636992054878e-05, "loss": 0.17, "num_input_tokens_seen": 100563184, "step": 304 }, { "epoch": 0.6350515463917525, "grad_norm": 0.9375, "learning_rate": 1.2868706263274602e-05, "loss": 0.1935, "num_input_tokens_seen": 101820432, "step": 308 }, { "epoch": 0.643298969072165, "grad_norm": 0.88671875, "learning_rate": 1.236207411223353e-05, "loss": 0.1833, "num_input_tokens_seen": 103280736, "step": 312 }, { "epoch": 0.6515463917525773, "grad_norm": 0.88671875, "learning_rate": 1.1861116968502015e-05, "loss": 0.1815, "num_input_tokens_seen": 104563920, "step": 316 }, { "epoch": 0.6597938144329897, "grad_norm": 0.875, "learning_rate": 1.136620704509892e-05, "loss": 0.1816, "num_input_tokens_seen": 105869408, "step": 320 }, { "epoch": 0.668041237113402, "grad_norm": 0.9296875, "learning_rate": 1.087771206193593e-05, "loss": 0.1837, "num_input_tokens_seen": 107213792, "step": 324 }, { "epoch": 0.6762886597938145, "grad_norm": 0.96484375, "learning_rate": 1.0395994972600285e-05, "loss": 0.1775, "num_input_tokens_seen": 108623536, "step": 328 }, { "epoch": 0.6845360824742268, "grad_norm": 0.97265625, "learning_rate": 9.921413694678959e-06, "loss": 0.2035, "num_input_tokens_seen": 109750560, "step": 332 }, { "epoch": 0.6927835051546392, "grad_norm": 0.921875, "learning_rate": 9.454320843824512e-06, "loss": 0.1862, "num_input_tokens_seen": 111023152, "step": 336 }, { "epoch": 0.7010309278350515, "grad_norm": 0.95703125, "learning_rate": 8.995063471760377e-06, "loss": 0.1927, "num_input_tokens_seen": 112284320, "step": 340 }, { "epoch": 0.709278350515464, "grad_norm": 0.96484375, "learning_rate": 8.543982808420156e-06, "loss": 0.1856, "num_input_tokens_seen": 113630688, "step": 344 }, { "epoch": 0.7175257731958763, "grad_norm": 0.953125, "learning_rate": 8.101414008412469e-06, "loss": 0.1792, "num_input_tokens_seen": 114946320, "step": 348 }, { "epoch": 0.7257731958762886, "grad_norm": 0.94921875, "learning_rate": 7.667685901999875e-06, "loss": 0.1891, "num_input_tokens_seen": 116220208, "step": 352 }, { "epoch": 0.734020618556701, "grad_norm": 0.8671875, "learning_rate": 7.24312075077674e-06, "loss": 0.1891, "num_input_tokens_seen": 117614672, "step": 356 }, { "epoch": 0.7422680412371134, "grad_norm": 1.0859375, "learning_rate": 6.828034008227678e-06, "loss": 0.1714, "num_input_tokens_seen": 118996816, "step": 360 }, { "epoch": 0.7505154639175258, "grad_norm": 0.90234375, "learning_rate": 6.422734085344464e-06, "loss": 0.1871, "num_input_tokens_seen": 120229232, "step": 364 }, { "epoch": 0.7587628865979381, "grad_norm": 0.8203125, "learning_rate": 6.027522121475482e-06, "loss": 0.1795, "num_input_tokens_seen": 121495936, "step": 368 }, { "epoch": 0.7670103092783506, "grad_norm": 0.95703125, "learning_rate": 5.642691760578116e-06, "loss": 0.1833, "num_input_tokens_seen": 122787872, "step": 372 }, { "epoch": 0.7752577319587629, "grad_norm": 0.83984375, "learning_rate": 5.268528933040147e-06, "loss": 0.1673, "num_input_tokens_seen": 124257600, "step": 376 }, { "epoch": 0.7835051546391752, "grad_norm": 0.875, "learning_rate": 4.905311643232464e-06, "loss": 0.1763, "num_input_tokens_seen": 125705408, "step": 380 }, { "epoch": 0.7917525773195876, "grad_norm": 0.9765625, "learning_rate": 4.553309762950739e-06, "loss": 0.1877, "num_input_tokens_seen": 126862272, "step": 384 }, { "epoch": 0.8, "grad_norm": 0.90625, "learning_rate": 4.212784830899725e-06, "loss": 0.1795, "num_input_tokens_seen": 128153600, "step": 388 }, { "epoch": 0.8082474226804124, "grad_norm": 0.828125, "learning_rate": 3.8839898583689725e-06, "loss": 0.1803, "num_input_tokens_seen": 129461872, "step": 392 }, { "epoch": 0.8164948453608247, "grad_norm": 0.88671875, "learning_rate": 3.567169141244562e-06, "loss": 0.179, "num_input_tokens_seen": 130662064, "step": 396 }, { "epoch": 0.8247422680412371, "grad_norm": 0.8359375, "learning_rate": 3.262558078496301e-06, "loss": 0.1679, "num_input_tokens_seen": 131997568, "step": 400 }, { "epoch": 0.8329896907216495, "grad_norm": 0.94921875, "learning_rate": 2.9703829972754407e-06, "loss": 0.1974, "num_input_tokens_seen": 133415744, "step": 404 }, { "epoch": 0.8412371134020619, "grad_norm": 0.828125, "learning_rate": 2.69086098475277e-06, "loss": 0.1699, "num_input_tokens_seen": 134815840, "step": 408 }, { "epoch": 0.8494845360824742, "grad_norm": 0.9921875, "learning_rate": 2.4241997268220096e-06, "loss": 0.1815, "num_input_tokens_seen": 136262128, "step": 412 }, { "epoch": 0.8577319587628865, "grad_norm": 0.96484375, "learning_rate": 2.1705973537884615e-06, "loss": 0.1781, "num_input_tokens_seen": 137430160, "step": 416 }, { "epoch": 0.865979381443299, "grad_norm": 0.95703125, "learning_rate": 1.9302422931574183e-06, "loss": 0.1899, "num_input_tokens_seen": 138709200, "step": 420 }, { "epoch": 0.8742268041237113, "grad_norm": 0.83984375, "learning_rate": 1.7033131296318473e-06, "loss": 0.1795, "num_input_tokens_seen": 140033680, "step": 424 }, { "epoch": 0.8824742268041237, "grad_norm": 0.8203125, "learning_rate": 1.4899784724232968e-06, "loss": 0.1749, "num_input_tokens_seen": 141348848, "step": 428 }, { "epoch": 0.8907216494845361, "grad_norm": 0.83984375, "learning_rate": 1.2903968299746094e-06, "loss": 0.171, "num_input_tokens_seen": 142797664, "step": 432 }, { "epoch": 0.8989690721649485, "grad_norm": 0.89453125, "learning_rate": 1.104716492187574e-06, "loss": 0.1812, "num_input_tokens_seen": 144154208, "step": 436 }, { "epoch": 0.9072164948453608, "grad_norm": 1.03125, "learning_rate": 9.330754202429726e-07, "loss": 0.1891, "num_input_tokens_seen": 145332560, "step": 440 }, { "epoch": 0.9154639175257732, "grad_norm": 0.98046875, "learning_rate": 7.756011440948996e-07, "loss": 0.1902, "num_input_tokens_seen": 146527344, "step": 444 }, { "epoch": 0.9237113402061856, "grad_norm": 1.0, "learning_rate": 6.324106677155573e-07, "loss": 0.1793, "num_input_tokens_seen": 147821568, "step": 448 }, { "epoch": 0.931958762886598, "grad_norm": 0.88671875, "learning_rate": 5.036103821608485e-07, "loss": 0.1844, "num_input_tokens_seen": 149191664, "step": 452 }, { "epoch": 0.9402061855670103, "grad_norm": 0.8515625, "learning_rate": 3.892959865214363e-07, "loss": 0.1795, "num_input_tokens_seen": 150526864, "step": 456 }, { "epoch": 0.9484536082474226, "grad_norm": 0.9453125, "learning_rate": 2.8955241681795534e-07, "loss": 0.1859, "num_input_tokens_seen": 151861952, "step": 460 }, { "epoch": 0.9567010309278351, "grad_norm": 0.71875, "learning_rate": 2.044537828932458e-07, "loss": 0.1787, "num_input_tokens_seen": 153201488, "step": 464 }, { "epoch": 0.9649484536082474, "grad_norm": 0.87109375, "learning_rate": 1.3406331334845813e-07, "loss": 0.1884, "num_input_tokens_seen": 154511184, "step": 468 }, { "epoch": 0.9731958762886598, "grad_norm": 0.796875, "learning_rate": 7.843330856396103e-08, "loss": 0.1858, "num_input_tokens_seen": 155737200, "step": 472 }, { "epoch": 0.9814432989690721, "grad_norm": 0.8671875, "learning_rate": 3.760510183997701e-08, "loss": 0.183, "num_input_tokens_seen": 157084960, "step": 476 }, { "epoch": 0.9896907216494846, "grad_norm": 0.93359375, "learning_rate": 1.160902868577951e-08, "loss": 0.1908, "num_input_tokens_seen": 158349904, "step": 480 }, { "epoch": 0.9979381443298969, "grad_norm": 0.8984375, "learning_rate": 4.64404280295927e-10, "loss": 0.1764, "num_input_tokens_seen": 159669152, "step": 484 }, { "epoch": 1.0, "eval_loss": 0.0952233299612999, "eval_runtime": 83.4314, "eval_samples_per_second": 12.453, "eval_steps_per_second": 0.396, "num_input_tokens_seen": 160041824, "step": 485 }, { "epoch": 1.0, "num_input_tokens_seen": 160041824, "step": 485, "total_flos": 9.01334912177537e+17, "train_loss": 0.20323730206366666, "train_runtime": 14397.8817, "train_samples_per_second": 4.306, "train_steps_per_second": 0.034, "train_tokens_per_second": 1386.226 } ], "logging_steps": 4, "max_steps": 485, "num_input_tokens_seen": 160041824, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.01334912177537e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }