|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3728, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000536480686695279, |
|
"grad_norm": 2.159003831528304, |
|
"learning_rate": 2.6809651474530834e-08, |
|
"loss": 0.2763, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002682403433476395, |
|
"grad_norm": 2.119515955240221, |
|
"learning_rate": 1.3404825737265417e-07, |
|
"loss": 0.2808, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00536480686695279, |
|
"grad_norm": 2.049414637279413, |
|
"learning_rate": 2.6809651474530835e-07, |
|
"loss": 0.27, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008047210300429184, |
|
"grad_norm": 1.8565631149959, |
|
"learning_rate": 4.021447721179625e-07, |
|
"loss": 0.271, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01072961373390558, |
|
"grad_norm": 1.8099188776354862, |
|
"learning_rate": 5.361930294906167e-07, |
|
"loss": 0.275, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013412017167381975, |
|
"grad_norm": 2.2990684991495063, |
|
"learning_rate": 6.702412868632709e-07, |
|
"loss": 0.2717, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.016094420600858368, |
|
"grad_norm": 2.819543495503733, |
|
"learning_rate": 8.04289544235925e-07, |
|
"loss": 0.275, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.018776824034334765, |
|
"grad_norm": 1.9480081947921484, |
|
"learning_rate": 9.383378016085791e-07, |
|
"loss": 0.2749, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02145922746781116, |
|
"grad_norm": 1.95437235684509, |
|
"learning_rate": 1.0723860589812334e-06, |
|
"loss": 0.262, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024141630901287552, |
|
"grad_norm": 2.117514986106169, |
|
"learning_rate": 1.2064343163538874e-06, |
|
"loss": 0.2779, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02682403433476395, |
|
"grad_norm": 1.9883094297371926, |
|
"learning_rate": 1.3404825737265418e-06, |
|
"loss": 0.2671, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029506437768240343, |
|
"grad_norm": 1.6309537225417674, |
|
"learning_rate": 1.4745308310991958e-06, |
|
"loss": 0.2705, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.032188841201716736, |
|
"grad_norm": 1.9374566786579963, |
|
"learning_rate": 1.60857908847185e-06, |
|
"loss": 0.2572, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03487124463519313, |
|
"grad_norm": 1.9521782298827601, |
|
"learning_rate": 1.7426273458445042e-06, |
|
"loss": 0.2765, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03755364806866953, |
|
"grad_norm": 1.8946479594126726, |
|
"learning_rate": 1.8766756032171582e-06, |
|
"loss": 0.2688, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.040236051502145924, |
|
"grad_norm": 2.0890536076880837, |
|
"learning_rate": 2.0107238605898126e-06, |
|
"loss": 0.2616, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04291845493562232, |
|
"grad_norm": 1.9780793853161633, |
|
"learning_rate": 2.1447721179624668e-06, |
|
"loss": 0.2669, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04560085836909871, |
|
"grad_norm": 1.8946762410648703, |
|
"learning_rate": 2.278820375335121e-06, |
|
"loss": 0.2607, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.048283261802575105, |
|
"grad_norm": 2.047411640091833, |
|
"learning_rate": 2.4128686327077747e-06, |
|
"loss": 0.2685, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.050965665236051505, |
|
"grad_norm": 1.9316747211791316, |
|
"learning_rate": 2.5469168900804294e-06, |
|
"loss": 0.2713, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0536480686695279, |
|
"grad_norm": 1.994496620507049, |
|
"learning_rate": 2.6809651474530836e-06, |
|
"loss": 0.2677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05633047210300429, |
|
"grad_norm": 1.905981925726772, |
|
"learning_rate": 2.8150134048257378e-06, |
|
"loss": 0.2711, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.059012875536480686, |
|
"grad_norm": 3.1892641200581644, |
|
"learning_rate": 2.9490616621983915e-06, |
|
"loss": 0.2666, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06169527896995708, |
|
"grad_norm": 2.0282068508306743, |
|
"learning_rate": 3.0831099195710457e-06, |
|
"loss": 0.2737, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06437768240343347, |
|
"grad_norm": 2.0791848571473674, |
|
"learning_rate": 3.2171581769437e-06, |
|
"loss": 0.27, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06706008583690987, |
|
"grad_norm": 2.0328809616002643, |
|
"learning_rate": 3.351206434316354e-06, |
|
"loss": 0.2752, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06974248927038626, |
|
"grad_norm": 1.8534693767597294, |
|
"learning_rate": 3.4852546916890083e-06, |
|
"loss": 0.2744, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07242489270386267, |
|
"grad_norm": 2.048030125810557, |
|
"learning_rate": 3.6193029490616625e-06, |
|
"loss": 0.2709, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07510729613733906, |
|
"grad_norm": 1.9867486569477613, |
|
"learning_rate": 3.7533512064343163e-06, |
|
"loss": 0.2736, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07778969957081545, |
|
"grad_norm": 2.1276602780145204, |
|
"learning_rate": 3.8873994638069705e-06, |
|
"loss": 0.2774, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08047210300429185, |
|
"grad_norm": 2.033354215180648, |
|
"learning_rate": 4.021447721179625e-06, |
|
"loss": 0.2725, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08315450643776824, |
|
"grad_norm": 2.0256563782144648, |
|
"learning_rate": 4.155495978552279e-06, |
|
"loss": 0.2772, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08583690987124463, |
|
"grad_norm": 1.9813469413072111, |
|
"learning_rate": 4.2895442359249335e-06, |
|
"loss": 0.2698, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08851931330472103, |
|
"grad_norm": 2.122915981942998, |
|
"learning_rate": 4.423592493297587e-06, |
|
"loss": 0.2755, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09120171673819742, |
|
"grad_norm": 2.2985675444327898, |
|
"learning_rate": 4.557640750670242e-06, |
|
"loss": 0.2841, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09388412017167382, |
|
"grad_norm": 2.164820695566957, |
|
"learning_rate": 4.691689008042896e-06, |
|
"loss": 0.2793, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09656652360515021, |
|
"grad_norm": 2.183339227249909, |
|
"learning_rate": 4.8257372654155495e-06, |
|
"loss": 0.2865, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0992489270386266, |
|
"grad_norm": 2.1531262710694614, |
|
"learning_rate": 4.959785522788204e-06, |
|
"loss": 0.2787, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10193133047210301, |
|
"grad_norm": 2.1290726250486802, |
|
"learning_rate": 5.093833780160859e-06, |
|
"loss": 0.2861, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1046137339055794, |
|
"grad_norm": 2.0983336189626653, |
|
"learning_rate": 5.2278820375335125e-06, |
|
"loss": 0.281, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1072961373390558, |
|
"grad_norm": 2.3359344902177757, |
|
"learning_rate": 5.361930294906167e-06, |
|
"loss": 0.286, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10997854077253219, |
|
"grad_norm": 2.2647245539596295, |
|
"learning_rate": 5.495978552278821e-06, |
|
"loss": 0.2928, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.11266094420600858, |
|
"grad_norm": 2.3160976702027503, |
|
"learning_rate": 5.6300268096514755e-06, |
|
"loss": 0.2987, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11534334763948498, |
|
"grad_norm": 2.167097469659948, |
|
"learning_rate": 5.764075067024129e-06, |
|
"loss": 0.2856, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.11802575107296137, |
|
"grad_norm": 2.6336660536832714, |
|
"learning_rate": 5.898123324396783e-06, |
|
"loss": 0.2789, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12070815450643776, |
|
"grad_norm": 2.2300194118786694, |
|
"learning_rate": 6.032171581769437e-06, |
|
"loss": 0.2783, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12339055793991416, |
|
"grad_norm": 2.369209579489948, |
|
"learning_rate": 6.1662198391420915e-06, |
|
"loss": 0.2831, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12607296137339055, |
|
"grad_norm": 2.176255475510943, |
|
"learning_rate": 6.300268096514745e-06, |
|
"loss": 0.2871, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.12875536480686695, |
|
"grad_norm": 2.4540441563893953, |
|
"learning_rate": 6.4343163538874e-06, |
|
"loss": 0.2986, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13143776824034334, |
|
"grad_norm": 2.276069551949472, |
|
"learning_rate": 6.5683646112600545e-06, |
|
"loss": 0.2877, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.13412017167381973, |
|
"grad_norm": 2.3971991612994317, |
|
"learning_rate": 6.702412868632708e-06, |
|
"loss": 0.2981, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13680257510729613, |
|
"grad_norm": 2.269508436580868, |
|
"learning_rate": 6.836461126005363e-06, |
|
"loss": 0.2861, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.13948497854077252, |
|
"grad_norm": 2.374887657608865, |
|
"learning_rate": 6.970509383378017e-06, |
|
"loss": 0.2966, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14216738197424894, |
|
"grad_norm": 2.361108272529918, |
|
"learning_rate": 7.104557640750671e-06, |
|
"loss": 0.2964, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.14484978540772533, |
|
"grad_norm": 2.347968771429336, |
|
"learning_rate": 7.238605898123325e-06, |
|
"loss": 0.2984, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14753218884120173, |
|
"grad_norm": 2.3043139124066627, |
|
"learning_rate": 7.37265415549598e-06, |
|
"loss": 0.2942, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.15021459227467812, |
|
"grad_norm": 2.4465652779296616, |
|
"learning_rate": 7.506702412868633e-06, |
|
"loss": 0.301, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15289699570815452, |
|
"grad_norm": 2.3140350505254363, |
|
"learning_rate": 7.640750670241287e-06, |
|
"loss": 0.2946, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1555793991416309, |
|
"grad_norm": 2.413194168004288, |
|
"learning_rate": 7.774798927613941e-06, |
|
"loss": 0.2955, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1582618025751073, |
|
"grad_norm": 2.450841329989038, |
|
"learning_rate": 7.908847184986595e-06, |
|
"loss": 0.3017, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.1609442060085837, |
|
"grad_norm": 2.4699230159692926, |
|
"learning_rate": 8.04289544235925e-06, |
|
"loss": 0.2938, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1636266094420601, |
|
"grad_norm": 2.470762830402983, |
|
"learning_rate": 8.176943699731904e-06, |
|
"loss": 0.3046, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.16630901287553648, |
|
"grad_norm": 2.430350728811769, |
|
"learning_rate": 8.310991957104558e-06, |
|
"loss": 0.3089, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16899141630901288, |
|
"grad_norm": 2.489809860770138, |
|
"learning_rate": 8.445040214477213e-06, |
|
"loss": 0.3112, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.17167381974248927, |
|
"grad_norm": 2.439575335695019, |
|
"learning_rate": 8.579088471849867e-06, |
|
"loss": 0.3079, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17435622317596566, |
|
"grad_norm": 2.320975818019913, |
|
"learning_rate": 8.71313672922252e-06, |
|
"loss": 0.2981, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17703862660944206, |
|
"grad_norm": 2.5159282882296825, |
|
"learning_rate": 8.847184986595175e-06, |
|
"loss": 0.3082, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17972103004291845, |
|
"grad_norm": 2.444727008718694, |
|
"learning_rate": 8.98123324396783e-06, |
|
"loss": 0.295, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.18240343347639484, |
|
"grad_norm": 2.5465767182435455, |
|
"learning_rate": 9.115281501340484e-06, |
|
"loss": 0.304, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18508583690987124, |
|
"grad_norm": 2.4341632455203, |
|
"learning_rate": 9.249329758713138e-06, |
|
"loss": 0.3125, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.18776824034334763, |
|
"grad_norm": 2.5356465792325547, |
|
"learning_rate": 9.383378016085791e-06, |
|
"loss": 0.3109, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19045064377682402, |
|
"grad_norm": 2.638456801298276, |
|
"learning_rate": 9.517426273458445e-06, |
|
"loss": 0.3161, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.19313304721030042, |
|
"grad_norm": 2.5434891205716235, |
|
"learning_rate": 9.651474530831099e-06, |
|
"loss": 0.3178, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1958154506437768, |
|
"grad_norm": 2.4996245795516443, |
|
"learning_rate": 9.785522788203754e-06, |
|
"loss": 0.315, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1984978540772532, |
|
"grad_norm": 2.4323732408263585, |
|
"learning_rate": 9.919571045576408e-06, |
|
"loss": 0.32, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.20118025751072963, |
|
"grad_norm": 2.639735876078223, |
|
"learning_rate": 9.999991231716779e-06, |
|
"loss": 0.318, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.20386266094420602, |
|
"grad_norm": 2.489134849192087, |
|
"learning_rate": 9.999892588883699e-06, |
|
"loss": 0.3099, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2065450643776824, |
|
"grad_norm": 2.651853212530237, |
|
"learning_rate": 9.99968434503304e-06, |
|
"loss": 0.3168, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2092274678111588, |
|
"grad_norm": 2.7127492692381283, |
|
"learning_rate": 9.999366504729645e-06, |
|
"loss": 0.3294, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2119098712446352, |
|
"grad_norm": 2.5252271002385616, |
|
"learning_rate": 9.998939074940788e-06, |
|
"loss": 0.3169, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2145922746781116, |
|
"grad_norm": 2.6399721708520385, |
|
"learning_rate": 9.998402065036018e-06, |
|
"loss": 0.3381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.217274678111588, |
|
"grad_norm": 2.7003365423717516, |
|
"learning_rate": 9.997755486786954e-06, |
|
"loss": 0.3142, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.21995708154506438, |
|
"grad_norm": 2.6868806638224707, |
|
"learning_rate": 9.996999354367028e-06, |
|
"loss": 0.3148, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22263948497854077, |
|
"grad_norm": 2.624156923872402, |
|
"learning_rate": 9.996133684351172e-06, |
|
"loss": 0.3222, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.22532188841201717, |
|
"grad_norm": 2.652116200690864, |
|
"learning_rate": 9.995158495715459e-06, |
|
"loss": 0.33, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22800429184549356, |
|
"grad_norm": 2.823316936363453, |
|
"learning_rate": 9.994073809836677e-06, |
|
"loss": 0.3243, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.23068669527896996, |
|
"grad_norm": 2.7125330278262756, |
|
"learning_rate": 9.992879650491877e-06, |
|
"loss": 0.3264, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23336909871244635, |
|
"grad_norm": 2.5801002778784174, |
|
"learning_rate": 9.991576043857833e-06, |
|
"loss": 0.3228, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.23605150214592274, |
|
"grad_norm": 2.7292886989062333, |
|
"learning_rate": 9.990163018510484e-06, |
|
"loss": 0.3202, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23873390557939914, |
|
"grad_norm": 2.5525025047817516, |
|
"learning_rate": 9.988640605424298e-06, |
|
"loss": 0.3297, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.24141630901287553, |
|
"grad_norm": 2.584026491785491, |
|
"learning_rate": 9.987008837971595e-06, |
|
"loss": 0.3236, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24409871244635192, |
|
"grad_norm": 2.672529304821864, |
|
"learning_rate": 9.98526775192182e-06, |
|
"loss": 0.33, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.24678111587982832, |
|
"grad_norm": 2.533369244922767, |
|
"learning_rate": 9.983417385440755e-06, |
|
"loss": 0.3252, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2494635193133047, |
|
"grad_norm": 2.5020721525093954, |
|
"learning_rate": 9.981457779089678e-06, |
|
"loss": 0.3403, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2521459227467811, |
|
"grad_norm": 2.5877996033422206, |
|
"learning_rate": 9.979388975824485e-06, |
|
"loss": 0.3228, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2548283261802575, |
|
"grad_norm": 2.5896411045127437, |
|
"learning_rate": 9.977211020994735e-06, |
|
"loss": 0.3281, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2575107296137339, |
|
"grad_norm": 2.6763575148290397, |
|
"learning_rate": 9.97492396234267e-06, |
|
"loss": 0.3232, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2601931330472103, |
|
"grad_norm": 2.69005708064464, |
|
"learning_rate": 9.972527850002154e-06, |
|
"loss": 0.3205, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2628755364806867, |
|
"grad_norm": 2.4610294792240426, |
|
"learning_rate": 9.970022736497588e-06, |
|
"loss": 0.3307, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2655579399141631, |
|
"grad_norm": 2.6457425081371166, |
|
"learning_rate": 9.96740867674275e-06, |
|
"loss": 0.3225, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.26824034334763946, |
|
"grad_norm": 2.55189454700473, |
|
"learning_rate": 9.964685728039596e-06, |
|
"loss": 0.3319, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2709227467811159, |
|
"grad_norm": 2.501583579859887, |
|
"learning_rate": 9.961853950076992e-06, |
|
"loss": 0.3253, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.27360515021459225, |
|
"grad_norm": 2.784849014755361, |
|
"learning_rate": 9.958913404929423e-06, |
|
"loss": 0.3246, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2762875536480687, |
|
"grad_norm": 2.4210533045680944, |
|
"learning_rate": 9.955864157055623e-06, |
|
"loss": 0.3319, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.27896995708154504, |
|
"grad_norm": 2.7378291124313887, |
|
"learning_rate": 9.95270627329716e-06, |
|
"loss": 0.3258, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28165236051502146, |
|
"grad_norm": 2.7361011558607267, |
|
"learning_rate": 9.949439822876975e-06, |
|
"loss": 0.3322, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2843347639484979, |
|
"grad_norm": 2.6713473598948827, |
|
"learning_rate": 9.94606487739787e-06, |
|
"loss": 0.3416, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28701716738197425, |
|
"grad_norm": 2.451525698431377, |
|
"learning_rate": 9.942581510840919e-06, |
|
"loss": 0.3373, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.28969957081545067, |
|
"grad_norm": 2.576977808650475, |
|
"learning_rate": 9.93898979956387e-06, |
|
"loss": 0.3294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29238197424892703, |
|
"grad_norm": 2.466121086053238, |
|
"learning_rate": 9.935289822299456e-06, |
|
"loss": 0.329, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.29506437768240346, |
|
"grad_norm": 2.4714805916085925, |
|
"learning_rate": 9.931481660153672e-06, |
|
"loss": 0.3254, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2977467811158798, |
|
"grad_norm": 2.547149501856792, |
|
"learning_rate": 9.927565396604001e-06, |
|
"loss": 0.3314, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.30042918454935624, |
|
"grad_norm": 2.51150785881792, |
|
"learning_rate": 9.923541117497586e-06, |
|
"loss": 0.3436, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3031115879828326, |
|
"grad_norm": 2.5691062074531907, |
|
"learning_rate": 9.919408911049333e-06, |
|
"loss": 0.3382, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.30579399141630903, |
|
"grad_norm": 2.5952061487754143, |
|
"learning_rate": 9.915168867839997e-06, |
|
"loss": 0.3389, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3084763948497854, |
|
"grad_norm": 2.648649412592521, |
|
"learning_rate": 9.910821080814184e-06, |
|
"loss": 0.3342, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3111587982832618, |
|
"grad_norm": 2.572014571107132, |
|
"learning_rate": 9.90636564527832e-06, |
|
"loss": 0.3343, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3138412017167382, |
|
"grad_norm": 2.48746737206051, |
|
"learning_rate": 9.901802658898552e-06, |
|
"loss": 0.3227, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.3165236051502146, |
|
"grad_norm": 2.6265939637641784, |
|
"learning_rate": 9.897132221698624e-06, |
|
"loss": 0.3367, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31920600858369097, |
|
"grad_norm": 2.8015089450095574, |
|
"learning_rate": 9.892354436057665e-06, |
|
"loss": 0.3358, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.3218884120171674, |
|
"grad_norm": 2.5365509224123413, |
|
"learning_rate": 9.887469406707962e-06, |
|
"loss": 0.3422, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32457081545064376, |
|
"grad_norm": 2.5432604350567134, |
|
"learning_rate": 9.882477240732652e-06, |
|
"loss": 0.3413, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.3272532188841202, |
|
"grad_norm": 2.6042699504210565, |
|
"learning_rate": 9.877378047563378e-06, |
|
"loss": 0.3399, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32993562231759654, |
|
"grad_norm": 2.364935157785183, |
|
"learning_rate": 9.872171938977895e-06, |
|
"loss": 0.3397, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.33261802575107297, |
|
"grad_norm": 2.4849267513553723, |
|
"learning_rate": 9.866859029097613e-06, |
|
"loss": 0.3442, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33530042918454933, |
|
"grad_norm": 2.38272866705992, |
|
"learning_rate": 9.8614394343851e-06, |
|
"loss": 0.3318, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.33798283261802575, |
|
"grad_norm": 2.6011694495652393, |
|
"learning_rate": 9.855913273641531e-06, |
|
"loss": 0.3476, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3406652360515021, |
|
"grad_norm": 2.4610293329339217, |
|
"learning_rate": 9.850280668004072e-06, |
|
"loss": 0.3406, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.34334763948497854, |
|
"grad_norm": 2.5247360326975348, |
|
"learning_rate": 9.844541740943239e-06, |
|
"loss": 0.327, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34603004291845496, |
|
"grad_norm": 2.4461747371459275, |
|
"learning_rate": 9.838696618260182e-06, |
|
"loss": 0.3266, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3487124463519313, |
|
"grad_norm": 2.494089826777542, |
|
"learning_rate": 9.832745428083934e-06, |
|
"loss": 0.3368, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35139484978540775, |
|
"grad_norm": 2.3501397778118216, |
|
"learning_rate": 9.826688300868597e-06, |
|
"loss": 0.3404, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3540772532188841, |
|
"grad_norm": 2.4817858772039276, |
|
"learning_rate": 9.820525369390486e-06, |
|
"loss": 0.3345, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35675965665236054, |
|
"grad_norm": 2.591760409478092, |
|
"learning_rate": 9.814256768745212e-06, |
|
"loss": 0.3354, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3594420600858369, |
|
"grad_norm": 2.5590717559961362, |
|
"learning_rate": 9.80788263634473e-06, |
|
"loss": 0.3347, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3621244635193133, |
|
"grad_norm": 2.572364951646433, |
|
"learning_rate": 9.801403111914324e-06, |
|
"loss": 0.3348, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3648068669527897, |
|
"grad_norm": 2.5379695715853785, |
|
"learning_rate": 9.794818337489535e-06, |
|
"loss": 0.3376, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3674892703862661, |
|
"grad_norm": 2.581994043768899, |
|
"learning_rate": 9.788128457413064e-06, |
|
"loss": 0.3404, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3701716738197425, |
|
"grad_norm": 2.7753171889008184, |
|
"learning_rate": 9.78133361833159e-06, |
|
"loss": 0.338, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3728540772532189, |
|
"grad_norm": 2.6265307697974984, |
|
"learning_rate": 9.774433969192569e-06, |
|
"loss": 0.3362, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.37553648068669526, |
|
"grad_norm": 2.4003652471685357, |
|
"learning_rate": 9.767429661240966e-06, |
|
"loss": 0.3391, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3782188841201717, |
|
"grad_norm": 2.6405132785898835, |
|
"learning_rate": 9.760320848015932e-06, |
|
"loss": 0.3366, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.38090128755364805, |
|
"grad_norm": 2.4198402675372073, |
|
"learning_rate": 9.75310768534745e-06, |
|
"loss": 0.3255, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38358369098712447, |
|
"grad_norm": 2.4622204650478308, |
|
"learning_rate": 9.745790331352907e-06, |
|
"loss": 0.3317, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.38626609442060084, |
|
"grad_norm": 2.4677135301200903, |
|
"learning_rate": 9.73836894643364e-06, |
|
"loss": 0.34, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38894849785407726, |
|
"grad_norm": 2.5286539668499852, |
|
"learning_rate": 9.730843693271413e-06, |
|
"loss": 0.3463, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3916309012875536, |
|
"grad_norm": 2.604595633759426, |
|
"learning_rate": 9.723214736824847e-06, |
|
"loss": 0.3251, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39431330472103004, |
|
"grad_norm": 2.4610667705486624, |
|
"learning_rate": 9.715482244325816e-06, |
|
"loss": 0.3429, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3969957081545064, |
|
"grad_norm": 2.5095556348211034, |
|
"learning_rate": 9.707646385275766e-06, |
|
"loss": 0.3464, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39967811158798283, |
|
"grad_norm": 2.4852177129885216, |
|
"learning_rate": 9.699707331442016e-06, |
|
"loss": 0.3398, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.40236051502145925, |
|
"grad_norm": 2.514520206838667, |
|
"learning_rate": 9.691665256853978e-06, |
|
"loss": 0.3393, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4050429184549356, |
|
"grad_norm": 2.395761943700656, |
|
"learning_rate": 9.683520337799353e-06, |
|
"loss": 0.3334, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.40772532188841204, |
|
"grad_norm": 2.671227379259191, |
|
"learning_rate": 9.675272752820258e-06, |
|
"loss": 0.3363, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4104077253218884, |
|
"grad_norm": 2.466935609738587, |
|
"learning_rate": 9.666922682709317e-06, |
|
"loss": 0.3417, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4130901287553648, |
|
"grad_norm": 2.635659874813337, |
|
"learning_rate": 9.6584703105057e-06, |
|
"loss": 0.3359, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4157725321888412, |
|
"grad_norm": 2.3825641605764485, |
|
"learning_rate": 9.649915821491107e-06, |
|
"loss": 0.3348, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4184549356223176, |
|
"grad_norm": 2.4764099222684703, |
|
"learning_rate": 9.641259403185706e-06, |
|
"loss": 0.3391, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.421137339055794, |
|
"grad_norm": 2.330923472419889, |
|
"learning_rate": 9.632501245344024e-06, |
|
"loss": 0.3425, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4238197424892704, |
|
"grad_norm": 2.548809870463951, |
|
"learning_rate": 9.623641539950787e-06, |
|
"loss": 0.3467, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42650214592274677, |
|
"grad_norm": 2.4334962792786903, |
|
"learning_rate": 9.614680481216712e-06, |
|
"loss": 0.3392, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4291845493562232, |
|
"grad_norm": 2.5285826005022773, |
|
"learning_rate": 9.60561826557425e-06, |
|
"loss": 0.3313, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43186695278969955, |
|
"grad_norm": 2.5379862001062197, |
|
"learning_rate": 9.596455091673282e-06, |
|
"loss": 0.3366, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.434549356223176, |
|
"grad_norm": 2.4524344493146626, |
|
"learning_rate": 9.587191160376758e-06, |
|
"loss": 0.3373, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43723175965665234, |
|
"grad_norm": 2.4382184535841294, |
|
"learning_rate": 9.577826674756301e-06, |
|
"loss": 0.3339, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.43991416309012876, |
|
"grad_norm": 2.3919293748346244, |
|
"learning_rate": 9.56836184008775e-06, |
|
"loss": 0.3372, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44259656652360513, |
|
"grad_norm": 2.42205214064704, |
|
"learning_rate": 9.558796863846663e-06, |
|
"loss": 0.3336, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.44527896995708155, |
|
"grad_norm": 2.476393254920334, |
|
"learning_rate": 9.549131955703772e-06, |
|
"loss": 0.3257, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4479613733905579, |
|
"grad_norm": 2.444593559511972, |
|
"learning_rate": 9.539367327520382e-06, |
|
"loss": 0.3406, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.45064377682403434, |
|
"grad_norm": 2.5344722013029526, |
|
"learning_rate": 9.529503193343726e-06, |
|
"loss": 0.3382, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4533261802575107, |
|
"grad_norm": 2.454951192856544, |
|
"learning_rate": 9.519539769402282e-06, |
|
"loss": 0.3377, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4560085836909871, |
|
"grad_norm": 2.4418321575893582, |
|
"learning_rate": 9.509477274101019e-06, |
|
"loss": 0.334, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45869098712446355, |
|
"grad_norm": 2.3404132744062354, |
|
"learning_rate": 9.499315928016619e-06, |
|
"loss": 0.3285, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.4613733905579399, |
|
"grad_norm": 2.520810375507073, |
|
"learning_rate": 9.489055953892644e-06, |
|
"loss": 0.3289, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46405579399141633, |
|
"grad_norm": 2.4546553731791243, |
|
"learning_rate": 9.478697576634646e-06, |
|
"loss": 0.332, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4667381974248927, |
|
"grad_norm": 2.5171909876978424, |
|
"learning_rate": 9.46824102330524e-06, |
|
"loss": 0.3377, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4694206008583691, |
|
"grad_norm": 2.3511374545845323, |
|
"learning_rate": 9.457686523119128e-06, |
|
"loss": 0.3348, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4721030042918455, |
|
"grad_norm": 2.4494832727820297, |
|
"learning_rate": 9.447034307438068e-06, |
|
"loss": 0.3289, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4747854077253219, |
|
"grad_norm": 2.25895629515223, |
|
"learning_rate": 9.436284609765818e-06, |
|
"loss": 0.3352, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.47746781115879827, |
|
"grad_norm": 2.4046948933529206, |
|
"learning_rate": 9.425437665742998e-06, |
|
"loss": 0.3388, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4801502145922747, |
|
"grad_norm": 2.438756929093279, |
|
"learning_rate": 9.414493713141936e-06, |
|
"loss": 0.332, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.48283261802575106, |
|
"grad_norm": 2.441067720550207, |
|
"learning_rate": 9.403452991861452e-06, |
|
"loss": 0.3366, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4855150214592275, |
|
"grad_norm": 2.283848440660158, |
|
"learning_rate": 9.392315743921606e-06, |
|
"loss": 0.3442, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.48819742489270385, |
|
"grad_norm": 2.278674924582739, |
|
"learning_rate": 9.381082213458384e-06, |
|
"loss": 0.336, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.49087982832618027, |
|
"grad_norm": 2.350164757811854, |
|
"learning_rate": 9.36975264671835e-06, |
|
"loss": 0.3373, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.49356223175965663, |
|
"grad_norm": 2.4724701188818137, |
|
"learning_rate": 9.358327292053244e-06, |
|
"loss": 0.3348, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49624463519313305, |
|
"grad_norm": 2.6152902868933343, |
|
"learning_rate": 9.346806399914547e-06, |
|
"loss": 0.3376, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4989270386266094, |
|
"grad_norm": 2.4063532283395443, |
|
"learning_rate": 9.335190222847988e-06, |
|
"loss": 0.3199, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5016094420600858, |
|
"grad_norm": 5.48681879827562, |
|
"learning_rate": 9.323479015488e-06, |
|
"loss": 0.3204, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5042918454935622, |
|
"grad_norm": 2.3776182827904178, |
|
"learning_rate": 9.311673034552146e-06, |
|
"loss": 0.3334, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5069742489270386, |
|
"grad_norm": 2.4035106668570427, |
|
"learning_rate": 9.299772538835492e-06, |
|
"loss": 0.3365, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.509656652360515, |
|
"grad_norm": 2.350065809667788, |
|
"learning_rate": 9.28777778920493e-06, |
|
"loss": 0.3255, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5123390557939914, |
|
"grad_norm": 2.4013909228188326, |
|
"learning_rate": 9.27568904859346e-06, |
|
"loss": 0.3343, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5150214592274678, |
|
"grad_norm": 2.3927042326444763, |
|
"learning_rate": 9.26350658199443e-06, |
|
"loss": 0.3354, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5177038626609443, |
|
"grad_norm": 2.477304846192379, |
|
"learning_rate": 9.251230656455722e-06, |
|
"loss": 0.3354, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5203862660944206, |
|
"grad_norm": 2.352254471197396, |
|
"learning_rate": 9.238861541073909e-06, |
|
"loss": 0.3421, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.523068669527897, |
|
"grad_norm": 2.4226810897336204, |
|
"learning_rate": 9.226399506988336e-06, |
|
"loss": 0.3284, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5257510729613734, |
|
"grad_norm": 2.389275785852567, |
|
"learning_rate": 9.213844827375196e-06, |
|
"loss": 0.3333, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5284334763948498, |
|
"grad_norm": 2.5370966583431094, |
|
"learning_rate": 9.201197777441533e-06, |
|
"loss": 0.3255, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5311158798283262, |
|
"grad_norm": 2.3882400135221094, |
|
"learning_rate": 9.188458634419213e-06, |
|
"loss": 0.3369, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5337982832618026, |
|
"grad_norm": 2.4052438157749068, |
|
"learning_rate": 9.175627677558842e-06, |
|
"loss": 0.3234, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5364806866952789, |
|
"grad_norm": 2.2811116038015666, |
|
"learning_rate": 9.162705188123647e-06, |
|
"loss": 0.3191, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5391630901287554, |
|
"grad_norm": 2.4314020545974895, |
|
"learning_rate": 9.149691449383313e-06, |
|
"loss": 0.3312, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.5418454935622318, |
|
"grad_norm": 2.307909222042768, |
|
"learning_rate": 9.136586746607767e-06, |
|
"loss": 0.3232, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5445278969957081, |
|
"grad_norm": 2.4715790680560223, |
|
"learning_rate": 9.123391367060937e-06, |
|
"loss": 0.3391, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5472103004291845, |
|
"grad_norm": 2.4269503060009154, |
|
"learning_rate": 9.110105599994436e-06, |
|
"loss": 0.3441, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.549892703862661, |
|
"grad_norm": 2.3116975773169983, |
|
"learning_rate": 9.096729736641242e-06, |
|
"loss": 0.3359, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5525751072961373, |
|
"grad_norm": 2.2974328635126873, |
|
"learning_rate": 9.0832640702093e-06, |
|
"loss": 0.3408, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5552575107296137, |
|
"grad_norm": 2.2521763335833986, |
|
"learning_rate": 9.0697088958751e-06, |
|
"loss": 0.3374, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5579399141630901, |
|
"grad_norm": 2.2458595897611704, |
|
"learning_rate": 9.056064510777204e-06, |
|
"loss": 0.3273, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5606223175965666, |
|
"grad_norm": 2.504303257938807, |
|
"learning_rate": 9.042331214009736e-06, |
|
"loss": 0.3305, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.5633047210300429, |
|
"grad_norm": 2.3598031278666225, |
|
"learning_rate": 9.028509306615825e-06, |
|
"loss": 0.3329, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5659871244635193, |
|
"grad_norm": 2.34151755224379, |
|
"learning_rate": 9.014599091581e-06, |
|
"loss": 0.3281, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.5686695278969958, |
|
"grad_norm": 2.299760982732921, |
|
"learning_rate": 9.000600873826558e-06, |
|
"loss": 0.3361, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5713519313304721, |
|
"grad_norm": 2.3580709482131947, |
|
"learning_rate": 8.98651496020287e-06, |
|
"loss": 0.3291, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.5740343347639485, |
|
"grad_norm": 2.3920031667806403, |
|
"learning_rate": 8.972341659482666e-06, |
|
"loss": 0.3312, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5767167381974249, |
|
"grad_norm": 2.172659970001439, |
|
"learning_rate": 8.958081282354253e-06, |
|
"loss": 0.3316, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5793991416309013, |
|
"grad_norm": 2.5308502890379985, |
|
"learning_rate": 8.943734141414719e-06, |
|
"loss": 0.33, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5820815450643777, |
|
"grad_norm": 2.2745279713011572, |
|
"learning_rate": 8.929300551163068e-06, |
|
"loss": 0.3314, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5847639484978541, |
|
"grad_norm": 2.3751907668094265, |
|
"learning_rate": 8.914780827993332e-06, |
|
"loss": 0.3202, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5874463519313304, |
|
"grad_norm": 2.3870442220469124, |
|
"learning_rate": 8.900175290187636e-06, |
|
"loss": 0.3378, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5901287553648069, |
|
"grad_norm": 2.42798532433966, |
|
"learning_rate": 8.885484257909218e-06, |
|
"loss": 0.3361, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5928111587982833, |
|
"grad_norm": 2.4376462794372844, |
|
"learning_rate": 8.870708053195414e-06, |
|
"loss": 0.3308, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5954935622317596, |
|
"grad_norm": 2.403977385099425, |
|
"learning_rate": 8.855846999950595e-06, |
|
"loss": 0.3354, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.598175965665236, |
|
"grad_norm": 2.354408960185111, |
|
"learning_rate": 8.840901423939075e-06, |
|
"loss": 0.3229, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6008583690987125, |
|
"grad_norm": 2.394846637874159, |
|
"learning_rate": 8.825871652777955e-06, |
|
"loss": 0.3283, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6035407725321889, |
|
"grad_norm": 2.5095631272468117, |
|
"learning_rate": 8.81075801592996e-06, |
|
"loss": 0.3319, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6062231759656652, |
|
"grad_norm": 2.134318384151151, |
|
"learning_rate": 8.795560844696198e-06, |
|
"loss": 0.3265, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6089055793991416, |
|
"grad_norm": 2.3159996051520855, |
|
"learning_rate": 8.780280472208915e-06, |
|
"loss": 0.3217, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6115879828326181, |
|
"grad_norm": 2.2868744865845403, |
|
"learning_rate": 8.764917233424179e-06, |
|
"loss": 0.3246, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6142703862660944, |
|
"grad_norm": 2.2853556512157978, |
|
"learning_rate": 8.749471465114548e-06, |
|
"loss": 0.3354, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6169527896995708, |
|
"grad_norm": 3.2543312254858385, |
|
"learning_rate": 8.73394350586168e-06, |
|
"loss": 0.3309, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6196351931330472, |
|
"grad_norm": 2.4093185978818097, |
|
"learning_rate": 8.71833369604891e-06, |
|
"loss": 0.331, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6223175965665236, |
|
"grad_norm": 2.2160284457021153, |
|
"learning_rate": 8.702642377853803e-06, |
|
"loss": 0.3276, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.3699457374589716, |
|
"learning_rate": 8.686869895240631e-06, |
|
"loss": 0.3203, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6276824034334764, |
|
"grad_norm": 2.3124144778635913, |
|
"learning_rate": 8.671016593952853e-06, |
|
"loss": 0.3265, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6303648068669528, |
|
"grad_norm": 2.216575126739186, |
|
"learning_rate": 8.655082821505524e-06, |
|
"loss": 0.3218, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6330472103004292, |
|
"grad_norm": 2.4834733324750307, |
|
"learning_rate": 8.639068927177684e-06, |
|
"loss": 0.3188, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6357296137339056, |
|
"grad_norm": 2.2746731949635435, |
|
"learning_rate": 8.622975262004694e-06, |
|
"loss": 0.3222, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6384120171673819, |
|
"grad_norm": 2.3470187447777113, |
|
"learning_rate": 8.606802178770551e-06, |
|
"loss": 0.3187, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6410944206008584, |
|
"grad_norm": 2.3886423981993175, |
|
"learning_rate": 8.590550032000146e-06, |
|
"loss": 0.3166, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6437768240343348, |
|
"grad_norm": 2.207150506736818, |
|
"learning_rate": 8.574219177951495e-06, |
|
"loss": 0.319, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6464592274678111, |
|
"grad_norm": 2.3199216358477064, |
|
"learning_rate": 8.557809974607936e-06, |
|
"loss": 0.3203, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.6491416309012875, |
|
"grad_norm": 2.218942159747723, |
|
"learning_rate": 8.541322781670272e-06, |
|
"loss": 0.3211, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.651824034334764, |
|
"grad_norm": 2.2351329682592835, |
|
"learning_rate": 8.524757960548888e-06, |
|
"loss": 0.3307, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.6545064377682404, |
|
"grad_norm": 2.3264994838320066, |
|
"learning_rate": 8.50811587435584e-06, |
|
"loss": 0.3327, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6571888412017167, |
|
"grad_norm": 2.1810997672668577, |
|
"learning_rate": 8.491396887896878e-06, |
|
"loss": 0.321, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6598712446351931, |
|
"grad_norm": 2.050613817522075, |
|
"learning_rate": 8.474601367663463e-06, |
|
"loss": 0.3382, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6625536480686696, |
|
"grad_norm": 2.3394223996144876, |
|
"learning_rate": 8.457729681824722e-06, |
|
"loss": 0.3298, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.6652360515021459, |
|
"grad_norm": 2.2199101705255067, |
|
"learning_rate": 8.440782200219391e-06, |
|
"loss": 0.3331, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6679184549356223, |
|
"grad_norm": 2.253788914495007, |
|
"learning_rate": 8.423759294347693e-06, |
|
"loss": 0.3281, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.6706008583690987, |
|
"grad_norm": 2.346252917613176, |
|
"learning_rate": 8.40666133736321e-06, |
|
"loss": 0.3332, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6732832618025751, |
|
"grad_norm": 2.128207834307521, |
|
"learning_rate": 8.389488704064686e-06, |
|
"loss": 0.3228, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.6759656652360515, |
|
"grad_norm": 2.2645538762664814, |
|
"learning_rate": 8.372241770887826e-06, |
|
"loss": 0.3301, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6786480686695279, |
|
"grad_norm": 2.2435703531759357, |
|
"learning_rate": 8.354920915897038e-06, |
|
"loss": 0.3188, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.6813304721030042, |
|
"grad_norm": 2.105214900619832, |
|
"learning_rate": 8.337526518777143e-06, |
|
"loss": 0.3154, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6840128755364807, |
|
"grad_norm": 2.1810993246076613, |
|
"learning_rate": 8.32005896082506e-06, |
|
"loss": 0.3272, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6866952789699571, |
|
"grad_norm": 2.192939248565909, |
|
"learning_rate": 8.302518624941435e-06, |
|
"loss": 0.3305, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6893776824034334, |
|
"grad_norm": 2.260655896974623, |
|
"learning_rate": 8.284905895622265e-06, |
|
"loss": 0.3255, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.6920600858369099, |
|
"grad_norm": 2.0785182248129384, |
|
"learning_rate": 8.26722115895045e-06, |
|
"loss": 0.3156, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6947424892703863, |
|
"grad_norm": 2.2508854283227264, |
|
"learning_rate": 8.249464802587353e-06, |
|
"loss": 0.3372, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.6974248927038627, |
|
"grad_norm": 2.1542862894893546, |
|
"learning_rate": 8.231637215764273e-06, |
|
"loss": 0.3294, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.700107296137339, |
|
"grad_norm": 2.1378147185453824, |
|
"learning_rate": 8.21373878927394e-06, |
|
"loss": 0.317, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7027896995708155, |
|
"grad_norm": 2.1169330120219167, |
|
"learning_rate": 8.195769915461931e-06, |
|
"loss": 0.3167, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7054721030042919, |
|
"grad_norm": 2.3529873888716817, |
|
"learning_rate": 8.177730988218083e-06, |
|
"loss": 0.3212, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7081545064377682, |
|
"grad_norm": 2.0874448659072566, |
|
"learning_rate": 8.159622402967841e-06, |
|
"loss": 0.3235, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7108369098712446, |
|
"grad_norm": 2.195062749072692, |
|
"learning_rate": 8.141444556663612e-06, |
|
"loss": 0.3283, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7135193133047211, |
|
"grad_norm": 2.2007491873002176, |
|
"learning_rate": 8.123197847776043e-06, |
|
"loss": 0.3158, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7162017167381974, |
|
"grad_norm": 2.070167655453875, |
|
"learning_rate": 8.104882676285301e-06, |
|
"loss": 0.324, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.7188841201716738, |
|
"grad_norm": 2.206341175300775, |
|
"learning_rate": 8.086499443672297e-06, |
|
"loss": 0.3309, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7215665236051502, |
|
"grad_norm": 2.3276835684793973, |
|
"learning_rate": 8.068048552909887e-06, |
|
"loss": 0.3144, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7242489270386266, |
|
"grad_norm": 2.2451541102581882, |
|
"learning_rate": 8.049530408454041e-06, |
|
"loss": 0.3187, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.726931330472103, |
|
"grad_norm": 2.1237145058481413, |
|
"learning_rate": 8.030945416234971e-06, |
|
"loss": 0.3081, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7296137339055794, |
|
"grad_norm": 2.1759630049594114, |
|
"learning_rate": 8.012293983648247e-06, |
|
"loss": 0.313, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7322961373390557, |
|
"grad_norm": 2.113909462324413, |
|
"learning_rate": 7.993576519545844e-06, |
|
"loss": 0.3141, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7349785407725322, |
|
"grad_norm": 2.2445859762038674, |
|
"learning_rate": 7.974793434227203e-06, |
|
"loss": 0.3246, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7376609442060086, |
|
"grad_norm": 2.2483908013869622, |
|
"learning_rate": 7.955945139430221e-06, |
|
"loss": 0.3135, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.740343347639485, |
|
"grad_norm": 2.060646727580543, |
|
"learning_rate": 7.937032048322231e-06, |
|
"loss": 0.3115, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7430257510729614, |
|
"grad_norm": 2.152482541863031, |
|
"learning_rate": 7.918054575490943e-06, |
|
"loss": 0.3229, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7457081545064378, |
|
"grad_norm": 2.2154788334750344, |
|
"learning_rate": 7.899013136935365e-06, |
|
"loss": 0.3099, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7483905579399142, |
|
"grad_norm": 2.0667260969991346, |
|
"learning_rate": 7.879908150056668e-06, |
|
"loss": 0.3126, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.7510729613733905, |
|
"grad_norm": 2.2189857771638954, |
|
"learning_rate": 7.860740033649053e-06, |
|
"loss": 0.3227, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.753755364806867, |
|
"grad_norm": 2.0623159952073, |
|
"learning_rate": 7.841509207890555e-06, |
|
"loss": 0.3085, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.7564377682403434, |
|
"grad_norm": 2.1912171583314515, |
|
"learning_rate": 7.822216094333847e-06, |
|
"loss": 0.3122, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7591201716738197, |
|
"grad_norm": 2.0734397425784423, |
|
"learning_rate": 7.802861115896988e-06, |
|
"loss": 0.3147, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.7618025751072961, |
|
"grad_norm": 2.035982714898064, |
|
"learning_rate": 7.783444696854161e-06, |
|
"loss": 0.3134, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7644849785407726, |
|
"grad_norm": 2.207731900654122, |
|
"learning_rate": 7.763967262826363e-06, |
|
"loss": 0.3067, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7671673819742489, |
|
"grad_norm": 2.1989811003411455, |
|
"learning_rate": 7.74442924077209e-06, |
|
"loss": 0.309, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7698497854077253, |
|
"grad_norm": 2.206268292717341, |
|
"learning_rate": 7.724831058977955e-06, |
|
"loss": 0.3161, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.7725321888412017, |
|
"grad_norm": 2.025984448452977, |
|
"learning_rate": 7.705173147049326e-06, |
|
"loss": 0.3206, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7752145922746781, |
|
"grad_norm": 1.9929564637912078, |
|
"learning_rate": 7.685455935900886e-06, |
|
"loss": 0.3079, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.7778969957081545, |
|
"grad_norm": 2.0314175301171407, |
|
"learning_rate": 7.665679857747204e-06, |
|
"loss": 0.3237, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7805793991416309, |
|
"grad_norm": 2.0949460645723184, |
|
"learning_rate": 7.645845346093246e-06, |
|
"loss": 0.3145, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.7832618025751072, |
|
"grad_norm": 2.014695355073373, |
|
"learning_rate": 7.625952835724892e-06, |
|
"loss": 0.3149, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7859442060085837, |
|
"grad_norm": 2.2387937166404632, |
|
"learning_rate": 7.606002762699378e-06, |
|
"loss": 0.3162, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.7886266094420601, |
|
"grad_norm": 2.15393507156796, |
|
"learning_rate": 7.585995564335764e-06, |
|
"loss": 0.3118, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7913090128755365, |
|
"grad_norm": 2.1595216508348765, |
|
"learning_rate": 7.565931679205329e-06, |
|
"loss": 0.3219, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.7939914163090128, |
|
"grad_norm": 1.9839551549639953, |
|
"learning_rate": 7.545811547121969e-06, |
|
"loss": 0.3121, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7966738197424893, |
|
"grad_norm": 2.0558005307909433, |
|
"learning_rate": 7.525635609132543e-06, |
|
"loss": 0.3237, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.7993562231759657, |
|
"grad_norm": 2.159571084879396, |
|
"learning_rate": 7.505404307507227e-06, |
|
"loss": 0.3049, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.802038626609442, |
|
"grad_norm": 2.0515725794825674, |
|
"learning_rate": 7.48511808572979e-06, |
|
"loss": 0.3146, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.8047210300429185, |
|
"grad_norm": 2.0530443893191905, |
|
"learning_rate": 7.464777388487899e-06, |
|
"loss": 0.3144, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8074034334763949, |
|
"grad_norm": 2.0905835218223263, |
|
"learning_rate": 7.4443826616633555e-06, |
|
"loss": 0.3165, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.8100858369098712, |
|
"grad_norm": 2.418027955277794, |
|
"learning_rate": 7.423934352322324e-06, |
|
"loss": 0.3192, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8127682403433476, |
|
"grad_norm": 2.037633719640789, |
|
"learning_rate": 7.403432908705537e-06, |
|
"loss": 0.3151, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.8154506437768241, |
|
"grad_norm": 2.060435827050158, |
|
"learning_rate": 7.382878780218466e-06, |
|
"loss": 0.3115, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8181330472103004, |
|
"grad_norm": 2.2566623779713475, |
|
"learning_rate": 7.362272417421467e-06, |
|
"loss": 0.3087, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8208154506437768, |
|
"grad_norm": 2.0327268140423347, |
|
"learning_rate": 7.341614272019912e-06, |
|
"loss": 0.31, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8234978540772532, |
|
"grad_norm": 1.9848402926650588, |
|
"learning_rate": 7.3209047968542815e-06, |
|
"loss": 0.312, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8261802575107297, |
|
"grad_norm": 2.019170925911015, |
|
"learning_rate": 7.300144445890236e-06, |
|
"loss": 0.3082, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.828862660944206, |
|
"grad_norm": 2.0412528190284243, |
|
"learning_rate": 7.279333674208671e-06, |
|
"loss": 0.3094, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.8315450643776824, |
|
"grad_norm": 2.1119805444890156, |
|
"learning_rate": 7.258472937995736e-06, |
|
"loss": 0.3068, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8342274678111588, |
|
"grad_norm": 2.1314912101337, |
|
"learning_rate": 7.23756269453284e-06, |
|
"loss": 0.3176, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.8369098712446352, |
|
"grad_norm": 2.153414608569875, |
|
"learning_rate": 7.216603402186618e-06, |
|
"loss": 0.32, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8395922746781116, |
|
"grad_norm": 2.096635805637814, |
|
"learning_rate": 7.195595520398898e-06, |
|
"loss": 0.3187, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.842274678111588, |
|
"grad_norm": 2.0469593757948403, |
|
"learning_rate": 7.174539509676612e-06, |
|
"loss": 0.3118, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8449570815450643, |
|
"grad_norm": 2.040350099401206, |
|
"learning_rate": 7.153435831581722e-06, |
|
"loss": 0.3168, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8476394849785408, |
|
"grad_norm": 1.9568187008226026, |
|
"learning_rate": 7.132284948721079e-06, |
|
"loss": 0.3123, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8503218884120172, |
|
"grad_norm": 2.072053485372317, |
|
"learning_rate": 7.1110873247363035e-06, |
|
"loss": 0.3082, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.8530042918454935, |
|
"grad_norm": 2.1483507704916125, |
|
"learning_rate": 7.089843424293606e-06, |
|
"loss": 0.3112, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.85568669527897, |
|
"grad_norm": 1.9900643394519248, |
|
"learning_rate": 7.0685537130736145e-06, |
|
"loss": 0.3146, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.8583690987124464, |
|
"grad_norm": 1.9798174728928966, |
|
"learning_rate": 7.047218657761156e-06, |
|
"loss": 0.3113, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8610515021459227, |
|
"grad_norm": 2.098703827789463, |
|
"learning_rate": 7.025838726035032e-06, |
|
"loss": 0.3142, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.8637339055793991, |
|
"grad_norm": 1.9934072120331014, |
|
"learning_rate": 7.004414386557765e-06, |
|
"loss": 0.3105, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8664163090128756, |
|
"grad_norm": 2.0766565354886, |
|
"learning_rate": 6.982946108965326e-06, |
|
"loss": 0.3126, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.869098712446352, |
|
"grad_norm": 1.972285992340838, |
|
"learning_rate": 6.961434363856836e-06, |
|
"loss": 0.3011, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8717811158798283, |
|
"grad_norm": 1.9754040092645364, |
|
"learning_rate": 6.939879622784259e-06, |
|
"loss": 0.3123, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8744635193133047, |
|
"grad_norm": 1.9267550423839046, |
|
"learning_rate": 6.918282358242053e-06, |
|
"loss": 0.3088, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8771459227467812, |
|
"grad_norm": 1.8725581381284804, |
|
"learning_rate": 6.896643043656826e-06, |
|
"loss": 0.3017, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.8798283261802575, |
|
"grad_norm": 1.9643002106806835, |
|
"learning_rate": 6.874962153376945e-06, |
|
"loss": 0.2996, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8825107296137339, |
|
"grad_norm": 2.022563091759606, |
|
"learning_rate": 6.853240162662149e-06, |
|
"loss": 0.3011, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.8851931330472103, |
|
"grad_norm": 1.9006078199965732, |
|
"learning_rate": 6.831477547673122e-06, |
|
"loss": 0.3035, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8878755364806867, |
|
"grad_norm": 1.93406144932169, |
|
"learning_rate": 6.8096747854610634e-06, |
|
"loss": 0.3087, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.8905579399141631, |
|
"grad_norm": 1.9677125043725916, |
|
"learning_rate": 6.787832353957225e-06, |
|
"loss": 0.3139, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8932403433476395, |
|
"grad_norm": 1.9769538748958528, |
|
"learning_rate": 6.7659507319624355e-06, |
|
"loss": 0.3087, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.8959227467811158, |
|
"grad_norm": 1.9159322818608964, |
|
"learning_rate": 6.744030399136606e-06, |
|
"loss": 0.3037, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8986051502145923, |
|
"grad_norm": 1.9936102143043135, |
|
"learning_rate": 6.722071835988217e-06, |
|
"loss": 0.3112, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9012875536480687, |
|
"grad_norm": 2.0322088544070893, |
|
"learning_rate": 6.700075523863783e-06, |
|
"loss": 0.302, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.903969957081545, |
|
"grad_norm": 1.9939197840944125, |
|
"learning_rate": 6.678041944937297e-06, |
|
"loss": 0.3052, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.9066523605150214, |
|
"grad_norm": 1.9871201030132006, |
|
"learning_rate": 6.655971582199672e-06, |
|
"loss": 0.3029, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9093347639484979, |
|
"grad_norm": 1.9797369349804896, |
|
"learning_rate": 6.633864919448143e-06, |
|
"loss": 0.318, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.9120171673819742, |
|
"grad_norm": 1.9742118421301116, |
|
"learning_rate": 6.611722441275666e-06, |
|
"loss": 0.3006, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9146995708154506, |
|
"grad_norm": 2.008210387553669, |
|
"learning_rate": 6.589544633060298e-06, |
|
"loss": 0.3045, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.9173819742489271, |
|
"grad_norm": 1.9781519372178997, |
|
"learning_rate": 6.5673319809545496e-06, |
|
"loss": 0.3128, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9200643776824035, |
|
"grad_norm": 2.0383463571510623, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 0.3192, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.9227467811158798, |
|
"grad_norm": 1.9857479052458995, |
|
"learning_rate": 6.522804093490305e-06, |
|
"loss": 0.3101, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9254291845493562, |
|
"grad_norm": 1.9349821078553695, |
|
"learning_rate": 6.50048983421313e-06, |
|
"loss": 0.3114, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9281115879828327, |
|
"grad_norm": 2.0349951403647797, |
|
"learning_rate": 6.478142683186827e-06, |
|
"loss": 0.3012, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.930793991416309, |
|
"grad_norm": 2.168702581095354, |
|
"learning_rate": 6.455763130276019e-06, |
|
"loss": 0.3128, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.9334763948497854, |
|
"grad_norm": 1.8274183403812996, |
|
"learning_rate": 6.433351666055598e-06, |
|
"loss": 0.3, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9361587982832618, |
|
"grad_norm": 1.958511869945403, |
|
"learning_rate": 6.410908781799974e-06, |
|
"loss": 0.2981, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.9388412017167382, |
|
"grad_norm": 1.9921302391659297, |
|
"learning_rate": 6.388434969472307e-06, |
|
"loss": 0.3064, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9415236051502146, |
|
"grad_norm": 1.9525782653355659, |
|
"learning_rate": 6.365930721713718e-06, |
|
"loss": 0.3083, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.944206008583691, |
|
"grad_norm": 1.9063351592546531, |
|
"learning_rate": 6.343396531832497e-06, |
|
"loss": 0.3101, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9468884120171673, |
|
"grad_norm": 1.9035930701540635, |
|
"learning_rate": 6.320832893793285e-06, |
|
"loss": 0.31, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.9495708154506438, |
|
"grad_norm": 1.945671534097237, |
|
"learning_rate": 6.298240302206242e-06, |
|
"loss": 0.3041, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9522532188841202, |
|
"grad_norm": 1.8898899891062726, |
|
"learning_rate": 6.275619252316213e-06, |
|
"loss": 0.3058, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.9549356223175965, |
|
"grad_norm": 1.9935515629356342, |
|
"learning_rate": 6.25297023999187e-06, |
|
"loss": 0.3123, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9576180257510729, |
|
"grad_norm": 1.9456154724465158, |
|
"learning_rate": 6.2302937617148365e-06, |
|
"loss": 0.3112, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.9603004291845494, |
|
"grad_norm": 1.8797779722740051, |
|
"learning_rate": 6.20759031456881e-06, |
|
"loss": 0.3096, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9629828326180258, |
|
"grad_norm": 1.8224411214112828, |
|
"learning_rate": 6.184860396228664e-06, |
|
"loss": 0.3101, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.9656652360515021, |
|
"grad_norm": 1.8297668810578698, |
|
"learning_rate": 6.1621045049495376e-06, |
|
"loss": 0.2847, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9683476394849786, |
|
"grad_norm": 1.9939840383368617, |
|
"learning_rate": 6.139323139555914e-06, |
|
"loss": 0.3053, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.971030042918455, |
|
"grad_norm": 1.9372274381006631, |
|
"learning_rate": 6.116516799430689e-06, |
|
"loss": 0.3037, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9737124463519313, |
|
"grad_norm": 1.9192694952750877, |
|
"learning_rate": 6.0936859845042164e-06, |
|
"loss": 0.2981, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.9763948497854077, |
|
"grad_norm": 1.9654236048530014, |
|
"learning_rate": 6.07083119524336e-06, |
|
"loss": 0.2996, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9790772532188842, |
|
"grad_norm": 1.9546774064799297, |
|
"learning_rate": 6.047952932640513e-06, |
|
"loss": 0.2956, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9817596566523605, |
|
"grad_norm": 1.9640715199592982, |
|
"learning_rate": 6.0250516982026205e-06, |
|
"loss": 0.3125, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9844420600858369, |
|
"grad_norm": 1.9162326350650847, |
|
"learning_rate": 6.002127993940187e-06, |
|
"loss": 0.2951, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.9871244635193133, |
|
"grad_norm": 1.8860432495404378, |
|
"learning_rate": 5.979182322356269e-06, |
|
"loss": 0.314, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9898068669527897, |
|
"grad_norm": 1.9983248565141436, |
|
"learning_rate": 5.956215186435464e-06, |
|
"loss": 0.3043, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.9924892703862661, |
|
"grad_norm": 2.0141784906862767, |
|
"learning_rate": 5.9332270896328815e-06, |
|
"loss": 0.2936, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9951716738197425, |
|
"grad_norm": 1.9984535593073485, |
|
"learning_rate": 5.910218535863106e-06, |
|
"loss": 0.2933, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.9978540772532188, |
|
"grad_norm": 1.8433003614212762, |
|
"learning_rate": 5.8871900294891525e-06, |
|
"loss": 0.3006, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 264.5555, |
|
"eval_samples_per_second": 3.78, |
|
"eval_steps_per_second": 0.945, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 1.0005364806866952, |
|
"grad_norm": 2.120694854811861, |
|
"learning_rate": 5.864142075311414e-06, |
|
"loss": 0.2825, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.0032188841201717, |
|
"grad_norm": 2.593879533700858, |
|
"learning_rate": 5.84107517855659e-06, |
|
"loss": 0.2044, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0059012875536482, |
|
"grad_norm": 1.9446143140239203, |
|
"learning_rate": 5.817989844866613e-06, |
|
"loss": 0.1927, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0085836909871244, |
|
"grad_norm": 1.9575627873101618, |
|
"learning_rate": 5.794886580287565e-06, |
|
"loss": 0.1991, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.011266094420601, |
|
"grad_norm": 1.9244368381378054, |
|
"learning_rate": 5.77176589125859e-06, |
|
"loss": 0.1978, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.0139484978540771, |
|
"grad_norm": 2.0898969362860784, |
|
"learning_rate": 5.7486282846007835e-06, |
|
"loss": 0.1898, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0166309012875536, |
|
"grad_norm": 1.951202789453023, |
|
"learning_rate": 5.725474267506088e-06, |
|
"loss": 0.204, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.01931330472103, |
|
"grad_norm": 2.2177150362230034, |
|
"learning_rate": 5.702304347526172e-06, |
|
"loss": 0.1872, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0219957081545064, |
|
"grad_norm": 2.0803411169100894, |
|
"learning_rate": 5.679119032561311e-06, |
|
"loss": 0.1852, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.0246781115879828, |
|
"grad_norm": 1.9125376121986404, |
|
"learning_rate": 5.655918830849243e-06, |
|
"loss": 0.1902, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0273605150214593, |
|
"grad_norm": 1.8075399566893977, |
|
"learning_rate": 5.632704250954039e-06, |
|
"loss": 0.1967, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.0300429184549356, |
|
"grad_norm": 1.9242799652725364, |
|
"learning_rate": 5.6094758017549436e-06, |
|
"loss": 0.1911, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.032725321888412, |
|
"grad_norm": 1.895262655048775, |
|
"learning_rate": 5.5862339924352306e-06, |
|
"loss": 0.1946, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.0354077253218885, |
|
"grad_norm": 1.9103776813332407, |
|
"learning_rate": 5.562979332471035e-06, |
|
"loss": 0.1851, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0380901287553648, |
|
"grad_norm": 1.899712434621443, |
|
"learning_rate": 5.539712331620186e-06, |
|
"loss": 0.1956, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.0407725321888412, |
|
"grad_norm": 1.9925712479858069, |
|
"learning_rate": 5.516433499911035e-06, |
|
"loss": 0.1925, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0434549356223175, |
|
"grad_norm": 1.9792754941714568, |
|
"learning_rate": 5.493143347631272e-06, |
|
"loss": 0.189, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.046137339055794, |
|
"grad_norm": 1.7987985947822989, |
|
"learning_rate": 5.4698423853167425e-06, |
|
"loss": 0.1919, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0488197424892705, |
|
"grad_norm": 1.8860226753914688, |
|
"learning_rate": 5.446531123740257e-06, |
|
"loss": 0.1927, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.0515021459227467, |
|
"grad_norm": 1.933052623626728, |
|
"learning_rate": 5.4232100739003855e-06, |
|
"loss": 0.1928, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0541845493562232, |
|
"grad_norm": 1.8922542086105854, |
|
"learning_rate": 5.399879747010275e-06, |
|
"loss": 0.2012, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.0568669527896997, |
|
"grad_norm": 1.923309236602501, |
|
"learning_rate": 5.376540654486422e-06, |
|
"loss": 0.1893, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.059549356223176, |
|
"grad_norm": 2.0308005017940705, |
|
"learning_rate": 5.353193307937477e-06, |
|
"loss": 0.198, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0622317596566524, |
|
"grad_norm": 1.9695909084757388, |
|
"learning_rate": 5.32983821915302e-06, |
|
"loss": 0.1919, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0649141630901287, |
|
"grad_norm": 1.8477356570414893, |
|
"learning_rate": 5.306475900092348e-06, |
|
"loss": 0.1919, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.0675965665236051, |
|
"grad_norm": 1.9140227431562078, |
|
"learning_rate": 5.283106862873253e-06, |
|
"loss": 0.1837, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0702789699570816, |
|
"grad_norm": 1.9336338988842239, |
|
"learning_rate": 5.259731619760792e-06, |
|
"loss": 0.1838, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.0729613733905579, |
|
"grad_norm": 1.9850056413345472, |
|
"learning_rate": 5.236350683156055e-06, |
|
"loss": 0.1888, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0756437768240343, |
|
"grad_norm": 1.8801399490119242, |
|
"learning_rate": 5.212964565584944e-06, |
|
"loss": 0.1917, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.0783261802575108, |
|
"grad_norm": 1.855460220415442, |
|
"learning_rate": 5.189573779686929e-06, |
|
"loss": 0.1953, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.081008583690987, |
|
"grad_norm": 1.9054566980250076, |
|
"learning_rate": 5.166178838203808e-06, |
|
"loss": 0.1961, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.0836909871244635, |
|
"grad_norm": 1.8220275412712565, |
|
"learning_rate": 5.142780253968481e-06, |
|
"loss": 0.1869, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0863733905579398, |
|
"grad_norm": 1.982688826449, |
|
"learning_rate": 5.119378539893693e-06, |
|
"loss": 0.1953, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0890557939914163, |
|
"grad_norm": 2.0137134540791184, |
|
"learning_rate": 5.095974208960799e-06, |
|
"loss": 0.1931, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0917381974248928, |
|
"grad_norm": 2.061966599930734, |
|
"learning_rate": 5.072567774208518e-06, |
|
"loss": 0.1902, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.094420600858369, |
|
"grad_norm": 1.8435117939697738, |
|
"learning_rate": 5.049159748721685e-06, |
|
"loss": 0.1891, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0971030042918455, |
|
"grad_norm": 1.9585163553081393, |
|
"learning_rate": 5.025750645620004e-06, |
|
"loss": 0.1847, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.099785407725322, |
|
"grad_norm": 1.8838790580051563, |
|
"learning_rate": 5.002340978046807e-06, |
|
"loss": 0.1862, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1024678111587982, |
|
"grad_norm": 2.032558796353021, |
|
"learning_rate": 4.978931259157791e-06, |
|
"loss": 0.1829, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.1051502145922747, |
|
"grad_norm": 1.9909681018156133, |
|
"learning_rate": 4.955522002109782e-06, |
|
"loss": 0.1896, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.1078326180257512, |
|
"grad_norm": 1.8872844795934913, |
|
"learning_rate": 4.932113720049485e-06, |
|
"loss": 0.1945, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.1105150214592274, |
|
"grad_norm": 1.8664340403182569, |
|
"learning_rate": 4.908706926102229e-06, |
|
"loss": 0.1832, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.113197424892704, |
|
"grad_norm": 1.9652896251498833, |
|
"learning_rate": 4.885302133360722e-06, |
|
"loss": 0.1935, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.1158798283261802, |
|
"grad_norm": 1.94462644957755, |
|
"learning_rate": 4.8618998548738065e-06, |
|
"loss": 0.19, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1185622317596566, |
|
"grad_norm": 1.8921831498846893, |
|
"learning_rate": 4.8385006036352104e-06, |
|
"loss": 0.1797, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.121244635193133, |
|
"grad_norm": 1.8719163544870403, |
|
"learning_rate": 4.8151048925723014e-06, |
|
"loss": 0.1944, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1239270386266094, |
|
"grad_norm": 1.9222116405084952, |
|
"learning_rate": 4.791713234534844e-06, |
|
"loss": 0.1837, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.1266094420600858, |
|
"grad_norm": 1.8232958991070136, |
|
"learning_rate": 4.768326142283757e-06, |
|
"loss": 0.1831, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1292918454935623, |
|
"grad_norm": 1.9510100553537169, |
|
"learning_rate": 4.744944128479879e-06, |
|
"loss": 0.1862, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.1319742489270386, |
|
"grad_norm": 1.8528114260849555, |
|
"learning_rate": 4.7215677056727185e-06, |
|
"loss": 0.1875, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.134656652360515, |
|
"grad_norm": 1.963211096765823, |
|
"learning_rate": 4.698197386289232e-06, |
|
"loss": 0.1888, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.1373390557939915, |
|
"grad_norm": 1.9707829205681304, |
|
"learning_rate": 4.674833682622577e-06, |
|
"loss": 0.1939, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1400214592274678, |
|
"grad_norm": 2.0735011250590416, |
|
"learning_rate": 4.6514771068209e-06, |
|
"loss": 0.1844, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1427038626609443, |
|
"grad_norm": 1.882540207560406, |
|
"learning_rate": 4.628128170876093e-06, |
|
"loss": 0.1915, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.1453862660944205, |
|
"grad_norm": 1.8962743473329966, |
|
"learning_rate": 4.604787386612579e-06, |
|
"loss": 0.1851, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.148068669527897, |
|
"grad_norm": 1.926874846100442, |
|
"learning_rate": 4.581455265676089e-06, |
|
"loss": 0.1924, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.1507510729613735, |
|
"grad_norm": 1.8048828318833432, |
|
"learning_rate": 4.558132319522451e-06, |
|
"loss": 0.1745, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.1534334763948497, |
|
"grad_norm": 1.7715458946715994, |
|
"learning_rate": 4.534819059406374e-06, |
|
"loss": 0.1825, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1561158798283262, |
|
"grad_norm": 2.1246170652187217, |
|
"learning_rate": 4.511515996370244e-06, |
|
"loss": 0.1969, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.1587982832618025, |
|
"grad_norm": 1.9981213878090085, |
|
"learning_rate": 4.488223641232915e-06, |
|
"loss": 0.1906, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.161480686695279, |
|
"grad_norm": 1.944388352609748, |
|
"learning_rate": 4.464942504578524e-06, |
|
"loss": 0.1904, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.1641630901287554, |
|
"grad_norm": 1.9404504924040458, |
|
"learning_rate": 4.441673096745287e-06, |
|
"loss": 0.1923, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1668454935622319, |
|
"grad_norm": 1.958136355382909, |
|
"learning_rate": 4.418415927814315e-06, |
|
"loss": 0.1849, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.1695278969957081, |
|
"grad_norm": 1.9020309954682937, |
|
"learning_rate": 4.395171507598441e-06, |
|
"loss": 0.185, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1722103004291846, |
|
"grad_norm": 2.054733450508494, |
|
"learning_rate": 4.371940345631027e-06, |
|
"loss": 0.1959, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.1748927038626609, |
|
"grad_norm": 1.7682045147485477, |
|
"learning_rate": 4.348722951154816e-06, |
|
"loss": 0.1829, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1775751072961373, |
|
"grad_norm": 1.8451190552804573, |
|
"learning_rate": 4.3255198331107485e-06, |
|
"loss": 0.1886, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.1802575107296138, |
|
"grad_norm": 1.888436127522195, |
|
"learning_rate": 4.302331500126824e-06, |
|
"loss": 0.1874, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.18293991416309, |
|
"grad_norm": 1.8508208572393086, |
|
"learning_rate": 4.279158460506939e-06, |
|
"loss": 0.1839, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.1856223175965666, |
|
"grad_norm": 1.8752610650517156, |
|
"learning_rate": 4.256001222219751e-06, |
|
"loss": 0.1836, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1883047210300428, |
|
"grad_norm": 1.9447253067539931, |
|
"learning_rate": 4.232860292887537e-06, |
|
"loss": 0.1793, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.1909871244635193, |
|
"grad_norm": 1.9831882071869358, |
|
"learning_rate": 4.2097361797750815e-06, |
|
"loss": 0.1818, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1936695278969958, |
|
"grad_norm": 1.7843813575545122, |
|
"learning_rate": 4.1866293897785356e-06, |
|
"loss": 0.1854, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.196351931330472, |
|
"grad_norm": 1.8215117169741488, |
|
"learning_rate": 4.16354042941432e-06, |
|
"loss": 0.1823, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1990343347639485, |
|
"grad_norm": 1.794689138477906, |
|
"learning_rate": 4.1404698048080175e-06, |
|
"loss": 0.1931, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.201716738197425, |
|
"grad_norm": 2.007288084700448, |
|
"learning_rate": 4.117418021683278e-06, |
|
"loss": 0.1903, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.2043991416309012, |
|
"grad_norm": 1.8915363768220448, |
|
"learning_rate": 4.094385585350736e-06, |
|
"loss": 0.1903, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.2070815450643777, |
|
"grad_norm": 1.8027413880341696, |
|
"learning_rate": 4.0713730006969285e-06, |
|
"loss": 0.1829, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.2097639484978542, |
|
"grad_norm": 1.916547673365756, |
|
"learning_rate": 4.048380772173231e-06, |
|
"loss": 0.1925, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.2124463519313304, |
|
"grad_norm": 2.003487538577625, |
|
"learning_rate": 4.0254094037848005e-06, |
|
"loss": 0.187, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.215128755364807, |
|
"grad_norm": 1.9436303434588267, |
|
"learning_rate": 4.002459399079523e-06, |
|
"loss": 0.1865, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.2178111587982832, |
|
"grad_norm": 1.875264231892694, |
|
"learning_rate": 3.979531261136981e-06, |
|
"loss": 0.1914, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.2204935622317596, |
|
"grad_norm": 1.9733578626559716, |
|
"learning_rate": 3.956625492557417e-06, |
|
"loss": 0.1832, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.2231759656652361, |
|
"grad_norm": 1.8441757005377994, |
|
"learning_rate": 3.933742595450733e-06, |
|
"loss": 0.1836, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.2258583690987124, |
|
"grad_norm": 1.9466583484674584, |
|
"learning_rate": 3.910883071425463e-06, |
|
"loss": 0.1871, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.2285407725321889, |
|
"grad_norm": 1.8304359569142867, |
|
"learning_rate": 3.8880474215777915e-06, |
|
"loss": 0.1838, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2312231759656653, |
|
"grad_norm": 1.8139303304810939, |
|
"learning_rate": 3.865236146480562e-06, |
|
"loss": 0.1843, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.2339055793991416, |
|
"grad_norm": 1.9435776514438137, |
|
"learning_rate": 3.842449746172311e-06, |
|
"loss": 0.1808, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.236587982832618, |
|
"grad_norm": 1.8980721011139456, |
|
"learning_rate": 3.8196887201463e-06, |
|
"loss": 0.1876, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.2392703862660945, |
|
"grad_norm": 1.9447417697477043, |
|
"learning_rate": 3.796953567339571e-06, |
|
"loss": 0.1934, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2419527896995708, |
|
"grad_norm": 1.8857226737287016, |
|
"learning_rate": 3.7742447861220027e-06, |
|
"loss": 0.1849, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.2446351931330473, |
|
"grad_norm": 1.8457486545476387, |
|
"learning_rate": 3.7515628742854006e-06, |
|
"loss": 0.1857, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2473175965665235, |
|
"grad_norm": 1.8824623465626855, |
|
"learning_rate": 3.7289083290325668e-06, |
|
"loss": 0.1829, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.7720169972710078, |
|
"learning_rate": 3.706281646966409e-06, |
|
"loss": 0.1795, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2526824034334765, |
|
"grad_norm": 1.8581809489899828, |
|
"learning_rate": 3.6836833240790625e-06, |
|
"loss": 0.1858, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.2553648068669527, |
|
"grad_norm": 1.870206295355774, |
|
"learning_rate": 3.6611138557410047e-06, |
|
"loss": 0.1871, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.2580472103004292, |
|
"grad_norm": 1.8107941795756974, |
|
"learning_rate": 3.638573736690202e-06, |
|
"loss": 0.1835, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.2607296137339055, |
|
"grad_norm": 1.8600512142142989, |
|
"learning_rate": 3.6160634610212642e-06, |
|
"loss": 0.1792, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.263412017167382, |
|
"grad_norm": 1.9037185417067675, |
|
"learning_rate": 3.5935835221746183e-06, |
|
"loss": 0.1804, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.2660944206008584, |
|
"grad_norm": 1.8190831838228707, |
|
"learning_rate": 3.5711344129256832e-06, |
|
"loss": 0.1855, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.268776824034335, |
|
"grad_norm": 1.8604510433648678, |
|
"learning_rate": 3.548716625374074e-06, |
|
"loss": 0.1854, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.2714592274678111, |
|
"grad_norm": 1.7922678088212196, |
|
"learning_rate": 3.5263306509328103e-06, |
|
"loss": 0.191, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2741416309012876, |
|
"grad_norm": 1.8236523117518297, |
|
"learning_rate": 3.5039769803175545e-06, |
|
"loss": 0.1836, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.2768240343347639, |
|
"grad_norm": 1.823103504438182, |
|
"learning_rate": 3.481656103535839e-06, |
|
"loss": 0.185, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2795064377682404, |
|
"grad_norm": 1.888033508047175, |
|
"learning_rate": 3.459368509876338e-06, |
|
"loss": 0.1856, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.2821888412017168, |
|
"grad_norm": 1.890507608231082, |
|
"learning_rate": 3.437114687898132e-06, |
|
"loss": 0.1836, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.284871244635193, |
|
"grad_norm": 1.7520046095037403, |
|
"learning_rate": 3.414895125420013e-06, |
|
"loss": 0.1827, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.2875536480686696, |
|
"grad_norm": 1.8360885026352816, |
|
"learning_rate": 3.3927103095097725e-06, |
|
"loss": 0.1835, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2902360515021458, |
|
"grad_norm": 1.8064157609962674, |
|
"learning_rate": 3.370560726473537e-06, |
|
"loss": 0.1873, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.2929184549356223, |
|
"grad_norm": 1.8383747146999474, |
|
"learning_rate": 3.348446861845106e-06, |
|
"loss": 0.1845, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.2956008583690988, |
|
"grad_norm": 1.8630841468301194, |
|
"learning_rate": 3.3263692003753056e-06, |
|
"loss": 0.1821, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.298283261802575, |
|
"grad_norm": 1.932110484350197, |
|
"learning_rate": 3.304328226021365e-06, |
|
"loss": 0.1923, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.3009656652360515, |
|
"grad_norm": 1.7606005669099878, |
|
"learning_rate": 3.282324421936307e-06, |
|
"loss": 0.183, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.3036480686695278, |
|
"grad_norm": 2.0056231457186673, |
|
"learning_rate": 3.2603582704583547e-06, |
|
"loss": 0.1778, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.3063304721030042, |
|
"grad_norm": 1.9561084586712767, |
|
"learning_rate": 3.2384302531003676e-06, |
|
"loss": 0.1799, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.3090128755364807, |
|
"grad_norm": 1.8638952268827136, |
|
"learning_rate": 3.216540850539272e-06, |
|
"loss": 0.1858, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.3116952789699572, |
|
"grad_norm": 1.917284911434071, |
|
"learning_rate": 3.1946905426055353e-06, |
|
"loss": 0.186, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.3143776824034334, |
|
"grad_norm": 1.8768203193899429, |
|
"learning_rate": 3.172879808272642e-06, |
|
"loss": 0.1881, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.31706008583691, |
|
"grad_norm": 1.7922207687968186, |
|
"learning_rate": 3.151109125646601e-06, |
|
"loss": 0.1794, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.3197424892703862, |
|
"grad_norm": 1.6790144782004666, |
|
"learning_rate": 3.1293789719554562e-06, |
|
"loss": 0.1843, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.3224248927038627, |
|
"grad_norm": 1.8512736337657654, |
|
"learning_rate": 3.107689823538833e-06, |
|
"loss": 0.1848, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.3251072961373391, |
|
"grad_norm": 1.778250965646014, |
|
"learning_rate": 3.086042155837491e-06, |
|
"loss": 0.1817, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.3277896995708154, |
|
"grad_norm": 1.8512377977977972, |
|
"learning_rate": 3.0644364433829076e-06, |
|
"loss": 0.1874, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.3304721030042919, |
|
"grad_norm": 1.7483705296612935, |
|
"learning_rate": 3.0428731597868706e-06, |
|
"loss": 0.1777, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.3331545064377681, |
|
"grad_norm": 1.88152399160428, |
|
"learning_rate": 3.021352777731096e-06, |
|
"loss": 0.1899, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.3358369098712446, |
|
"grad_norm": 2.044481463245228, |
|
"learning_rate": 2.9998757689568775e-06, |
|
"loss": 0.1816, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.338519313304721, |
|
"grad_norm": 1.7647457877389827, |
|
"learning_rate": 2.978442604254729e-06, |
|
"loss": 0.1818, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.3412017167381975, |
|
"grad_norm": 1.8065349625848237, |
|
"learning_rate": 2.9570537534540765e-06, |
|
"loss": 0.1867, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3438841201716738, |
|
"grad_norm": 1.8115902616120596, |
|
"learning_rate": 2.935709685412954e-06, |
|
"loss": 0.1826, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.3465665236051503, |
|
"grad_norm": 1.8408207995961785, |
|
"learning_rate": 2.9144108680077288e-06, |
|
"loss": 0.1818, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3492489270386265, |
|
"grad_norm": 1.8831016606974356, |
|
"learning_rate": 2.8931577681228407e-06, |
|
"loss": 0.18, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.351931330472103, |
|
"grad_norm": 1.9489521406118477, |
|
"learning_rate": 2.871950851640577e-06, |
|
"loss": 0.1859, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3546137339055795, |
|
"grad_norm": 1.7722721486632287, |
|
"learning_rate": 2.8507905834308417e-06, |
|
"loss": 0.1881, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.3572961373390557, |
|
"grad_norm": 1.6851782754641256, |
|
"learning_rate": 2.8296774273409944e-06, |
|
"loss": 0.1818, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.3599785407725322, |
|
"grad_norm": 1.916422047214028, |
|
"learning_rate": 2.8086118461856494e-06, |
|
"loss": 0.1792, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.3626609442060085, |
|
"grad_norm": 1.9594890392126139, |
|
"learning_rate": 2.787594301736556e-06, |
|
"loss": 0.1913, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.365343347639485, |
|
"grad_norm": 1.8980441354405482, |
|
"learning_rate": 2.7666252547124596e-06, |
|
"loss": 0.1834, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.3680257510729614, |
|
"grad_norm": 1.7545189156866563, |
|
"learning_rate": 2.745705164769015e-06, |
|
"loss": 0.1801, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.370708154506438, |
|
"grad_norm": 1.850269965863506, |
|
"learning_rate": 2.724834490488705e-06, |
|
"loss": 0.1839, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.3733905579399142, |
|
"grad_norm": 1.855740355586457, |
|
"learning_rate": 2.7040136893707813e-06, |
|
"loss": 0.1777, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3760729613733906, |
|
"grad_norm": 1.8352138794321073, |
|
"learning_rate": 2.683243217821248e-06, |
|
"loss": 0.1767, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.378755364806867, |
|
"grad_norm": 1.8325611316525643, |
|
"learning_rate": 2.66252353114285e-06, |
|
"loss": 0.1835, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3814377682403434, |
|
"grad_norm": 1.80493984379527, |
|
"learning_rate": 2.6418550835250946e-06, |
|
"loss": 0.1814, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.3841201716738198, |
|
"grad_norm": 1.858890843757945, |
|
"learning_rate": 2.621238328034289e-06, |
|
"loss": 0.1821, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.386802575107296, |
|
"grad_norm": 1.6380462360496462, |
|
"learning_rate": 2.60067371660362e-06, |
|
"loss": 0.1735, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.3894849785407726, |
|
"grad_norm": 1.8867445454415144, |
|
"learning_rate": 2.5801617000232416e-06, |
|
"loss": 0.189, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3921673819742488, |
|
"grad_norm": 1.7765522026919163, |
|
"learning_rate": 2.559702727930386e-06, |
|
"loss": 0.1759, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.3948497854077253, |
|
"grad_norm": 1.792662418009236, |
|
"learning_rate": 2.5392972487995247e-06, |
|
"loss": 0.1795, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3975321888412018, |
|
"grad_norm": 1.8090026006827873, |
|
"learning_rate": 2.5189457099325153e-06, |
|
"loss": 0.1775, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.400214592274678, |
|
"grad_norm": 1.8203891838432023, |
|
"learning_rate": 2.498648557448824e-06, |
|
"loss": 0.178, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.4028969957081545, |
|
"grad_norm": 1.72821875442422, |
|
"learning_rate": 2.4784062362757156e-06, |
|
"loss": 0.1771, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.4055793991416308, |
|
"grad_norm": 1.8579489870699128, |
|
"learning_rate": 2.458219190138526e-06, |
|
"loss": 0.1808, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.4082618025751072, |
|
"grad_norm": 1.8627368501971178, |
|
"learning_rate": 2.4380878615509156e-06, |
|
"loss": 0.1787, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.4109442060085837, |
|
"grad_norm": 1.7845043882521765, |
|
"learning_rate": 2.418012691805191e-06, |
|
"loss": 0.1799, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.4136266094420602, |
|
"grad_norm": 1.788591369135296, |
|
"learning_rate": 2.3979941209626072e-06, |
|
"loss": 0.1777, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.4163090128755365, |
|
"grad_norm": 2.0687601856442948, |
|
"learning_rate": 2.3780325878437415e-06, |
|
"loss": 0.1788, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.418991416309013, |
|
"grad_norm": 1.8934836343713255, |
|
"learning_rate": 2.358128530018858e-06, |
|
"loss": 0.1852, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.4216738197424892, |
|
"grad_norm": 1.966625879583478, |
|
"learning_rate": 2.3382823837983314e-06, |
|
"loss": 0.1764, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.4243562231759657, |
|
"grad_norm": 1.9063104586942645, |
|
"learning_rate": 2.318494584223072e-06, |
|
"loss": 0.1859, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.4270386266094421, |
|
"grad_norm": 1.769887559359207, |
|
"learning_rate": 2.2987655650549862e-06, |
|
"loss": 0.1773, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.4297210300429184, |
|
"grad_norm": 1.814494467956324, |
|
"learning_rate": 2.2790957587674876e-06, |
|
"loss": 0.175, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.4324034334763949, |
|
"grad_norm": 1.696652358329876, |
|
"learning_rate": 2.2594855965359906e-06, |
|
"loss": 0.1788, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.4350858369098711, |
|
"grad_norm": 1.8559660922469163, |
|
"learning_rate": 2.2399355082284804e-06, |
|
"loss": 0.1811, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.4377682403433476, |
|
"grad_norm": 1.9239538845551258, |
|
"learning_rate": 2.2204459223960716e-06, |
|
"loss": 0.1793, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.440450643776824, |
|
"grad_norm": 1.8209833290463602, |
|
"learning_rate": 2.2010172662636377e-06, |
|
"loss": 0.1856, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.4431330472103006, |
|
"grad_norm": 1.8780321758764846, |
|
"learning_rate": 2.1816499657204183e-06, |
|
"loss": 0.1759, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4458154506437768, |
|
"grad_norm": 1.8081298619358337, |
|
"learning_rate": 2.1623444453107067e-06, |
|
"loss": 0.1818, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.4484978540772533, |
|
"grad_norm": 1.8393403734990783, |
|
"learning_rate": 2.1431011282245274e-06, |
|
"loss": 0.1818, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4511802575107295, |
|
"grad_norm": 1.7677912528124624, |
|
"learning_rate": 2.12392043628837e-06, |
|
"loss": 0.1782, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.453862660944206, |
|
"grad_norm": 1.8305928226025625, |
|
"learning_rate": 2.10480278995594e-06, |
|
"loss": 0.1802, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.4565450643776825, |
|
"grad_norm": 1.7672537137117097, |
|
"learning_rate": 2.0857486082989344e-06, |
|
"loss": 0.185, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.4592274678111588, |
|
"grad_norm": 1.8149425510442876, |
|
"learning_rate": 2.0667583089978673e-06, |
|
"loss": 0.1801, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4619098712446352, |
|
"grad_norm": 1.8898302264278783, |
|
"learning_rate": 2.0478323083329072e-06, |
|
"loss": 0.1729, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.4645922746781115, |
|
"grad_norm": 1.7928411416585508, |
|
"learning_rate": 2.028971021174754e-06, |
|
"loss": 0.187, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.467274678111588, |
|
"grad_norm": 1.777771108928477, |
|
"learning_rate": 2.0101748609755407e-06, |
|
"loss": 0.1842, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.4699570815450644, |
|
"grad_norm": 1.7918282362479463, |
|
"learning_rate": 1.9914442397597756e-06, |
|
"loss": 0.1756, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4726394849785407, |
|
"grad_norm": 1.8575352923188138, |
|
"learning_rate": 1.9727795681153083e-06, |
|
"loss": 0.1711, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.4753218884120172, |
|
"grad_norm": 1.7880373191062, |
|
"learning_rate": 1.954181255184331e-06, |
|
"loss": 0.1761, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4780042918454936, |
|
"grad_norm": 1.6912764756456815, |
|
"learning_rate": 1.935649708654403e-06, |
|
"loss": 0.18, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.48068669527897, |
|
"grad_norm": 1.750214167289921, |
|
"learning_rate": 1.9171853347495234e-06, |
|
"loss": 0.1758, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.4833690987124464, |
|
"grad_norm": 1.742571882789752, |
|
"learning_rate": 1.8987885382212235e-06, |
|
"loss": 0.177, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.4860515021459229, |
|
"grad_norm": 1.747985211748792, |
|
"learning_rate": 1.8804597223396865e-06, |
|
"loss": 0.1827, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.488733905579399, |
|
"grad_norm": 1.7365122211636692, |
|
"learning_rate": 1.8621992888849217e-06, |
|
"loss": 0.1813, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.4914163090128756, |
|
"grad_norm": 1.7225741986182896, |
|
"learning_rate": 1.8440076381379395e-06, |
|
"loss": 0.1761, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4940987124463518, |
|
"grad_norm": 1.7524421394229264, |
|
"learning_rate": 1.8258851688720009e-06, |
|
"loss": 0.1719, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.4967811158798283, |
|
"grad_norm": 1.8688103054066454, |
|
"learning_rate": 1.807832278343849e-06, |
|
"loss": 0.1815, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4994635193133048, |
|
"grad_norm": 1.814078295814406, |
|
"learning_rate": 1.7898493622850227e-06, |
|
"loss": 0.1763, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.5021459227467813, |
|
"grad_norm": 1.7624214805914273, |
|
"learning_rate": 1.771936814893167e-06, |
|
"loss": 0.1766, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.5048283261802575, |
|
"grad_norm": 1.836237270092974, |
|
"learning_rate": 1.7540950288234033e-06, |
|
"loss": 0.1854, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.5075107296137338, |
|
"grad_norm": 1.8024815301300612, |
|
"learning_rate": 1.7363243951797155e-06, |
|
"loss": 0.1762, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.5101931330472103, |
|
"grad_norm": 1.6738381918826155, |
|
"learning_rate": 1.7186253035063738e-06, |
|
"loss": 0.1693, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.5128755364806867, |
|
"grad_norm": 1.710012695779441, |
|
"learning_rate": 1.7009981417794114e-06, |
|
"loss": 0.1718, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.5155579399141632, |
|
"grad_norm": 1.764745052249843, |
|
"learning_rate": 1.6834432963980957e-06, |
|
"loss": 0.1754, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.5182403433476395, |
|
"grad_norm": 1.7903434110070675, |
|
"learning_rate": 1.6659611521764807e-06, |
|
"loss": 0.1747, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.5209227467811157, |
|
"grad_norm": 1.768530343927696, |
|
"learning_rate": 1.6485520923349529e-06, |
|
"loss": 0.1705, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.5236051502145922, |
|
"grad_norm": 1.7394186867583092, |
|
"learning_rate": 1.6312164984918516e-06, |
|
"loss": 0.162, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.5262875536480687, |
|
"grad_norm": 1.802460018224268, |
|
"learning_rate": 1.6139547506550808e-06, |
|
"loss": 0.181, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.5289699570815452, |
|
"grad_norm": 1.7800548331331065, |
|
"learning_rate": 1.5967672272137968e-06, |
|
"loss": 0.1824, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.5316523605150214, |
|
"grad_norm": 1.8431118044667758, |
|
"learning_rate": 1.5796543049301033e-06, |
|
"loss": 0.1745, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.5343347639484979, |
|
"grad_norm": 1.933099270127094, |
|
"learning_rate": 1.5626163589307991e-06, |
|
"loss": 0.1822, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.5370171673819741, |
|
"grad_norm": 1.776366621098978, |
|
"learning_rate": 1.5456537626991525e-06, |
|
"loss": 0.1702, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.5396995708154506, |
|
"grad_norm": 1.7328445156009145, |
|
"learning_rate": 1.5287668880667107e-06, |
|
"loss": 0.1717, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.542381974248927, |
|
"grad_norm": 1.8204881586425163, |
|
"learning_rate": 1.5119561052051546e-06, |
|
"loss": 0.1743, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.5450643776824036, |
|
"grad_norm": 1.9765707252109948, |
|
"learning_rate": 1.495221782618183e-06, |
|
"loss": 0.18, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5477467811158798, |
|
"grad_norm": 1.8401668550635137, |
|
"learning_rate": 1.4785642871334349e-06, |
|
"loss": 0.1705, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.550429184549356, |
|
"grad_norm": 1.833961707251297, |
|
"learning_rate": 1.4619839838944416e-06, |
|
"loss": 0.181, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5531115879828326, |
|
"grad_norm": 1.7665847583946384, |
|
"learning_rate": 1.4454812363526339e-06, |
|
"loss": 0.1737, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.555793991416309, |
|
"grad_norm": 1.7847703805357666, |
|
"learning_rate": 1.429056406259368e-06, |
|
"loss": 0.1644, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5584763948497855, |
|
"grad_norm": 1.8318156839510908, |
|
"learning_rate": 1.4127098536579982e-06, |
|
"loss": 0.1786, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.5611587982832618, |
|
"grad_norm": 1.844245320322203, |
|
"learning_rate": 1.3964419368759786e-06, |
|
"loss": 0.172, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5638412017167382, |
|
"grad_norm": 1.7841736172536102, |
|
"learning_rate": 1.380253012517019e-06, |
|
"loss": 0.1708, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.5665236051502145, |
|
"grad_norm": 1.7429571392033343, |
|
"learning_rate": 1.3641434354532595e-06, |
|
"loss": 0.1751, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.569206008583691, |
|
"grad_norm": 1.7709929022213504, |
|
"learning_rate": 1.3481135588174926e-06, |
|
"loss": 0.1732, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5718884120171674, |
|
"grad_norm": 1.8229215381144042, |
|
"learning_rate": 1.332163733995427e-06, |
|
"loss": 0.178, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.574570815450644, |
|
"grad_norm": 1.8414185467503128, |
|
"learning_rate": 1.3162943106179748e-06, |
|
"loss": 0.1759, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.5772532188841202, |
|
"grad_norm": 1.8031832543461532, |
|
"learning_rate": 1.3005056365536067e-06, |
|
"loss": 0.1734, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.5799356223175964, |
|
"grad_norm": 1.8394143663010076, |
|
"learning_rate": 1.2847980579007003e-06, |
|
"loss": 0.1703, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.582618025751073, |
|
"grad_norm": 1.8514796820816015, |
|
"learning_rate": 1.2691719189799774e-06, |
|
"loss": 0.1789, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5853004291845494, |
|
"grad_norm": 1.7324391573834732, |
|
"learning_rate": 1.253627562326936e-06, |
|
"loss": 0.1688, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.5879828326180259, |
|
"grad_norm": 1.6329352950537614, |
|
"learning_rate": 1.2381653286843648e-06, |
|
"loss": 0.1714, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.5906652360515021, |
|
"grad_norm": 1.8077758359457528, |
|
"learning_rate": 1.2227855569948477e-06, |
|
"loss": 0.1789, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.5933476394849786, |
|
"grad_norm": 1.7200363797199907, |
|
"learning_rate": 1.2074885843933542e-06, |
|
"loss": 0.1682, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5960300429184548, |
|
"grad_norm": 1.7933536337032043, |
|
"learning_rate": 1.1922747461998425e-06, |
|
"loss": 0.1784, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.5987124463519313, |
|
"grad_norm": 1.7094305976943212, |
|
"learning_rate": 1.1771443759119028e-06, |
|
"loss": 0.178, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.6013948497854078, |
|
"grad_norm": 1.7614127239043487, |
|
"learning_rate": 1.162097805197459e-06, |
|
"loss": 0.1696, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.6040772532188843, |
|
"grad_norm": 1.7080432529090546, |
|
"learning_rate": 1.147135363887485e-06, |
|
"loss": 0.1713, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.6067596566523605, |
|
"grad_norm": 1.8100144082110108, |
|
"learning_rate": 1.1322573799687904e-06, |
|
"loss": 0.1817, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.6094420600858368, |
|
"grad_norm": 1.7730855447142229, |
|
"learning_rate": 1.1174641795768132e-06, |
|
"loss": 0.175, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6121244635193133, |
|
"grad_norm": 1.7814093742215265, |
|
"learning_rate": 1.1027560869884845e-06, |
|
"loss": 0.1799, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.6148068669527897, |
|
"grad_norm": 1.7611202217473185, |
|
"learning_rate": 1.0881334246151114e-06, |
|
"loss": 0.1771, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.6174892703862662, |
|
"grad_norm": 1.9085949877488397, |
|
"learning_rate": 1.073596512995317e-06, |
|
"loss": 0.1766, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.6201716738197425, |
|
"grad_norm": 1.6313988420619614, |
|
"learning_rate": 1.0591456707880077e-06, |
|
"loss": 0.1673, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.6228540772532187, |
|
"grad_norm": 1.8333189107212091, |
|
"learning_rate": 1.0447812147653885e-06, |
|
"loss": 0.1672, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.6255364806866952, |
|
"grad_norm": 1.802930250142997, |
|
"learning_rate": 1.0305034598060254e-06, |
|
"loss": 0.1857, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.6282188841201717, |
|
"grad_norm": 1.7222446800259414, |
|
"learning_rate": 1.0163127188879352e-06, |
|
"loss": 0.1709, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.6309012875536482, |
|
"grad_norm": 1.7539149339913624, |
|
"learning_rate": 1.0022093030817316e-06, |
|
"loss": 0.1721, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.6335836909871244, |
|
"grad_norm": 1.76300637184033, |
|
"learning_rate": 9.88193521543797e-07, |
|
"loss": 0.1725, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.636266094420601, |
|
"grad_norm": 1.6781734808752238, |
|
"learning_rate": 9.742656815095175e-07, |
|
"loss": 0.1683, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6389484978540771, |
|
"grad_norm": 1.6651389758811728, |
|
"learning_rate": 9.604260882865395e-07, |
|
"loss": 0.1725, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.6416309012875536, |
|
"grad_norm": 1.7341038389120755, |
|
"learning_rate": 9.466750452480816e-07, |
|
"loss": 0.1672, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.64431330472103, |
|
"grad_norm": 1.7243001528617379, |
|
"learning_rate": 9.330128538262784e-07, |
|
"loss": 0.1698, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.6469957081545066, |
|
"grad_norm": 1.7466343759077627, |
|
"learning_rate": 9.194398135055815e-07, |
|
"loss": 0.1748, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.6496781115879828, |
|
"grad_norm": 1.7394790981646728, |
|
"learning_rate": 9.059562218161894e-07, |
|
"loss": 0.1714, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.652360515021459, |
|
"grad_norm": 1.7335156997700363, |
|
"learning_rate": 8.925623743275235e-07, |
|
"loss": 0.1785, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6550429184549356, |
|
"grad_norm": 1.7957055473541403, |
|
"learning_rate": 8.792585646417568e-07, |
|
"loss": 0.1784, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.657725321888412, |
|
"grad_norm": 1.7723875901313646, |
|
"learning_rate": 8.660450843873647e-07, |
|
"loss": 0.1785, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6604077253218885, |
|
"grad_norm": 1.7939885647592386, |
|
"learning_rate": 8.529222232127526e-07, |
|
"loss": 0.1745, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.6630901287553648, |
|
"grad_norm": 1.807074126619144, |
|
"learning_rate": 8.398902687798832e-07, |
|
"loss": 0.1723, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6657725321888412, |
|
"grad_norm": 1.7133609299716994, |
|
"learning_rate": 8.269495067579891e-07, |
|
"loss": 0.183, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.6684549356223175, |
|
"grad_norm": 1.841552312150271, |
|
"learning_rate": 8.141002208172977e-07, |
|
"loss": 0.174, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.671137339055794, |
|
"grad_norm": 1.801392312052175, |
|
"learning_rate": 8.013426926228274e-07, |
|
"loss": 0.1728, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.6738197424892705, |
|
"grad_norm": 1.9076893725432649, |
|
"learning_rate": 7.886772018281969e-07, |
|
"loss": 0.1785, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.676502145922747, |
|
"grad_norm": 1.7208262593130386, |
|
"learning_rate": 7.761040260695074e-07, |
|
"loss": 0.1671, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.6791845493562232, |
|
"grad_norm": 1.8329842884681125, |
|
"learning_rate": 7.636234409592524e-07, |
|
"loss": 0.1716, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6818669527896994, |
|
"grad_norm": 1.7441404712147288, |
|
"learning_rate": 7.512357200802722e-07, |
|
"loss": 0.1734, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.684549356223176, |
|
"grad_norm": 1.9083885712113065, |
|
"learning_rate": 7.389411349797654e-07, |
|
"loss": 0.1728, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6872317596566524, |
|
"grad_norm": 1.7874625869773564, |
|
"learning_rate": 7.267399551633253e-07, |
|
"loss": 0.1712, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.6899141630901289, |
|
"grad_norm": 1.9439352861683026, |
|
"learning_rate": 7.146324480890476e-07, |
|
"loss": 0.177, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.6925965665236051, |
|
"grad_norm": 1.7964941099594292, |
|
"learning_rate": 7.026188791616484e-07, |
|
"loss": 0.1784, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.6952789699570814, |
|
"grad_norm": 1.6119663887100937, |
|
"learning_rate": 6.906995117266641e-07, |
|
"loss": 0.1736, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6979613733905579, |
|
"grad_norm": 1.7991944548752512, |
|
"learning_rate": 6.788746070646646e-07, |
|
"loss": 0.1784, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.7006437768240343, |
|
"grad_norm": 1.7108398560281477, |
|
"learning_rate": 6.671444243855368e-07, |
|
"loss": 0.1674, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.7033261802575108, |
|
"grad_norm": 1.7468306887337883, |
|
"learning_rate": 6.555092208227953e-07, |
|
"loss": 0.1703, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.7060085836909873, |
|
"grad_norm": 1.8045524345001702, |
|
"learning_rate": 6.439692514279516e-07, |
|
"loss": 0.1674, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.7086909871244635, |
|
"grad_norm": 1.7384877006545285, |
|
"learning_rate": 6.325247691649139e-07, |
|
"loss": 0.1701, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.7113733905579398, |
|
"grad_norm": 1.7869927843133888, |
|
"learning_rate": 6.211760249044535e-07, |
|
"loss": 0.1681, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.7140557939914163, |
|
"grad_norm": 1.7353614017887253, |
|
"learning_rate": 6.099232674187e-07, |
|
"loss": 0.1709, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.7167381974248928, |
|
"grad_norm": 1.6623728003314253, |
|
"learning_rate": 5.987667433756844e-07, |
|
"loss": 0.1707, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.7194206008583692, |
|
"grad_norm": 1.7371350671015628, |
|
"learning_rate": 5.877066973339379e-07, |
|
"loss": 0.1745, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.7221030042918455, |
|
"grad_norm": 1.701180995240129, |
|
"learning_rate": 5.767433717371301e-07, |
|
"loss": 0.172, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.7247854077253217, |
|
"grad_norm": 1.6948875584483825, |
|
"learning_rate": 5.658770069087521e-07, |
|
"loss": 0.1703, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.7274678111587982, |
|
"grad_norm": 1.7145556224150973, |
|
"learning_rate": 5.551078410468486e-07, |
|
"loss": 0.167, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.7301502145922747, |
|
"grad_norm": 1.8591358327221823, |
|
"learning_rate": 5.444361102187979e-07, |
|
"loss": 0.1713, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.7328326180257512, |
|
"grad_norm": 1.8192517669139423, |
|
"learning_rate": 5.338620483561386e-07, |
|
"loss": 0.1759, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.7355150214592274, |
|
"grad_norm": 1.7855530826157633, |
|
"learning_rate": 5.233858872494357e-07, |
|
"loss": 0.1748, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.738197424892704, |
|
"grad_norm": 1.81751812762985, |
|
"learning_rate": 5.130078565432089e-07, |
|
"loss": 0.1703, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.7408798283261802, |
|
"grad_norm": 1.6529197916217437, |
|
"learning_rate": 5.027281837308873e-07, |
|
"loss": 0.163, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.7435622317596566, |
|
"grad_norm": 1.855614807864555, |
|
"learning_rate": 4.925470941498345e-07, |
|
"loss": 0.1702, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.746244635193133, |
|
"grad_norm": 1.6148931678290364, |
|
"learning_rate": 4.824648109763991e-07, |
|
"loss": 0.1691, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.7489270386266096, |
|
"grad_norm": 1.7526231976044122, |
|
"learning_rate": 4.724815552210288e-07, |
|
"loss": 0.1697, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.7516094420600858, |
|
"grad_norm": 1.7624722668231747, |
|
"learning_rate": 4.6259754572342e-07, |
|
"loss": 0.1719, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.754291845493562, |
|
"grad_norm": 1.7200492383966377, |
|
"learning_rate": 4.5281299914773146e-07, |
|
"loss": 0.1738, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7569742489270386, |
|
"grad_norm": 1.6761900052291174, |
|
"learning_rate": 4.43128129977819e-07, |
|
"loss": 0.1727, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.759656652360515, |
|
"grad_norm": 1.7044511967244793, |
|
"learning_rate": 4.3354315051254927e-07, |
|
"loss": 0.1774, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7623390557939915, |
|
"grad_norm": 1.6843092740663743, |
|
"learning_rate": 4.2405827086113406e-07, |
|
"loss": 0.1715, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.7650214592274678, |
|
"grad_norm": 1.8382574118111221, |
|
"learning_rate": 4.146736989385336e-07, |
|
"loss": 0.1596, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.7677038626609443, |
|
"grad_norm": 1.7505575915211584, |
|
"learning_rate": 4.0538964046089426e-07, |
|
"loss": 0.1775, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.7703862660944205, |
|
"grad_norm": 1.7658082592766489, |
|
"learning_rate": 3.962062989410359e-07, |
|
"loss": 0.1713, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.773068669527897, |
|
"grad_norm": 1.8213010707609696, |
|
"learning_rate": 3.871238756840029e-07, |
|
"loss": 0.1734, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.7757510729613735, |
|
"grad_norm": 1.7743468035626766, |
|
"learning_rate": 3.7814256978263465e-07, |
|
"loss": 0.1709, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.77843347639485, |
|
"grad_norm": 1.7598019660058082, |
|
"learning_rate": 3.6926257811321585e-07, |
|
"loss": 0.1636, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.7811158798283262, |
|
"grad_norm": 1.8342773904197431, |
|
"learning_rate": 3.604840953311506e-07, |
|
"loss": 0.167, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7837982832618025, |
|
"grad_norm": 1.7806767995597308, |
|
"learning_rate": 3.518073138667044e-07, |
|
"loss": 0.1713, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.786480686695279, |
|
"grad_norm": 1.726871526920193, |
|
"learning_rate": 3.4323242392077737e-07, |
|
"loss": 0.1681, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7891630901287554, |
|
"grad_norm": 1.6372149878592794, |
|
"learning_rate": 3.347596134607406e-07, |
|
"loss": 0.1683, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.7918454935622319, |
|
"grad_norm": 1.8105431005325472, |
|
"learning_rate": 3.263890682163129e-07, |
|
"loss": 0.1752, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7945278969957081, |
|
"grad_norm": 1.725058931174304, |
|
"learning_rate": 3.1812097167549127e-07, |
|
"loss": 0.1713, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.7972103004291844, |
|
"grad_norm": 1.6434589482561779, |
|
"learning_rate": 3.0995550508052976e-07, |
|
"loss": 0.1733, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.7998927038626609, |
|
"grad_norm": 1.650071186502983, |
|
"learning_rate": 3.018928474239613e-07, |
|
"loss": 0.1662, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.8025751072961373, |
|
"grad_norm": 1.7187038525710812, |
|
"learning_rate": 2.9393317544468003e-07, |
|
"loss": 0.1754, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.8052575107296138, |
|
"grad_norm": 1.7402435710253121, |
|
"learning_rate": 2.860766636240636e-07, |
|
"loss": 0.1732, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.80793991416309, |
|
"grad_norm": 1.6401645883053977, |
|
"learning_rate": 2.7832348418215084e-07, |
|
"loss": 0.1717, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.8106223175965666, |
|
"grad_norm": 1.7511299185732079, |
|
"learning_rate": 2.7067380707386235e-07, |
|
"loss": 0.1732, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.8133047210300428, |
|
"grad_norm": 1.7136871103356945, |
|
"learning_rate": 2.631277999852799e-07, |
|
"loss": 0.1735, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.8159871244635193, |
|
"grad_norm": 1.7871702437992416, |
|
"learning_rate": 2.556856283299691e-07, |
|
"loss": 0.1684, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.8186695278969958, |
|
"grad_norm": 1.6593429116360952, |
|
"learning_rate": 2.483474552453513e-07, |
|
"loss": 0.1745, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.8213519313304722, |
|
"grad_norm": 1.7978050169175657, |
|
"learning_rate": 2.4111344158912863e-07, |
|
"loss": 0.1723, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.8240343347639485, |
|
"grad_norm": 1.6990756617234288, |
|
"learning_rate": 2.3398374593576022e-07, |
|
"loss": 0.1625, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8267167381974247, |
|
"grad_norm": 1.6914886507967104, |
|
"learning_rate": 2.2695852457298328e-07, |
|
"loss": 0.1719, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.8293991416309012, |
|
"grad_norm": 1.819281521215906, |
|
"learning_rate": 2.2003793149838692e-07, |
|
"loss": 0.1697, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.8320815450643777, |
|
"grad_norm": 1.7816762381539413, |
|
"learning_rate": 2.1322211841604046e-07, |
|
"loss": 0.1697, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.8347639484978542, |
|
"grad_norm": 1.824171974983062, |
|
"learning_rate": 2.0651123473316103e-07, |
|
"loss": 0.1788, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.8374463519313304, |
|
"grad_norm": 1.7295079329400838, |
|
"learning_rate": 1.9990542755684738e-07, |
|
"loss": 0.1667, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.840128755364807, |
|
"grad_norm": 1.7347345339228704, |
|
"learning_rate": 1.9340484169084627e-07, |
|
"loss": 0.1715, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8428111587982832, |
|
"grad_norm": 1.7439628045359357, |
|
"learning_rate": 1.870096196323856e-07, |
|
"loss": 0.1754, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.8454935622317596, |
|
"grad_norm": 1.6912422808683083, |
|
"learning_rate": 1.8071990156904362e-07, |
|
"loss": 0.169, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.8481759656652361, |
|
"grad_norm": 1.7086838463786134, |
|
"learning_rate": 1.7453582537568404e-07, |
|
"loss": 0.169, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.8508583690987126, |
|
"grad_norm": 1.7247723681966847, |
|
"learning_rate": 1.6845752661142744e-07, |
|
"loss": 0.1743, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.8535407725321889, |
|
"grad_norm": 1.6883713658463495, |
|
"learning_rate": 1.624851385166809e-07, |
|
"loss": 0.1692, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.856223175965665, |
|
"grad_norm": 1.7717537767283589, |
|
"learning_rate": 1.5661879201022135e-07, |
|
"loss": 0.1644, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.8589055793991416, |
|
"grad_norm": 1.8983100667430102, |
|
"learning_rate": 1.5085861568631845e-07, |
|
"loss": 0.1751, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.861587982832618, |
|
"grad_norm": 1.7144123953802644, |
|
"learning_rate": 1.4520473581192407e-07, |
|
"loss": 0.1637, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.8642703862660945, |
|
"grad_norm": 1.810798311124524, |
|
"learning_rate": 1.396572763238957e-07, |
|
"loss": 0.1698, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.8669527896995708, |
|
"grad_norm": 1.7067124428040772, |
|
"learning_rate": 1.3421635882628958e-07, |
|
"loss": 0.1689, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.869635193133047, |
|
"grad_norm": 1.884316752905617, |
|
"learning_rate": 1.2888210258768464e-07, |
|
"loss": 0.1719, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.8723175965665235, |
|
"grad_norm": 1.72536322397334, |
|
"learning_rate": 1.2365462453857612e-07, |
|
"loss": 0.1736, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 1.8145577367067274, |
|
"learning_rate": 1.1853403926880725e-07, |
|
"loss": 0.1777, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.8776824034334765, |
|
"grad_norm": 1.778123472441151, |
|
"learning_rate": 1.1352045902506158e-07, |
|
"loss": 0.1784, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.880364806866953, |
|
"grad_norm": 1.6805472877060268, |
|
"learning_rate": 1.0861399370839964e-07, |
|
"loss": 0.1681, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.8830472103004292, |
|
"grad_norm": 1.7387187114111113, |
|
"learning_rate": 1.03814750871849e-07, |
|
"loss": 0.1733, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.8857296137339055, |
|
"grad_norm": 1.7843611800178358, |
|
"learning_rate": 9.912283571805015e-08, |
|
"loss": 0.171, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.888412017167382, |
|
"grad_norm": 1.8466504961068093, |
|
"learning_rate": 9.45383510969472e-08, |
|
"loss": 0.1791, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.8910944206008584, |
|
"grad_norm": 1.7171535115000942, |
|
"learning_rate": 9.006139750353526e-08, |
|
"loss": 0.1717, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.893776824034335, |
|
"grad_norm": 1.707432002602561, |
|
"learning_rate": 8.569207307565664e-08, |
|
"loss": 0.1766, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8964592274678111, |
|
"grad_norm": 1.76598377123232, |
|
"learning_rate": 8.143047359184863e-08, |
|
"loss": 0.1773, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.8991416309012874, |
|
"grad_norm": 1.7916356651498915, |
|
"learning_rate": 7.727669246924697e-08, |
|
"loss": 0.1714, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.9018240343347639, |
|
"grad_norm": 1.8057780438533981, |
|
"learning_rate": 7.32308207615351e-08, |
|
"loss": 0.1737, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.9045064377682404, |
|
"grad_norm": 1.6924697614273796, |
|
"learning_rate": 6.929294715694923e-08, |
|
"loss": 0.1676, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.9071888412017168, |
|
"grad_norm": 1.6576764316139245, |
|
"learning_rate": 6.54631579763343e-08, |
|
"loss": 0.1704, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.909871244635193, |
|
"grad_norm": 1.6988868782886677, |
|
"learning_rate": 6.174153717125264e-08, |
|
"loss": 0.1628, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.9125536480686696, |
|
"grad_norm": 1.6633844266503204, |
|
"learning_rate": 5.812816632214169e-08, |
|
"loss": 0.1735, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.9152360515021458, |
|
"grad_norm": 1.7412542978551506, |
|
"learning_rate": 5.4623124636528635e-08, |
|
"loss": 0.1746, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.9179184549356223, |
|
"grad_norm": 1.7163653843898274, |
|
"learning_rate": 5.122648894728854e-08, |
|
"loss": 0.1585, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.9206008583690988, |
|
"grad_norm": 1.8956780429112952, |
|
"learning_rate": 4.7938333710969564e-08, |
|
"loss": 0.1661, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.9232832618025753, |
|
"grad_norm": 1.6583340654394345, |
|
"learning_rate": 4.4758731006149804e-08, |
|
"loss": 0.1653, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.9259656652360515, |
|
"grad_norm": 1.7145781241134583, |
|
"learning_rate": 4.16877505318658e-08, |
|
"loss": 0.1662, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.9286480686695278, |
|
"grad_norm": 1.7047184051248314, |
|
"learning_rate": 3.872545960608099e-08, |
|
"loss": 0.1706, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.9313304721030042, |
|
"grad_norm": 1.9251593725684037, |
|
"learning_rate": 3.587192316420962e-08, |
|
"loss": 0.184, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9340128755364807, |
|
"grad_norm": 1.7850667557545585, |
|
"learning_rate": 3.312720375769518e-08, |
|
"loss": 0.1704, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.9366952789699572, |
|
"grad_norm": 1.7932962452118937, |
|
"learning_rate": 3.04913615526381e-08, |
|
"loss": 0.173, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.9393776824034334, |
|
"grad_norm": 1.7110116741355956, |
|
"learning_rate": 2.796445432847794e-08, |
|
"loss": 0.1688, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.94206008583691, |
|
"grad_norm": 1.6586995566511722, |
|
"learning_rate": 2.554653747672442e-08, |
|
"loss": 0.1692, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.9447424892703862, |
|
"grad_norm": 1.717312821533025, |
|
"learning_rate": 2.323766399974614e-08, |
|
"loss": 0.1694, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.9474248927038627, |
|
"grad_norm": 1.7163974599391538, |
|
"learning_rate": 2.1037884509605976e-08, |
|
"loss": 0.1726, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.9501072961373391, |
|
"grad_norm": 1.7553880101865882, |
|
"learning_rate": 1.8947247226954736e-08, |
|
"loss": 0.1684, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.9527896995708156, |
|
"grad_norm": 1.779622377229239, |
|
"learning_rate": 1.6965797979971442e-08, |
|
"loss": 0.1755, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.9554721030042919, |
|
"grad_norm": 1.7439946846126306, |
|
"learning_rate": 1.509358020336027e-08, |
|
"loss": 0.1668, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.9581545064377681, |
|
"grad_norm": 1.8649162684099, |
|
"learning_rate": 1.3330634937396835e-08, |
|
"loss": 0.1646, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.9608369098712446, |
|
"grad_norm": 1.7465480225139673, |
|
"learning_rate": 1.1677000827030604e-08, |
|
"loss": 0.1708, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.963519313304721, |
|
"grad_norm": 1.7689202198771905, |
|
"learning_rate": 1.0132714121037223e-08, |
|
"loss": 0.1774, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9662017167381975, |
|
"grad_norm": 1.646042671050445, |
|
"learning_rate": 8.697808671221385e-09, |
|
"loss": 0.1716, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.9688841201716738, |
|
"grad_norm": 1.5784998914878945, |
|
"learning_rate": 7.3723159316796414e-09, |
|
"loss": 0.1685, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.97156652360515, |
|
"grad_norm": 1.6910798525884396, |
|
"learning_rate": 6.1562649581059505e-09, |
|
"loss": 0.1683, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.9742489270386265, |
|
"grad_norm": 1.695138209999217, |
|
"learning_rate": 5.049682407157752e-09, |
|
"loss": 0.1714, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.976931330472103, |
|
"grad_norm": 1.7277430907852327, |
|
"learning_rate": 4.052592535871425e-09, |
|
"loss": 0.167, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.9796137339055795, |
|
"grad_norm": 1.8699834089528526, |
|
"learning_rate": 3.1650172011293834e-09, |
|
"loss": 0.1802, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.9822961373390557, |
|
"grad_norm": 1.7382756655510954, |
|
"learning_rate": 2.3869758591810177e-09, |
|
"loss": 0.1639, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.9849785407725322, |
|
"grad_norm": 1.7097894461766394, |
|
"learning_rate": 1.718485565218031e-09, |
|
"loss": 0.1695, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.9876609442060085, |
|
"grad_norm": 1.7701378490286461, |
|
"learning_rate": 1.15956097299752e-09, |
|
"loss": 0.1661, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.990343347639485, |
|
"grad_norm": 1.6908898113769697, |
|
"learning_rate": 7.102143345238955e-10, |
|
"loss": 0.1682, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.9930257510729614, |
|
"grad_norm": 1.758028582044859, |
|
"learning_rate": 3.7045549977909877e-10, |
|
"loss": 0.1718, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.995708154506438, |
|
"grad_norm": 1.7293701434808701, |
|
"learning_rate": 1.4029191650555274e-10, |
|
"loss": 0.174, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.9983905579399142, |
|
"grad_norm": 1.7674392540151087, |
|
"learning_rate": 1.9728630044069107e-11, |
|
"loss": 0.1708, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_runtime": 264.2943, |
|
"eval_samples_per_second": 3.784, |
|
"eval_steps_per_second": 0.946, |
|
"step": 3728 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3728, |
|
"total_flos": 390283678187520.0, |
|
"train_loss": 0.247329246460624, |
|
"train_runtime": 33490.3442, |
|
"train_samples_per_second": 1.78, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3728, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 390283678187520.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|