{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 61000, "global_step": 244570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.177617859917406e-05, "grad_norm": 6.915310859680176, "learning_rate": 4.088808929958703e-09, "loss": 0.4734, "step": 10 }, { "epoch": 0.00016355235719834813, "grad_norm": 6.224706172943115, "learning_rate": 8.177617859917406e-09, "loss": 0.4838, "step": 20 }, { "epoch": 0.0002453285357975222, "grad_norm": 6.07737398147583, "learning_rate": 1.2266426789876111e-08, "loss": 0.4648, "step": 30 }, { "epoch": 0.00032710471439669625, "grad_norm": 6.497094631195068, "learning_rate": 1.6355235719834812e-08, "loss": 0.5033, "step": 40 }, { "epoch": 0.0004088808929958703, "grad_norm": 6.6569719314575195, "learning_rate": 2.044404464979352e-08, "loss": 0.4993, "step": 50 }, { "epoch": 0.0004906570715950444, "grad_norm": 6.37425422668457, "learning_rate": 2.4532853579752222e-08, "loss": 0.4573, "step": 60 }, { "epoch": 0.0005724332501942184, "grad_norm": 7.456509113311768, "learning_rate": 2.8621662509710925e-08, "loss": 0.4684, "step": 70 }, { "epoch": 0.0006542094287933925, "grad_norm": 8.260735511779785, "learning_rate": 3.2710471439669625e-08, "loss": 0.485, "step": 80 }, { "epoch": 0.0007359856073925666, "grad_norm": 5.983162879943848, "learning_rate": 3.679928036962833e-08, "loss": 0.4809, "step": 90 }, { "epoch": 0.0008177617859917406, "grad_norm": 5.34896993637085, "learning_rate": 4.088808929958704e-08, "loss": 0.4986, "step": 100 }, { "epoch": 0.0008995379645909147, "grad_norm": 4.9466447830200195, "learning_rate": 4.497689822954574e-08, "loss": 0.468, "step": 110 }, { "epoch": 0.0009813141431900888, "grad_norm": 9.193735122680664, "learning_rate": 4.9065707159504443e-08, "loss": 0.4802, "step": 120 }, { "epoch": 0.0010630903217892628, "grad_norm": 6.511040687561035, "learning_rate": 5.315451608946314e-08, "loss": 0.4939, "step": 130 }, { "epoch": 0.0011448665003884369, "grad_norm": 7.231633186340332, "learning_rate": 5.724332501942185e-08, "loss": 0.4837, "step": 140 }, { "epoch": 0.001226642678987611, "grad_norm": 5.278972625732422, "learning_rate": 6.133213394938055e-08, "loss": 0.445, "step": 150 }, { "epoch": 0.001308418857586785, "grad_norm": 4.97473669052124, "learning_rate": 6.542094287933925e-08, "loss": 0.4514, "step": 160 }, { "epoch": 0.001390195036185959, "grad_norm": 6.1382155418396, "learning_rate": 6.950975180929796e-08, "loss": 0.4321, "step": 170 }, { "epoch": 0.0014719712147851331, "grad_norm": 4.931957244873047, "learning_rate": 7.359856073925666e-08, "loss": 0.394, "step": 180 }, { "epoch": 0.0015537473933843072, "grad_norm": 6.497879981994629, "learning_rate": 7.768736966921536e-08, "loss": 0.4118, "step": 190 }, { "epoch": 0.0016355235719834813, "grad_norm": 5.134346961975098, "learning_rate": 8.177617859917407e-08, "loss": 0.3758, "step": 200 }, { "epoch": 0.0017172997505826553, "grad_norm": 5.720001697540283, "learning_rate": 8.586498752913277e-08, "loss": 0.354, "step": 210 }, { "epoch": 0.0017990759291818294, "grad_norm": 4.203914165496826, "learning_rate": 8.995379645909147e-08, "loss": 0.3184, "step": 220 }, { "epoch": 0.0018808521077810034, "grad_norm": 4.407601833343506, "learning_rate": 9.404260538905017e-08, "loss": 0.3275, "step": 230 }, { "epoch": 0.0019626282863801775, "grad_norm": 4.816983699798584, "learning_rate": 9.813141431900889e-08, "loss": 0.3069, "step": 240 }, { "epoch": 0.0020444044649793516, "grad_norm": 4.278802871704102, "learning_rate": 1.0222022324896759e-07, "loss": 0.3007, "step": 250 }, { "epoch": 0.0021261806435785256, "grad_norm": 4.0535054206848145, "learning_rate": 1.0630903217892629e-07, "loss": 0.273, "step": 260 }, { "epoch": 0.0022079568221776997, "grad_norm": 3.145960807800293, "learning_rate": 1.10397841108885e-07, "loss": 0.2518, "step": 270 }, { "epoch": 0.0022897330007768738, "grad_norm": 2.7841999530792236, "learning_rate": 1.144866500388437e-07, "loss": 0.2155, "step": 280 }, { "epoch": 0.002371509179376048, "grad_norm": 2.2798659801483154, "learning_rate": 1.185754589688024e-07, "loss": 0.1804, "step": 290 }, { "epoch": 0.002453285357975222, "grad_norm": 2.135394811630249, "learning_rate": 1.226642678987611e-07, "loss": 0.1813, "step": 300 }, { "epoch": 0.002535061536574396, "grad_norm": 2.3528695106506348, "learning_rate": 1.267530768287198e-07, "loss": 0.1582, "step": 310 }, { "epoch": 0.00261683771517357, "grad_norm": 1.9857685565948486, "learning_rate": 1.308418857586785e-07, "loss": 0.1734, "step": 320 }, { "epoch": 0.002698613893772744, "grad_norm": 2.2221999168395996, "learning_rate": 1.349306946886372e-07, "loss": 0.1427, "step": 330 }, { "epoch": 0.002780390072371918, "grad_norm": 1.8349144458770752, "learning_rate": 1.3901950361859592e-07, "loss": 0.1608, "step": 340 }, { "epoch": 0.002862166250971092, "grad_norm": 1.824054479598999, "learning_rate": 1.431083125485546e-07, "loss": 0.1663, "step": 350 }, { "epoch": 0.0029439424295702663, "grad_norm": 1.8261404037475586, "learning_rate": 1.4719712147851332e-07, "loss": 0.1369, "step": 360 }, { "epoch": 0.0030257186081694403, "grad_norm": 1.5938246250152588, "learning_rate": 1.5128593040847204e-07, "loss": 0.1269, "step": 370 }, { "epoch": 0.0031074947867686144, "grad_norm": 1.5476601123809814, "learning_rate": 1.5537473933843072e-07, "loss": 0.1347, "step": 380 }, { "epoch": 0.0031892709653677884, "grad_norm": 1.6773386001586914, "learning_rate": 1.594635482683894e-07, "loss": 0.1159, "step": 390 }, { "epoch": 0.0032710471439669625, "grad_norm": 1.7451281547546387, "learning_rate": 1.6355235719834815e-07, "loss": 0.1189, "step": 400 }, { "epoch": 0.0033528233225661366, "grad_norm": 2.0135226249694824, "learning_rate": 1.6764116612830684e-07, "loss": 0.1102, "step": 410 }, { "epoch": 0.0034345995011653106, "grad_norm": 1.868825078010559, "learning_rate": 1.7172997505826555e-07, "loss": 0.1076, "step": 420 }, { "epoch": 0.0035163756797644847, "grad_norm": 1.8518611192703247, "learning_rate": 1.7581878398822424e-07, "loss": 0.1115, "step": 430 }, { "epoch": 0.0035981518583636588, "grad_norm": 2.0004355907440186, "learning_rate": 1.7990759291818295e-07, "loss": 0.1004, "step": 440 }, { "epoch": 0.003679928036962833, "grad_norm": 1.4761673212051392, "learning_rate": 1.8399640184814163e-07, "loss": 0.1005, "step": 450 }, { "epoch": 0.003761704215562007, "grad_norm": 1.3519024848937988, "learning_rate": 1.8808521077810035e-07, "loss": 0.0944, "step": 460 }, { "epoch": 0.003843480394161181, "grad_norm": 1.3592880964279175, "learning_rate": 1.9217401970805903e-07, "loss": 0.0905, "step": 470 }, { "epoch": 0.003925256572760355, "grad_norm": 1.5534443855285645, "learning_rate": 1.9626282863801777e-07, "loss": 0.0928, "step": 480 }, { "epoch": 0.004007032751359529, "grad_norm": 1.5873217582702637, "learning_rate": 2.0035163756797646e-07, "loss": 0.0778, "step": 490 }, { "epoch": 0.004088808929958703, "grad_norm": 1.9665029048919678, "learning_rate": 2.0444044649793517e-07, "loss": 0.091, "step": 500 }, { "epoch": 0.004170585108557877, "grad_norm": 1.4012279510498047, "learning_rate": 2.0852925542789386e-07, "loss": 0.0916, "step": 510 }, { "epoch": 0.004252361287157051, "grad_norm": 1.5916063785552979, "learning_rate": 2.1261806435785257e-07, "loss": 0.0814, "step": 520 }, { "epoch": 0.004334137465756225, "grad_norm": 1.2805981636047363, "learning_rate": 2.1670687328781126e-07, "loss": 0.0754, "step": 530 }, { "epoch": 0.004415913644355399, "grad_norm": 1.4810798168182373, "learning_rate": 2.2079568221777e-07, "loss": 0.0904, "step": 540 }, { "epoch": 0.0044976898229545734, "grad_norm": 1.6251362562179565, "learning_rate": 2.2488449114772869e-07, "loss": 0.0843, "step": 550 }, { "epoch": 0.0045794660015537475, "grad_norm": 1.457366704940796, "learning_rate": 2.289733000776874e-07, "loss": 0.0802, "step": 560 }, { "epoch": 0.0046612421801529216, "grad_norm": 1.3614826202392578, "learning_rate": 2.3306210900764608e-07, "loss": 0.0739, "step": 570 }, { "epoch": 0.004743018358752096, "grad_norm": 1.7205181121826172, "learning_rate": 2.371509179376048e-07, "loss": 0.0702, "step": 580 }, { "epoch": 0.00482479453735127, "grad_norm": 1.4927618503570557, "learning_rate": 2.412397268675635e-07, "loss": 0.0807, "step": 590 }, { "epoch": 0.004906570715950444, "grad_norm": 1.6795421838760376, "learning_rate": 2.453285357975222e-07, "loss": 0.0709, "step": 600 }, { "epoch": 0.004988346894549618, "grad_norm": 1.239315390586853, "learning_rate": 2.494173447274809e-07, "loss": 0.0653, "step": 610 }, { "epoch": 0.005070123073148792, "grad_norm": 1.2652521133422852, "learning_rate": 2.535061536574396e-07, "loss": 0.0778, "step": 620 }, { "epoch": 0.005151899251747966, "grad_norm": 1.4729022979736328, "learning_rate": 2.575949625873983e-07, "loss": 0.076, "step": 630 }, { "epoch": 0.00523367543034714, "grad_norm": 1.5114805698394775, "learning_rate": 2.61683771517357e-07, "loss": 0.0647, "step": 640 }, { "epoch": 0.005315451608946314, "grad_norm": 1.518172264099121, "learning_rate": 2.657725804473157e-07, "loss": 0.0644, "step": 650 }, { "epoch": 0.005397227787545488, "grad_norm": 1.5464613437652588, "learning_rate": 2.698613893772744e-07, "loss": 0.0627, "step": 660 }, { "epoch": 0.005479003966144662, "grad_norm": 1.1676665544509888, "learning_rate": 2.739501983072331e-07, "loss": 0.0511, "step": 670 }, { "epoch": 0.005560780144743836, "grad_norm": 1.311774492263794, "learning_rate": 2.7803900723719185e-07, "loss": 0.0715, "step": 680 }, { "epoch": 0.00564255632334301, "grad_norm": 1.6116594076156616, "learning_rate": 2.8212781616715053e-07, "loss": 0.0644, "step": 690 }, { "epoch": 0.005724332501942184, "grad_norm": 2.082928419113159, "learning_rate": 2.862166250971092e-07, "loss": 0.064, "step": 700 }, { "epoch": 0.0058061086805413584, "grad_norm": 1.1010290384292603, "learning_rate": 2.903054340270679e-07, "loss": 0.0598, "step": 710 }, { "epoch": 0.0058878848591405325, "grad_norm": 1.4474798440933228, "learning_rate": 2.9439424295702665e-07, "loss": 0.065, "step": 720 }, { "epoch": 0.0059696610377397066, "grad_norm": 1.8776367902755737, "learning_rate": 2.9848305188698533e-07, "loss": 0.0641, "step": 730 }, { "epoch": 0.006051437216338881, "grad_norm": 1.1593611240386963, "learning_rate": 3.0257186081694407e-07, "loss": 0.058, "step": 740 }, { "epoch": 0.006133213394938055, "grad_norm": 1.5302402973175049, "learning_rate": 3.0666066974690276e-07, "loss": 0.0539, "step": 750 }, { "epoch": 0.006214989573537229, "grad_norm": 1.6658142805099487, "learning_rate": 3.1074947867686145e-07, "loss": 0.0651, "step": 760 }, { "epoch": 0.006296765752136403, "grad_norm": 1.2949118614196777, "learning_rate": 3.148382876068202e-07, "loss": 0.0599, "step": 770 }, { "epoch": 0.006378541930735577, "grad_norm": 1.604233980178833, "learning_rate": 3.189270965367788e-07, "loss": 0.0534, "step": 780 }, { "epoch": 0.006460318109334751, "grad_norm": 1.783679723739624, "learning_rate": 3.2301590546673756e-07, "loss": 0.0511, "step": 790 }, { "epoch": 0.006542094287933925, "grad_norm": 1.130206823348999, "learning_rate": 3.271047143966963e-07, "loss": 0.0521, "step": 800 }, { "epoch": 0.006623870466533099, "grad_norm": 1.473360538482666, "learning_rate": 3.31193523326655e-07, "loss": 0.0519, "step": 810 }, { "epoch": 0.006705646645132273, "grad_norm": 1.5919809341430664, "learning_rate": 3.3528233225661367e-07, "loss": 0.0513, "step": 820 }, { "epoch": 0.006787422823731447, "grad_norm": 1.2469764947891235, "learning_rate": 3.3937114118657236e-07, "loss": 0.0451, "step": 830 }, { "epoch": 0.006869199002330621, "grad_norm": 1.2967842817306519, "learning_rate": 3.434599501165311e-07, "loss": 0.0522, "step": 840 }, { "epoch": 0.006950975180929795, "grad_norm": 1.3128080368041992, "learning_rate": 3.4754875904648984e-07, "loss": 0.0482, "step": 850 }, { "epoch": 0.007032751359528969, "grad_norm": 1.0906916856765747, "learning_rate": 3.5163756797644847e-07, "loss": 0.0641, "step": 860 }, { "epoch": 0.0071145275381281434, "grad_norm": 1.1668692827224731, "learning_rate": 3.557263769064072e-07, "loss": 0.0617, "step": 870 }, { "epoch": 0.0071963037167273175, "grad_norm": 1.108760118484497, "learning_rate": 3.598151858363659e-07, "loss": 0.0439, "step": 880 }, { "epoch": 0.0072780798953264916, "grad_norm": 1.7057290077209473, "learning_rate": 3.6390399476632464e-07, "loss": 0.0458, "step": 890 }, { "epoch": 0.007359856073925666, "grad_norm": 1.2693947553634644, "learning_rate": 3.6799280369628327e-07, "loss": 0.0517, "step": 900 }, { "epoch": 0.00744163225252484, "grad_norm": 1.6022868156433105, "learning_rate": 3.72081612626242e-07, "loss": 0.0525, "step": 910 }, { "epoch": 0.007523408431124014, "grad_norm": 1.3306891918182373, "learning_rate": 3.761704215562007e-07, "loss": 0.044, "step": 920 }, { "epoch": 0.007605184609723188, "grad_norm": 1.3318002223968506, "learning_rate": 3.8025923048615943e-07, "loss": 0.0397, "step": 930 }, { "epoch": 0.007686960788322362, "grad_norm": 1.306094765663147, "learning_rate": 3.8434803941611807e-07, "loss": 0.0516, "step": 940 }, { "epoch": 0.007768736966921536, "grad_norm": 1.2337863445281982, "learning_rate": 3.884368483460768e-07, "loss": 0.0454, "step": 950 }, { "epoch": 0.00785051314552071, "grad_norm": 1.0005780458450317, "learning_rate": 3.9252565727603555e-07, "loss": 0.0472, "step": 960 }, { "epoch": 0.007932289324119884, "grad_norm": 1.1386178731918335, "learning_rate": 3.9661446620599423e-07, "loss": 0.0398, "step": 970 }, { "epoch": 0.008014065502719058, "grad_norm": 1.5848337411880493, "learning_rate": 4.007032751359529e-07, "loss": 0.0393, "step": 980 }, { "epoch": 0.008095841681318232, "grad_norm": 1.2817010879516602, "learning_rate": 4.047920840659116e-07, "loss": 0.0421, "step": 990 }, { "epoch": 0.008177617859917406, "grad_norm": 1.1460365056991577, "learning_rate": 4.0888089299587035e-07, "loss": 0.0419, "step": 1000 }, { "epoch": 0.00825939403851658, "grad_norm": 1.1048555374145508, "learning_rate": 4.129697019258291e-07, "loss": 0.041, "step": 1010 }, { "epoch": 0.008341170217115754, "grad_norm": 0.9925638437271118, "learning_rate": 4.170585108557877e-07, "loss": 0.0269, "step": 1020 }, { "epoch": 0.008422946395714928, "grad_norm": 0.8781243562698364, "learning_rate": 4.2114731978574646e-07, "loss": 0.0437, "step": 1030 }, { "epoch": 0.008504722574314103, "grad_norm": 0.9905436635017395, "learning_rate": 4.2523612871570515e-07, "loss": 0.0408, "step": 1040 }, { "epoch": 0.008586498752913277, "grad_norm": 1.1999629735946655, "learning_rate": 4.293249376456639e-07, "loss": 0.0371, "step": 1050 }, { "epoch": 0.00866827493151245, "grad_norm": 1.7010293006896973, "learning_rate": 4.334137465756225e-07, "loss": 0.0371, "step": 1060 }, { "epoch": 0.008750051110111625, "grad_norm": 0.7821071743965149, "learning_rate": 4.3750255550558126e-07, "loss": 0.041, "step": 1070 }, { "epoch": 0.008831827288710799, "grad_norm": 1.2586233615875244, "learning_rate": 4.4159136443554e-07, "loss": 0.0409, "step": 1080 }, { "epoch": 0.008913603467309973, "grad_norm": 0.9747447967529297, "learning_rate": 4.456801733654987e-07, "loss": 0.0305, "step": 1090 }, { "epoch": 0.008995379645909147, "grad_norm": 1.4229587316513062, "learning_rate": 4.4976898229545737e-07, "loss": 0.0423, "step": 1100 }, { "epoch": 0.009077155824508321, "grad_norm": 1.1502677202224731, "learning_rate": 4.5385779122541606e-07, "loss": 0.0398, "step": 1110 }, { "epoch": 0.009158932003107495, "grad_norm": 0.9817847609519958, "learning_rate": 4.579466001553748e-07, "loss": 0.0391, "step": 1120 }, { "epoch": 0.009240708181706669, "grad_norm": 0.9837647080421448, "learning_rate": 4.620354090853335e-07, "loss": 0.0354, "step": 1130 }, { "epoch": 0.009322484360305843, "grad_norm": 1.0652971267700195, "learning_rate": 4.6612421801529217e-07, "loss": 0.0333, "step": 1140 }, { "epoch": 0.009404260538905017, "grad_norm": 1.0206173658370972, "learning_rate": 4.7021302694525086e-07, "loss": 0.0417, "step": 1150 }, { "epoch": 0.009486036717504191, "grad_norm": 0.9779715538024902, "learning_rate": 4.743018358752096e-07, "loss": 0.0315, "step": 1160 }, { "epoch": 0.009567812896103365, "grad_norm": 1.172609567642212, "learning_rate": 4.783906448051683e-07, "loss": 0.0279, "step": 1170 }, { "epoch": 0.00964958907470254, "grad_norm": 0.9360582232475281, "learning_rate": 4.82479453735127e-07, "loss": 0.0383, "step": 1180 }, { "epoch": 0.009731365253301713, "grad_norm": 1.09714674949646, "learning_rate": 4.865682626650857e-07, "loss": 0.0317, "step": 1190 }, { "epoch": 0.009813141431900888, "grad_norm": 1.1327892541885376, "learning_rate": 4.906570715950444e-07, "loss": 0.0382, "step": 1200 }, { "epoch": 0.009894917610500062, "grad_norm": 1.0385960340499878, "learning_rate": 4.947458805250032e-07, "loss": 0.0334, "step": 1210 }, { "epoch": 0.009976693789099236, "grad_norm": 0.8331118226051331, "learning_rate": 4.988346894549618e-07, "loss": 0.0327, "step": 1220 }, { "epoch": 0.01005846996769841, "grad_norm": 1.3018481731414795, "learning_rate": 5.029234983849206e-07, "loss": 0.0382, "step": 1230 }, { "epoch": 0.010140246146297584, "grad_norm": 0.9364983439445496, "learning_rate": 5.070123073148792e-07, "loss": 0.0273, "step": 1240 }, { "epoch": 0.010222022324896758, "grad_norm": 1.2291910648345947, "learning_rate": 5.111011162448379e-07, "loss": 0.0393, "step": 1250 }, { "epoch": 0.010303798503495932, "grad_norm": 1.304630160331726, "learning_rate": 5.151899251747966e-07, "loss": 0.0292, "step": 1260 }, { "epoch": 0.010385574682095106, "grad_norm": 0.7274601459503174, "learning_rate": 5.192787341047553e-07, "loss": 0.0283, "step": 1270 }, { "epoch": 0.01046735086069428, "grad_norm": 1.3901946544647217, "learning_rate": 5.23367543034714e-07, "loss": 0.0361, "step": 1280 }, { "epoch": 0.010549127039293454, "grad_norm": 1.0442465543746948, "learning_rate": 5.274563519646728e-07, "loss": 0.0421, "step": 1290 }, { "epoch": 0.010630903217892628, "grad_norm": 1.3575109243392944, "learning_rate": 5.315451608946314e-07, "loss": 0.0302, "step": 1300 }, { "epoch": 0.010712679396491802, "grad_norm": 1.4557172060012817, "learning_rate": 5.356339698245902e-07, "loss": 0.0381, "step": 1310 }, { "epoch": 0.010794455575090976, "grad_norm": 1.6049987077713013, "learning_rate": 5.397227787545488e-07, "loss": 0.0319, "step": 1320 }, { "epoch": 0.01087623175369015, "grad_norm": 1.0673428773880005, "learning_rate": 5.438115876845075e-07, "loss": 0.0377, "step": 1330 }, { "epoch": 0.010958007932289324, "grad_norm": 1.0952708721160889, "learning_rate": 5.479003966144662e-07, "loss": 0.0267, "step": 1340 }, { "epoch": 0.011039784110888498, "grad_norm": 0.9603659510612488, "learning_rate": 5.519892055444249e-07, "loss": 0.0338, "step": 1350 }, { "epoch": 0.011121560289487673, "grad_norm": 0.8856310248374939, "learning_rate": 5.560780144743837e-07, "loss": 0.0335, "step": 1360 }, { "epoch": 0.011203336468086847, "grad_norm": 1.5546015501022339, "learning_rate": 5.601668234043424e-07, "loss": 0.0298, "step": 1370 }, { "epoch": 0.01128511264668602, "grad_norm": 1.332705020904541, "learning_rate": 5.642556323343011e-07, "loss": 0.0266, "step": 1380 }, { "epoch": 0.011366888825285195, "grad_norm": 1.551566481590271, "learning_rate": 5.683444412642598e-07, "loss": 0.0325, "step": 1390 }, { "epoch": 0.011448665003884369, "grad_norm": 1.163188099861145, "learning_rate": 5.724332501942184e-07, "loss": 0.0315, "step": 1400 }, { "epoch": 0.011530441182483543, "grad_norm": 1.058802843093872, "learning_rate": 5.765220591241772e-07, "loss": 0.0395, "step": 1410 }, { "epoch": 0.011612217361082717, "grad_norm": 0.7201812267303467, "learning_rate": 5.806108680541358e-07, "loss": 0.0353, "step": 1420 }, { "epoch": 0.011693993539681891, "grad_norm": 0.8713283538818359, "learning_rate": 5.846996769840946e-07, "loss": 0.0258, "step": 1430 }, { "epoch": 0.011775769718281065, "grad_norm": 0.8260728716850281, "learning_rate": 5.887884859140533e-07, "loss": 0.0294, "step": 1440 }, { "epoch": 0.011857545896880239, "grad_norm": 1.02854585647583, "learning_rate": 5.92877294844012e-07, "loss": 0.0229, "step": 1450 }, { "epoch": 0.011939322075479413, "grad_norm": 0.8813720941543579, "learning_rate": 5.969661037739707e-07, "loss": 0.0328, "step": 1460 }, { "epoch": 0.012021098254078587, "grad_norm": 1.127172589302063, "learning_rate": 6.010549127039294e-07, "loss": 0.0306, "step": 1470 }, { "epoch": 0.012102874432677761, "grad_norm": 1.0341534614562988, "learning_rate": 6.051437216338881e-07, "loss": 0.0261, "step": 1480 }, { "epoch": 0.012184650611276935, "grad_norm": 1.05917227268219, "learning_rate": 6.092325305638468e-07, "loss": 0.0328, "step": 1490 }, { "epoch": 0.01226642678987611, "grad_norm": 0.9537339210510254, "learning_rate": 6.133213394938055e-07, "loss": 0.04, "step": 1500 }, { "epoch": 0.012348202968475283, "grad_norm": 0.6900754570960999, "learning_rate": 6.174101484237642e-07, "loss": 0.0264, "step": 1510 }, { "epoch": 0.012429979147074458, "grad_norm": 1.1838163137435913, "learning_rate": 6.214989573537229e-07, "loss": 0.0267, "step": 1520 }, { "epoch": 0.012511755325673632, "grad_norm": 1.3369439840316772, "learning_rate": 6.255877662836817e-07, "loss": 0.0345, "step": 1530 }, { "epoch": 0.012593531504272806, "grad_norm": 1.2623252868652344, "learning_rate": 6.296765752136404e-07, "loss": 0.0293, "step": 1540 }, { "epoch": 0.01267530768287198, "grad_norm": 0.852893054485321, "learning_rate": 6.337653841435991e-07, "loss": 0.0278, "step": 1550 }, { "epoch": 0.012757083861471154, "grad_norm": 0.7341757416725159, "learning_rate": 6.378541930735576e-07, "loss": 0.0269, "step": 1560 }, { "epoch": 0.012838860040070328, "grad_norm": 0.8912689089775085, "learning_rate": 6.419430020035164e-07, "loss": 0.0278, "step": 1570 }, { "epoch": 0.012920636218669502, "grad_norm": 1.0535553693771362, "learning_rate": 6.460318109334751e-07, "loss": 0.0244, "step": 1580 }, { "epoch": 0.013002412397268676, "grad_norm": 0.8526079058647156, "learning_rate": 6.501206198634338e-07, "loss": 0.0323, "step": 1590 }, { "epoch": 0.01308418857586785, "grad_norm": 1.306506633758545, "learning_rate": 6.542094287933926e-07, "loss": 0.0254, "step": 1600 }, { "epoch": 0.013165964754467024, "grad_norm": 1.0552763938903809, "learning_rate": 6.582982377233513e-07, "loss": 0.0287, "step": 1610 }, { "epoch": 0.013247740933066198, "grad_norm": 1.1660277843475342, "learning_rate": 6.6238704665331e-07, "loss": 0.0259, "step": 1620 }, { "epoch": 0.013329517111665372, "grad_norm": 1.1373275518417358, "learning_rate": 6.664758555832688e-07, "loss": 0.0247, "step": 1630 }, { "epoch": 0.013411293290264546, "grad_norm": 1.1496608257293701, "learning_rate": 6.705646645132273e-07, "loss": 0.0247, "step": 1640 }, { "epoch": 0.01349306946886372, "grad_norm": 1.0518120527267456, "learning_rate": 6.74653473443186e-07, "loss": 0.0331, "step": 1650 }, { "epoch": 0.013574845647462894, "grad_norm": 1.0328962802886963, "learning_rate": 6.787422823731447e-07, "loss": 0.0307, "step": 1660 }, { "epoch": 0.013656621826062068, "grad_norm": 1.149566888809204, "learning_rate": 6.828310913031035e-07, "loss": 0.0272, "step": 1670 }, { "epoch": 0.013738398004661243, "grad_norm": 1.4218741655349731, "learning_rate": 6.869199002330622e-07, "loss": 0.0251, "step": 1680 }, { "epoch": 0.013820174183260417, "grad_norm": 1.3190597295761108, "learning_rate": 6.910087091630209e-07, "loss": 0.0224, "step": 1690 }, { "epoch": 0.01390195036185959, "grad_norm": 1.9806984663009644, "learning_rate": 6.950975180929797e-07, "loss": 0.0268, "step": 1700 }, { "epoch": 0.013983726540458765, "grad_norm": 0.9855793714523315, "learning_rate": 6.991863270229384e-07, "loss": 0.0316, "step": 1710 }, { "epoch": 0.014065502719057939, "grad_norm": 1.1324021816253662, "learning_rate": 7.032751359528969e-07, "loss": 0.018, "step": 1720 }, { "epoch": 0.014147278897657113, "grad_norm": 0.9898813962936401, "learning_rate": 7.073639448828556e-07, "loss": 0.0294, "step": 1730 }, { "epoch": 0.014229055076256287, "grad_norm": 1.1737151145935059, "learning_rate": 7.114527538128144e-07, "loss": 0.0186, "step": 1740 }, { "epoch": 0.014310831254855461, "grad_norm": 0.7514078617095947, "learning_rate": 7.155415627427731e-07, "loss": 0.0256, "step": 1750 }, { "epoch": 0.014392607433454635, "grad_norm": 1.0933904647827148, "learning_rate": 7.196303716727318e-07, "loss": 0.0206, "step": 1760 }, { "epoch": 0.014474383612053809, "grad_norm": 1.0706230401992798, "learning_rate": 7.237191806026905e-07, "loss": 0.0227, "step": 1770 }, { "epoch": 0.014556159790652983, "grad_norm": 0.8044664859771729, "learning_rate": 7.278079895326493e-07, "loss": 0.0183, "step": 1780 }, { "epoch": 0.014637935969252157, "grad_norm": 0.8203301429748535, "learning_rate": 7.31896798462608e-07, "loss": 0.0218, "step": 1790 }, { "epoch": 0.014719712147851331, "grad_norm": 0.7524286508560181, "learning_rate": 7.359856073925665e-07, "loss": 0.0202, "step": 1800 }, { "epoch": 0.014801488326450505, "grad_norm": 1.0189841985702515, "learning_rate": 7.400744163225252e-07, "loss": 0.0291, "step": 1810 }, { "epoch": 0.01488326450504968, "grad_norm": 1.3423631191253662, "learning_rate": 7.44163225252484e-07, "loss": 0.0297, "step": 1820 }, { "epoch": 0.014965040683648853, "grad_norm": 0.9403053522109985, "learning_rate": 7.482520341824427e-07, "loss": 0.0241, "step": 1830 }, { "epoch": 0.015046816862248028, "grad_norm": 1.134793996810913, "learning_rate": 7.523408431124014e-07, "loss": 0.0274, "step": 1840 }, { "epoch": 0.015128593040847202, "grad_norm": 0.7618448734283447, "learning_rate": 7.564296520423602e-07, "loss": 0.024, "step": 1850 }, { "epoch": 0.015210369219446376, "grad_norm": 0.832290530204773, "learning_rate": 7.605184609723189e-07, "loss": 0.0253, "step": 1860 }, { "epoch": 0.01529214539804555, "grad_norm": 1.6003111600875854, "learning_rate": 7.646072699022775e-07, "loss": 0.0295, "step": 1870 }, { "epoch": 0.015373921576644724, "grad_norm": 1.0995795726776123, "learning_rate": 7.686960788322361e-07, "loss": 0.0266, "step": 1880 }, { "epoch": 0.015455697755243898, "grad_norm": 0.6890827417373657, "learning_rate": 7.727848877621949e-07, "loss": 0.0292, "step": 1890 }, { "epoch": 0.015537473933843072, "grad_norm": 0.9940330386161804, "learning_rate": 7.768736966921536e-07, "loss": 0.0279, "step": 1900 }, { "epoch": 0.015619250112442246, "grad_norm": 1.055101990699768, "learning_rate": 7.809625056221123e-07, "loss": 0.0192, "step": 1910 }, { "epoch": 0.01570102629104142, "grad_norm": 0.7658411860466003, "learning_rate": 7.850513145520711e-07, "loss": 0.022, "step": 1920 }, { "epoch": 0.015782802469640592, "grad_norm": 1.2398520708084106, "learning_rate": 7.891401234820298e-07, "loss": 0.0235, "step": 1930 }, { "epoch": 0.015864578648239768, "grad_norm": 0.9140836596488953, "learning_rate": 7.932289324119885e-07, "loss": 0.0176, "step": 1940 }, { "epoch": 0.01594635482683894, "grad_norm": 0.751602292060852, "learning_rate": 7.97317741341947e-07, "loss": 0.0236, "step": 1950 }, { "epoch": 0.016028131005438116, "grad_norm": 0.8845899105072021, "learning_rate": 8.014065502719058e-07, "loss": 0.0196, "step": 1960 }, { "epoch": 0.01610990718403729, "grad_norm": 1.0326802730560303, "learning_rate": 8.054953592018645e-07, "loss": 0.0192, "step": 1970 }, { "epoch": 0.016191683362636464, "grad_norm": 0.8487699627876282, "learning_rate": 8.095841681318232e-07, "loss": 0.0211, "step": 1980 }, { "epoch": 0.016273459541235637, "grad_norm": 0.9864015579223633, "learning_rate": 8.13672977061782e-07, "loss": 0.0245, "step": 1990 }, { "epoch": 0.016355235719834813, "grad_norm": 1.1065493822097778, "learning_rate": 8.177617859917407e-07, "loss": 0.0243, "step": 2000 }, { "epoch": 0.016437011898433985, "grad_norm": 0.7035016417503357, "learning_rate": 8.218505949216994e-07, "loss": 0.0234, "step": 2010 }, { "epoch": 0.01651878807703316, "grad_norm": 0.9495044946670532, "learning_rate": 8.259394038516582e-07, "loss": 0.0201, "step": 2020 }, { "epoch": 0.016600564255632333, "grad_norm": 0.7478663325309753, "learning_rate": 8.300282127816168e-07, "loss": 0.0277, "step": 2030 }, { "epoch": 0.01668234043423151, "grad_norm": 0.7824975252151489, "learning_rate": 8.341170217115754e-07, "loss": 0.0211, "step": 2040 }, { "epoch": 0.01676411661283068, "grad_norm": 0.8358989953994751, "learning_rate": 8.382058306415341e-07, "loss": 0.0177, "step": 2050 }, { "epoch": 0.016845892791429857, "grad_norm": 1.248717188835144, "learning_rate": 8.422946395714929e-07, "loss": 0.021, "step": 2060 }, { "epoch": 0.01692766897002903, "grad_norm": 1.302614688873291, "learning_rate": 8.463834485014516e-07, "loss": 0.0194, "step": 2070 }, { "epoch": 0.017009445148628205, "grad_norm": 1.0778701305389404, "learning_rate": 8.504722574314103e-07, "loss": 0.022, "step": 2080 }, { "epoch": 0.017091221327227377, "grad_norm": 0.6054985523223877, "learning_rate": 8.545610663613691e-07, "loss": 0.0216, "step": 2090 }, { "epoch": 0.017172997505826553, "grad_norm": 1.004224419593811, "learning_rate": 8.586498752913278e-07, "loss": 0.0222, "step": 2100 }, { "epoch": 0.017254773684425725, "grad_norm": 0.44088125228881836, "learning_rate": 8.627386842212864e-07, "loss": 0.0182, "step": 2110 }, { "epoch": 0.0173365498630249, "grad_norm": 1.2193385362625122, "learning_rate": 8.66827493151245e-07, "loss": 0.0195, "step": 2120 }, { "epoch": 0.017418326041624074, "grad_norm": 0.8764237761497498, "learning_rate": 8.709163020812038e-07, "loss": 0.0254, "step": 2130 }, { "epoch": 0.01750010222022325, "grad_norm": 1.1546716690063477, "learning_rate": 8.750051110111625e-07, "loss": 0.021, "step": 2140 }, { "epoch": 0.01758187839882242, "grad_norm": 0.7074424028396606, "learning_rate": 8.790939199411212e-07, "loss": 0.0222, "step": 2150 }, { "epoch": 0.017663654577421598, "grad_norm": 0.4366390109062195, "learning_rate": 8.8318272887108e-07, "loss": 0.0169, "step": 2160 }, { "epoch": 0.01774543075602077, "grad_norm": 0.8902955651283264, "learning_rate": 8.872715378010387e-07, "loss": 0.0221, "step": 2170 }, { "epoch": 0.017827206934619946, "grad_norm": 0.959267258644104, "learning_rate": 8.913603467309974e-07, "loss": 0.0215, "step": 2180 }, { "epoch": 0.017908983113219118, "grad_norm": 0.9067825078964233, "learning_rate": 8.95449155660956e-07, "loss": 0.0231, "step": 2190 }, { "epoch": 0.017990759291818294, "grad_norm": 0.8475223779678345, "learning_rate": 8.995379645909147e-07, "loss": 0.0156, "step": 2200 }, { "epoch": 0.018072535470417466, "grad_norm": 0.5644289255142212, "learning_rate": 9.036267735208734e-07, "loss": 0.0195, "step": 2210 }, { "epoch": 0.018154311649016642, "grad_norm": 0.7580407857894897, "learning_rate": 9.077155824508321e-07, "loss": 0.0179, "step": 2220 }, { "epoch": 0.018236087827615814, "grad_norm": 0.7657674551010132, "learning_rate": 9.118043913807909e-07, "loss": 0.0178, "step": 2230 }, { "epoch": 0.01831786400621499, "grad_norm": 0.8305881023406982, "learning_rate": 9.158932003107496e-07, "loss": 0.0184, "step": 2240 }, { "epoch": 0.018399640184814162, "grad_norm": 0.7564247250556946, "learning_rate": 9.199820092407083e-07, "loss": 0.0182, "step": 2250 }, { "epoch": 0.018481416363413338, "grad_norm": 1.2004976272583008, "learning_rate": 9.24070818170667e-07, "loss": 0.0226, "step": 2260 }, { "epoch": 0.01856319254201251, "grad_norm": 1.0904821157455444, "learning_rate": 9.281596271006255e-07, "loss": 0.0244, "step": 2270 }, { "epoch": 0.018644968720611686, "grad_norm": 0.6409932971000671, "learning_rate": 9.322484360305843e-07, "loss": 0.0195, "step": 2280 }, { "epoch": 0.01872674489921086, "grad_norm": 0.5938852429389954, "learning_rate": 9.36337244960543e-07, "loss": 0.0265, "step": 2290 }, { "epoch": 0.018808521077810034, "grad_norm": 0.8427553772926331, "learning_rate": 9.404260538905017e-07, "loss": 0.0244, "step": 2300 }, { "epoch": 0.018890297256409207, "grad_norm": 0.4211156964302063, "learning_rate": 9.445148628204605e-07, "loss": 0.0146, "step": 2310 }, { "epoch": 0.018972073435008383, "grad_norm": 0.6996192932128906, "learning_rate": 9.486036717504192e-07, "loss": 0.0173, "step": 2320 }, { "epoch": 0.019053849613607555, "grad_norm": 1.245827078819275, "learning_rate": 9.526924806803779e-07, "loss": 0.0182, "step": 2330 }, { "epoch": 0.01913562579220673, "grad_norm": 0.8412471413612366, "learning_rate": 9.567812896103367e-07, "loss": 0.019, "step": 2340 }, { "epoch": 0.019217401970805903, "grad_norm": 0.7528647184371948, "learning_rate": 9.608700985402951e-07, "loss": 0.0217, "step": 2350 }, { "epoch": 0.01929917814940508, "grad_norm": 0.5715022683143616, "learning_rate": 9.64958907470254e-07, "loss": 0.0214, "step": 2360 }, { "epoch": 0.01938095432800425, "grad_norm": 0.9240434765815735, "learning_rate": 9.690477164002127e-07, "loss": 0.0174, "step": 2370 }, { "epoch": 0.019462730506603427, "grad_norm": 1.1387667655944824, "learning_rate": 9.731365253301714e-07, "loss": 0.019, "step": 2380 }, { "epoch": 0.0195445066852026, "grad_norm": 1.187327265739441, "learning_rate": 9.7722533426013e-07, "loss": 0.0224, "step": 2390 }, { "epoch": 0.019626282863801775, "grad_norm": 0.8985880017280579, "learning_rate": 9.813141431900888e-07, "loss": 0.0223, "step": 2400 }, { "epoch": 0.019708059042400947, "grad_norm": 0.9941482543945312, "learning_rate": 9.854029521200475e-07, "loss": 0.0169, "step": 2410 }, { "epoch": 0.019789835221000123, "grad_norm": 0.5453265309333801, "learning_rate": 9.894917610500064e-07, "loss": 0.0185, "step": 2420 }, { "epoch": 0.019871611399599295, "grad_norm": 0.9199097156524658, "learning_rate": 9.935805699799648e-07, "loss": 0.0199, "step": 2430 }, { "epoch": 0.01995338757819847, "grad_norm": 0.7095609903335571, "learning_rate": 9.976693789099235e-07, "loss": 0.0269, "step": 2440 }, { "epoch": 0.020035163756797644, "grad_norm": 0.8660944104194641, "learning_rate": 1.0017581878398822e-06, "loss": 0.0163, "step": 2450 }, { "epoch": 0.02011693993539682, "grad_norm": 1.069158673286438, "learning_rate": 1.0058469967698411e-06, "loss": 0.0204, "step": 2460 }, { "epoch": 0.02019871611399599, "grad_norm": 0.8873130083084106, "learning_rate": 1.0099358056997998e-06, "loss": 0.0158, "step": 2470 }, { "epoch": 0.020280492292595168, "grad_norm": 0.8453035950660706, "learning_rate": 1.0140246146297585e-06, "loss": 0.0207, "step": 2480 }, { "epoch": 0.02036226847119434, "grad_norm": 0.7982597947120667, "learning_rate": 1.0181134235597172e-06, "loss": 0.0222, "step": 2490 }, { "epoch": 0.020444044649793516, "grad_norm": 1.164185881614685, "learning_rate": 1.0222022324896759e-06, "loss": 0.0202, "step": 2500 }, { "epoch": 0.020525820828392688, "grad_norm": 0.7843097448348999, "learning_rate": 1.0262910414196346e-06, "loss": 0.0213, "step": 2510 }, { "epoch": 0.020607597006991864, "grad_norm": 1.3766648769378662, "learning_rate": 1.0303798503495932e-06, "loss": 0.0175, "step": 2520 }, { "epoch": 0.020689373185591036, "grad_norm": 1.119235873222351, "learning_rate": 1.034468659279552e-06, "loss": 0.0203, "step": 2530 }, { "epoch": 0.020771149364190212, "grad_norm": 0.8436144590377808, "learning_rate": 1.0385574682095106e-06, "loss": 0.0167, "step": 2540 }, { "epoch": 0.020852925542789384, "grad_norm": 0.8539019823074341, "learning_rate": 1.0426462771394693e-06, "loss": 0.0222, "step": 2550 }, { "epoch": 0.02093470172138856, "grad_norm": 0.5763360857963562, "learning_rate": 1.046735086069428e-06, "loss": 0.0209, "step": 2560 }, { "epoch": 0.021016477899987732, "grad_norm": 0.8403864502906799, "learning_rate": 1.0508238949993869e-06, "loss": 0.0215, "step": 2570 }, { "epoch": 0.021098254078586908, "grad_norm": 0.6292503476142883, "learning_rate": 1.0549127039293456e-06, "loss": 0.0168, "step": 2580 }, { "epoch": 0.02118003025718608, "grad_norm": 0.5675994157791138, "learning_rate": 1.059001512859304e-06, "loss": 0.0171, "step": 2590 }, { "epoch": 0.021261806435785256, "grad_norm": 0.8643420934677124, "learning_rate": 1.0630903217892627e-06, "loss": 0.0166, "step": 2600 }, { "epoch": 0.02134358261438443, "grad_norm": 0.8766602277755737, "learning_rate": 1.0671791307192216e-06, "loss": 0.0166, "step": 2610 }, { "epoch": 0.021425358792983604, "grad_norm": 0.8299167156219482, "learning_rate": 1.0712679396491803e-06, "loss": 0.0265, "step": 2620 }, { "epoch": 0.021507134971582777, "grad_norm": 0.7048602104187012, "learning_rate": 1.075356748579139e-06, "loss": 0.0138, "step": 2630 }, { "epoch": 0.021588911150181953, "grad_norm": 0.6225913763046265, "learning_rate": 1.0794455575090977e-06, "loss": 0.0232, "step": 2640 }, { "epoch": 0.021670687328781125, "grad_norm": 0.5488381385803223, "learning_rate": 1.0835343664390564e-06, "loss": 0.0245, "step": 2650 }, { "epoch": 0.0217524635073803, "grad_norm": 1.350603461265564, "learning_rate": 1.087623175369015e-06, "loss": 0.0149, "step": 2660 }, { "epoch": 0.021834239685979473, "grad_norm": 0.6596383452415466, "learning_rate": 1.0917119842989737e-06, "loss": 0.0188, "step": 2670 }, { "epoch": 0.02191601586457865, "grad_norm": 0.8221743106842041, "learning_rate": 1.0958007932289324e-06, "loss": 0.0156, "step": 2680 }, { "epoch": 0.02199779204317782, "grad_norm": 0.6935690641403198, "learning_rate": 1.0998896021588911e-06, "loss": 0.0183, "step": 2690 }, { "epoch": 0.022079568221776997, "grad_norm": 0.8324612379074097, "learning_rate": 1.1039784110888498e-06, "loss": 0.0116, "step": 2700 }, { "epoch": 0.02216134440037617, "grad_norm": 0.8581863045692444, "learning_rate": 1.1080672200188087e-06, "loss": 0.0172, "step": 2710 }, { "epoch": 0.022243120578975345, "grad_norm": 0.7777926921844482, "learning_rate": 1.1121560289487674e-06, "loss": 0.0186, "step": 2720 }, { "epoch": 0.022324896757574517, "grad_norm": 0.5943484306335449, "learning_rate": 1.116244837878726e-06, "loss": 0.0152, "step": 2730 }, { "epoch": 0.022406672936173693, "grad_norm": 0.7810112237930298, "learning_rate": 1.1203336468086848e-06, "loss": 0.0177, "step": 2740 }, { "epoch": 0.022488449114772865, "grad_norm": 0.791497528553009, "learning_rate": 1.1244224557386435e-06, "loss": 0.0192, "step": 2750 }, { "epoch": 0.02257022529337204, "grad_norm": 0.8567879796028137, "learning_rate": 1.1285112646686021e-06, "loss": 0.0168, "step": 2760 }, { "epoch": 0.022652001471971214, "grad_norm": 0.9758214950561523, "learning_rate": 1.1326000735985608e-06, "loss": 0.0241, "step": 2770 }, { "epoch": 0.02273377765057039, "grad_norm": 0.7046816349029541, "learning_rate": 1.1366888825285195e-06, "loss": 0.0159, "step": 2780 }, { "epoch": 0.02281555382916956, "grad_norm": 0.6212954521179199, "learning_rate": 1.1407776914584782e-06, "loss": 0.0216, "step": 2790 }, { "epoch": 0.022897330007768738, "grad_norm": 0.678287923336029, "learning_rate": 1.1448665003884369e-06, "loss": 0.0221, "step": 2800 }, { "epoch": 0.02297910618636791, "grad_norm": 0.9189453721046448, "learning_rate": 1.1489553093183958e-06, "loss": 0.015, "step": 2810 }, { "epoch": 0.023060882364967086, "grad_norm": 0.4063735604286194, "learning_rate": 1.1530441182483545e-06, "loss": 0.0155, "step": 2820 }, { "epoch": 0.023142658543566258, "grad_norm": 0.8095321655273438, "learning_rate": 1.157132927178313e-06, "loss": 0.013, "step": 2830 }, { "epoch": 0.023224434722165434, "grad_norm": 0.6770833730697632, "learning_rate": 1.1612217361082716e-06, "loss": 0.0185, "step": 2840 }, { "epoch": 0.023306210900764606, "grad_norm": 0.6769402027130127, "learning_rate": 1.1653105450382305e-06, "loss": 0.0143, "step": 2850 }, { "epoch": 0.023387987079363782, "grad_norm": 0.9326784014701843, "learning_rate": 1.1693993539681892e-06, "loss": 0.0168, "step": 2860 }, { "epoch": 0.023469763257962954, "grad_norm": 0.49598228931427, "learning_rate": 1.173488162898148e-06, "loss": 0.0161, "step": 2870 }, { "epoch": 0.02355153943656213, "grad_norm": 0.8045369982719421, "learning_rate": 1.1775769718281066e-06, "loss": 0.0155, "step": 2880 }, { "epoch": 0.023633315615161302, "grad_norm": 0.6725611090660095, "learning_rate": 1.1816657807580653e-06, "loss": 0.0137, "step": 2890 }, { "epoch": 0.023715091793760478, "grad_norm": 0.9340780973434448, "learning_rate": 1.185754589688024e-06, "loss": 0.0195, "step": 2900 }, { "epoch": 0.02379686797235965, "grad_norm": 0.8150255680084229, "learning_rate": 1.1898433986179826e-06, "loss": 0.0168, "step": 2910 }, { "epoch": 0.023878644150958826, "grad_norm": 0.6778362989425659, "learning_rate": 1.1939322075479413e-06, "loss": 0.01, "step": 2920 }, { "epoch": 0.023960420329558, "grad_norm": 0.9786208271980286, "learning_rate": 1.1980210164779e-06, "loss": 0.018, "step": 2930 }, { "epoch": 0.024042196508157174, "grad_norm": 0.424230694770813, "learning_rate": 1.2021098254078587e-06, "loss": 0.0176, "step": 2940 }, { "epoch": 0.024123972686756347, "grad_norm": 0.478906512260437, "learning_rate": 1.2061986343378176e-06, "loss": 0.0099, "step": 2950 }, { "epoch": 0.024205748865355523, "grad_norm": 0.4936705231666565, "learning_rate": 1.2102874432677763e-06, "loss": 0.0163, "step": 2960 }, { "epoch": 0.024287525043954695, "grad_norm": 0.8430413007736206, "learning_rate": 1.214376252197735e-06, "loss": 0.0177, "step": 2970 }, { "epoch": 0.02436930122255387, "grad_norm": 0.9461743235588074, "learning_rate": 1.2184650611276937e-06, "loss": 0.0187, "step": 2980 }, { "epoch": 0.024451077401153043, "grad_norm": 1.028806447982788, "learning_rate": 1.2225538700576524e-06, "loss": 0.0177, "step": 2990 }, { "epoch": 0.02453285357975222, "grad_norm": 0.6077003479003906, "learning_rate": 1.226642678987611e-06, "loss": 0.0153, "step": 3000 }, { "epoch": 0.02461462975835139, "grad_norm": 0.9136999249458313, "learning_rate": 1.2307314879175697e-06, "loss": 0.0179, "step": 3010 }, { "epoch": 0.024696405936950567, "grad_norm": 0.7793986201286316, "learning_rate": 1.2348202968475284e-06, "loss": 0.0136, "step": 3020 }, { "epoch": 0.02477818211554974, "grad_norm": 0.38555195927619934, "learning_rate": 1.238909105777487e-06, "loss": 0.0195, "step": 3030 }, { "epoch": 0.024859958294148915, "grad_norm": 0.7198175191879272, "learning_rate": 1.2429979147074458e-06, "loss": 0.0152, "step": 3040 }, { "epoch": 0.024941734472748087, "grad_norm": 0.8507718443870544, "learning_rate": 1.2470867236374045e-06, "loss": 0.0177, "step": 3050 }, { "epoch": 0.025023510651347263, "grad_norm": 0.6421129703521729, "learning_rate": 1.2511755325673634e-06, "loss": 0.0162, "step": 3060 }, { "epoch": 0.025105286829946435, "grad_norm": 0.680374801158905, "learning_rate": 1.255264341497322e-06, "loss": 0.0167, "step": 3070 }, { "epoch": 0.02518706300854561, "grad_norm": 0.5657944679260254, "learning_rate": 1.2593531504272807e-06, "loss": 0.0143, "step": 3080 }, { "epoch": 0.025268839187144784, "grad_norm": 0.5668798089027405, "learning_rate": 1.2634419593572394e-06, "loss": 0.0181, "step": 3090 }, { "epoch": 0.02535061536574396, "grad_norm": 0.49946075677871704, "learning_rate": 1.2675307682871981e-06, "loss": 0.0124, "step": 3100 }, { "epoch": 0.02543239154434313, "grad_norm": 0.6147026419639587, "learning_rate": 1.2716195772171566e-06, "loss": 0.0145, "step": 3110 }, { "epoch": 0.025514167722942308, "grad_norm": 0.8548004031181335, "learning_rate": 1.2757083861471153e-06, "loss": 0.0148, "step": 3120 }, { "epoch": 0.02559594390154148, "grad_norm": 0.6901496648788452, "learning_rate": 1.279797195077074e-06, "loss": 0.0115, "step": 3130 }, { "epoch": 0.025677720080140656, "grad_norm": 1.1626031398773193, "learning_rate": 1.2838860040070329e-06, "loss": 0.0159, "step": 3140 }, { "epoch": 0.025759496258739828, "grad_norm": 1.1190515756607056, "learning_rate": 1.2879748129369915e-06, "loss": 0.0158, "step": 3150 }, { "epoch": 0.025841272437339004, "grad_norm": 0.924916684627533, "learning_rate": 1.2920636218669502e-06, "loss": 0.0127, "step": 3160 }, { "epoch": 0.025923048615938176, "grad_norm": 0.5516752004623413, "learning_rate": 1.296152430796909e-06, "loss": 0.015, "step": 3170 }, { "epoch": 0.026004824794537352, "grad_norm": 0.7694199085235596, "learning_rate": 1.3002412397268676e-06, "loss": 0.0133, "step": 3180 }, { "epoch": 0.026086600973136524, "grad_norm": 0.7436428070068359, "learning_rate": 1.3043300486568263e-06, "loss": 0.0144, "step": 3190 }, { "epoch": 0.0261683771517357, "grad_norm": 0.726702868938446, "learning_rate": 1.3084188575867852e-06, "loss": 0.0167, "step": 3200 }, { "epoch": 0.026250153330334872, "grad_norm": 0.39614537358283997, "learning_rate": 1.3125076665167439e-06, "loss": 0.0109, "step": 3210 }, { "epoch": 0.026331929508934048, "grad_norm": 0.6014586687088013, "learning_rate": 1.3165964754467026e-06, "loss": 0.0183, "step": 3220 }, { "epoch": 0.02641370568753322, "grad_norm": 0.7098396420478821, "learning_rate": 1.3206852843766613e-06, "loss": 0.0144, "step": 3230 }, { "epoch": 0.026495481866132396, "grad_norm": 0.6827712655067444, "learning_rate": 1.32477409330662e-06, "loss": 0.0203, "step": 3240 }, { "epoch": 0.02657725804473157, "grad_norm": 0.6132046580314636, "learning_rate": 1.3288629022365786e-06, "loss": 0.0142, "step": 3250 }, { "epoch": 0.026659034223330744, "grad_norm": 0.7569664120674133, "learning_rate": 1.3329517111665375e-06, "loss": 0.0155, "step": 3260 }, { "epoch": 0.026740810401929917, "grad_norm": 0.6682250499725342, "learning_rate": 1.3370405200964958e-06, "loss": 0.0158, "step": 3270 }, { "epoch": 0.026822586580529093, "grad_norm": 0.7162862420082092, "learning_rate": 1.3411293290264547e-06, "loss": 0.0157, "step": 3280 }, { "epoch": 0.026904362759128265, "grad_norm": 1.1174174547195435, "learning_rate": 1.3452181379564134e-06, "loss": 0.0186, "step": 3290 }, { "epoch": 0.02698613893772744, "grad_norm": 0.5226957201957703, "learning_rate": 1.349306946886372e-06, "loss": 0.0175, "step": 3300 }, { "epoch": 0.027067915116326613, "grad_norm": 0.9089828729629517, "learning_rate": 1.3533957558163307e-06, "loss": 0.0191, "step": 3310 }, { "epoch": 0.02714969129492579, "grad_norm": 0.657285213470459, "learning_rate": 1.3574845647462894e-06, "loss": 0.0194, "step": 3320 }, { "epoch": 0.02723146747352496, "grad_norm": 0.8843213319778442, "learning_rate": 1.3615733736762481e-06, "loss": 0.0103, "step": 3330 }, { "epoch": 0.027313243652124137, "grad_norm": 0.6670709848403931, "learning_rate": 1.365662182606207e-06, "loss": 0.0162, "step": 3340 }, { "epoch": 0.02739501983072331, "grad_norm": 0.2659953236579895, "learning_rate": 1.3697509915361657e-06, "loss": 0.0152, "step": 3350 }, { "epoch": 0.027476796009322485, "grad_norm": 0.6499205231666565, "learning_rate": 1.3738398004661244e-06, "loss": 0.0127, "step": 3360 }, { "epoch": 0.027558572187921657, "grad_norm": 0.683926522731781, "learning_rate": 1.377928609396083e-06, "loss": 0.0132, "step": 3370 }, { "epoch": 0.027640348366520833, "grad_norm": 1.1246620416641235, "learning_rate": 1.3820174183260418e-06, "loss": 0.0175, "step": 3380 }, { "epoch": 0.027722124545120005, "grad_norm": 0.7255322337150574, "learning_rate": 1.3861062272560004e-06, "loss": 0.0172, "step": 3390 }, { "epoch": 0.02780390072371918, "grad_norm": 0.640313446521759, "learning_rate": 1.3901950361859593e-06, "loss": 0.0126, "step": 3400 }, { "epoch": 0.027885676902318354, "grad_norm": 0.7173632383346558, "learning_rate": 1.394283845115918e-06, "loss": 0.017, "step": 3410 }, { "epoch": 0.02796745308091753, "grad_norm": 0.48858392238616943, "learning_rate": 1.3983726540458767e-06, "loss": 0.0127, "step": 3420 }, { "epoch": 0.0280492292595167, "grad_norm": 0.6261333227157593, "learning_rate": 1.4024614629758352e-06, "loss": 0.0135, "step": 3430 }, { "epoch": 0.028131005438115878, "grad_norm": 0.44763949513435364, "learning_rate": 1.4065502719057939e-06, "loss": 0.0123, "step": 3440 }, { "epoch": 0.02821278161671505, "grad_norm": 1.0369536876678467, "learning_rate": 1.4106390808357526e-06, "loss": 0.0203, "step": 3450 }, { "epoch": 0.028294557795314226, "grad_norm": 0.6758723258972168, "learning_rate": 1.4147278897657113e-06, "loss": 0.0115, "step": 3460 }, { "epoch": 0.028376333973913398, "grad_norm": 0.8049179911613464, "learning_rate": 1.41881669869567e-06, "loss": 0.0172, "step": 3470 }, { "epoch": 0.028458110152512574, "grad_norm": 0.42695003747940063, "learning_rate": 1.4229055076256288e-06, "loss": 0.0152, "step": 3480 }, { "epoch": 0.028539886331111746, "grad_norm": 0.5059463977813721, "learning_rate": 1.4269943165555875e-06, "loss": 0.0125, "step": 3490 }, { "epoch": 0.028621662509710922, "grad_norm": 0.810394287109375, "learning_rate": 1.4310831254855462e-06, "loss": 0.0137, "step": 3500 }, { "epoch": 0.028703438688310094, "grad_norm": 0.5483757257461548, "learning_rate": 1.435171934415505e-06, "loss": 0.0158, "step": 3510 }, { "epoch": 0.02878521486690927, "grad_norm": 1.0205004215240479, "learning_rate": 1.4392607433454636e-06, "loss": 0.0129, "step": 3520 }, { "epoch": 0.028866991045508442, "grad_norm": 1.597946286201477, "learning_rate": 1.4433495522754223e-06, "loss": 0.0143, "step": 3530 }, { "epoch": 0.028948767224107618, "grad_norm": 0.6048858165740967, "learning_rate": 1.447438361205381e-06, "loss": 0.0147, "step": 3540 }, { "epoch": 0.02903054340270679, "grad_norm": 0.48918259143829346, "learning_rate": 1.4515271701353399e-06, "loss": 0.0156, "step": 3550 }, { "epoch": 0.029112319581305966, "grad_norm": 0.43175217509269714, "learning_rate": 1.4556159790652985e-06, "loss": 0.0128, "step": 3560 }, { "epoch": 0.02919409575990514, "grad_norm": 0.5688096880912781, "learning_rate": 1.4597047879952572e-06, "loss": 0.0135, "step": 3570 }, { "epoch": 0.029275871938504314, "grad_norm": 0.47472959756851196, "learning_rate": 1.463793596925216e-06, "loss": 0.0142, "step": 3580 }, { "epoch": 0.029357648117103487, "grad_norm": 1.1332507133483887, "learning_rate": 1.4678824058551744e-06, "loss": 0.0137, "step": 3590 }, { "epoch": 0.029439424295702663, "grad_norm": 0.4645165503025055, "learning_rate": 1.471971214785133e-06, "loss": 0.0108, "step": 3600 }, { "epoch": 0.029521200474301835, "grad_norm": 0.351664662361145, "learning_rate": 1.4760600237150918e-06, "loss": 0.0115, "step": 3610 }, { "epoch": 0.02960297665290101, "grad_norm": 0.6137601137161255, "learning_rate": 1.4801488326450505e-06, "loss": 0.0164, "step": 3620 }, { "epoch": 0.029684752831500183, "grad_norm": 0.5417942404747009, "learning_rate": 1.4842376415750093e-06, "loss": 0.0126, "step": 3630 }, { "epoch": 0.02976652901009936, "grad_norm": 0.7579416632652283, "learning_rate": 1.488326450504968e-06, "loss": 0.012, "step": 3640 }, { "epoch": 0.02984830518869853, "grad_norm": 0.6156266927719116, "learning_rate": 1.4924152594349267e-06, "loss": 0.0093, "step": 3650 }, { "epoch": 0.029930081367297707, "grad_norm": 1.0282368659973145, "learning_rate": 1.4965040683648854e-06, "loss": 0.016, "step": 3660 }, { "epoch": 0.03001185754589688, "grad_norm": 0.5861940979957581, "learning_rate": 1.500592877294844e-06, "loss": 0.0132, "step": 3670 }, { "epoch": 0.030093633724496055, "grad_norm": 0.765267014503479, "learning_rate": 1.5046816862248028e-06, "loss": 0.0144, "step": 3680 }, { "epoch": 0.030175409903095227, "grad_norm": 0.5525199174880981, "learning_rate": 1.5087704951547617e-06, "loss": 0.0125, "step": 3690 }, { "epoch": 0.030257186081694403, "grad_norm": 0.46491435170173645, "learning_rate": 1.5128593040847204e-06, "loss": 0.0103, "step": 3700 }, { "epoch": 0.030338962260293575, "grad_norm": 0.368021696805954, "learning_rate": 1.516948113014679e-06, "loss": 0.0119, "step": 3710 }, { "epoch": 0.03042073843889275, "grad_norm": 0.7681676149368286, "learning_rate": 1.5210369219446377e-06, "loss": 0.0095, "step": 3720 }, { "epoch": 0.030502514617491924, "grad_norm": 0.6889114379882812, "learning_rate": 1.5251257308745964e-06, "loss": 0.012, "step": 3730 }, { "epoch": 0.0305842907960911, "grad_norm": 0.7520129084587097, "learning_rate": 1.529214539804555e-06, "loss": 0.0126, "step": 3740 }, { "epoch": 0.03066606697469027, "grad_norm": 0.46731698513031006, "learning_rate": 1.5333033487345136e-06, "loss": 0.012, "step": 3750 }, { "epoch": 0.030747843153289448, "grad_norm": 0.6616219282150269, "learning_rate": 1.5373921576644723e-06, "loss": 0.0159, "step": 3760 }, { "epoch": 0.03082961933188862, "grad_norm": 0.47107169032096863, "learning_rate": 1.5414809665944312e-06, "loss": 0.0125, "step": 3770 }, { "epoch": 0.030911395510487796, "grad_norm": 0.8340710401535034, "learning_rate": 1.5455697755243899e-06, "loss": 0.0127, "step": 3780 }, { "epoch": 0.030993171689086968, "grad_norm": 0.881138801574707, "learning_rate": 1.5496585844543485e-06, "loss": 0.0144, "step": 3790 }, { "epoch": 0.031074947867686144, "grad_norm": 0.5952323079109192, "learning_rate": 1.5537473933843072e-06, "loss": 0.0117, "step": 3800 }, { "epoch": 0.031156724046285316, "grad_norm": 0.4318182170391083, "learning_rate": 1.557836202314266e-06, "loss": 0.0135, "step": 3810 }, { "epoch": 0.031238500224884492, "grad_norm": 0.8230600953102112, "learning_rate": 1.5619250112442246e-06, "loss": 0.0135, "step": 3820 }, { "epoch": 0.031320276403483664, "grad_norm": 0.8565261960029602, "learning_rate": 1.5660138201741835e-06, "loss": 0.0113, "step": 3830 }, { "epoch": 0.03140205258208284, "grad_norm": 0.555404007434845, "learning_rate": 1.5701026291041422e-06, "loss": 0.0157, "step": 3840 }, { "epoch": 0.031483828760682016, "grad_norm": 0.5525087118148804, "learning_rate": 1.5741914380341009e-06, "loss": 0.0098, "step": 3850 }, { "epoch": 0.031565604939281185, "grad_norm": 0.7917882800102234, "learning_rate": 1.5782802469640596e-06, "loss": 0.0147, "step": 3860 }, { "epoch": 0.03164738111788036, "grad_norm": 0.7172435522079468, "learning_rate": 1.5823690558940182e-06, "loss": 0.016, "step": 3870 }, { "epoch": 0.031729157296479536, "grad_norm": 0.610882043838501, "learning_rate": 1.586457864823977e-06, "loss": 0.0129, "step": 3880 }, { "epoch": 0.03181093347507871, "grad_norm": 0.9148808717727661, "learning_rate": 1.5905466737539358e-06, "loss": 0.0098, "step": 3890 }, { "epoch": 0.03189270965367788, "grad_norm": 0.7708027958869934, "learning_rate": 1.594635482683894e-06, "loss": 0.0143, "step": 3900 }, { "epoch": 0.03197448583227706, "grad_norm": 0.7160828113555908, "learning_rate": 1.598724291613853e-06, "loss": 0.0173, "step": 3910 }, { "epoch": 0.03205626201087623, "grad_norm": 0.901698887348175, "learning_rate": 1.6028131005438117e-06, "loss": 0.0111, "step": 3920 }, { "epoch": 0.03213803818947541, "grad_norm": 1.052769422531128, "learning_rate": 1.6069019094737704e-06, "loss": 0.02, "step": 3930 }, { "epoch": 0.03221981436807458, "grad_norm": 1.178275227546692, "learning_rate": 1.610990718403729e-06, "loss": 0.0139, "step": 3940 }, { "epoch": 0.03230159054667375, "grad_norm": 0.6990852355957031, "learning_rate": 1.6150795273336877e-06, "loss": 0.0127, "step": 3950 }, { "epoch": 0.03238336672527293, "grad_norm": 0.7708835005760193, "learning_rate": 1.6191683362636464e-06, "loss": 0.0088, "step": 3960 }, { "epoch": 0.032465142903872105, "grad_norm": 0.45492079854011536, "learning_rate": 1.6232571451936053e-06, "loss": 0.0142, "step": 3970 }, { "epoch": 0.03254691908247127, "grad_norm": 0.6293238401412964, "learning_rate": 1.627345954123564e-06, "loss": 0.0114, "step": 3980 }, { "epoch": 0.03262869526107045, "grad_norm": 0.6108927130699158, "learning_rate": 1.6314347630535227e-06, "loss": 0.0124, "step": 3990 }, { "epoch": 0.032710471439669625, "grad_norm": 0.5335335731506348, "learning_rate": 1.6355235719834814e-06, "loss": 0.0111, "step": 4000 }, { "epoch": 0.0327922476182688, "grad_norm": 0.5913642644882202, "learning_rate": 1.63961238091344e-06, "loss": 0.0091, "step": 4010 }, { "epoch": 0.03287402379686797, "grad_norm": 0.6201720237731934, "learning_rate": 1.6437011898433988e-06, "loss": 0.0131, "step": 4020 }, { "epoch": 0.032955799975467145, "grad_norm": 0.7870773673057556, "learning_rate": 1.6477899987733574e-06, "loss": 0.0161, "step": 4030 }, { "epoch": 0.03303757615406632, "grad_norm": 0.5950058698654175, "learning_rate": 1.6518788077033163e-06, "loss": 0.013, "step": 4040 }, { "epoch": 0.0331193523326655, "grad_norm": 0.5774102807044983, "learning_rate": 1.655967616633275e-06, "loss": 0.0111, "step": 4050 }, { "epoch": 0.033201128511264666, "grad_norm": 0.6722300052642822, "learning_rate": 1.6600564255632335e-06, "loss": 0.0113, "step": 4060 }, { "epoch": 0.03328290468986384, "grad_norm": 0.599590539932251, "learning_rate": 1.6641452344931922e-06, "loss": 0.0109, "step": 4070 }, { "epoch": 0.03336468086846302, "grad_norm": 0.6564891338348389, "learning_rate": 1.6682340434231509e-06, "loss": 0.0178, "step": 4080 }, { "epoch": 0.03344645704706219, "grad_norm": 0.6660075187683105, "learning_rate": 1.6723228523531096e-06, "loss": 0.0133, "step": 4090 }, { "epoch": 0.03352823322566136, "grad_norm": 0.3452300727367401, "learning_rate": 1.6764116612830683e-06, "loss": 0.0168, "step": 4100 }, { "epoch": 0.03361000940426054, "grad_norm": 1.0351828336715698, "learning_rate": 1.680500470213027e-06, "loss": 0.0108, "step": 4110 }, { "epoch": 0.033691785582859714, "grad_norm": 0.3236365020275116, "learning_rate": 1.6845892791429858e-06, "loss": 0.0132, "step": 4120 }, { "epoch": 0.03377356176145889, "grad_norm": 1.0196924209594727, "learning_rate": 1.6886780880729445e-06, "loss": 0.0109, "step": 4130 }, { "epoch": 0.03385533794005806, "grad_norm": 0.27483275532722473, "learning_rate": 1.6927668970029032e-06, "loss": 0.0117, "step": 4140 }, { "epoch": 0.033937114118657234, "grad_norm": 0.38854026794433594, "learning_rate": 1.696855705932862e-06, "loss": 0.0154, "step": 4150 }, { "epoch": 0.03401889029725641, "grad_norm": 0.490877628326416, "learning_rate": 1.7009445148628206e-06, "loss": 0.0136, "step": 4160 }, { "epoch": 0.034100666475855586, "grad_norm": 0.7204733490943909, "learning_rate": 1.7050333237927793e-06, "loss": 0.0114, "step": 4170 }, { "epoch": 0.034182442654454755, "grad_norm": 0.6644676327705383, "learning_rate": 1.7091221327227382e-06, "loss": 0.011, "step": 4180 }, { "epoch": 0.03426421883305393, "grad_norm": 0.5738486647605896, "learning_rate": 1.7132109416526969e-06, "loss": 0.0097, "step": 4190 }, { "epoch": 0.034345995011653106, "grad_norm": 0.7578938007354736, "learning_rate": 1.7172997505826555e-06, "loss": 0.0123, "step": 4200 }, { "epoch": 0.03442777119025228, "grad_norm": 0.4658215343952179, "learning_rate": 1.7213885595126142e-06, "loss": 0.013, "step": 4210 }, { "epoch": 0.03450954736885145, "grad_norm": 0.6085911393165588, "learning_rate": 1.7254773684425727e-06, "loss": 0.0101, "step": 4220 }, { "epoch": 0.03459132354745063, "grad_norm": 0.7969480156898499, "learning_rate": 1.7295661773725314e-06, "loss": 0.0118, "step": 4230 }, { "epoch": 0.0346730997260498, "grad_norm": 0.28409257531166077, "learning_rate": 1.73365498630249e-06, "loss": 0.0139, "step": 4240 }, { "epoch": 0.03475487590464898, "grad_norm": 0.6369352340698242, "learning_rate": 1.7377437952324488e-06, "loss": 0.0113, "step": 4250 }, { "epoch": 0.03483665208324815, "grad_norm": 0.7766498327255249, "learning_rate": 1.7418326041624077e-06, "loss": 0.0166, "step": 4260 }, { "epoch": 0.03491842826184732, "grad_norm": 0.38910865783691406, "learning_rate": 1.7459214130923663e-06, "loss": 0.0142, "step": 4270 }, { "epoch": 0.0350002044404465, "grad_norm": 0.5756144523620605, "learning_rate": 1.750010222022325e-06, "loss": 0.0091, "step": 4280 }, { "epoch": 0.035081980619045675, "grad_norm": 0.2403801530599594, "learning_rate": 1.7540990309522837e-06, "loss": 0.0115, "step": 4290 }, { "epoch": 0.03516375679764484, "grad_norm": 0.4053070545196533, "learning_rate": 1.7581878398822424e-06, "loss": 0.0116, "step": 4300 }, { "epoch": 0.03524553297624402, "grad_norm": 0.3956272304058075, "learning_rate": 1.762276648812201e-06, "loss": 0.0106, "step": 4310 }, { "epoch": 0.035327309154843195, "grad_norm": 0.6047540903091431, "learning_rate": 1.76636545774216e-06, "loss": 0.0151, "step": 4320 }, { "epoch": 0.03540908533344237, "grad_norm": 0.4543668329715729, "learning_rate": 1.7704542666721187e-06, "loss": 0.0109, "step": 4330 }, { "epoch": 0.03549086151204154, "grad_norm": 0.6661786437034607, "learning_rate": 1.7745430756020774e-06, "loss": 0.0139, "step": 4340 }, { "epoch": 0.035572637690640715, "grad_norm": 0.588858962059021, "learning_rate": 1.778631884532036e-06, "loss": 0.0138, "step": 4350 }, { "epoch": 0.03565441386923989, "grad_norm": 1.9523764848709106, "learning_rate": 1.7827206934619947e-06, "loss": 0.011, "step": 4360 }, { "epoch": 0.03573619004783907, "grad_norm": 0.876654863357544, "learning_rate": 1.7868095023919534e-06, "loss": 0.0134, "step": 4370 }, { "epoch": 0.035817966226438236, "grad_norm": 1.080365777015686, "learning_rate": 1.790898311321912e-06, "loss": 0.0129, "step": 4380 }, { "epoch": 0.03589974240503741, "grad_norm": 0.5491147041320801, "learning_rate": 1.7949871202518706e-06, "loss": 0.0108, "step": 4390 }, { "epoch": 0.03598151858363659, "grad_norm": 0.45625370740890503, "learning_rate": 1.7990759291818295e-06, "loss": 0.0139, "step": 4400 }, { "epoch": 0.03606329476223576, "grad_norm": 0.9616511464118958, "learning_rate": 1.8031647381117882e-06, "loss": 0.0118, "step": 4410 }, { "epoch": 0.03614507094083493, "grad_norm": 0.8957614898681641, "learning_rate": 1.8072535470417469e-06, "loss": 0.0109, "step": 4420 }, { "epoch": 0.03622684711943411, "grad_norm": 0.4979615807533264, "learning_rate": 1.8113423559717055e-06, "loss": 0.0091, "step": 4430 }, { "epoch": 0.036308623298033284, "grad_norm": 0.4960176944732666, "learning_rate": 1.8154311649016642e-06, "loss": 0.0093, "step": 4440 }, { "epoch": 0.03639039947663246, "grad_norm": 0.7180801630020142, "learning_rate": 1.819519973831623e-06, "loss": 0.0135, "step": 4450 }, { "epoch": 0.03647217565523163, "grad_norm": 0.3395709693431854, "learning_rate": 1.8236087827615818e-06, "loss": 0.01, "step": 4460 }, { "epoch": 0.036553951833830804, "grad_norm": 0.4302184581756592, "learning_rate": 1.8276975916915405e-06, "loss": 0.0154, "step": 4470 }, { "epoch": 0.03663572801242998, "grad_norm": 0.5934908986091614, "learning_rate": 1.8317864006214992e-06, "loss": 0.0121, "step": 4480 }, { "epoch": 0.036717504191029156, "grad_norm": 0.4930459260940552, "learning_rate": 1.8358752095514579e-06, "loss": 0.0116, "step": 4490 }, { "epoch": 0.036799280369628325, "grad_norm": 0.2977577745914459, "learning_rate": 1.8399640184814166e-06, "loss": 0.0109, "step": 4500 }, { "epoch": 0.0368810565482275, "grad_norm": 0.5819641947746277, "learning_rate": 1.8440528274113752e-06, "loss": 0.012, "step": 4510 }, { "epoch": 0.036962832726826676, "grad_norm": 0.19429124891757965, "learning_rate": 1.848141636341334e-06, "loss": 0.0094, "step": 4520 }, { "epoch": 0.03704460890542585, "grad_norm": 0.2997596561908722, "learning_rate": 1.8522304452712928e-06, "loss": 0.0136, "step": 4530 }, { "epoch": 0.03712638508402502, "grad_norm": 0.5615129470825195, "learning_rate": 1.856319254201251e-06, "loss": 0.0124, "step": 4540 }, { "epoch": 0.0372081612626242, "grad_norm": 0.5873528718948364, "learning_rate": 1.86040806313121e-06, "loss": 0.0136, "step": 4550 }, { "epoch": 0.03728993744122337, "grad_norm": 0.4241098165512085, "learning_rate": 1.8644968720611687e-06, "loss": 0.0136, "step": 4560 }, { "epoch": 0.03737171361982255, "grad_norm": 0.8561471700668335, "learning_rate": 1.8685856809911274e-06, "loss": 0.0132, "step": 4570 }, { "epoch": 0.03745348979842172, "grad_norm": 0.665401041507721, "learning_rate": 1.872674489921086e-06, "loss": 0.0132, "step": 4580 }, { "epoch": 0.03753526597702089, "grad_norm": 0.4842516779899597, "learning_rate": 1.8767632988510447e-06, "loss": 0.0132, "step": 4590 }, { "epoch": 0.03761704215562007, "grad_norm": 0.9491329789161682, "learning_rate": 1.8808521077810034e-06, "loss": 0.0121, "step": 4600 }, { "epoch": 0.037698818334219245, "grad_norm": 0.34329769015312195, "learning_rate": 1.8849409167109623e-06, "loss": 0.0103, "step": 4610 }, { "epoch": 0.03778059451281841, "grad_norm": 0.38318753242492676, "learning_rate": 1.889029725640921e-06, "loss": 0.0109, "step": 4620 }, { "epoch": 0.03786237069141759, "grad_norm": 0.8690067529678345, "learning_rate": 1.8931185345708797e-06, "loss": 0.0166, "step": 4630 }, { "epoch": 0.037944146870016765, "grad_norm": 0.7746715545654297, "learning_rate": 1.8972073435008384e-06, "loss": 0.0108, "step": 4640 }, { "epoch": 0.03802592304861594, "grad_norm": 0.4402405321598053, "learning_rate": 1.901296152430797e-06, "loss": 0.0072, "step": 4650 }, { "epoch": 0.03810769922721511, "grad_norm": 0.503290057182312, "learning_rate": 1.9053849613607558e-06, "loss": 0.0119, "step": 4660 }, { "epoch": 0.038189475405814285, "grad_norm": 0.5684505105018616, "learning_rate": 1.9094737702907144e-06, "loss": 0.0097, "step": 4670 }, { "epoch": 0.03827125158441346, "grad_norm": 0.5448082685470581, "learning_rate": 1.9135625792206733e-06, "loss": 0.0105, "step": 4680 }, { "epoch": 0.03835302776301264, "grad_norm": 0.6527565717697144, "learning_rate": 1.917651388150632e-06, "loss": 0.0141, "step": 4690 }, { "epoch": 0.038434803941611806, "grad_norm": 0.527949333190918, "learning_rate": 1.9217401970805903e-06, "loss": 0.0132, "step": 4700 }, { "epoch": 0.03851658012021098, "grad_norm": 0.5864323377609253, "learning_rate": 1.925829006010549e-06, "loss": 0.01, "step": 4710 }, { "epoch": 0.03859835629881016, "grad_norm": 0.8070444464683533, "learning_rate": 1.929917814940508e-06, "loss": 0.0121, "step": 4720 }, { "epoch": 0.03868013247740933, "grad_norm": 0.22216126322746277, "learning_rate": 1.9340066238704666e-06, "loss": 0.0119, "step": 4730 }, { "epoch": 0.0387619086560085, "grad_norm": 0.3387186825275421, "learning_rate": 1.9380954328004255e-06, "loss": 0.0117, "step": 4740 }, { "epoch": 0.03884368483460768, "grad_norm": 0.8486486077308655, "learning_rate": 1.942184241730384e-06, "loss": 0.0123, "step": 4750 }, { "epoch": 0.038925461013206854, "grad_norm": 0.3959420621395111, "learning_rate": 1.946273050660343e-06, "loss": 0.0106, "step": 4760 }, { "epoch": 0.03900723719180603, "grad_norm": 0.7306724190711975, "learning_rate": 1.9503618595903013e-06, "loss": 0.0108, "step": 4770 }, { "epoch": 0.0390890133704052, "grad_norm": 0.40827372670173645, "learning_rate": 1.95445066852026e-06, "loss": 0.0119, "step": 4780 }, { "epoch": 0.039170789549004374, "grad_norm": 0.49872416257858276, "learning_rate": 1.958539477450219e-06, "loss": 0.0092, "step": 4790 }, { "epoch": 0.03925256572760355, "grad_norm": 0.7687469124794006, "learning_rate": 1.9626282863801776e-06, "loss": 0.0084, "step": 4800 }, { "epoch": 0.039334341906202726, "grad_norm": 0.5276715755462646, "learning_rate": 1.9667170953101365e-06, "loss": 0.0102, "step": 4810 }, { "epoch": 0.039416118084801895, "grad_norm": 0.27500495314598083, "learning_rate": 1.970805904240095e-06, "loss": 0.0146, "step": 4820 }, { "epoch": 0.03949789426340107, "grad_norm": 0.5596851706504822, "learning_rate": 1.974894713170054e-06, "loss": 0.0077, "step": 4830 }, { "epoch": 0.039579670442000246, "grad_norm": 0.634928822517395, "learning_rate": 1.9789835221000127e-06, "loss": 0.0077, "step": 4840 }, { "epoch": 0.03966144662059942, "grad_norm": 0.4987649917602539, "learning_rate": 1.9830723310299712e-06, "loss": 0.01, "step": 4850 }, { "epoch": 0.03974322279919859, "grad_norm": 0.5003216862678528, "learning_rate": 1.9871611399599297e-06, "loss": 0.0131, "step": 4860 }, { "epoch": 0.03982499897779777, "grad_norm": 1.0306885242462158, "learning_rate": 1.9912499488898886e-06, "loss": 0.0167, "step": 4870 }, { "epoch": 0.03990677515639694, "grad_norm": 0.6028053760528564, "learning_rate": 1.995338757819847e-06, "loss": 0.0145, "step": 4880 }, { "epoch": 0.03998855133499612, "grad_norm": 1.185516357421875, "learning_rate": 1.999427566749806e-06, "loss": 0.0109, "step": 4890 }, { "epoch": 0.04007032751359529, "grad_norm": 1.012634038925171, "learning_rate": 2.0035163756797644e-06, "loss": 0.0109, "step": 4900 }, { "epoch": 0.04015210369219446, "grad_norm": 0.6581801772117615, "learning_rate": 2.0076051846097233e-06, "loss": 0.0111, "step": 4910 }, { "epoch": 0.04023387987079364, "grad_norm": 0.3307479918003082, "learning_rate": 2.0116939935396822e-06, "loss": 0.009, "step": 4920 }, { "epoch": 0.040315656049392815, "grad_norm": 0.5561138987541199, "learning_rate": 2.0157828024696407e-06, "loss": 0.0128, "step": 4930 }, { "epoch": 0.04039743222799198, "grad_norm": 0.2793656885623932, "learning_rate": 2.0198716113995996e-06, "loss": 0.0113, "step": 4940 }, { "epoch": 0.04047920840659116, "grad_norm": 0.4720984697341919, "learning_rate": 2.023960420329558e-06, "loss": 0.0076, "step": 4950 }, { "epoch": 0.040560984585190335, "grad_norm": 0.6401547789573669, "learning_rate": 2.028049229259517e-06, "loss": 0.0112, "step": 4960 }, { "epoch": 0.04064276076378951, "grad_norm": 0.4821650981903076, "learning_rate": 2.0321380381894755e-06, "loss": 0.0109, "step": 4970 }, { "epoch": 0.04072453694238868, "grad_norm": 0.7943426966667175, "learning_rate": 2.0362268471194344e-06, "loss": 0.0089, "step": 4980 }, { "epoch": 0.040806313120987855, "grad_norm": 0.3268415331840515, "learning_rate": 2.0403156560493933e-06, "loss": 0.0103, "step": 4990 }, { "epoch": 0.04088808929958703, "grad_norm": 0.2765938937664032, "learning_rate": 2.0444044649793517e-06, "loss": 0.0117, "step": 5000 }, { "epoch": 0.04096986547818621, "grad_norm": 0.5612201690673828, "learning_rate": 2.0484932739093106e-06, "loss": 0.0089, "step": 5010 }, { "epoch": 0.041051641656785376, "grad_norm": 0.6689507365226746, "learning_rate": 2.052582082839269e-06, "loss": 0.0114, "step": 5020 }, { "epoch": 0.04113341783538455, "grad_norm": 0.30818042159080505, "learning_rate": 2.0566708917692276e-06, "loss": 0.0115, "step": 5030 }, { "epoch": 0.04121519401398373, "grad_norm": 0.7414902448654175, "learning_rate": 2.0607597006991865e-06, "loss": 0.0144, "step": 5040 }, { "epoch": 0.0412969701925829, "grad_norm": 0.5182631015777588, "learning_rate": 2.064848509629145e-06, "loss": 0.0088, "step": 5050 }, { "epoch": 0.04137874637118207, "grad_norm": 0.45450520515441895, "learning_rate": 2.068937318559104e-06, "loss": 0.0108, "step": 5060 }, { "epoch": 0.04146052254978125, "grad_norm": 0.27424412965774536, "learning_rate": 2.0730261274890627e-06, "loss": 0.0103, "step": 5070 }, { "epoch": 0.041542298728380424, "grad_norm": 0.6853080987930298, "learning_rate": 2.0771149364190212e-06, "loss": 0.0065, "step": 5080 }, { "epoch": 0.0416240749069796, "grad_norm": 0.3488280773162842, "learning_rate": 2.08120374534898e-06, "loss": 0.0137, "step": 5090 }, { "epoch": 0.04170585108557877, "grad_norm": 1.020790457725525, "learning_rate": 2.0852925542789386e-06, "loss": 0.0163, "step": 5100 }, { "epoch": 0.041787627264177944, "grad_norm": 0.5051382184028625, "learning_rate": 2.0893813632088975e-06, "loss": 0.0133, "step": 5110 }, { "epoch": 0.04186940344277712, "grad_norm": 0.5222627520561218, "learning_rate": 2.093470172138856e-06, "loss": 0.0147, "step": 5120 }, { "epoch": 0.041951179621376296, "grad_norm": 0.36509257555007935, "learning_rate": 2.097558981068815e-06, "loss": 0.0121, "step": 5130 }, { "epoch": 0.042032955799975465, "grad_norm": 0.565642237663269, "learning_rate": 2.1016477899987738e-06, "loss": 0.0112, "step": 5140 }, { "epoch": 0.04211473197857464, "grad_norm": 0.7105297446250916, "learning_rate": 2.1057365989287322e-06, "loss": 0.0143, "step": 5150 }, { "epoch": 0.042196508157173816, "grad_norm": 0.8569501638412476, "learning_rate": 2.109825407858691e-06, "loss": 0.0109, "step": 5160 }, { "epoch": 0.04227828433577299, "grad_norm": 0.5921562910079956, "learning_rate": 2.1139142167886496e-06, "loss": 0.0106, "step": 5170 }, { "epoch": 0.04236006051437216, "grad_norm": 0.6121943593025208, "learning_rate": 2.118003025718608e-06, "loss": 0.0086, "step": 5180 }, { "epoch": 0.04244183669297134, "grad_norm": 0.6652101874351501, "learning_rate": 2.122091834648567e-06, "loss": 0.0137, "step": 5190 }, { "epoch": 0.04252361287157051, "grad_norm": 0.5299475789070129, "learning_rate": 2.1261806435785255e-06, "loss": 0.0078, "step": 5200 }, { "epoch": 0.04260538905016969, "grad_norm": 0.32207491993904114, "learning_rate": 2.1302694525084844e-06, "loss": 0.0111, "step": 5210 }, { "epoch": 0.04268716522876886, "grad_norm": 0.46233540773391724, "learning_rate": 2.1343582614384433e-06, "loss": 0.0108, "step": 5220 }, { "epoch": 0.04276894140736803, "grad_norm": 0.3714620769023895, "learning_rate": 2.1384470703684017e-06, "loss": 0.0104, "step": 5230 }, { "epoch": 0.04285071758596721, "grad_norm": 0.43780356645584106, "learning_rate": 2.1425358792983606e-06, "loss": 0.0099, "step": 5240 }, { "epoch": 0.042932493764566385, "grad_norm": 0.8709565997123718, "learning_rate": 2.146624688228319e-06, "loss": 0.0109, "step": 5250 }, { "epoch": 0.04301426994316555, "grad_norm": 0.5543777346611023, "learning_rate": 2.150713497158278e-06, "loss": 0.0093, "step": 5260 }, { "epoch": 0.04309604612176473, "grad_norm": 0.5606587529182434, "learning_rate": 2.154802306088237e-06, "loss": 0.0105, "step": 5270 }, { "epoch": 0.043177822300363905, "grad_norm": 0.4445473253726959, "learning_rate": 2.1588911150181954e-06, "loss": 0.01, "step": 5280 }, { "epoch": 0.04325959847896308, "grad_norm": 0.6214092373847961, "learning_rate": 2.1629799239481543e-06, "loss": 0.0112, "step": 5290 }, { "epoch": 0.04334137465756225, "grad_norm": 0.5576469898223877, "learning_rate": 2.1670687328781128e-06, "loss": 0.0109, "step": 5300 }, { "epoch": 0.043423150836161425, "grad_norm": 0.42635807394981384, "learning_rate": 2.1711575418080716e-06, "loss": 0.0108, "step": 5310 }, { "epoch": 0.0435049270147606, "grad_norm": 0.4954546093940735, "learning_rate": 2.17524635073803e-06, "loss": 0.0084, "step": 5320 }, { "epoch": 0.04358670319335978, "grad_norm": 0.6241279244422913, "learning_rate": 2.179335159667989e-06, "loss": 0.0119, "step": 5330 }, { "epoch": 0.043668479371958946, "grad_norm": 0.6994395852088928, "learning_rate": 2.1834239685979475e-06, "loss": 0.0103, "step": 5340 }, { "epoch": 0.04375025555055812, "grad_norm": 0.26346078515052795, "learning_rate": 2.1875127775279064e-06, "loss": 0.0114, "step": 5350 }, { "epoch": 0.0438320317291573, "grad_norm": 0.5773823261260986, "learning_rate": 2.191601586457865e-06, "loss": 0.008, "step": 5360 }, { "epoch": 0.04391380790775647, "grad_norm": 0.4464243948459625, "learning_rate": 2.1956903953878238e-06, "loss": 0.0094, "step": 5370 }, { "epoch": 0.04399558408635564, "grad_norm": 0.3336261808872223, "learning_rate": 2.1997792043177822e-06, "loss": 0.0081, "step": 5380 }, { "epoch": 0.04407736026495482, "grad_norm": 0.2952643036842346, "learning_rate": 2.203868013247741e-06, "loss": 0.0091, "step": 5390 }, { "epoch": 0.044159136443553994, "grad_norm": 0.5185619592666626, "learning_rate": 2.2079568221776996e-06, "loss": 0.0096, "step": 5400 }, { "epoch": 0.04424091262215317, "grad_norm": 0.410940557718277, "learning_rate": 2.2120456311076585e-06, "loss": 0.0116, "step": 5410 }, { "epoch": 0.04432268880075234, "grad_norm": 0.7112141847610474, "learning_rate": 2.2161344400376174e-06, "loss": 0.0121, "step": 5420 }, { "epoch": 0.044404464979351514, "grad_norm": 0.6917464733123779, "learning_rate": 2.220223248967576e-06, "loss": 0.0096, "step": 5430 }, { "epoch": 0.04448624115795069, "grad_norm": 1.2700241804122925, "learning_rate": 2.2243120578975348e-06, "loss": 0.0087, "step": 5440 }, { "epoch": 0.044568017336549866, "grad_norm": 0.3247332274913788, "learning_rate": 2.2284008668274933e-06, "loss": 0.0086, "step": 5450 }, { "epoch": 0.044649793515149035, "grad_norm": 0.25401604175567627, "learning_rate": 2.232489675757452e-06, "loss": 0.0103, "step": 5460 }, { "epoch": 0.04473156969374821, "grad_norm": 0.7275440096855164, "learning_rate": 2.2365784846874106e-06, "loss": 0.009, "step": 5470 }, { "epoch": 0.044813345872347386, "grad_norm": 0.6449062824249268, "learning_rate": 2.2406672936173695e-06, "loss": 0.0092, "step": 5480 }, { "epoch": 0.04489512205094656, "grad_norm": 0.6834784746170044, "learning_rate": 2.244756102547328e-06, "loss": 0.0083, "step": 5490 }, { "epoch": 0.04497689822954573, "grad_norm": 0.468504399061203, "learning_rate": 2.248844911477287e-06, "loss": 0.0102, "step": 5500 }, { "epoch": 0.04505867440814491, "grad_norm": 0.5805609226226807, "learning_rate": 2.2529337204072454e-06, "loss": 0.0105, "step": 5510 }, { "epoch": 0.04514045058674408, "grad_norm": 0.463903546333313, "learning_rate": 2.2570225293372043e-06, "loss": 0.0073, "step": 5520 }, { "epoch": 0.04522222676534326, "grad_norm": 0.32623010873794556, "learning_rate": 2.2611113382671628e-06, "loss": 0.0102, "step": 5530 }, { "epoch": 0.04530400294394243, "grad_norm": 0.7982626557350159, "learning_rate": 2.2652001471971217e-06, "loss": 0.0069, "step": 5540 }, { "epoch": 0.0453857791225416, "grad_norm": 0.6010729074478149, "learning_rate": 2.26928895612708e-06, "loss": 0.0097, "step": 5550 }, { "epoch": 0.04546755530114078, "grad_norm": 0.967785120010376, "learning_rate": 2.273377765057039e-06, "loss": 0.0102, "step": 5560 }, { "epoch": 0.045549331479739955, "grad_norm": 0.33727172017097473, "learning_rate": 2.277466573986998e-06, "loss": 0.017, "step": 5570 }, { "epoch": 0.04563110765833912, "grad_norm": 0.5835988521575928, "learning_rate": 2.2815553829169564e-06, "loss": 0.0088, "step": 5580 }, { "epoch": 0.0457128838369383, "grad_norm": 0.2702619433403015, "learning_rate": 2.2856441918469153e-06, "loss": 0.0092, "step": 5590 }, { "epoch": 0.045794660015537475, "grad_norm": 0.39039990305900574, "learning_rate": 2.2897330007768738e-06, "loss": 0.0079, "step": 5600 }, { "epoch": 0.04587643619413665, "grad_norm": 0.7185168266296387, "learning_rate": 2.2938218097068327e-06, "loss": 0.0087, "step": 5610 }, { "epoch": 0.04595821237273582, "grad_norm": 0.1481962949037552, "learning_rate": 2.2979106186367916e-06, "loss": 0.0122, "step": 5620 }, { "epoch": 0.046039988551334995, "grad_norm": 0.36130425333976746, "learning_rate": 2.30199942756675e-06, "loss": 0.0082, "step": 5630 }, { "epoch": 0.04612176472993417, "grad_norm": 0.1933794617652893, "learning_rate": 2.306088236496709e-06, "loss": 0.0115, "step": 5640 }, { "epoch": 0.04620354090853335, "grad_norm": 0.3672092854976654, "learning_rate": 2.3101770454266674e-06, "loss": 0.0095, "step": 5650 }, { "epoch": 0.046285317087132516, "grad_norm": 0.44979819655418396, "learning_rate": 2.314265854356626e-06, "loss": 0.0078, "step": 5660 }, { "epoch": 0.04636709326573169, "grad_norm": 0.3968663811683655, "learning_rate": 2.3183546632865848e-06, "loss": 0.0119, "step": 5670 }, { "epoch": 0.04644886944433087, "grad_norm": 0.3982337713241577, "learning_rate": 2.3224434722165433e-06, "loss": 0.0088, "step": 5680 }, { "epoch": 0.04653064562293004, "grad_norm": 0.38680607080459595, "learning_rate": 2.326532281146502e-06, "loss": 0.0094, "step": 5690 }, { "epoch": 0.04661242180152921, "grad_norm": 0.30489885807037354, "learning_rate": 2.330621090076461e-06, "loss": 0.0105, "step": 5700 }, { "epoch": 0.04669419798012839, "grad_norm": 0.3467826247215271, "learning_rate": 2.3347098990064195e-06, "loss": 0.0084, "step": 5710 }, { "epoch": 0.046775974158727564, "grad_norm": 0.3731320798397064, "learning_rate": 2.3387987079363784e-06, "loss": 0.0114, "step": 5720 }, { "epoch": 0.04685775033732674, "grad_norm": 0.5634636282920837, "learning_rate": 2.342887516866337e-06, "loss": 0.0112, "step": 5730 }, { "epoch": 0.04693952651592591, "grad_norm": 1.0205252170562744, "learning_rate": 2.346976325796296e-06, "loss": 0.0142, "step": 5740 }, { "epoch": 0.047021302694525084, "grad_norm": 0.6698241829872131, "learning_rate": 2.3510651347262543e-06, "loss": 0.0075, "step": 5750 }, { "epoch": 0.04710307887312426, "grad_norm": 0.6831504106521606, "learning_rate": 2.355153943656213e-06, "loss": 0.0106, "step": 5760 }, { "epoch": 0.047184855051723436, "grad_norm": 0.7955835461616516, "learning_rate": 2.359242752586172e-06, "loss": 0.0091, "step": 5770 }, { "epoch": 0.047266631230322605, "grad_norm": 0.5376361608505249, "learning_rate": 2.3633315615161306e-06, "loss": 0.0078, "step": 5780 }, { "epoch": 0.04734840740892178, "grad_norm": 0.8883642554283142, "learning_rate": 2.3674203704460894e-06, "loss": 0.009, "step": 5790 }, { "epoch": 0.047430183587520956, "grad_norm": 0.5348543524742126, "learning_rate": 2.371509179376048e-06, "loss": 0.0072, "step": 5800 }, { "epoch": 0.04751195976612013, "grad_norm": 0.8444530367851257, "learning_rate": 2.3755979883060064e-06, "loss": 0.0117, "step": 5810 }, { "epoch": 0.0475937359447193, "grad_norm": 0.2698040306568146, "learning_rate": 2.3796867972359653e-06, "loss": 0.0082, "step": 5820 }, { "epoch": 0.04767551212331848, "grad_norm": 0.5120652318000793, "learning_rate": 2.3837756061659238e-06, "loss": 0.0085, "step": 5830 }, { "epoch": 0.04775728830191765, "grad_norm": 0.32402828335762024, "learning_rate": 2.3878644150958827e-06, "loss": 0.0094, "step": 5840 }, { "epoch": 0.04783906448051683, "grad_norm": 0.7247368693351746, "learning_rate": 2.3919532240258416e-06, "loss": 0.0114, "step": 5850 }, { "epoch": 0.047920840659116, "grad_norm": 0.31977081298828125, "learning_rate": 2.3960420329558e-06, "loss": 0.0093, "step": 5860 }, { "epoch": 0.04800261683771517, "grad_norm": 0.37693729996681213, "learning_rate": 2.400130841885759e-06, "loss": 0.0124, "step": 5870 }, { "epoch": 0.04808439301631435, "grad_norm": 0.1337776780128479, "learning_rate": 2.4042196508157174e-06, "loss": 0.007, "step": 5880 }, { "epoch": 0.048166169194913525, "grad_norm": 0.2491583377122879, "learning_rate": 2.4083084597456763e-06, "loss": 0.0106, "step": 5890 }, { "epoch": 0.04824794537351269, "grad_norm": 0.29856422543525696, "learning_rate": 2.4123972686756352e-06, "loss": 0.0094, "step": 5900 }, { "epoch": 0.04832972155211187, "grad_norm": 0.7346547245979309, "learning_rate": 2.4164860776055937e-06, "loss": 0.0105, "step": 5910 }, { "epoch": 0.048411497730711045, "grad_norm": 0.2014632523059845, "learning_rate": 2.4205748865355526e-06, "loss": 0.0102, "step": 5920 }, { "epoch": 0.04849327390931022, "grad_norm": 0.5461695790290833, "learning_rate": 2.424663695465511e-06, "loss": 0.0096, "step": 5930 }, { "epoch": 0.04857505008790939, "grad_norm": 0.3115442395210266, "learning_rate": 2.42875250439547e-06, "loss": 0.0089, "step": 5940 }, { "epoch": 0.048656826266508565, "grad_norm": 0.5467522740364075, "learning_rate": 2.4328413133254284e-06, "loss": 0.014, "step": 5950 }, { "epoch": 0.04873860244510774, "grad_norm": 0.7176483273506165, "learning_rate": 2.4369301222553873e-06, "loss": 0.0087, "step": 5960 }, { "epoch": 0.04882037862370692, "grad_norm": 0.3998804986476898, "learning_rate": 2.441018931185346e-06, "loss": 0.0095, "step": 5970 }, { "epoch": 0.048902154802306086, "grad_norm": 0.3213229179382324, "learning_rate": 2.4451077401153047e-06, "loss": 0.009, "step": 5980 }, { "epoch": 0.04898393098090526, "grad_norm": 0.2595870792865753, "learning_rate": 2.449196549045263e-06, "loss": 0.0057, "step": 5990 }, { "epoch": 0.04906570715950444, "grad_norm": 0.39242804050445557, "learning_rate": 2.453285357975222e-06, "loss": 0.0089, "step": 6000 }, { "epoch": 0.04914748333810361, "grad_norm": 0.4261815845966339, "learning_rate": 2.4573741669051806e-06, "loss": 0.0066, "step": 6010 }, { "epoch": 0.04922925951670278, "grad_norm": 0.2756611704826355, "learning_rate": 2.4614629758351395e-06, "loss": 0.0098, "step": 6020 }, { "epoch": 0.04931103569530196, "grad_norm": 0.633546769618988, "learning_rate": 2.465551784765098e-06, "loss": 0.0086, "step": 6030 }, { "epoch": 0.049392811873901134, "grad_norm": 0.5894165635108948, "learning_rate": 2.469640593695057e-06, "loss": 0.0108, "step": 6040 }, { "epoch": 0.04947458805250031, "grad_norm": 0.23623603582382202, "learning_rate": 2.4737294026250157e-06, "loss": 0.0087, "step": 6050 }, { "epoch": 0.04955636423109948, "grad_norm": 0.4837205708026886, "learning_rate": 2.477818211554974e-06, "loss": 0.0061, "step": 6060 }, { "epoch": 0.049638140409698654, "grad_norm": 0.8155025839805603, "learning_rate": 2.481907020484933e-06, "loss": 0.009, "step": 6070 }, { "epoch": 0.04971991658829783, "grad_norm": 0.35893261432647705, "learning_rate": 2.4859958294148916e-06, "loss": 0.0061, "step": 6080 }, { "epoch": 0.049801692766897006, "grad_norm": 0.36244210600852966, "learning_rate": 2.4900846383448505e-06, "loss": 0.0104, "step": 6090 }, { "epoch": 0.049883468945496175, "grad_norm": 0.43849048018455505, "learning_rate": 2.494173447274809e-06, "loss": 0.0106, "step": 6100 }, { "epoch": 0.04996524512409535, "grad_norm": 0.34247490763664246, "learning_rate": 2.498262256204768e-06, "loss": 0.0088, "step": 6110 }, { "epoch": 0.050047021302694526, "grad_norm": 0.4423629343509674, "learning_rate": 2.5023510651347267e-06, "loss": 0.0091, "step": 6120 }, { "epoch": 0.0501287974812937, "grad_norm": 0.37672853469848633, "learning_rate": 2.5064398740646852e-06, "loss": 0.0088, "step": 6130 }, { "epoch": 0.05021057365989287, "grad_norm": 0.8803871273994446, "learning_rate": 2.510528682994644e-06, "loss": 0.0096, "step": 6140 }, { "epoch": 0.05029234983849205, "grad_norm": 0.1945023089647293, "learning_rate": 2.5146174919246026e-06, "loss": 0.0076, "step": 6150 }, { "epoch": 0.05037412601709122, "grad_norm": 0.7034628987312317, "learning_rate": 2.5187063008545615e-06, "loss": 0.0101, "step": 6160 }, { "epoch": 0.0504559021956904, "grad_norm": 0.3625240921974182, "learning_rate": 2.52279510978452e-06, "loss": 0.0072, "step": 6170 }, { "epoch": 0.05053767837428957, "grad_norm": 0.34521573781967163, "learning_rate": 2.526883918714479e-06, "loss": 0.0119, "step": 6180 }, { "epoch": 0.05061945455288874, "grad_norm": 0.5287312269210815, "learning_rate": 2.5309727276444373e-06, "loss": 0.0096, "step": 6190 }, { "epoch": 0.05070123073148792, "grad_norm": 0.22815854847431183, "learning_rate": 2.5350615365743962e-06, "loss": 0.0073, "step": 6200 }, { "epoch": 0.050783006910087095, "grad_norm": 0.42801740765571594, "learning_rate": 2.5391503455043547e-06, "loss": 0.0131, "step": 6210 }, { "epoch": 0.05086478308868626, "grad_norm": 0.5187172293663025, "learning_rate": 2.543239154434313e-06, "loss": 0.0079, "step": 6220 }, { "epoch": 0.05094655926728544, "grad_norm": 0.40832510590553284, "learning_rate": 2.547327963364272e-06, "loss": 0.0124, "step": 6230 }, { "epoch": 0.051028335445884615, "grad_norm": 0.5107972621917725, "learning_rate": 2.5514167722942306e-06, "loss": 0.0102, "step": 6240 }, { "epoch": 0.05111011162448379, "grad_norm": 0.7692256569862366, "learning_rate": 2.55550558122419e-06, "loss": 0.0126, "step": 6250 }, { "epoch": 0.05119188780308296, "grad_norm": 0.5329268574714661, "learning_rate": 2.559594390154148e-06, "loss": 0.01, "step": 6260 }, { "epoch": 0.051273663981682135, "grad_norm": 0.41874822974205017, "learning_rate": 2.5636831990841073e-06, "loss": 0.006, "step": 6270 }, { "epoch": 0.05135544016028131, "grad_norm": 0.23143897950649261, "learning_rate": 2.5677720080140657e-06, "loss": 0.0069, "step": 6280 }, { "epoch": 0.05143721633888049, "grad_norm": 0.2891366183757782, "learning_rate": 2.5718608169440246e-06, "loss": 0.0084, "step": 6290 }, { "epoch": 0.051518992517479656, "grad_norm": 0.47341209650039673, "learning_rate": 2.575949625873983e-06, "loss": 0.0132, "step": 6300 }, { "epoch": 0.05160076869607883, "grad_norm": 0.42190879583358765, "learning_rate": 2.580038434803942e-06, "loss": 0.0083, "step": 6310 }, { "epoch": 0.05168254487467801, "grad_norm": 0.19326333701610565, "learning_rate": 2.5841272437339005e-06, "loss": 0.007, "step": 6320 }, { "epoch": 0.05176432105327718, "grad_norm": 0.21987292170524597, "learning_rate": 2.5882160526638594e-06, "loss": 0.0092, "step": 6330 }, { "epoch": 0.05184609723187635, "grad_norm": 0.4455094039440155, "learning_rate": 2.592304861593818e-06, "loss": 0.0103, "step": 6340 }, { "epoch": 0.05192787341047553, "grad_norm": 0.6536917090415955, "learning_rate": 2.5963936705237767e-06, "loss": 0.0083, "step": 6350 }, { "epoch": 0.052009649589074704, "grad_norm": 0.4780283570289612, "learning_rate": 2.6004824794537352e-06, "loss": 0.01, "step": 6360 }, { "epoch": 0.05209142576767388, "grad_norm": 0.38714879751205444, "learning_rate": 2.6045712883836937e-06, "loss": 0.0105, "step": 6370 }, { "epoch": 0.05217320194627305, "grad_norm": 0.4376566708087921, "learning_rate": 2.6086600973136526e-06, "loss": 0.0121, "step": 6380 }, { "epoch": 0.052254978124872224, "grad_norm": 0.7861015796661377, "learning_rate": 2.612748906243611e-06, "loss": 0.0119, "step": 6390 }, { "epoch": 0.0523367543034714, "grad_norm": 0.696694552898407, "learning_rate": 2.6168377151735704e-06, "loss": 0.0099, "step": 6400 }, { "epoch": 0.052418530482070576, "grad_norm": 0.485705703496933, "learning_rate": 2.620926524103529e-06, "loss": 0.0086, "step": 6410 }, { "epoch": 0.052500306660669745, "grad_norm": 0.26931214332580566, "learning_rate": 2.6250153330334878e-06, "loss": 0.0072, "step": 6420 }, { "epoch": 0.05258208283926892, "grad_norm": 0.9748420715332031, "learning_rate": 2.6291041419634462e-06, "loss": 0.0093, "step": 6430 }, { "epoch": 0.052663859017868096, "grad_norm": 0.5134990215301514, "learning_rate": 2.633192950893405e-06, "loss": 0.0067, "step": 6440 }, { "epoch": 0.05274563519646727, "grad_norm": 0.2783730626106262, "learning_rate": 2.6372817598233636e-06, "loss": 0.0076, "step": 6450 }, { "epoch": 0.05282741137506644, "grad_norm": 0.6506968140602112, "learning_rate": 2.6413705687533225e-06, "loss": 0.0101, "step": 6460 }, { "epoch": 0.05290918755366562, "grad_norm": 0.33195382356643677, "learning_rate": 2.645459377683281e-06, "loss": 0.011, "step": 6470 }, { "epoch": 0.05299096373226479, "grad_norm": 0.35806503891944885, "learning_rate": 2.64954818661324e-06, "loss": 0.0096, "step": 6480 }, { "epoch": 0.05307273991086397, "grad_norm": 0.43844369053840637, "learning_rate": 2.6536369955431984e-06, "loss": 0.0089, "step": 6490 }, { "epoch": 0.05315451608946314, "grad_norm": 0.3241475522518158, "learning_rate": 2.6577258044731573e-06, "loss": 0.0105, "step": 6500 }, { "epoch": 0.05323629226806231, "grad_norm": 0.37202122807502747, "learning_rate": 2.6618146134031157e-06, "loss": 0.0071, "step": 6510 }, { "epoch": 0.05331806844666149, "grad_norm": 0.721437394618988, "learning_rate": 2.665903422333075e-06, "loss": 0.0076, "step": 6520 }, { "epoch": 0.053399844625260665, "grad_norm": 0.5464370846748352, "learning_rate": 2.669992231263033e-06, "loss": 0.0073, "step": 6530 }, { "epoch": 0.05348162080385983, "grad_norm": 0.37740015983581543, "learning_rate": 2.6740810401929916e-06, "loss": 0.0092, "step": 6540 }, { "epoch": 0.05356339698245901, "grad_norm": 0.31767183542251587, "learning_rate": 2.678169849122951e-06, "loss": 0.0097, "step": 6550 }, { "epoch": 0.053645173161058185, "grad_norm": 0.12536996603012085, "learning_rate": 2.6822586580529094e-06, "loss": 0.0103, "step": 6560 }, { "epoch": 0.05372694933965736, "grad_norm": 0.391972154378891, "learning_rate": 2.6863474669828683e-06, "loss": 0.0064, "step": 6570 }, { "epoch": 0.05380872551825653, "grad_norm": 0.39502251148223877, "learning_rate": 2.6904362759128267e-06, "loss": 0.0103, "step": 6580 }, { "epoch": 0.053890501696855705, "grad_norm": 0.28295785188674927, "learning_rate": 2.6945250848427856e-06, "loss": 0.0071, "step": 6590 }, { "epoch": 0.05397227787545488, "grad_norm": 0.5455918312072754, "learning_rate": 2.698613893772744e-06, "loss": 0.0101, "step": 6600 }, { "epoch": 0.05405405405405406, "grad_norm": 0.4729023873806, "learning_rate": 2.702702702702703e-06, "loss": 0.0095, "step": 6610 }, { "epoch": 0.054135830232653226, "grad_norm": 0.37769538164138794, "learning_rate": 2.7067915116326615e-06, "loss": 0.0085, "step": 6620 }, { "epoch": 0.0542176064112524, "grad_norm": 0.5047798156738281, "learning_rate": 2.7108803205626204e-06, "loss": 0.008, "step": 6630 }, { "epoch": 0.05429938258985158, "grad_norm": 0.46113482117652893, "learning_rate": 2.714969129492579e-06, "loss": 0.0074, "step": 6640 }, { "epoch": 0.05438115876845075, "grad_norm": 0.2947273850440979, "learning_rate": 2.7190579384225378e-06, "loss": 0.0082, "step": 6650 }, { "epoch": 0.05446293494704992, "grad_norm": 0.18630701303482056, "learning_rate": 2.7231467473524962e-06, "loss": 0.0074, "step": 6660 }, { "epoch": 0.0545447111256491, "grad_norm": 0.5059335827827454, "learning_rate": 2.7272355562824556e-06, "loss": 0.0069, "step": 6670 }, { "epoch": 0.054626487304248274, "grad_norm": 0.6512726545333862, "learning_rate": 2.731324365212414e-06, "loss": 0.0086, "step": 6680 }, { "epoch": 0.05470826348284745, "grad_norm": 0.414931982755661, "learning_rate": 2.735413174142372e-06, "loss": 0.0102, "step": 6690 }, { "epoch": 0.05479003966144662, "grad_norm": 0.29168495535850525, "learning_rate": 2.7395019830723314e-06, "loss": 0.0094, "step": 6700 }, { "epoch": 0.054871815840045794, "grad_norm": 0.31788870692253113, "learning_rate": 2.74359079200229e-06, "loss": 0.0073, "step": 6710 }, { "epoch": 0.05495359201864497, "grad_norm": 0.32468169927597046, "learning_rate": 2.7476796009322488e-06, "loss": 0.008, "step": 6720 }, { "epoch": 0.055035368197244146, "grad_norm": 0.8096317052841187, "learning_rate": 2.7517684098622073e-06, "loss": 0.0064, "step": 6730 }, { "epoch": 0.055117144375843315, "grad_norm": 0.41052326560020447, "learning_rate": 2.755857218792166e-06, "loss": 0.0069, "step": 6740 }, { "epoch": 0.05519892055444249, "grad_norm": 0.44185999035835266, "learning_rate": 2.7599460277221246e-06, "loss": 0.0124, "step": 6750 }, { "epoch": 0.055280696733041666, "grad_norm": 0.10696009546518326, "learning_rate": 2.7640348366520835e-06, "loss": 0.0086, "step": 6760 }, { "epoch": 0.05536247291164084, "grad_norm": 0.45813170075416565, "learning_rate": 2.768123645582042e-06, "loss": 0.0086, "step": 6770 }, { "epoch": 0.05544424909024001, "grad_norm": 0.3673521876335144, "learning_rate": 2.772212454512001e-06, "loss": 0.0073, "step": 6780 }, { "epoch": 0.05552602526883919, "grad_norm": 0.20073318481445312, "learning_rate": 2.7763012634419594e-06, "loss": 0.007, "step": 6790 }, { "epoch": 0.05560780144743836, "grad_norm": 0.536151647567749, "learning_rate": 2.7803900723719187e-06, "loss": 0.0085, "step": 6800 }, { "epoch": 0.05568957762603754, "grad_norm": 0.43947750329971313, "learning_rate": 2.7844788813018767e-06, "loss": 0.0094, "step": 6810 }, { "epoch": 0.05577135380463671, "grad_norm": 0.44766151905059814, "learning_rate": 2.788567690231836e-06, "loss": 0.0112, "step": 6820 }, { "epoch": 0.05585312998323588, "grad_norm": 0.4710805416107178, "learning_rate": 2.7926564991617945e-06, "loss": 0.0083, "step": 6830 }, { "epoch": 0.05593490616183506, "grad_norm": 0.30909034609794617, "learning_rate": 2.7967453080917534e-06, "loss": 0.011, "step": 6840 }, { "epoch": 0.056016682340434235, "grad_norm": 0.3210555613040924, "learning_rate": 2.800834117021712e-06, "loss": 0.0115, "step": 6850 }, { "epoch": 0.0560984585190334, "grad_norm": 0.47284042835235596, "learning_rate": 2.8049229259516704e-06, "loss": 0.0099, "step": 6860 }, { "epoch": 0.05618023469763258, "grad_norm": 0.4959762394428253, "learning_rate": 2.8090117348816293e-06, "loss": 0.0096, "step": 6870 }, { "epoch": 0.056262010876231755, "grad_norm": 0.3304935395717621, "learning_rate": 2.8131005438115878e-06, "loss": 0.0077, "step": 6880 }, { "epoch": 0.05634378705483093, "grad_norm": 0.43599432706832886, "learning_rate": 2.8171893527415467e-06, "loss": 0.0087, "step": 6890 }, { "epoch": 0.0564255632334301, "grad_norm": 0.6967390179634094, "learning_rate": 2.821278161671505e-06, "loss": 0.0079, "step": 6900 }, { "epoch": 0.056507339412029275, "grad_norm": 0.34818020462989807, "learning_rate": 2.825366970601464e-06, "loss": 0.0083, "step": 6910 }, { "epoch": 0.05658911559062845, "grad_norm": 0.38428330421447754, "learning_rate": 2.8294557795314225e-06, "loss": 0.0076, "step": 6920 }, { "epoch": 0.05667089176922763, "grad_norm": 0.4542878270149231, "learning_rate": 2.8335445884613814e-06, "loss": 0.0109, "step": 6930 }, { "epoch": 0.056752667947826796, "grad_norm": 0.20611336827278137, "learning_rate": 2.83763339739134e-06, "loss": 0.0064, "step": 6940 }, { "epoch": 0.05683444412642597, "grad_norm": 0.3574419319629669, "learning_rate": 2.841722206321299e-06, "loss": 0.0102, "step": 6950 }, { "epoch": 0.05691622030502515, "grad_norm": 0.49413689970970154, "learning_rate": 2.8458110152512577e-06, "loss": 0.009, "step": 6960 }, { "epoch": 0.05699799648362432, "grad_norm": 0.18027357757091522, "learning_rate": 2.8498998241812166e-06, "loss": 0.0067, "step": 6970 }, { "epoch": 0.05707977266222349, "grad_norm": 0.6209383606910706, "learning_rate": 2.853988633111175e-06, "loss": 0.0084, "step": 6980 }, { "epoch": 0.05716154884082267, "grad_norm": 0.5790071487426758, "learning_rate": 2.858077442041134e-06, "loss": 0.0079, "step": 6990 }, { "epoch": 0.057243325019421844, "grad_norm": 0.4012437164783478, "learning_rate": 2.8621662509710924e-06, "loss": 0.0075, "step": 7000 }, { "epoch": 0.05732510119802102, "grad_norm": 0.3888300061225891, "learning_rate": 2.866255059901051e-06, "loss": 0.006, "step": 7010 }, { "epoch": 0.05740687737662019, "grad_norm": 0.4325159192085266, "learning_rate": 2.87034386883101e-06, "loss": 0.0084, "step": 7020 }, { "epoch": 0.057488653555219364, "grad_norm": 0.5549134612083435, "learning_rate": 2.8744326777609683e-06, "loss": 0.0081, "step": 7030 }, { "epoch": 0.05757042973381854, "grad_norm": 0.46372202038764954, "learning_rate": 2.878521486690927e-06, "loss": 0.0071, "step": 7040 }, { "epoch": 0.057652205912417716, "grad_norm": 0.363163560628891, "learning_rate": 2.8826102956208856e-06, "loss": 0.0083, "step": 7050 }, { "epoch": 0.057733982091016885, "grad_norm": 0.4084949493408203, "learning_rate": 2.8866991045508445e-06, "loss": 0.0088, "step": 7060 }, { "epoch": 0.05781575826961606, "grad_norm": 0.17369338870048523, "learning_rate": 2.890787913480803e-06, "loss": 0.0058, "step": 7070 }, { "epoch": 0.057897534448215236, "grad_norm": 0.5388824343681335, "learning_rate": 2.894876722410762e-06, "loss": 0.0085, "step": 7080 }, { "epoch": 0.05797931062681441, "grad_norm": 0.3341132402420044, "learning_rate": 2.8989655313407204e-06, "loss": 0.0071, "step": 7090 }, { "epoch": 0.05806108680541358, "grad_norm": 0.3266710340976715, "learning_rate": 2.9030543402706797e-06, "loss": 0.0066, "step": 7100 }, { "epoch": 0.05814286298401276, "grad_norm": 0.4573591947555542, "learning_rate": 2.907143149200638e-06, "loss": 0.0113, "step": 7110 }, { "epoch": 0.05822463916261193, "grad_norm": 0.5610418319702148, "learning_rate": 2.911231958130597e-06, "loss": 0.0102, "step": 7120 }, { "epoch": 0.05830641534121111, "grad_norm": 0.30347105860710144, "learning_rate": 2.9153207670605556e-06, "loss": 0.0062, "step": 7130 }, { "epoch": 0.05838819151981028, "grad_norm": 0.43632569909095764, "learning_rate": 2.9194095759905145e-06, "loss": 0.0065, "step": 7140 }, { "epoch": 0.05846996769840945, "grad_norm": 0.5060490369796753, "learning_rate": 2.923498384920473e-06, "loss": 0.0069, "step": 7150 }, { "epoch": 0.05855174387700863, "grad_norm": 0.3052797317504883, "learning_rate": 2.927587193850432e-06, "loss": 0.009, "step": 7160 }, { "epoch": 0.058633520055607805, "grad_norm": 1.7008483409881592, "learning_rate": 2.9316760027803903e-06, "loss": 0.0072, "step": 7170 }, { "epoch": 0.05871529623420697, "grad_norm": 1.165497064590454, "learning_rate": 2.9357648117103488e-06, "loss": 0.0076, "step": 7180 }, { "epoch": 0.05879707241280615, "grad_norm": 1.1416255235671997, "learning_rate": 2.9398536206403077e-06, "loss": 0.0057, "step": 7190 }, { "epoch": 0.058878848591405325, "grad_norm": 0.6544772386550903, "learning_rate": 2.943942429570266e-06, "loss": 0.0074, "step": 7200 }, { "epoch": 0.0589606247700045, "grad_norm": 0.3071068525314331, "learning_rate": 2.948031238500225e-06, "loss": 0.0088, "step": 7210 }, { "epoch": 0.05904240094860367, "grad_norm": 0.3527463972568512, "learning_rate": 2.9521200474301835e-06, "loss": 0.0083, "step": 7220 }, { "epoch": 0.059124177127202845, "grad_norm": 0.24901141226291656, "learning_rate": 2.956208856360143e-06, "loss": 0.0063, "step": 7230 }, { "epoch": 0.05920595330580202, "grad_norm": 0.28327909111976624, "learning_rate": 2.960297665290101e-06, "loss": 0.0056, "step": 7240 }, { "epoch": 0.0592877294844012, "grad_norm": 0.8020561933517456, "learning_rate": 2.9643864742200602e-06, "loss": 0.0086, "step": 7250 }, { "epoch": 0.059369505663000366, "grad_norm": 0.3904261291027069, "learning_rate": 2.9684752831500187e-06, "loss": 0.0058, "step": 7260 }, { "epoch": 0.05945128184159954, "grad_norm": 0.30926957726478577, "learning_rate": 2.9725640920799776e-06, "loss": 0.008, "step": 7270 }, { "epoch": 0.05953305802019872, "grad_norm": 0.35348865389823914, "learning_rate": 2.976652901009936e-06, "loss": 0.0058, "step": 7280 }, { "epoch": 0.05961483419879789, "grad_norm": 0.1757941097021103, "learning_rate": 2.980741709939895e-06, "loss": 0.0056, "step": 7290 }, { "epoch": 0.05969661037739706, "grad_norm": 0.4952080249786377, "learning_rate": 2.9848305188698534e-06, "loss": 0.011, "step": 7300 }, { "epoch": 0.05977838655599624, "grad_norm": 0.43434077501296997, "learning_rate": 2.9889193277998123e-06, "loss": 0.0112, "step": 7310 }, { "epoch": 0.059860162734595414, "grad_norm": 0.502842366695404, "learning_rate": 2.993008136729771e-06, "loss": 0.0082, "step": 7320 }, { "epoch": 0.05994193891319459, "grad_norm": 0.48228830099105835, "learning_rate": 2.9970969456597293e-06, "loss": 0.0123, "step": 7330 }, { "epoch": 0.06002371509179376, "grad_norm": 0.6749780774116516, "learning_rate": 3.001185754589688e-06, "loss": 0.0077, "step": 7340 }, { "epoch": 0.060105491270392934, "grad_norm": 0.2720852494239807, "learning_rate": 3.0052745635196467e-06, "loss": 0.0086, "step": 7350 }, { "epoch": 0.06018726744899211, "grad_norm": 0.4877711534500122, "learning_rate": 3.0093633724496056e-06, "loss": 0.0095, "step": 7360 }, { "epoch": 0.060269043627591286, "grad_norm": 0.2720319330692291, "learning_rate": 3.013452181379564e-06, "loss": 0.0093, "step": 7370 }, { "epoch": 0.060350819806190455, "grad_norm": 0.31180432438850403, "learning_rate": 3.0175409903095234e-06, "loss": 0.01, "step": 7380 }, { "epoch": 0.06043259598478963, "grad_norm": 0.46297842264175415, "learning_rate": 3.021629799239482e-06, "loss": 0.0064, "step": 7390 }, { "epoch": 0.060514372163388806, "grad_norm": 0.6761751770973206, "learning_rate": 3.0257186081694407e-06, "loss": 0.009, "step": 7400 }, { "epoch": 0.06059614834198798, "grad_norm": 0.18391092121601105, "learning_rate": 3.029807417099399e-06, "loss": 0.0079, "step": 7410 }, { "epoch": 0.06067792452058715, "grad_norm": 0.5230231881141663, "learning_rate": 3.033896226029358e-06, "loss": 0.0066, "step": 7420 }, { "epoch": 0.06075970069918633, "grad_norm": 0.30409184098243713, "learning_rate": 3.0379850349593166e-06, "loss": 0.0064, "step": 7430 }, { "epoch": 0.0608414768777855, "grad_norm": 0.5191689729690552, "learning_rate": 3.0420738438892755e-06, "loss": 0.0106, "step": 7440 }, { "epoch": 0.06092325305638468, "grad_norm": 0.5508574843406677, "learning_rate": 3.046162652819234e-06, "loss": 0.009, "step": 7450 }, { "epoch": 0.06100502923498385, "grad_norm": 0.22000062465667725, "learning_rate": 3.050251461749193e-06, "loss": 0.0073, "step": 7460 }, { "epoch": 0.06108680541358302, "grad_norm": 0.3203989565372467, "learning_rate": 3.0543402706791513e-06, "loss": 0.0062, "step": 7470 }, { "epoch": 0.0611685815921822, "grad_norm": 0.3950120806694031, "learning_rate": 3.05842907960911e-06, "loss": 0.0071, "step": 7480 }, { "epoch": 0.061250357770781375, "grad_norm": 0.5276485085487366, "learning_rate": 3.0625178885390687e-06, "loss": 0.0047, "step": 7490 }, { "epoch": 0.06133213394938054, "grad_norm": 0.2949937880039215, "learning_rate": 3.066606697469027e-06, "loss": 0.0082, "step": 7500 }, { "epoch": 0.06141391012797972, "grad_norm": 0.46173223853111267, "learning_rate": 3.070695506398986e-06, "loss": 0.0067, "step": 7510 }, { "epoch": 0.061495686306578895, "grad_norm": 0.4428367614746094, "learning_rate": 3.0747843153289445e-06, "loss": 0.0086, "step": 7520 }, { "epoch": 0.06157746248517807, "grad_norm": 0.546877920627594, "learning_rate": 3.078873124258904e-06, "loss": 0.0094, "step": 7530 }, { "epoch": 0.06165923866377724, "grad_norm": 0.2604999542236328, "learning_rate": 3.0829619331888623e-06, "loss": 0.0072, "step": 7540 }, { "epoch": 0.061741014842376415, "grad_norm": 0.41582193970680237, "learning_rate": 3.0870507421188212e-06, "loss": 0.0061, "step": 7550 }, { "epoch": 0.06182279102097559, "grad_norm": 0.4189705550670624, "learning_rate": 3.0911395510487797e-06, "loss": 0.0105, "step": 7560 }, { "epoch": 0.06190456719957477, "grad_norm": 0.6673094630241394, "learning_rate": 3.0952283599787386e-06, "loss": 0.0094, "step": 7570 }, { "epoch": 0.061986343378173936, "grad_norm": 0.4438168406486511, "learning_rate": 3.099317168908697e-06, "loss": 0.0068, "step": 7580 }, { "epoch": 0.06206811955677311, "grad_norm": 0.36975860595703125, "learning_rate": 3.103405977838656e-06, "loss": 0.0059, "step": 7590 }, { "epoch": 0.06214989573537229, "grad_norm": 0.2618210017681122, "learning_rate": 3.1074947867686145e-06, "loss": 0.0062, "step": 7600 }, { "epoch": 0.06223167191397146, "grad_norm": 0.2813078761100769, "learning_rate": 3.1115835956985734e-06, "loss": 0.0073, "step": 7610 }, { "epoch": 0.06231344809257063, "grad_norm": 0.32427841424942017, "learning_rate": 3.115672404628532e-06, "loss": 0.007, "step": 7620 }, { "epoch": 0.06239522427116981, "grad_norm": 0.18865042924880981, "learning_rate": 3.1197612135584907e-06, "loss": 0.0078, "step": 7630 }, { "epoch": 0.062477000449768984, "grad_norm": 0.09106726944446564, "learning_rate": 3.123850022488449e-06, "loss": 0.0052, "step": 7640 }, { "epoch": 0.06255877662836816, "grad_norm": 0.4402522146701813, "learning_rate": 3.1279388314184077e-06, "loss": 0.0064, "step": 7650 }, { "epoch": 0.06264055280696733, "grad_norm": 0.5164087414741516, "learning_rate": 3.132027640348367e-06, "loss": 0.0072, "step": 7660 }, { "epoch": 0.06272232898556651, "grad_norm": 0.28929027915000916, "learning_rate": 3.136116449278325e-06, "loss": 0.0071, "step": 7670 }, { "epoch": 0.06280410516416568, "grad_norm": 0.3441929817199707, "learning_rate": 3.1402052582082844e-06, "loss": 0.0056, "step": 7680 }, { "epoch": 0.06288588134276485, "grad_norm": 0.45967450737953186, "learning_rate": 3.144294067138243e-06, "loss": 0.0069, "step": 7690 }, { "epoch": 0.06296765752136403, "grad_norm": 0.37486031651496887, "learning_rate": 3.1483828760682018e-06, "loss": 0.0058, "step": 7700 }, { "epoch": 0.0630494336999632, "grad_norm": 0.17313653230667114, "learning_rate": 3.1524716849981602e-06, "loss": 0.0065, "step": 7710 }, { "epoch": 0.06313120987856237, "grad_norm": 0.26948627829551697, "learning_rate": 3.156560493928119e-06, "loss": 0.0087, "step": 7720 }, { "epoch": 0.06321298605716155, "grad_norm": 0.4362739622592926, "learning_rate": 3.1606493028580776e-06, "loss": 0.0051, "step": 7730 }, { "epoch": 0.06329476223576072, "grad_norm": 0.39098379015922546, "learning_rate": 3.1647381117880365e-06, "loss": 0.0083, "step": 7740 }, { "epoch": 0.0633765384143599, "grad_norm": 0.40326863527297974, "learning_rate": 3.168826920717995e-06, "loss": 0.0064, "step": 7750 }, { "epoch": 0.06345831459295907, "grad_norm": 0.2515845000743866, "learning_rate": 3.172915729647954e-06, "loss": 0.0046, "step": 7760 }, { "epoch": 0.06354009077155824, "grad_norm": 0.37105268239974976, "learning_rate": 3.1770045385779123e-06, "loss": 0.0071, "step": 7770 }, { "epoch": 0.06362186695015742, "grad_norm": 0.3093467652797699, "learning_rate": 3.1810933475078717e-06, "loss": 0.0058, "step": 7780 }, { "epoch": 0.06370364312875659, "grad_norm": 0.08499680459499359, "learning_rate": 3.1851821564378297e-06, "loss": 0.0097, "step": 7790 }, { "epoch": 0.06378541930735576, "grad_norm": 0.22086836397647858, "learning_rate": 3.189270965367788e-06, "loss": 0.0077, "step": 7800 }, { "epoch": 0.06386719548595494, "grad_norm": 0.3603876531124115, "learning_rate": 3.1933597742977475e-06, "loss": 0.0086, "step": 7810 }, { "epoch": 0.06394897166455411, "grad_norm": 0.33825188875198364, "learning_rate": 3.197448583227706e-06, "loss": 0.0086, "step": 7820 }, { "epoch": 0.0640307478431533, "grad_norm": 0.6677052974700928, "learning_rate": 3.201537392157665e-06, "loss": 0.0088, "step": 7830 }, { "epoch": 0.06411252402175247, "grad_norm": 0.40389710664749146, "learning_rate": 3.2056262010876234e-06, "loss": 0.008, "step": 7840 }, { "epoch": 0.06419430020035163, "grad_norm": 0.27625641226768494, "learning_rate": 3.2097150100175823e-06, "loss": 0.0079, "step": 7850 }, { "epoch": 0.06427607637895082, "grad_norm": 0.24675193428993225, "learning_rate": 3.2138038189475407e-06, "loss": 0.0095, "step": 7860 }, { "epoch": 0.06435785255754999, "grad_norm": 0.18372662365436554, "learning_rate": 3.2178926278774996e-06, "loss": 0.0051, "step": 7870 }, { "epoch": 0.06443962873614915, "grad_norm": 0.14238952100276947, "learning_rate": 3.221981436807458e-06, "loss": 0.0095, "step": 7880 }, { "epoch": 0.06452140491474834, "grad_norm": 0.37580201029777527, "learning_rate": 3.226070245737417e-06, "loss": 0.0094, "step": 7890 }, { "epoch": 0.0646031810933475, "grad_norm": 0.4543515145778656, "learning_rate": 3.2301590546673755e-06, "loss": 0.0091, "step": 7900 }, { "epoch": 0.06468495727194669, "grad_norm": 0.3509499132633209, "learning_rate": 3.2342478635973344e-06, "loss": 0.0072, "step": 7910 }, { "epoch": 0.06476673345054586, "grad_norm": 0.38209235668182373, "learning_rate": 3.238336672527293e-06, "loss": 0.0073, "step": 7920 }, { "epoch": 0.06484850962914503, "grad_norm": 0.4726744294166565, "learning_rate": 3.242425481457252e-06, "loss": 0.0074, "step": 7930 }, { "epoch": 0.06493028580774421, "grad_norm": 0.7583603858947754, "learning_rate": 3.2465142903872107e-06, "loss": 0.0088, "step": 7940 }, { "epoch": 0.06501206198634338, "grad_norm": 0.41415172815322876, "learning_rate": 3.2506030993171696e-06, "loss": 0.0081, "step": 7950 }, { "epoch": 0.06509383816494255, "grad_norm": 0.36930006742477417, "learning_rate": 3.254691908247128e-06, "loss": 0.0104, "step": 7960 }, { "epoch": 0.06517561434354173, "grad_norm": 0.475744366645813, "learning_rate": 3.2587807171770865e-06, "loss": 0.0067, "step": 7970 }, { "epoch": 0.0652573905221409, "grad_norm": 0.2080039381980896, "learning_rate": 3.2628695261070454e-06, "loss": 0.0071, "step": 7980 }, { "epoch": 0.06533916670074008, "grad_norm": 0.43300026655197144, "learning_rate": 3.266958335037004e-06, "loss": 0.0069, "step": 7990 }, { "epoch": 0.06542094287933925, "grad_norm": 0.3130123019218445, "learning_rate": 3.2710471439669628e-06, "loss": 0.0098, "step": 8000 }, { "epoch": 0.06550271905793842, "grad_norm": 0.3841807246208191, "learning_rate": 3.2751359528969212e-06, "loss": 0.0063, "step": 8010 }, { "epoch": 0.0655844952365376, "grad_norm": 0.23478813469409943, "learning_rate": 3.27922476182688e-06, "loss": 0.0065, "step": 8020 }, { "epoch": 0.06566627141513677, "grad_norm": 0.2702580690383911, "learning_rate": 3.2833135707568386e-06, "loss": 0.0067, "step": 8030 }, { "epoch": 0.06574804759373594, "grad_norm": 0.45004475116729736, "learning_rate": 3.2874023796867975e-06, "loss": 0.0104, "step": 8040 }, { "epoch": 0.06582982377233512, "grad_norm": 0.4973240792751312, "learning_rate": 3.291491188616756e-06, "loss": 0.0075, "step": 8050 }, { "epoch": 0.06591159995093429, "grad_norm": 0.5740577578544617, "learning_rate": 3.295579997546715e-06, "loss": 0.0087, "step": 8060 }, { "epoch": 0.06599337612953347, "grad_norm": 0.30603641271591187, "learning_rate": 3.2996688064766734e-06, "loss": 0.0054, "step": 8070 }, { "epoch": 0.06607515230813264, "grad_norm": 0.3434327244758606, "learning_rate": 3.3037576154066327e-06, "loss": 0.0079, "step": 8080 }, { "epoch": 0.06615692848673181, "grad_norm": 0.36041420698165894, "learning_rate": 3.307846424336591e-06, "loss": 0.0062, "step": 8090 }, { "epoch": 0.066238704665331, "grad_norm": 0.6578243374824524, "learning_rate": 3.31193523326655e-06, "loss": 0.0121, "step": 8100 }, { "epoch": 0.06632048084393016, "grad_norm": 0.44313809275627136, "learning_rate": 3.3160240421965085e-06, "loss": 0.0087, "step": 8110 }, { "epoch": 0.06640225702252933, "grad_norm": 0.7440814971923828, "learning_rate": 3.320112851126467e-06, "loss": 0.0067, "step": 8120 }, { "epoch": 0.06648403320112851, "grad_norm": 0.40714552998542786, "learning_rate": 3.324201660056426e-06, "loss": 0.0088, "step": 8130 }, { "epoch": 0.06656580937972768, "grad_norm": 0.3434862196445465, "learning_rate": 3.3282904689863844e-06, "loss": 0.0099, "step": 8140 }, { "epoch": 0.06664758555832687, "grad_norm": 0.3192213177680969, "learning_rate": 3.3323792779163433e-06, "loss": 0.009, "step": 8150 }, { "epoch": 0.06672936173692604, "grad_norm": 0.23757612705230713, "learning_rate": 3.3364680868463018e-06, "loss": 0.0088, "step": 8160 }, { "epoch": 0.0668111379155252, "grad_norm": 0.21129213273525238, "learning_rate": 3.3405568957762607e-06, "loss": 0.0082, "step": 8170 }, { "epoch": 0.06689291409412439, "grad_norm": 0.5580547451972961, "learning_rate": 3.344645704706219e-06, "loss": 0.0108, "step": 8180 }, { "epoch": 0.06697469027272356, "grad_norm": 0.4760306775569916, "learning_rate": 3.348734513636178e-06, "loss": 0.0107, "step": 8190 }, { "epoch": 0.06705646645132272, "grad_norm": 0.49628832936286926, "learning_rate": 3.3528233225661365e-06, "loss": 0.0081, "step": 8200 }, { "epoch": 0.06713824262992191, "grad_norm": 0.2689409554004669, "learning_rate": 3.356912131496096e-06, "loss": 0.009, "step": 8210 }, { "epoch": 0.06722001880852108, "grad_norm": 0.5267099142074585, "learning_rate": 3.361000940426054e-06, "loss": 0.0078, "step": 8220 }, { "epoch": 0.06730179498712026, "grad_norm": 0.3202621340751648, "learning_rate": 3.365089749356013e-06, "loss": 0.0081, "step": 8230 }, { "epoch": 0.06738357116571943, "grad_norm": 0.40278369188308716, "learning_rate": 3.3691785582859717e-06, "loss": 0.0082, "step": 8240 }, { "epoch": 0.0674653473443186, "grad_norm": 0.2667425274848938, "learning_rate": 3.3732673672159306e-06, "loss": 0.0055, "step": 8250 }, { "epoch": 0.06754712352291778, "grad_norm": 0.34195414185523987, "learning_rate": 3.377356176145889e-06, "loss": 0.0053, "step": 8260 }, { "epoch": 0.06762889970151695, "grad_norm": 0.374248743057251, "learning_rate": 3.381444985075848e-06, "loss": 0.007, "step": 8270 }, { "epoch": 0.06771067588011612, "grad_norm": 0.33511850237846375, "learning_rate": 3.3855337940058064e-06, "loss": 0.006, "step": 8280 }, { "epoch": 0.0677924520587153, "grad_norm": 0.27361389994621277, "learning_rate": 3.389622602935765e-06, "loss": 0.0061, "step": 8290 }, { "epoch": 0.06787422823731447, "grad_norm": 0.2840229272842407, "learning_rate": 3.393711411865724e-06, "loss": 0.0058, "step": 8300 }, { "epoch": 0.06795600441591365, "grad_norm": 0.33233922719955444, "learning_rate": 3.3978002207956823e-06, "loss": 0.0059, "step": 8310 }, { "epoch": 0.06803778059451282, "grad_norm": 0.4570990204811096, "learning_rate": 3.401889029725641e-06, "loss": 0.0068, "step": 8320 }, { "epoch": 0.06811955677311199, "grad_norm": 0.2846108675003052, "learning_rate": 3.4059778386555996e-06, "loss": 0.0061, "step": 8330 }, { "epoch": 0.06820133295171117, "grad_norm": 0.2802140414714813, "learning_rate": 3.4100666475855585e-06, "loss": 0.0064, "step": 8340 }, { "epoch": 0.06828310913031034, "grad_norm": 0.6089891195297241, "learning_rate": 3.414155456515517e-06, "loss": 0.008, "step": 8350 }, { "epoch": 0.06836488530890951, "grad_norm": 0.3797265589237213, "learning_rate": 3.4182442654454763e-06, "loss": 0.0094, "step": 8360 }, { "epoch": 0.06844666148750869, "grad_norm": 0.22226926684379578, "learning_rate": 3.422333074375435e-06, "loss": 0.0063, "step": 8370 }, { "epoch": 0.06852843766610786, "grad_norm": 1.2454370260238647, "learning_rate": 3.4264218833053937e-06, "loss": 0.0091, "step": 8380 }, { "epoch": 0.06861021384470704, "grad_norm": 0.27699014544487, "learning_rate": 3.430510692235352e-06, "loss": 0.0084, "step": 8390 }, { "epoch": 0.06869199002330621, "grad_norm": 0.42073386907577515, "learning_rate": 3.434599501165311e-06, "loss": 0.007, "step": 8400 }, { "epoch": 0.06877376620190538, "grad_norm": 0.3646271228790283, "learning_rate": 3.4386883100952696e-06, "loss": 0.0059, "step": 8410 }, { "epoch": 0.06885554238050456, "grad_norm": 0.4448525905609131, "learning_rate": 3.4427771190252285e-06, "loss": 0.0054, "step": 8420 }, { "epoch": 0.06893731855910373, "grad_norm": 0.578381359577179, "learning_rate": 3.446865927955187e-06, "loss": 0.0073, "step": 8430 }, { "epoch": 0.0690190947377029, "grad_norm": 0.2098456174135208, "learning_rate": 3.4509547368851454e-06, "loss": 0.0071, "step": 8440 }, { "epoch": 0.06910087091630208, "grad_norm": 0.3506998121738434, "learning_rate": 3.4550435458151043e-06, "loss": 0.0067, "step": 8450 }, { "epoch": 0.06918264709490125, "grad_norm": 0.17126134037971497, "learning_rate": 3.4591323547450628e-06, "loss": 0.0062, "step": 8460 }, { "epoch": 0.06926442327350044, "grad_norm": 0.7571012377738953, "learning_rate": 3.4632211636750217e-06, "loss": 0.0074, "step": 8470 }, { "epoch": 0.0693461994520996, "grad_norm": 0.5076506733894348, "learning_rate": 3.46730997260498e-06, "loss": 0.007, "step": 8480 }, { "epoch": 0.06942797563069877, "grad_norm": 0.5406510829925537, "learning_rate": 3.471398781534939e-06, "loss": 0.0089, "step": 8490 }, { "epoch": 0.06950975180929796, "grad_norm": 0.2018844187259674, "learning_rate": 3.4754875904648975e-06, "loss": 0.0079, "step": 8500 }, { "epoch": 0.06959152798789713, "grad_norm": 0.38283655047416687, "learning_rate": 3.479576399394857e-06, "loss": 0.0122, "step": 8510 }, { "epoch": 0.0696733041664963, "grad_norm": 0.4553569257259369, "learning_rate": 3.4836652083248153e-06, "loss": 0.0075, "step": 8520 }, { "epoch": 0.06975508034509548, "grad_norm": 0.3532845079898834, "learning_rate": 3.4877540172547742e-06, "loss": 0.0061, "step": 8530 }, { "epoch": 0.06983685652369465, "grad_norm": 0.24850064516067505, "learning_rate": 3.4918428261847327e-06, "loss": 0.0058, "step": 8540 }, { "epoch": 0.06991863270229383, "grad_norm": 0.42161446809768677, "learning_rate": 3.4959316351146916e-06, "loss": 0.0068, "step": 8550 }, { "epoch": 0.070000408880893, "grad_norm": 0.5017529726028442, "learning_rate": 3.50002044404465e-06, "loss": 0.0052, "step": 8560 }, { "epoch": 0.07008218505949217, "grad_norm": 0.3295353651046753, "learning_rate": 3.504109252974609e-06, "loss": 0.0067, "step": 8570 }, { "epoch": 0.07016396123809135, "grad_norm": 0.5104038119316101, "learning_rate": 3.5081980619045674e-06, "loss": 0.0072, "step": 8580 }, { "epoch": 0.07024573741669052, "grad_norm": 0.31049469113349915, "learning_rate": 3.5122868708345263e-06, "loss": 0.0055, "step": 8590 }, { "epoch": 0.07032751359528969, "grad_norm": 0.4387781620025635, "learning_rate": 3.516375679764485e-06, "loss": 0.0074, "step": 8600 }, { "epoch": 0.07040928977388887, "grad_norm": 0.27587011456489563, "learning_rate": 3.5204644886944433e-06, "loss": 0.0075, "step": 8610 }, { "epoch": 0.07049106595248804, "grad_norm": 0.6406636238098145, "learning_rate": 3.524553297624402e-06, "loss": 0.0071, "step": 8620 }, { "epoch": 0.07057284213108722, "grad_norm": 0.21419444680213928, "learning_rate": 3.5286421065543607e-06, "loss": 0.0066, "step": 8630 }, { "epoch": 0.07065461830968639, "grad_norm": 0.27413904666900635, "learning_rate": 3.53273091548432e-06, "loss": 0.0064, "step": 8640 }, { "epoch": 0.07073639448828556, "grad_norm": 0.4421122968196869, "learning_rate": 3.536819724414278e-06, "loss": 0.0096, "step": 8650 }, { "epoch": 0.07081817066688474, "grad_norm": 0.24912598729133606, "learning_rate": 3.5409085333442374e-06, "loss": 0.0085, "step": 8660 }, { "epoch": 0.07089994684548391, "grad_norm": 0.38731104135513306, "learning_rate": 3.544997342274196e-06, "loss": 0.005, "step": 8670 }, { "epoch": 0.07098172302408308, "grad_norm": 0.16080866754055023, "learning_rate": 3.5490861512041547e-06, "loss": 0.0103, "step": 8680 }, { "epoch": 0.07106349920268226, "grad_norm": 0.168672576546669, "learning_rate": 3.553174960134113e-06, "loss": 0.0073, "step": 8690 }, { "epoch": 0.07114527538128143, "grad_norm": 0.33472809195518494, "learning_rate": 3.557263769064072e-06, "loss": 0.0082, "step": 8700 }, { "epoch": 0.07122705155988061, "grad_norm": 0.5694189667701721, "learning_rate": 3.5613525779940306e-06, "loss": 0.0039, "step": 8710 }, { "epoch": 0.07130882773847978, "grad_norm": 0.4743298292160034, "learning_rate": 3.5654413869239895e-06, "loss": 0.0084, "step": 8720 }, { "epoch": 0.07139060391707895, "grad_norm": 0.33446916937828064, "learning_rate": 3.569530195853948e-06, "loss": 0.0047, "step": 8730 }, { "epoch": 0.07147238009567813, "grad_norm": 0.2522331178188324, "learning_rate": 3.573619004783907e-06, "loss": 0.008, "step": 8740 }, { "epoch": 0.0715541562742773, "grad_norm": 0.2880892753601074, "learning_rate": 3.5777078137138653e-06, "loss": 0.0077, "step": 8750 }, { "epoch": 0.07163593245287647, "grad_norm": 0.581580400466919, "learning_rate": 3.581796622643824e-06, "loss": 0.0074, "step": 8760 }, { "epoch": 0.07171770863147565, "grad_norm": 0.6333312392234802, "learning_rate": 3.5858854315737827e-06, "loss": 0.0054, "step": 8770 }, { "epoch": 0.07179948481007482, "grad_norm": 0.33174601197242737, "learning_rate": 3.589974240503741e-06, "loss": 0.0092, "step": 8780 }, { "epoch": 0.071881260988674, "grad_norm": 0.5023044347763062, "learning_rate": 3.5940630494337005e-06, "loss": 0.0045, "step": 8790 }, { "epoch": 0.07196303716727318, "grad_norm": 0.25727012753486633, "learning_rate": 3.598151858363659e-06, "loss": 0.0049, "step": 8800 }, { "epoch": 0.07204481334587234, "grad_norm": 0.4763897955417633, "learning_rate": 3.602240667293618e-06, "loss": 0.0076, "step": 8810 }, { "epoch": 0.07212658952447153, "grad_norm": 0.35906583070755005, "learning_rate": 3.6063294762235763e-06, "loss": 0.0062, "step": 8820 }, { "epoch": 0.0722083657030707, "grad_norm": 0.48753055930137634, "learning_rate": 3.6104182851535352e-06, "loss": 0.007, "step": 8830 }, { "epoch": 0.07229014188166986, "grad_norm": 0.31612202525138855, "learning_rate": 3.6145070940834937e-06, "loss": 0.0066, "step": 8840 }, { "epoch": 0.07237191806026905, "grad_norm": 0.12028723955154419, "learning_rate": 3.6185959030134526e-06, "loss": 0.0065, "step": 8850 }, { "epoch": 0.07245369423886822, "grad_norm": 0.34159448742866516, "learning_rate": 3.622684711943411e-06, "loss": 0.006, "step": 8860 }, { "epoch": 0.0725354704174674, "grad_norm": 0.4141539931297302, "learning_rate": 3.62677352087337e-06, "loss": 0.0077, "step": 8870 }, { "epoch": 0.07261724659606657, "grad_norm": 0.40803852677345276, "learning_rate": 3.6308623298033285e-06, "loss": 0.0078, "step": 8880 }, { "epoch": 0.07269902277466574, "grad_norm": 0.44807103276252747, "learning_rate": 3.6349511387332874e-06, "loss": 0.0102, "step": 8890 }, { "epoch": 0.07278079895326492, "grad_norm": 0.18245036900043488, "learning_rate": 3.639039947663246e-06, "loss": 0.008, "step": 8900 }, { "epoch": 0.07286257513186409, "grad_norm": 0.3583453893661499, "learning_rate": 3.643128756593205e-06, "loss": 0.0067, "step": 8910 }, { "epoch": 0.07294435131046326, "grad_norm": 0.20543281733989716, "learning_rate": 3.6472175655231636e-06, "loss": 0.0062, "step": 8920 }, { "epoch": 0.07302612748906244, "grad_norm": 0.3898959457874298, "learning_rate": 3.6513063744531217e-06, "loss": 0.0081, "step": 8930 }, { "epoch": 0.07310790366766161, "grad_norm": 0.2614564895629883, "learning_rate": 3.655395183383081e-06, "loss": 0.0072, "step": 8940 }, { "epoch": 0.07318967984626079, "grad_norm": 0.14598466455936432, "learning_rate": 3.6594839923130395e-06, "loss": 0.0053, "step": 8950 }, { "epoch": 0.07327145602485996, "grad_norm": 0.35553064942359924, "learning_rate": 3.6635728012429984e-06, "loss": 0.0062, "step": 8960 }, { "epoch": 0.07335323220345913, "grad_norm": 0.38124433159828186, "learning_rate": 3.667661610172957e-06, "loss": 0.0066, "step": 8970 }, { "epoch": 0.07343500838205831, "grad_norm": 0.09580385684967041, "learning_rate": 3.6717504191029157e-06, "loss": 0.0091, "step": 8980 }, { "epoch": 0.07351678456065748, "grad_norm": 0.5590872168540955, "learning_rate": 3.6758392280328742e-06, "loss": 0.0082, "step": 8990 }, { "epoch": 0.07359856073925665, "grad_norm": 0.22312355041503906, "learning_rate": 3.679928036962833e-06, "loss": 0.0074, "step": 9000 }, { "epoch": 0.07368033691785583, "grad_norm": 0.7020902037620544, "learning_rate": 3.6840168458927916e-06, "loss": 0.0104, "step": 9010 }, { "epoch": 0.073762113096455, "grad_norm": 0.3490425646305084, "learning_rate": 3.6881056548227505e-06, "loss": 0.0072, "step": 9020 }, { "epoch": 0.07384388927505418, "grad_norm": 0.3989957273006439, "learning_rate": 3.692194463752709e-06, "loss": 0.0108, "step": 9030 }, { "epoch": 0.07392566545365335, "grad_norm": 0.47711673378944397, "learning_rate": 3.696283272682668e-06, "loss": 0.0066, "step": 9040 }, { "epoch": 0.07400744163225252, "grad_norm": 0.5390136241912842, "learning_rate": 3.7003720816126263e-06, "loss": 0.0078, "step": 9050 }, { "epoch": 0.0740892178108517, "grad_norm": 0.45338964462280273, "learning_rate": 3.7044608905425857e-06, "loss": 0.007, "step": 9060 }, { "epoch": 0.07417099398945087, "grad_norm": 0.5489073991775513, "learning_rate": 3.708549699472544e-06, "loss": 0.0089, "step": 9070 }, { "epoch": 0.07425277016805004, "grad_norm": 0.8101269006729126, "learning_rate": 3.712638508402502e-06, "loss": 0.0068, "step": 9080 }, { "epoch": 0.07433454634664922, "grad_norm": 0.3475898206233978, "learning_rate": 3.7167273173324615e-06, "loss": 0.0084, "step": 9090 }, { "epoch": 0.0744163225252484, "grad_norm": 0.22456538677215576, "learning_rate": 3.72081612626242e-06, "loss": 0.0071, "step": 9100 }, { "epoch": 0.07449809870384756, "grad_norm": 0.4106576442718506, "learning_rate": 3.724904935192379e-06, "loss": 0.0063, "step": 9110 }, { "epoch": 0.07457987488244675, "grad_norm": 0.31747907400131226, "learning_rate": 3.7289937441223374e-06, "loss": 0.0057, "step": 9120 }, { "epoch": 0.07466165106104591, "grad_norm": 0.24723702669143677, "learning_rate": 3.7330825530522963e-06, "loss": 0.0087, "step": 9130 }, { "epoch": 0.0747434272396451, "grad_norm": 0.36508312821388245, "learning_rate": 3.7371713619822547e-06, "loss": 0.0048, "step": 9140 }, { "epoch": 0.07482520341824427, "grad_norm": 0.1606563925743103, "learning_rate": 3.7412601709122136e-06, "loss": 0.0045, "step": 9150 }, { "epoch": 0.07490697959684343, "grad_norm": 0.42194700241088867, "learning_rate": 3.745348979842172e-06, "loss": 0.0092, "step": 9160 }, { "epoch": 0.07498875577544262, "grad_norm": 0.3217294216156006, "learning_rate": 3.749437788772131e-06, "loss": 0.0056, "step": 9170 }, { "epoch": 0.07507053195404179, "grad_norm": 0.45729872584342957, "learning_rate": 3.7535265977020895e-06, "loss": 0.008, "step": 9180 }, { "epoch": 0.07515230813264095, "grad_norm": 0.3740416169166565, "learning_rate": 3.757615406632049e-06, "loss": 0.005, "step": 9190 }, { "epoch": 0.07523408431124014, "grad_norm": 0.3507230281829834, "learning_rate": 3.761704215562007e-06, "loss": 0.006, "step": 9200 }, { "epoch": 0.0753158604898393, "grad_norm": 0.351917028427124, "learning_rate": 3.765793024491966e-06, "loss": 0.006, "step": 9210 }, { "epoch": 0.07539763666843849, "grad_norm": 0.24644185602664948, "learning_rate": 3.7698818334219246e-06, "loss": 0.0058, "step": 9220 }, { "epoch": 0.07547941284703766, "grad_norm": 0.0687708780169487, "learning_rate": 3.773970642351883e-06, "loss": 0.0064, "step": 9230 }, { "epoch": 0.07556118902563683, "grad_norm": 0.32236355543136597, "learning_rate": 3.778059451281842e-06, "loss": 0.0074, "step": 9240 }, { "epoch": 0.07564296520423601, "grad_norm": 0.3070613443851471, "learning_rate": 3.7821482602118005e-06, "loss": 0.0065, "step": 9250 }, { "epoch": 0.07572474138283518, "grad_norm": 0.2714635729789734, "learning_rate": 3.7862370691417594e-06, "loss": 0.0087, "step": 9260 }, { "epoch": 0.07580651756143435, "grad_norm": 0.4121258556842804, "learning_rate": 3.790325878071718e-06, "loss": 0.009, "step": 9270 }, { "epoch": 0.07588829374003353, "grad_norm": 0.30750930309295654, "learning_rate": 3.7944146870016768e-06, "loss": 0.0071, "step": 9280 }, { "epoch": 0.0759700699186327, "grad_norm": 0.5951226949691772, "learning_rate": 3.7985034959316352e-06, "loss": 0.0061, "step": 9290 }, { "epoch": 0.07605184609723188, "grad_norm": 0.2760522961616516, "learning_rate": 3.802592304861594e-06, "loss": 0.0102, "step": 9300 }, { "epoch": 0.07613362227583105, "grad_norm": 0.41103675961494446, "learning_rate": 3.8066811137915526e-06, "loss": 0.0071, "step": 9310 }, { "epoch": 0.07621539845443022, "grad_norm": 0.4903619587421417, "learning_rate": 3.8107699227215115e-06, "loss": 0.0101, "step": 9320 }, { "epoch": 0.0762971746330294, "grad_norm": 0.1608354151248932, "learning_rate": 3.81485873165147e-06, "loss": 0.0061, "step": 9330 }, { "epoch": 0.07637895081162857, "grad_norm": 0.4494560658931732, "learning_rate": 3.818947540581429e-06, "loss": 0.0068, "step": 9340 }, { "epoch": 0.07646072699022774, "grad_norm": 0.32435235381126404, "learning_rate": 3.823036349511388e-06, "loss": 0.0059, "step": 9350 }, { "epoch": 0.07654250316882692, "grad_norm": 0.27319347858428955, "learning_rate": 3.827125158441347e-06, "loss": 0.0049, "step": 9360 }, { "epoch": 0.07662427934742609, "grad_norm": 0.24009540677070618, "learning_rate": 3.831213967371305e-06, "loss": 0.0068, "step": 9370 }, { "epoch": 0.07670605552602527, "grad_norm": 0.315594345331192, "learning_rate": 3.835302776301264e-06, "loss": 0.006, "step": 9380 }, { "epoch": 0.07678783170462444, "grad_norm": 0.65758216381073, "learning_rate": 3.8393915852312225e-06, "loss": 0.0055, "step": 9390 }, { "epoch": 0.07686960788322361, "grad_norm": 0.4198679029941559, "learning_rate": 3.843480394161181e-06, "loss": 0.0074, "step": 9400 }, { "epoch": 0.0769513840618228, "grad_norm": 0.3569006323814392, "learning_rate": 3.84756920309114e-06, "loss": 0.0059, "step": 9410 }, { "epoch": 0.07703316024042196, "grad_norm": 0.21409223973751068, "learning_rate": 3.851658012021098e-06, "loss": 0.0051, "step": 9420 }, { "epoch": 0.07711493641902113, "grad_norm": 0.4838927388191223, "learning_rate": 3.855746820951057e-06, "loss": 0.0059, "step": 9430 }, { "epoch": 0.07719671259762032, "grad_norm": 0.3891927897930145, "learning_rate": 3.859835629881016e-06, "loss": 0.0054, "step": 9440 }, { "epoch": 0.07727848877621948, "grad_norm": 0.3458938002586365, "learning_rate": 3.863924438810975e-06, "loss": 0.0065, "step": 9450 }, { "epoch": 0.07736026495481867, "grad_norm": 0.48210445046424866, "learning_rate": 3.868013247740933e-06, "loss": 0.0064, "step": 9460 }, { "epoch": 0.07744204113341784, "grad_norm": 0.35087573528289795, "learning_rate": 3.872102056670892e-06, "loss": 0.006, "step": 9470 }, { "epoch": 0.077523817312017, "grad_norm": 0.45964300632476807, "learning_rate": 3.876190865600851e-06, "loss": 0.0053, "step": 9480 }, { "epoch": 0.07760559349061619, "grad_norm": 0.2601996958255768, "learning_rate": 3.88027967453081e-06, "loss": 0.006, "step": 9490 }, { "epoch": 0.07768736966921536, "grad_norm": 0.35369905829429626, "learning_rate": 3.884368483460768e-06, "loss": 0.0049, "step": 9500 }, { "epoch": 0.07776914584781452, "grad_norm": 0.4632114768028259, "learning_rate": 3.888457292390727e-06, "loss": 0.0064, "step": 9510 }, { "epoch": 0.07785092202641371, "grad_norm": 0.31354448199272156, "learning_rate": 3.892546101320686e-06, "loss": 0.009, "step": 9520 }, { "epoch": 0.07793269820501288, "grad_norm": 0.5062047243118286, "learning_rate": 3.8966349102506446e-06, "loss": 0.008, "step": 9530 }, { "epoch": 0.07801447438361206, "grad_norm": 0.42435339093208313, "learning_rate": 3.900723719180603e-06, "loss": 0.0057, "step": 9540 }, { "epoch": 0.07809625056221123, "grad_norm": 0.22783958911895752, "learning_rate": 3.9048125281105615e-06, "loss": 0.0054, "step": 9550 }, { "epoch": 0.0781780267408104, "grad_norm": 0.18129149079322815, "learning_rate": 3.90890133704052e-06, "loss": 0.0049, "step": 9560 }, { "epoch": 0.07825980291940958, "grad_norm": 0.3654685616493225, "learning_rate": 3.9129901459704785e-06, "loss": 0.0095, "step": 9570 }, { "epoch": 0.07834157909800875, "grad_norm": 0.3140919804573059, "learning_rate": 3.917078954900438e-06, "loss": 0.0069, "step": 9580 }, { "epoch": 0.07842335527660792, "grad_norm": 0.32768863439559937, "learning_rate": 3.921167763830396e-06, "loss": 0.0053, "step": 9590 }, { "epoch": 0.0785051314552071, "grad_norm": 0.20444175601005554, "learning_rate": 3.925256572760355e-06, "loss": 0.0084, "step": 9600 }, { "epoch": 0.07858690763380627, "grad_norm": 0.7807380557060242, "learning_rate": 3.929345381690314e-06, "loss": 0.0098, "step": 9610 }, { "epoch": 0.07866868381240545, "grad_norm": 0.40706127882003784, "learning_rate": 3.933434190620273e-06, "loss": 0.0072, "step": 9620 }, { "epoch": 0.07875045999100462, "grad_norm": 0.3183347284793854, "learning_rate": 3.937522999550231e-06, "loss": 0.0082, "step": 9630 }, { "epoch": 0.07883223616960379, "grad_norm": 0.1867201328277588, "learning_rate": 3.94161180848019e-06, "loss": 0.0062, "step": 9640 }, { "epoch": 0.07891401234820297, "grad_norm": 0.6310930848121643, "learning_rate": 3.945700617410149e-06, "loss": 0.0059, "step": 9650 }, { "epoch": 0.07899578852680214, "grad_norm": 0.282291054725647, "learning_rate": 3.949789426340108e-06, "loss": 0.0053, "step": 9660 }, { "epoch": 0.07907756470540131, "grad_norm": 0.4659254848957062, "learning_rate": 3.953878235270066e-06, "loss": 0.0061, "step": 9670 }, { "epoch": 0.07915934088400049, "grad_norm": 0.43040481209754944, "learning_rate": 3.9579670442000255e-06, "loss": 0.0071, "step": 9680 }, { "epoch": 0.07924111706259966, "grad_norm": 0.21548302471637726, "learning_rate": 3.9620558531299835e-06, "loss": 0.007, "step": 9690 }, { "epoch": 0.07932289324119884, "grad_norm": 0.2450828105211258, "learning_rate": 3.9661446620599424e-06, "loss": 0.0049, "step": 9700 }, { "epoch": 0.07940466941979801, "grad_norm": 0.27978450059890747, "learning_rate": 3.970233470989901e-06, "loss": 0.0061, "step": 9710 }, { "epoch": 0.07948644559839718, "grad_norm": 0.28118300437927246, "learning_rate": 3.974322279919859e-06, "loss": 0.0079, "step": 9720 }, { "epoch": 0.07956822177699636, "grad_norm": 0.25546443462371826, "learning_rate": 3.978411088849818e-06, "loss": 0.0059, "step": 9730 }, { "epoch": 0.07964999795559553, "grad_norm": 0.21359534561634064, "learning_rate": 3.982499897779777e-06, "loss": 0.0057, "step": 9740 }, { "epoch": 0.0797317741341947, "grad_norm": 0.3084525465965271, "learning_rate": 3.986588706709736e-06, "loss": 0.0054, "step": 9750 }, { "epoch": 0.07981355031279389, "grad_norm": 0.25539010763168335, "learning_rate": 3.990677515639694e-06, "loss": 0.0041, "step": 9760 }, { "epoch": 0.07989532649139305, "grad_norm": 0.4585506319999695, "learning_rate": 3.994766324569653e-06, "loss": 0.0051, "step": 9770 }, { "epoch": 0.07997710266999224, "grad_norm": 0.24536791443824768, "learning_rate": 3.998855133499612e-06, "loss": 0.0054, "step": 9780 }, { "epoch": 0.0800588788485914, "grad_norm": 0.3169955313205719, "learning_rate": 4.002943942429571e-06, "loss": 0.0088, "step": 9790 }, { "epoch": 0.08014065502719057, "grad_norm": 0.12479060143232346, "learning_rate": 4.007032751359529e-06, "loss": 0.0056, "step": 9800 }, { "epoch": 0.08022243120578976, "grad_norm": 0.2103637158870697, "learning_rate": 4.011121560289488e-06, "loss": 0.0069, "step": 9810 }, { "epoch": 0.08030420738438893, "grad_norm": 0.2349805235862732, "learning_rate": 4.015210369219447e-06, "loss": 0.0061, "step": 9820 }, { "epoch": 0.0803859835629881, "grad_norm": 0.1681896299123764, "learning_rate": 4.019299178149406e-06, "loss": 0.0046, "step": 9830 }, { "epoch": 0.08046775974158728, "grad_norm": 0.04846082255244255, "learning_rate": 4.0233879870793645e-06, "loss": 0.0053, "step": 9840 }, { "epoch": 0.08054953592018645, "grad_norm": 0.2909950017929077, "learning_rate": 4.027476796009323e-06, "loss": 0.0072, "step": 9850 }, { "epoch": 0.08063131209878563, "grad_norm": 0.12380955368280411, "learning_rate": 4.0315656049392814e-06, "loss": 0.0076, "step": 9860 }, { "epoch": 0.0807130882773848, "grad_norm": 0.16662953794002533, "learning_rate": 4.03565441386924e-06, "loss": 0.0082, "step": 9870 }, { "epoch": 0.08079486445598397, "grad_norm": 0.24904552102088928, "learning_rate": 4.039743222799199e-06, "loss": 0.0097, "step": 9880 }, { "epoch": 0.08087664063458315, "grad_norm": 0.2110363394021988, "learning_rate": 4.043832031729157e-06, "loss": 0.0042, "step": 9890 }, { "epoch": 0.08095841681318232, "grad_norm": 0.25393468141555786, "learning_rate": 4.047920840659116e-06, "loss": 0.0074, "step": 9900 }, { "epoch": 0.08104019299178149, "grad_norm": 0.21383805572986603, "learning_rate": 4.052009649589075e-06, "loss": 0.0041, "step": 9910 }, { "epoch": 0.08112196917038067, "grad_norm": 0.5934288501739502, "learning_rate": 4.056098458519034e-06, "loss": 0.0071, "step": 9920 }, { "epoch": 0.08120374534897984, "grad_norm": 0.265231192111969, "learning_rate": 4.060187267448992e-06, "loss": 0.0041, "step": 9930 }, { "epoch": 0.08128552152757902, "grad_norm": 0.49324658513069153, "learning_rate": 4.064276076378951e-06, "loss": 0.0065, "step": 9940 }, { "epoch": 0.08136729770617819, "grad_norm": 0.18881502747535706, "learning_rate": 4.06836488530891e-06, "loss": 0.0067, "step": 9950 }, { "epoch": 0.08144907388477736, "grad_norm": 0.05667192488908768, "learning_rate": 4.072453694238869e-06, "loss": 0.0063, "step": 9960 }, { "epoch": 0.08153085006337654, "grad_norm": 0.38809624314308167, "learning_rate": 4.076542503168827e-06, "loss": 0.0047, "step": 9970 }, { "epoch": 0.08161262624197571, "grad_norm": 0.30594801902770996, "learning_rate": 4.0806313120987865e-06, "loss": 0.0063, "step": 9980 }, { "epoch": 0.08169440242057488, "grad_norm": 0.2737598717212677, "learning_rate": 4.0847201210287446e-06, "loss": 0.0053, "step": 9990 }, { "epoch": 0.08177617859917406, "grad_norm": 0.26453208923339844, "learning_rate": 4.0888089299587035e-06, "loss": 0.0048, "step": 10000 }, { "epoch": 0.08185795477777323, "grad_norm": 0.22281545400619507, "learning_rate": 4.092897738888662e-06, "loss": 0.0051, "step": 10010 }, { "epoch": 0.08193973095637241, "grad_norm": 0.3754483461380005, "learning_rate": 4.096986547818621e-06, "loss": 0.0098, "step": 10020 }, { "epoch": 0.08202150713497158, "grad_norm": 0.1422722488641739, "learning_rate": 4.101075356748579e-06, "loss": 0.0054, "step": 10030 }, { "epoch": 0.08210328331357075, "grad_norm": 0.3164348602294922, "learning_rate": 4.105164165678538e-06, "loss": 0.0059, "step": 10040 }, { "epoch": 0.08218505949216993, "grad_norm": 0.25441068410873413, "learning_rate": 4.109252974608497e-06, "loss": 0.0061, "step": 10050 }, { "epoch": 0.0822668356707691, "grad_norm": 0.18866175413131714, "learning_rate": 4.113341783538455e-06, "loss": 0.0065, "step": 10060 }, { "epoch": 0.08234861184936827, "grad_norm": 0.20276595652103424, "learning_rate": 4.117430592468414e-06, "loss": 0.0075, "step": 10070 }, { "epoch": 0.08243038802796746, "grad_norm": 0.4447649121284485, "learning_rate": 4.121519401398373e-06, "loss": 0.0049, "step": 10080 }, { "epoch": 0.08251216420656662, "grad_norm": 0.15174956619739532, "learning_rate": 4.125608210328332e-06, "loss": 0.0056, "step": 10090 }, { "epoch": 0.0825939403851658, "grad_norm": 0.23484547436237335, "learning_rate": 4.12969701925829e-06, "loss": 0.0104, "step": 10100 }, { "epoch": 0.08267571656376498, "grad_norm": 0.5247977375984192, "learning_rate": 4.13378582818825e-06, "loss": 0.0055, "step": 10110 }, { "epoch": 0.08275749274236414, "grad_norm": 0.3042147755622864, "learning_rate": 4.137874637118208e-06, "loss": 0.0091, "step": 10120 }, { "epoch": 0.08283926892096333, "grad_norm": 0.17443284392356873, "learning_rate": 4.141963446048167e-06, "loss": 0.0055, "step": 10130 }, { "epoch": 0.0829210450995625, "grad_norm": 0.2105005979537964, "learning_rate": 4.1460522549781255e-06, "loss": 0.009, "step": 10140 }, { "epoch": 0.08300282127816166, "grad_norm": 0.32286304235458374, "learning_rate": 4.150141063908084e-06, "loss": 0.0055, "step": 10150 }, { "epoch": 0.08308459745676085, "grad_norm": 0.11341719329357147, "learning_rate": 4.1542298728380424e-06, "loss": 0.0056, "step": 10160 }, { "epoch": 0.08316637363536002, "grad_norm": 0.2263059914112091, "learning_rate": 4.158318681768001e-06, "loss": 0.0049, "step": 10170 }, { "epoch": 0.0832481498139592, "grad_norm": 0.35127365589141846, "learning_rate": 4.16240749069796e-06, "loss": 0.0041, "step": 10180 }, { "epoch": 0.08332992599255837, "grad_norm": 0.3060101270675659, "learning_rate": 4.166496299627918e-06, "loss": 0.0077, "step": 10190 }, { "epoch": 0.08341170217115754, "grad_norm": 0.6958140134811401, "learning_rate": 4.170585108557877e-06, "loss": 0.005, "step": 10200 }, { "epoch": 0.08349347834975672, "grad_norm": 0.3303450047969818, "learning_rate": 4.174673917487836e-06, "loss": 0.0062, "step": 10210 }, { "epoch": 0.08357525452835589, "grad_norm": 0.22563916444778442, "learning_rate": 4.178762726417795e-06, "loss": 0.0058, "step": 10220 }, { "epoch": 0.08365703070695506, "grad_norm": 0.7362670302391052, "learning_rate": 4.182851535347753e-06, "loss": 0.0097, "step": 10230 }, { "epoch": 0.08373880688555424, "grad_norm": 0.4914403259754181, "learning_rate": 4.186940344277712e-06, "loss": 0.0059, "step": 10240 }, { "epoch": 0.08382058306415341, "grad_norm": 0.4505293667316437, "learning_rate": 4.191029153207671e-06, "loss": 0.005, "step": 10250 }, { "epoch": 0.08390235924275259, "grad_norm": 0.6118637323379517, "learning_rate": 4.19511796213763e-06, "loss": 0.0055, "step": 10260 }, { "epoch": 0.08398413542135176, "grad_norm": 0.1689242571592331, "learning_rate": 4.199206771067589e-06, "loss": 0.003, "step": 10270 }, { "epoch": 0.08406591159995093, "grad_norm": 0.3534964621067047, "learning_rate": 4.2032955799975475e-06, "loss": 0.0062, "step": 10280 }, { "epoch": 0.08414768777855011, "grad_norm": 0.2610909342765808, "learning_rate": 4.207384388927506e-06, "loss": 0.0102, "step": 10290 }, { "epoch": 0.08422946395714928, "grad_norm": 0.1365964710712433, "learning_rate": 4.2114731978574645e-06, "loss": 0.0076, "step": 10300 }, { "epoch": 0.08431124013574845, "grad_norm": 0.4042908251285553, "learning_rate": 4.215562006787423e-06, "loss": 0.0091, "step": 10310 }, { "epoch": 0.08439301631434763, "grad_norm": 0.413969486951828, "learning_rate": 4.219650815717382e-06, "loss": 0.0078, "step": 10320 }, { "epoch": 0.0844747924929468, "grad_norm": 0.322640597820282, "learning_rate": 4.22373962464734e-06, "loss": 0.0046, "step": 10330 }, { "epoch": 0.08455656867154598, "grad_norm": 0.3190023899078369, "learning_rate": 4.227828433577299e-06, "loss": 0.0061, "step": 10340 }, { "epoch": 0.08463834485014515, "grad_norm": 0.6387690305709839, "learning_rate": 4.231917242507258e-06, "loss": 0.007, "step": 10350 }, { "epoch": 0.08472012102874432, "grad_norm": 0.3363858461380005, "learning_rate": 4.236006051437216e-06, "loss": 0.0084, "step": 10360 }, { "epoch": 0.0848018972073435, "grad_norm": 0.3830738663673401, "learning_rate": 4.240094860367175e-06, "loss": 0.0064, "step": 10370 }, { "epoch": 0.08488367338594267, "grad_norm": 0.1858435571193695, "learning_rate": 4.244183669297134e-06, "loss": 0.007, "step": 10380 }, { "epoch": 0.08496544956454184, "grad_norm": 0.24974986910820007, "learning_rate": 4.248272478227093e-06, "loss": 0.0075, "step": 10390 }, { "epoch": 0.08504722574314103, "grad_norm": 0.17417992651462555, "learning_rate": 4.252361287157051e-06, "loss": 0.0045, "step": 10400 }, { "epoch": 0.0851290019217402, "grad_norm": 0.43284666538238525, "learning_rate": 4.256450096087011e-06, "loss": 0.005, "step": 10410 }, { "epoch": 0.08521077810033938, "grad_norm": 0.2814231216907501, "learning_rate": 4.260538905016969e-06, "loss": 0.0053, "step": 10420 }, { "epoch": 0.08529255427893855, "grad_norm": 0.5598049163818359, "learning_rate": 4.264627713946928e-06, "loss": 0.0077, "step": 10430 }, { "epoch": 0.08537433045753771, "grad_norm": 0.3293662667274475, "learning_rate": 4.2687165228768865e-06, "loss": 0.0045, "step": 10440 }, { "epoch": 0.0854561066361369, "grad_norm": 0.6687154173851013, "learning_rate": 4.272805331806845e-06, "loss": 0.0062, "step": 10450 }, { "epoch": 0.08553788281473607, "grad_norm": 0.3934236168861389, "learning_rate": 4.2768941407368035e-06, "loss": 0.0094, "step": 10460 }, { "epoch": 0.08561965899333523, "grad_norm": 0.26494479179382324, "learning_rate": 4.280982949666762e-06, "loss": 0.0079, "step": 10470 }, { "epoch": 0.08570143517193442, "grad_norm": 0.07163529098033905, "learning_rate": 4.285071758596721e-06, "loss": 0.006, "step": 10480 }, { "epoch": 0.08578321135053359, "grad_norm": 0.24065445363521576, "learning_rate": 4.28916056752668e-06, "loss": 0.0062, "step": 10490 }, { "epoch": 0.08586498752913277, "grad_norm": 0.16450531780719757, "learning_rate": 4.293249376456638e-06, "loss": 0.0054, "step": 10500 }, { "epoch": 0.08594676370773194, "grad_norm": 0.32424938678741455, "learning_rate": 4.297338185386597e-06, "loss": 0.0075, "step": 10510 }, { "epoch": 0.0860285398863311, "grad_norm": 0.33380547165870667, "learning_rate": 4.301426994316556e-06, "loss": 0.0074, "step": 10520 }, { "epoch": 0.08611031606493029, "grad_norm": 0.48931387066841125, "learning_rate": 4.305515803246514e-06, "loss": 0.0079, "step": 10530 }, { "epoch": 0.08619209224352946, "grad_norm": 0.3341701924800873, "learning_rate": 4.309604612176474e-06, "loss": 0.0086, "step": 10540 }, { "epoch": 0.08627386842212863, "grad_norm": 0.7538759112358093, "learning_rate": 4.313693421106432e-06, "loss": 0.0056, "step": 10550 }, { "epoch": 0.08635564460072781, "grad_norm": 0.37267905473709106, "learning_rate": 4.317782230036391e-06, "loss": 0.0057, "step": 10560 }, { "epoch": 0.08643742077932698, "grad_norm": 0.33258256316185, "learning_rate": 4.32187103896635e-06, "loss": 0.0059, "step": 10570 }, { "epoch": 0.08651919695792616, "grad_norm": 0.14358733594417572, "learning_rate": 4.3259598478963086e-06, "loss": 0.0052, "step": 10580 }, { "epoch": 0.08660097313652533, "grad_norm": 0.29341351985931396, "learning_rate": 4.330048656826267e-06, "loss": 0.005, "step": 10590 }, { "epoch": 0.0866827493151245, "grad_norm": 0.37835660576820374, "learning_rate": 4.3341374657562255e-06, "loss": 0.0075, "step": 10600 }, { "epoch": 0.08676452549372368, "grad_norm": 0.35013076663017273, "learning_rate": 4.338226274686184e-06, "loss": 0.0071, "step": 10610 }, { "epoch": 0.08684630167232285, "grad_norm": 0.45420441031455994, "learning_rate": 4.342315083616143e-06, "loss": 0.007, "step": 10620 }, { "epoch": 0.08692807785092202, "grad_norm": 0.5008588433265686, "learning_rate": 4.346403892546101e-06, "loss": 0.01, "step": 10630 }, { "epoch": 0.0870098540295212, "grad_norm": 0.2317742109298706, "learning_rate": 4.35049270147606e-06, "loss": 0.0074, "step": 10640 }, { "epoch": 0.08709163020812037, "grad_norm": 0.6369801759719849, "learning_rate": 4.354581510406019e-06, "loss": 0.0072, "step": 10650 }, { "epoch": 0.08717340638671955, "grad_norm": 0.2746371924877167, "learning_rate": 4.358670319335978e-06, "loss": 0.0049, "step": 10660 }, { "epoch": 0.08725518256531872, "grad_norm": 0.19675789773464203, "learning_rate": 4.362759128265936e-06, "loss": 0.0067, "step": 10670 }, { "epoch": 0.08733695874391789, "grad_norm": 0.26836180686950684, "learning_rate": 4.366847937195895e-06, "loss": 0.0049, "step": 10680 }, { "epoch": 0.08741873492251707, "grad_norm": 0.29743149876594543, "learning_rate": 4.370936746125854e-06, "loss": 0.0062, "step": 10690 }, { "epoch": 0.08750051110111624, "grad_norm": 0.19135704636573792, "learning_rate": 4.375025555055813e-06, "loss": 0.0046, "step": 10700 }, { "epoch": 0.08758228727971541, "grad_norm": 0.2278808057308197, "learning_rate": 4.379114363985772e-06, "loss": 0.0067, "step": 10710 }, { "epoch": 0.0876640634583146, "grad_norm": 0.2645963728427887, "learning_rate": 4.38320317291573e-06, "loss": 0.0049, "step": 10720 }, { "epoch": 0.08774583963691376, "grad_norm": 0.2975017726421356, "learning_rate": 4.387291981845689e-06, "loss": 0.0054, "step": 10730 }, { "epoch": 0.08782761581551295, "grad_norm": 0.3936592638492584, "learning_rate": 4.3913807907756475e-06, "loss": 0.0046, "step": 10740 }, { "epoch": 0.08790939199411212, "grad_norm": 0.3450113832950592, "learning_rate": 4.3954695997056064e-06, "loss": 0.0071, "step": 10750 }, { "epoch": 0.08799116817271128, "grad_norm": 0.09059746563434601, "learning_rate": 4.3995584086355645e-06, "loss": 0.0062, "step": 10760 }, { "epoch": 0.08807294435131047, "grad_norm": 0.28337231278419495, "learning_rate": 4.403647217565523e-06, "loss": 0.0086, "step": 10770 }, { "epoch": 0.08815472052990964, "grad_norm": 0.2913172245025635, "learning_rate": 4.407736026495482e-06, "loss": 0.0043, "step": 10780 }, { "epoch": 0.0882364967085088, "grad_norm": 0.2908845543861389, "learning_rate": 4.411824835425441e-06, "loss": 0.0066, "step": 10790 }, { "epoch": 0.08831827288710799, "grad_norm": 0.1553611159324646, "learning_rate": 4.415913644355399e-06, "loss": 0.0055, "step": 10800 }, { "epoch": 0.08840004906570716, "grad_norm": 0.1739351600408554, "learning_rate": 4.420002453285359e-06, "loss": 0.0063, "step": 10810 }, { "epoch": 0.08848182524430634, "grad_norm": 0.5525997281074524, "learning_rate": 4.424091262215317e-06, "loss": 0.0061, "step": 10820 }, { "epoch": 0.08856360142290551, "grad_norm": 0.25720110535621643, "learning_rate": 4.428180071145275e-06, "loss": 0.0046, "step": 10830 }, { "epoch": 0.08864537760150468, "grad_norm": 0.5605131387710571, "learning_rate": 4.432268880075235e-06, "loss": 0.0082, "step": 10840 }, { "epoch": 0.08872715378010386, "grad_norm": 0.27624645829200745, "learning_rate": 4.436357689005193e-06, "loss": 0.0073, "step": 10850 }, { "epoch": 0.08880892995870303, "grad_norm": 0.2783728539943695, "learning_rate": 4.440446497935152e-06, "loss": 0.0072, "step": 10860 }, { "epoch": 0.0888907061373022, "grad_norm": 0.25143250823020935, "learning_rate": 4.444535306865111e-06, "loss": 0.0042, "step": 10870 }, { "epoch": 0.08897248231590138, "grad_norm": 0.2957516014575958, "learning_rate": 4.4486241157950696e-06, "loss": 0.0077, "step": 10880 }, { "epoch": 0.08905425849450055, "grad_norm": 0.34241795539855957, "learning_rate": 4.452712924725028e-06, "loss": 0.0065, "step": 10890 }, { "epoch": 0.08913603467309973, "grad_norm": 0.3540817201137543, "learning_rate": 4.4568017336549865e-06, "loss": 0.0058, "step": 10900 }, { "epoch": 0.0892178108516989, "grad_norm": 0.32001590728759766, "learning_rate": 4.460890542584945e-06, "loss": 0.0055, "step": 10910 }, { "epoch": 0.08929958703029807, "grad_norm": 0.1929943859577179, "learning_rate": 4.464979351514904e-06, "loss": 0.0054, "step": 10920 }, { "epoch": 0.08938136320889725, "grad_norm": 0.28171244263648987, "learning_rate": 4.469068160444862e-06, "loss": 0.007, "step": 10930 }, { "epoch": 0.08946313938749642, "grad_norm": 0.3855818510055542, "learning_rate": 4.473156969374821e-06, "loss": 0.0073, "step": 10940 }, { "epoch": 0.08954491556609559, "grad_norm": 0.3206920623779297, "learning_rate": 4.47724577830478e-06, "loss": 0.007, "step": 10950 }, { "epoch": 0.08962669174469477, "grad_norm": 0.20464156568050385, "learning_rate": 4.481334587234739e-06, "loss": 0.0071, "step": 10960 }, { "epoch": 0.08970846792329394, "grad_norm": 0.4152478575706482, "learning_rate": 4.485423396164698e-06, "loss": 0.0048, "step": 10970 }, { "epoch": 0.08979024410189312, "grad_norm": 1.003226399421692, "learning_rate": 4.489512205094656e-06, "loss": 0.0083, "step": 10980 }, { "epoch": 0.08987202028049229, "grad_norm": 0.5919243693351746, "learning_rate": 4.493601014024615e-06, "loss": 0.0058, "step": 10990 }, { "epoch": 0.08995379645909146, "grad_norm": 0.22121219336986542, "learning_rate": 4.497689822954574e-06, "loss": 0.0075, "step": 11000 }, { "epoch": 0.09003557263769064, "grad_norm": 0.37251603603363037, "learning_rate": 4.501778631884533e-06, "loss": 0.0075, "step": 11010 }, { "epoch": 0.09011734881628981, "grad_norm": 0.3410487473011017, "learning_rate": 4.505867440814491e-06, "loss": 0.007, "step": 11020 }, { "epoch": 0.09019912499488898, "grad_norm": 0.16706398129463196, "learning_rate": 4.50995624974445e-06, "loss": 0.0053, "step": 11030 }, { "epoch": 0.09028090117348817, "grad_norm": 0.20489798486232758, "learning_rate": 4.5140450586744086e-06, "loss": 0.0042, "step": 11040 }, { "epoch": 0.09036267735208733, "grad_norm": 0.48074066638946533, "learning_rate": 4.5181338676043675e-06, "loss": 0.0043, "step": 11050 }, { "epoch": 0.09044445353068652, "grad_norm": 0.2956502139568329, "learning_rate": 4.5222226765343255e-06, "loss": 0.0071, "step": 11060 }, { "epoch": 0.09052622970928569, "grad_norm": 0.2620284855365753, "learning_rate": 4.526311485464284e-06, "loss": 0.0108, "step": 11070 }, { "epoch": 0.09060800588788485, "grad_norm": 0.4393211305141449, "learning_rate": 4.530400294394243e-06, "loss": 0.0092, "step": 11080 }, { "epoch": 0.09068978206648404, "grad_norm": 0.24213793873786926, "learning_rate": 4.534489103324202e-06, "loss": 0.0052, "step": 11090 }, { "epoch": 0.0907715582450832, "grad_norm": 0.16947466135025024, "learning_rate": 4.53857791225416e-06, "loss": 0.0081, "step": 11100 }, { "epoch": 0.09085333442368237, "grad_norm": 0.4350884258747101, "learning_rate": 4.54266672118412e-06, "loss": 0.0084, "step": 11110 }, { "epoch": 0.09093511060228156, "grad_norm": 0.10278057307004929, "learning_rate": 4.546755530114078e-06, "loss": 0.0048, "step": 11120 }, { "epoch": 0.09101688678088073, "grad_norm": 0.3721492886543274, "learning_rate": 4.550844339044037e-06, "loss": 0.0067, "step": 11130 }, { "epoch": 0.09109866295947991, "grad_norm": 0.23111101984977722, "learning_rate": 4.554933147973996e-06, "loss": 0.0064, "step": 11140 }, { "epoch": 0.09118043913807908, "grad_norm": 0.17665012180805206, "learning_rate": 4.559021956903954e-06, "loss": 0.0065, "step": 11150 }, { "epoch": 0.09126221531667825, "grad_norm": 0.22879019379615784, "learning_rate": 4.563110765833913e-06, "loss": 0.0056, "step": 11160 }, { "epoch": 0.09134399149527743, "grad_norm": 0.3635673522949219, "learning_rate": 4.567199574763872e-06, "loss": 0.008, "step": 11170 }, { "epoch": 0.0914257676738766, "grad_norm": 0.3861032724380493, "learning_rate": 4.571288383693831e-06, "loss": 0.0085, "step": 11180 }, { "epoch": 0.09150754385247577, "grad_norm": 0.1987600177526474, "learning_rate": 4.575377192623789e-06, "loss": 0.004, "step": 11190 }, { "epoch": 0.09158932003107495, "grad_norm": 0.26110029220581055, "learning_rate": 4.5794660015537475e-06, "loss": 0.0049, "step": 11200 }, { "epoch": 0.09167109620967412, "grad_norm": 0.383747935295105, "learning_rate": 4.5835548104837064e-06, "loss": 0.0051, "step": 11210 }, { "epoch": 0.0917528723882733, "grad_norm": 0.3661295771598816, "learning_rate": 4.587643619413665e-06, "loss": 0.0061, "step": 11220 }, { "epoch": 0.09183464856687247, "grad_norm": 0.49778735637664795, "learning_rate": 4.591732428343623e-06, "loss": 0.0048, "step": 11230 }, { "epoch": 0.09191642474547164, "grad_norm": 0.37393373250961304, "learning_rate": 4.595821237273583e-06, "loss": 0.007, "step": 11240 }, { "epoch": 0.09199820092407082, "grad_norm": 0.2465086579322815, "learning_rate": 4.599910046203541e-06, "loss": 0.0072, "step": 11250 }, { "epoch": 0.09207997710266999, "grad_norm": 0.20759816467761993, "learning_rate": 4.6039988551335e-06, "loss": 0.0052, "step": 11260 }, { "epoch": 0.09216175328126916, "grad_norm": 0.3007579743862152, "learning_rate": 4.608087664063459e-06, "loss": 0.0054, "step": 11270 }, { "epoch": 0.09224352945986834, "grad_norm": 0.3887057900428772, "learning_rate": 4.612176472993418e-06, "loss": 0.0062, "step": 11280 }, { "epoch": 0.09232530563846751, "grad_norm": 0.2999528646469116, "learning_rate": 4.616265281923376e-06, "loss": 0.0068, "step": 11290 }, { "epoch": 0.0924070818170667, "grad_norm": 0.351728618144989, "learning_rate": 4.620354090853335e-06, "loss": 0.0071, "step": 11300 }, { "epoch": 0.09248885799566586, "grad_norm": 0.46770623326301575, "learning_rate": 4.624442899783294e-06, "loss": 0.0071, "step": 11310 }, { "epoch": 0.09257063417426503, "grad_norm": 0.374315083026886, "learning_rate": 4.628531708713252e-06, "loss": 0.0071, "step": 11320 }, { "epoch": 0.09265241035286421, "grad_norm": 0.503532350063324, "learning_rate": 4.632620517643211e-06, "loss": 0.0072, "step": 11330 }, { "epoch": 0.09273418653146338, "grad_norm": 0.22252234816551208, "learning_rate": 4.6367093265731696e-06, "loss": 0.0056, "step": 11340 }, { "epoch": 0.09281596271006255, "grad_norm": 0.12247475981712341, "learning_rate": 4.6407981355031285e-06, "loss": 0.0048, "step": 11350 }, { "epoch": 0.09289773888866174, "grad_norm": 0.19860035181045532, "learning_rate": 4.6448869444330865e-06, "loss": 0.006, "step": 11360 }, { "epoch": 0.0929795150672609, "grad_norm": 0.23276017606258392, "learning_rate": 4.648975753363046e-06, "loss": 0.0053, "step": 11370 }, { "epoch": 0.09306129124586009, "grad_norm": 0.25798603892326355, "learning_rate": 4.653064562293004e-06, "loss": 0.0051, "step": 11380 }, { "epoch": 0.09314306742445926, "grad_norm": 0.7404858469963074, "learning_rate": 4.657153371222963e-06, "loss": 0.0054, "step": 11390 }, { "epoch": 0.09322484360305842, "grad_norm": 0.9267423152923584, "learning_rate": 4.661242180152922e-06, "loss": 0.0049, "step": 11400 }, { "epoch": 0.09330661978165761, "grad_norm": 0.26692819595336914, "learning_rate": 4.665330989082881e-06, "loss": 0.0055, "step": 11410 }, { "epoch": 0.09338839596025678, "grad_norm": 0.04801531881093979, "learning_rate": 4.669419798012839e-06, "loss": 0.0052, "step": 11420 }, { "epoch": 0.09347017213885594, "grad_norm": 0.19331564009189606, "learning_rate": 4.673508606942798e-06, "loss": 0.0071, "step": 11430 }, { "epoch": 0.09355194831745513, "grad_norm": 0.19296297430992126, "learning_rate": 4.677597415872757e-06, "loss": 0.0082, "step": 11440 }, { "epoch": 0.0936337244960543, "grad_norm": 0.020449718460440636, "learning_rate": 4.681686224802716e-06, "loss": 0.0089, "step": 11450 }, { "epoch": 0.09371550067465348, "grad_norm": 0.24021543562412262, "learning_rate": 4.685775033732674e-06, "loss": 0.0071, "step": 11460 }, { "epoch": 0.09379727685325265, "grad_norm": 0.4115442633628845, "learning_rate": 4.689863842662633e-06, "loss": 0.0076, "step": 11470 }, { "epoch": 0.09387905303185182, "grad_norm": 0.4514647126197815, "learning_rate": 4.693952651592592e-06, "loss": 0.0078, "step": 11480 }, { "epoch": 0.093960829210451, "grad_norm": 0.22985608875751495, "learning_rate": 4.69804146052255e-06, "loss": 0.006, "step": 11490 }, { "epoch": 0.09404260538905017, "grad_norm": 0.4835754334926605, "learning_rate": 4.7021302694525086e-06, "loss": 0.0076, "step": 11500 }, { "epoch": 0.09412438156764934, "grad_norm": 0.7283335328102112, "learning_rate": 4.7062190783824675e-06, "loss": 0.0067, "step": 11510 }, { "epoch": 0.09420615774624852, "grad_norm": 0.3569016456604004, "learning_rate": 4.710307887312426e-06, "loss": 0.0079, "step": 11520 }, { "epoch": 0.09428793392484769, "grad_norm": 0.29056259989738464, "learning_rate": 4.714396696242384e-06, "loss": 0.0066, "step": 11530 }, { "epoch": 0.09436971010344687, "grad_norm": 0.40302157402038574, "learning_rate": 4.718485505172344e-06, "loss": 0.0087, "step": 11540 }, { "epoch": 0.09445148628204604, "grad_norm": 0.22508463263511658, "learning_rate": 4.722574314102302e-06, "loss": 0.009, "step": 11550 }, { "epoch": 0.09453326246064521, "grad_norm": 0.08979017287492752, "learning_rate": 4.726663123032261e-06, "loss": 0.0065, "step": 11560 }, { "epoch": 0.09461503863924439, "grad_norm": 0.4193160831928253, "learning_rate": 4.73075193196222e-06, "loss": 0.007, "step": 11570 }, { "epoch": 0.09469681481784356, "grad_norm": 0.18131503462791443, "learning_rate": 4.734840740892179e-06, "loss": 0.0044, "step": 11580 }, { "epoch": 0.09477859099644273, "grad_norm": 0.21392978727817535, "learning_rate": 4.738929549822137e-06, "loss": 0.0041, "step": 11590 }, { "epoch": 0.09486036717504191, "grad_norm": 0.2623828053474426, "learning_rate": 4.743018358752096e-06, "loss": 0.0038, "step": 11600 }, { "epoch": 0.09494214335364108, "grad_norm": 0.16508550941944122, "learning_rate": 4.747107167682055e-06, "loss": 0.0048, "step": 11610 }, { "epoch": 0.09502391953224026, "grad_norm": 0.15890435874462128, "learning_rate": 4.751195976612013e-06, "loss": 0.005, "step": 11620 }, { "epoch": 0.09510569571083943, "grad_norm": 0.21249724924564362, "learning_rate": 4.755284785541972e-06, "loss": 0.0096, "step": 11630 }, { "epoch": 0.0951874718894386, "grad_norm": 0.5112670660018921, "learning_rate": 4.759373594471931e-06, "loss": 0.0065, "step": 11640 }, { "epoch": 0.09526924806803778, "grad_norm": 0.24624350666999817, "learning_rate": 4.7634624034018895e-06, "loss": 0.0031, "step": 11650 }, { "epoch": 0.09535102424663695, "grad_norm": 0.42952993512153625, "learning_rate": 4.7675512123318475e-06, "loss": 0.0073, "step": 11660 }, { "epoch": 0.09543280042523612, "grad_norm": 0.3334479033946991, "learning_rate": 4.771640021261807e-06, "loss": 0.0093, "step": 11670 }, { "epoch": 0.0955145766038353, "grad_norm": 0.28839412331581116, "learning_rate": 4.775728830191765e-06, "loss": 0.0064, "step": 11680 }, { "epoch": 0.09559635278243447, "grad_norm": 0.6868475675582886, "learning_rate": 4.779817639121724e-06, "loss": 0.0062, "step": 11690 }, { "epoch": 0.09567812896103366, "grad_norm": 0.22327622771263123, "learning_rate": 4.783906448051683e-06, "loss": 0.0062, "step": 11700 }, { "epoch": 0.09575990513963283, "grad_norm": 0.13777567446231842, "learning_rate": 4.787995256981642e-06, "loss": 0.0061, "step": 11710 }, { "epoch": 0.095841681318232, "grad_norm": 0.23634982109069824, "learning_rate": 4.7920840659116e-06, "loss": 0.006, "step": 11720 }, { "epoch": 0.09592345749683118, "grad_norm": 0.20276091992855072, "learning_rate": 4.796172874841559e-06, "loss": 0.0069, "step": 11730 }, { "epoch": 0.09600523367543035, "grad_norm": 0.4315340518951416, "learning_rate": 4.800261683771518e-06, "loss": 0.0053, "step": 11740 }, { "epoch": 0.09608700985402951, "grad_norm": 0.3487113416194916, "learning_rate": 4.804350492701477e-06, "loss": 0.0064, "step": 11750 }, { "epoch": 0.0961687860326287, "grad_norm": 0.16679076850414276, "learning_rate": 4.808439301631435e-06, "loss": 0.0052, "step": 11760 }, { "epoch": 0.09625056221122787, "grad_norm": 0.1691240817308426, "learning_rate": 4.812528110561394e-06, "loss": 0.0039, "step": 11770 }, { "epoch": 0.09633233838982705, "grad_norm": 0.5068387389183044, "learning_rate": 4.816616919491353e-06, "loss": 0.0049, "step": 11780 }, { "epoch": 0.09641411456842622, "grad_norm": 0.24514199793338776, "learning_rate": 4.820705728421311e-06, "loss": 0.0062, "step": 11790 }, { "epoch": 0.09649589074702539, "grad_norm": 0.2412988394498825, "learning_rate": 4.8247945373512704e-06, "loss": 0.0073, "step": 11800 }, { "epoch": 0.09657766692562457, "grad_norm": 0.22931227087974548, "learning_rate": 4.8288833462812285e-06, "loss": 0.0051, "step": 11810 }, { "epoch": 0.09665944310422374, "grad_norm": 0.2864583730697632, "learning_rate": 4.832972155211187e-06, "loss": 0.005, "step": 11820 }, { "epoch": 0.09674121928282291, "grad_norm": 0.3612000048160553, "learning_rate": 4.837060964141146e-06, "loss": 0.0067, "step": 11830 }, { "epoch": 0.09682299546142209, "grad_norm": 0.1383812576532364, "learning_rate": 4.841149773071105e-06, "loss": 0.0042, "step": 11840 }, { "epoch": 0.09690477164002126, "grad_norm": 0.5104392766952515, "learning_rate": 4.845238582001063e-06, "loss": 0.0103, "step": 11850 }, { "epoch": 0.09698654781862044, "grad_norm": 0.3259025812149048, "learning_rate": 4.849327390931022e-06, "loss": 0.0039, "step": 11860 }, { "epoch": 0.09706832399721961, "grad_norm": 0.4635397493839264, "learning_rate": 4.853416199860981e-06, "loss": 0.0043, "step": 11870 }, { "epoch": 0.09715010017581878, "grad_norm": 0.38159385323524475, "learning_rate": 4.85750500879094e-06, "loss": 0.0068, "step": 11880 }, { "epoch": 0.09723187635441796, "grad_norm": 0.20505471527576447, "learning_rate": 4.861593817720898e-06, "loss": 0.0045, "step": 11890 }, { "epoch": 0.09731365253301713, "grad_norm": 0.3736940622329712, "learning_rate": 4.865682626650857e-06, "loss": 0.0063, "step": 11900 }, { "epoch": 0.0973954287116163, "grad_norm": 0.4769817888736725, "learning_rate": 4.869771435580816e-06, "loss": 0.0053, "step": 11910 }, { "epoch": 0.09747720489021548, "grad_norm": 0.35351458191871643, "learning_rate": 4.873860244510775e-06, "loss": 0.0045, "step": 11920 }, { "epoch": 0.09755898106881465, "grad_norm": 0.34504464268684387, "learning_rate": 4.877949053440733e-06, "loss": 0.0065, "step": 11930 }, { "epoch": 0.09764075724741383, "grad_norm": 0.2533998191356659, "learning_rate": 4.882037862370692e-06, "loss": 0.0065, "step": 11940 }, { "epoch": 0.097722533426013, "grad_norm": 0.2835679054260254, "learning_rate": 4.8861266713006505e-06, "loss": 0.0082, "step": 11950 }, { "epoch": 0.09780430960461217, "grad_norm": 0.32273584604263306, "learning_rate": 4.890215480230609e-06, "loss": 0.0071, "step": 11960 }, { "epoch": 0.09788608578321135, "grad_norm": 0.11887706816196442, "learning_rate": 4.894304289160568e-06, "loss": 0.0061, "step": 11970 }, { "epoch": 0.09796786196181052, "grad_norm": 0.1685151755809784, "learning_rate": 4.898393098090526e-06, "loss": 0.0071, "step": 11980 }, { "epoch": 0.09804963814040969, "grad_norm": 0.5990136861801147, "learning_rate": 4.902481907020485e-06, "loss": 0.008, "step": 11990 }, { "epoch": 0.09813141431900888, "grad_norm": 0.24593879282474518, "learning_rate": 4.906570715950444e-06, "loss": 0.0081, "step": 12000 }, { "epoch": 0.09821319049760804, "grad_norm": 0.369905948638916, "learning_rate": 4.910659524880403e-06, "loss": 0.008, "step": 12010 }, { "epoch": 0.09829496667620723, "grad_norm": 0.37332749366760254, "learning_rate": 4.914748333810361e-06, "loss": 0.0055, "step": 12020 }, { "epoch": 0.0983767428548064, "grad_norm": 0.29450783133506775, "learning_rate": 4.91883714274032e-06, "loss": 0.0081, "step": 12030 }, { "epoch": 0.09845851903340556, "grad_norm": 0.5235955715179443, "learning_rate": 4.922925951670279e-06, "loss": 0.01, "step": 12040 }, { "epoch": 0.09854029521200475, "grad_norm": 0.37664419412612915, "learning_rate": 4.927014760600238e-06, "loss": 0.0062, "step": 12050 }, { "epoch": 0.09862207139060392, "grad_norm": 0.19504287838935852, "learning_rate": 4.931103569530196e-06, "loss": 0.0075, "step": 12060 }, { "epoch": 0.09870384756920308, "grad_norm": 0.19349701702594757, "learning_rate": 4.935192378460156e-06, "loss": 0.0043, "step": 12070 }, { "epoch": 0.09878562374780227, "grad_norm": 0.20090457797050476, "learning_rate": 4.939281187390114e-06, "loss": 0.0081, "step": 12080 }, { "epoch": 0.09886739992640144, "grad_norm": 0.279163658618927, "learning_rate": 4.9433699963200725e-06, "loss": 0.0049, "step": 12090 }, { "epoch": 0.09894917610500062, "grad_norm": 0.19866052269935608, "learning_rate": 4.9474588052500314e-06, "loss": 0.0054, "step": 12100 }, { "epoch": 0.09903095228359979, "grad_norm": 0.21595343947410583, "learning_rate": 4.9515476141799895e-06, "loss": 0.0063, "step": 12110 }, { "epoch": 0.09911272846219896, "grad_norm": 0.4887649118900299, "learning_rate": 4.955636423109948e-06, "loss": 0.0067, "step": 12120 }, { "epoch": 0.09919450464079814, "grad_norm": 0.18383482098579407, "learning_rate": 4.959725232039907e-06, "loss": 0.0071, "step": 12130 }, { "epoch": 0.09927628081939731, "grad_norm": 0.1654735803604126, "learning_rate": 4.963814040969866e-06, "loss": 0.0053, "step": 12140 }, { "epoch": 0.09935805699799648, "grad_norm": 0.5138298869132996, "learning_rate": 4.967902849899824e-06, "loss": 0.0084, "step": 12150 }, { "epoch": 0.09943983317659566, "grad_norm": 0.2991342842578888, "learning_rate": 4.971991658829783e-06, "loss": 0.0031, "step": 12160 }, { "epoch": 0.09952160935519483, "grad_norm": 0.22731924057006836, "learning_rate": 4.976080467759742e-06, "loss": 0.0062, "step": 12170 }, { "epoch": 0.09960338553379401, "grad_norm": 0.23162879049777985, "learning_rate": 4.980169276689701e-06, "loss": 0.008, "step": 12180 }, { "epoch": 0.09968516171239318, "grad_norm": 0.11379845440387726, "learning_rate": 4.984258085619659e-06, "loss": 0.0054, "step": 12190 }, { "epoch": 0.09976693789099235, "grad_norm": 0.16307222843170166, "learning_rate": 4.988346894549618e-06, "loss": 0.0057, "step": 12200 }, { "epoch": 0.09984871406959153, "grad_norm": 0.2612988352775574, "learning_rate": 4.992435703479577e-06, "loss": 0.0074, "step": 12210 }, { "epoch": 0.0999304902481907, "grad_norm": 0.15351417660713196, "learning_rate": 4.996524512409536e-06, "loss": 0.0046, "step": 12220 }, { "epoch": 0.10001226642678987, "grad_norm": 0.38906365633010864, "learning_rate": 5.000613321339495e-06, "loss": 0.0066, "step": 12230 }, { "epoch": 0.10009404260538905, "grad_norm": 0.1408500075340271, "learning_rate": 5.0047021302694535e-06, "loss": 0.0068, "step": 12240 }, { "epoch": 0.10017581878398822, "grad_norm": 0.17896650731563568, "learning_rate": 5.0087909391994115e-06, "loss": 0.0051, "step": 12250 }, { "epoch": 0.1002575949625874, "grad_norm": 0.3743566572666168, "learning_rate": 5.0128797481293704e-06, "loss": 0.0039, "step": 12260 }, { "epoch": 0.10033937114118657, "grad_norm": 0.19379180669784546, "learning_rate": 5.0169685570593285e-06, "loss": 0.0066, "step": 12270 }, { "epoch": 0.10042114731978574, "grad_norm": 0.2258613556623459, "learning_rate": 5.021057365989288e-06, "loss": 0.0067, "step": 12280 }, { "epoch": 0.10050292349838492, "grad_norm": 0.2958235442638397, "learning_rate": 5.025146174919246e-06, "loss": 0.0066, "step": 12290 }, { "epoch": 0.1005846996769841, "grad_norm": 0.21631379425525665, "learning_rate": 5.029234983849205e-06, "loss": 0.0061, "step": 12300 }, { "epoch": 0.10066647585558326, "grad_norm": 0.2175014764070511, "learning_rate": 5.033323792779163e-06, "loss": 0.004, "step": 12310 }, { "epoch": 0.10074825203418245, "grad_norm": 0.420070618391037, "learning_rate": 5.037412601709123e-06, "loss": 0.0051, "step": 12320 }, { "epoch": 0.10083002821278161, "grad_norm": 0.19950683414936066, "learning_rate": 5.041501410639081e-06, "loss": 0.0054, "step": 12330 }, { "epoch": 0.1009118043913808, "grad_norm": 0.39730173349380493, "learning_rate": 5.04559021956904e-06, "loss": 0.0054, "step": 12340 }, { "epoch": 0.10099358056997997, "grad_norm": 0.3557604253292084, "learning_rate": 5.049679028498998e-06, "loss": 0.0057, "step": 12350 }, { "epoch": 0.10107535674857913, "grad_norm": 0.1822202056646347, "learning_rate": 5.053767837428958e-06, "loss": 0.0056, "step": 12360 }, { "epoch": 0.10115713292717832, "grad_norm": 0.19628939032554626, "learning_rate": 5.057856646358917e-06, "loss": 0.0069, "step": 12370 }, { "epoch": 0.10123890910577749, "grad_norm": 0.11980808526277542, "learning_rate": 5.061945455288875e-06, "loss": 0.0031, "step": 12380 }, { "epoch": 0.10132068528437665, "grad_norm": 0.19024015963077545, "learning_rate": 5.0660342642188336e-06, "loss": 0.0036, "step": 12390 }, { "epoch": 0.10140246146297584, "grad_norm": 0.6813333034515381, "learning_rate": 5.0701230731487925e-06, "loss": 0.0064, "step": 12400 }, { "epoch": 0.101484237641575, "grad_norm": 0.4492645859718323, "learning_rate": 5.074211882078751e-06, "loss": 0.0058, "step": 12410 }, { "epoch": 0.10156601382017419, "grad_norm": 0.20754310488700867, "learning_rate": 5.078300691008709e-06, "loss": 0.0055, "step": 12420 }, { "epoch": 0.10164778999877336, "grad_norm": 0.3192213773727417, "learning_rate": 5.082389499938668e-06, "loss": 0.0088, "step": 12430 }, { "epoch": 0.10172956617737253, "grad_norm": 0.11921313405036926, "learning_rate": 5.086478308868626e-06, "loss": 0.0038, "step": 12440 }, { "epoch": 0.10181134235597171, "grad_norm": 0.47654613852500916, "learning_rate": 5.090567117798586e-06, "loss": 0.0045, "step": 12450 }, { "epoch": 0.10189311853457088, "grad_norm": 0.31924083828926086, "learning_rate": 5.094655926728544e-06, "loss": 0.0066, "step": 12460 }, { "epoch": 0.10197489471317005, "grad_norm": 0.07139851897954941, "learning_rate": 5.098744735658503e-06, "loss": 0.0068, "step": 12470 }, { "epoch": 0.10205667089176923, "grad_norm": 0.5400155782699585, "learning_rate": 5.102833544588461e-06, "loss": 0.0071, "step": 12480 }, { "epoch": 0.1021384470703684, "grad_norm": 0.2310018688440323, "learning_rate": 5.106922353518421e-06, "loss": 0.0055, "step": 12490 }, { "epoch": 0.10222022324896758, "grad_norm": 0.17953123152256012, "learning_rate": 5.11101116244838e-06, "loss": 0.0053, "step": 12500 }, { "epoch": 0.10230199942756675, "grad_norm": 0.32012367248535156, "learning_rate": 5.115099971378338e-06, "loss": 0.0062, "step": 12510 }, { "epoch": 0.10238377560616592, "grad_norm": 0.33476465940475464, "learning_rate": 5.119188780308296e-06, "loss": 0.006, "step": 12520 }, { "epoch": 0.1024655517847651, "grad_norm": 0.17103061079978943, "learning_rate": 5.123277589238256e-06, "loss": 0.0056, "step": 12530 }, { "epoch": 0.10254732796336427, "grad_norm": 0.34817495942115784, "learning_rate": 5.1273663981682145e-06, "loss": 0.0048, "step": 12540 }, { "epoch": 0.10262910414196344, "grad_norm": 0.30009469389915466, "learning_rate": 5.1314552070981726e-06, "loss": 0.0057, "step": 12550 }, { "epoch": 0.10271088032056262, "grad_norm": 0.11684536188840866, "learning_rate": 5.1355440160281314e-06, "loss": 0.0043, "step": 12560 }, { "epoch": 0.10279265649916179, "grad_norm": 0.2330314666032791, "learning_rate": 5.13963282495809e-06, "loss": 0.008, "step": 12570 }, { "epoch": 0.10287443267776097, "grad_norm": 0.2974810004234314, "learning_rate": 5.143721633888049e-06, "loss": 0.0046, "step": 12580 }, { "epoch": 0.10295620885636014, "grad_norm": 0.05953258275985718, "learning_rate": 5.147810442818007e-06, "loss": 0.0046, "step": 12590 }, { "epoch": 0.10303798503495931, "grad_norm": 0.3685937225818634, "learning_rate": 5.151899251747966e-06, "loss": 0.0055, "step": 12600 }, { "epoch": 0.1031197612135585, "grad_norm": 0.29219943284988403, "learning_rate": 5.155988060677924e-06, "loss": 0.0065, "step": 12610 }, { "epoch": 0.10320153739215766, "grad_norm": 0.23109813034534454, "learning_rate": 5.160076869607884e-06, "loss": 0.0076, "step": 12620 }, { "epoch": 0.10328331357075683, "grad_norm": 0.14733853936195374, "learning_rate": 5.164165678537842e-06, "loss": 0.0036, "step": 12630 }, { "epoch": 0.10336508974935602, "grad_norm": 0.13692009449005127, "learning_rate": 5.168254487467801e-06, "loss": 0.005, "step": 12640 }, { "epoch": 0.10344686592795518, "grad_norm": 0.18726709485054016, "learning_rate": 5.172343296397759e-06, "loss": 0.005, "step": 12650 }, { "epoch": 0.10352864210655437, "grad_norm": 0.11397925019264221, "learning_rate": 5.176432105327719e-06, "loss": 0.0056, "step": 12660 }, { "epoch": 0.10361041828515354, "grad_norm": 0.4388279616832733, "learning_rate": 5.180520914257678e-06, "loss": 0.0044, "step": 12670 }, { "epoch": 0.1036921944637527, "grad_norm": 0.24643804132938385, "learning_rate": 5.184609723187636e-06, "loss": 0.0062, "step": 12680 }, { "epoch": 0.10377397064235189, "grad_norm": 0.21087653934955597, "learning_rate": 5.188698532117595e-06, "loss": 0.0057, "step": 12690 }, { "epoch": 0.10385574682095106, "grad_norm": 0.26524049043655396, "learning_rate": 5.1927873410475535e-06, "loss": 0.0078, "step": 12700 }, { "epoch": 0.10393752299955022, "grad_norm": 0.4163035750389099, "learning_rate": 5.196876149977512e-06, "loss": 0.0063, "step": 12710 }, { "epoch": 0.10401929917814941, "grad_norm": 0.15011613070964813, "learning_rate": 5.2009649589074704e-06, "loss": 0.004, "step": 12720 }, { "epoch": 0.10410107535674858, "grad_norm": 0.24131453037261963, "learning_rate": 5.205053767837429e-06, "loss": 0.0054, "step": 12730 }, { "epoch": 0.10418285153534776, "grad_norm": 0.15249007940292358, "learning_rate": 5.209142576767387e-06, "loss": 0.005, "step": 12740 }, { "epoch": 0.10426462771394693, "grad_norm": 0.17965981364250183, "learning_rate": 5.213231385697347e-06, "loss": 0.007, "step": 12750 }, { "epoch": 0.1043464038925461, "grad_norm": 0.14027029275894165, "learning_rate": 5.217320194627305e-06, "loss": 0.0047, "step": 12760 }, { "epoch": 0.10442818007114528, "grad_norm": 0.2707688808441162, "learning_rate": 5.221409003557264e-06, "loss": 0.0071, "step": 12770 }, { "epoch": 0.10450995624974445, "grad_norm": 0.0828278511762619, "learning_rate": 5.225497812487222e-06, "loss": 0.0072, "step": 12780 }, { "epoch": 0.10459173242834362, "grad_norm": 0.16468550264835358, "learning_rate": 5.229586621417182e-06, "loss": 0.0053, "step": 12790 }, { "epoch": 0.1046735086069428, "grad_norm": 0.27211061120033264, "learning_rate": 5.233675430347141e-06, "loss": 0.0054, "step": 12800 }, { "epoch": 0.10475528478554197, "grad_norm": 0.49203020334243774, "learning_rate": 5.237764239277099e-06, "loss": 0.0053, "step": 12810 }, { "epoch": 0.10483706096414115, "grad_norm": 0.21157513558864594, "learning_rate": 5.241853048207058e-06, "loss": 0.0027, "step": 12820 }, { "epoch": 0.10491883714274032, "grad_norm": 0.12230520695447922, "learning_rate": 5.245941857137017e-06, "loss": 0.0052, "step": 12830 }, { "epoch": 0.10500061332133949, "grad_norm": 0.183057963848114, "learning_rate": 5.2500306660669755e-06, "loss": 0.0082, "step": 12840 }, { "epoch": 0.10508238949993867, "grad_norm": 0.21471023559570312, "learning_rate": 5.2541194749969336e-06, "loss": 0.0047, "step": 12850 }, { "epoch": 0.10516416567853784, "grad_norm": 0.208570659160614, "learning_rate": 5.2582082839268925e-06, "loss": 0.0074, "step": 12860 }, { "epoch": 0.10524594185713701, "grad_norm": 0.19605427980422974, "learning_rate": 5.262297092856851e-06, "loss": 0.0065, "step": 12870 }, { "epoch": 0.10532771803573619, "grad_norm": 0.5276281833648682, "learning_rate": 5.26638590178681e-06, "loss": 0.0064, "step": 12880 }, { "epoch": 0.10540949421433536, "grad_norm": 0.19056051969528198, "learning_rate": 5.270474710716768e-06, "loss": 0.0043, "step": 12890 }, { "epoch": 0.10549127039293454, "grad_norm": 0.17859677970409393, "learning_rate": 5.274563519646727e-06, "loss": 0.0047, "step": 12900 }, { "epoch": 0.10557304657153371, "grad_norm": 0.20403020083904266, "learning_rate": 5.278652328576685e-06, "loss": 0.0037, "step": 12910 }, { "epoch": 0.10565482275013288, "grad_norm": 0.3248593509197235, "learning_rate": 5.282741137506645e-06, "loss": 0.004, "step": 12920 }, { "epoch": 0.10573659892873206, "grad_norm": 0.22429780662059784, "learning_rate": 5.286829946436604e-06, "loss": 0.0034, "step": 12930 }, { "epoch": 0.10581837510733123, "grad_norm": 0.2560414969921112, "learning_rate": 5.290918755366562e-06, "loss": 0.0085, "step": 12940 }, { "epoch": 0.1059001512859304, "grad_norm": 0.29515719413757324, "learning_rate": 5.29500756429652e-06, "loss": 0.0059, "step": 12950 }, { "epoch": 0.10598192746452959, "grad_norm": 0.22170490026474, "learning_rate": 5.29909637322648e-06, "loss": 0.0048, "step": 12960 }, { "epoch": 0.10606370364312875, "grad_norm": 0.3948929011821747, "learning_rate": 5.303185182156439e-06, "loss": 0.0047, "step": 12970 }, { "epoch": 0.10614547982172794, "grad_norm": 0.42535147070884705, "learning_rate": 5.307273991086397e-06, "loss": 0.0045, "step": 12980 }, { "epoch": 0.1062272560003271, "grad_norm": 0.08654831349849701, "learning_rate": 5.311362800016356e-06, "loss": 0.0042, "step": 12990 }, { "epoch": 0.10630903217892627, "grad_norm": 0.18588215112686157, "learning_rate": 5.3154516089463145e-06, "loss": 0.0069, "step": 13000 }, { "epoch": 0.10639080835752546, "grad_norm": 0.2844898998737335, "learning_rate": 5.319540417876273e-06, "loss": 0.0057, "step": 13010 }, { "epoch": 0.10647258453612463, "grad_norm": 0.5792611241340637, "learning_rate": 5.3236292268062315e-06, "loss": 0.0067, "step": 13020 }, { "epoch": 0.1065543607147238, "grad_norm": 0.2975863516330719, "learning_rate": 5.32771803573619e-06, "loss": 0.0048, "step": 13030 }, { "epoch": 0.10663613689332298, "grad_norm": 0.2297876924276352, "learning_rate": 5.33180684466615e-06, "loss": 0.0063, "step": 13040 }, { "epoch": 0.10671791307192215, "grad_norm": 0.18013343214988708, "learning_rate": 5.335895653596108e-06, "loss": 0.0054, "step": 13050 }, { "epoch": 0.10679968925052133, "grad_norm": 0.10431109368801117, "learning_rate": 5.339984462526066e-06, "loss": 0.0074, "step": 13060 }, { "epoch": 0.1068814654291205, "grad_norm": 0.42335572838783264, "learning_rate": 5.344073271456025e-06, "loss": 0.0058, "step": 13070 }, { "epoch": 0.10696324160771967, "grad_norm": 0.31978997588157654, "learning_rate": 5.348162080385983e-06, "loss": 0.0047, "step": 13080 }, { "epoch": 0.10704501778631885, "grad_norm": 0.18415072560310364, "learning_rate": 5.352250889315943e-06, "loss": 0.0037, "step": 13090 }, { "epoch": 0.10712679396491802, "grad_norm": 0.40185117721557617, "learning_rate": 5.356339698245902e-06, "loss": 0.0057, "step": 13100 }, { "epoch": 0.10720857014351719, "grad_norm": 0.12582595646381378, "learning_rate": 5.36042850717586e-06, "loss": 0.0054, "step": 13110 }, { "epoch": 0.10729034632211637, "grad_norm": 0.18733462691307068, "learning_rate": 5.364517316105819e-06, "loss": 0.0048, "step": 13120 }, { "epoch": 0.10737212250071554, "grad_norm": 0.13133035600185394, "learning_rate": 5.368606125035778e-06, "loss": 0.0061, "step": 13130 }, { "epoch": 0.10745389867931472, "grad_norm": 0.23115289211273193, "learning_rate": 5.3726949339657365e-06, "loss": 0.0047, "step": 13140 }, { "epoch": 0.10753567485791389, "grad_norm": 0.264527291059494, "learning_rate": 5.376783742895695e-06, "loss": 0.005, "step": 13150 }, { "epoch": 0.10761745103651306, "grad_norm": 0.20404213666915894, "learning_rate": 5.3808725518256535e-06, "loss": 0.0046, "step": 13160 }, { "epoch": 0.10769922721511224, "grad_norm": 0.20650121569633484, "learning_rate": 5.384961360755613e-06, "loss": 0.006, "step": 13170 }, { "epoch": 0.10778100339371141, "grad_norm": 0.0807342603802681, "learning_rate": 5.389050169685571e-06, "loss": 0.0056, "step": 13180 }, { "epoch": 0.10786277957231058, "grad_norm": 0.3422909677028656, "learning_rate": 5.393138978615529e-06, "loss": 0.004, "step": 13190 }, { "epoch": 0.10794455575090976, "grad_norm": 0.3891322910785675, "learning_rate": 5.397227787545488e-06, "loss": 0.006, "step": 13200 }, { "epoch": 0.10802633192950893, "grad_norm": 0.19013053178787231, "learning_rate": 5.401316596475446e-06, "loss": 0.0089, "step": 13210 }, { "epoch": 0.10810810810810811, "grad_norm": 0.3437032997608185, "learning_rate": 5.405405405405406e-06, "loss": 0.0064, "step": 13220 }, { "epoch": 0.10818988428670728, "grad_norm": 0.22450971603393555, "learning_rate": 5.409494214335365e-06, "loss": 0.0049, "step": 13230 }, { "epoch": 0.10827166046530645, "grad_norm": 0.42465850710868835, "learning_rate": 5.413583023265323e-06, "loss": 0.0039, "step": 13240 }, { "epoch": 0.10835343664390563, "grad_norm": 0.14723894000053406, "learning_rate": 5.417671832195282e-06, "loss": 0.0074, "step": 13250 }, { "epoch": 0.1084352128225048, "grad_norm": 0.12248914688825607, "learning_rate": 5.421760641125241e-06, "loss": 0.0074, "step": 13260 }, { "epoch": 0.10851698900110397, "grad_norm": 0.19625546038150787, "learning_rate": 5.4258494500552e-06, "loss": 0.0053, "step": 13270 }, { "epoch": 0.10859876517970316, "grad_norm": 0.11208714544773102, "learning_rate": 5.429938258985158e-06, "loss": 0.004, "step": 13280 }, { "epoch": 0.10868054135830232, "grad_norm": 0.18309834599494934, "learning_rate": 5.434027067915117e-06, "loss": 0.0049, "step": 13290 }, { "epoch": 0.1087623175369015, "grad_norm": 0.3972659409046173, "learning_rate": 5.4381158768450755e-06, "loss": 0.0072, "step": 13300 }, { "epoch": 0.10884409371550068, "grad_norm": 0.17099712789058685, "learning_rate": 5.442204685775034e-06, "loss": 0.0029, "step": 13310 }, { "epoch": 0.10892586989409984, "grad_norm": 0.19902847707271576, "learning_rate": 5.4462934947049925e-06, "loss": 0.0065, "step": 13320 }, { "epoch": 0.10900764607269903, "grad_norm": 0.20948822796344757, "learning_rate": 5.450382303634951e-06, "loss": 0.005, "step": 13330 }, { "epoch": 0.1090894222512982, "grad_norm": 0.3119771480560303, "learning_rate": 5.454471112564911e-06, "loss": 0.0075, "step": 13340 }, { "epoch": 0.10917119842989736, "grad_norm": 0.607835590839386, "learning_rate": 5.458559921494869e-06, "loss": 0.0064, "step": 13350 }, { "epoch": 0.10925297460849655, "grad_norm": 0.18808308243751526, "learning_rate": 5.462648730424828e-06, "loss": 0.0067, "step": 13360 }, { "epoch": 0.10933475078709572, "grad_norm": 0.2268318384885788, "learning_rate": 5.466737539354786e-06, "loss": 0.0043, "step": 13370 }, { "epoch": 0.1094165269656949, "grad_norm": 0.3127067983150482, "learning_rate": 5.470826348284744e-06, "loss": 0.0045, "step": 13380 }, { "epoch": 0.10949830314429407, "grad_norm": 0.353005975484848, "learning_rate": 5.474915157214704e-06, "loss": 0.0078, "step": 13390 }, { "epoch": 0.10958007932289324, "grad_norm": 0.3389965295791626, "learning_rate": 5.479003966144663e-06, "loss": 0.0041, "step": 13400 }, { "epoch": 0.10966185550149242, "grad_norm": 0.12946949899196625, "learning_rate": 5.483092775074621e-06, "loss": 0.0072, "step": 13410 }, { "epoch": 0.10974363168009159, "grad_norm": 0.18596619367599487, "learning_rate": 5.48718158400458e-06, "loss": 0.0075, "step": 13420 }, { "epoch": 0.10982540785869076, "grad_norm": 0.1712915301322937, "learning_rate": 5.491270392934539e-06, "loss": 0.0055, "step": 13430 }, { "epoch": 0.10990718403728994, "grad_norm": 0.10986873507499695, "learning_rate": 5.4953592018644976e-06, "loss": 0.0072, "step": 13440 }, { "epoch": 0.10998896021588911, "grad_norm": 0.18526148796081543, "learning_rate": 5.499448010794456e-06, "loss": 0.005, "step": 13450 }, { "epoch": 0.11007073639448829, "grad_norm": 0.23552335798740387, "learning_rate": 5.5035368197244145e-06, "loss": 0.0049, "step": 13460 }, { "epoch": 0.11015251257308746, "grad_norm": 0.18874835968017578, "learning_rate": 5.507625628654374e-06, "loss": 0.0103, "step": 13470 }, { "epoch": 0.11023428875168663, "grad_norm": 0.2648612856864929, "learning_rate": 5.511714437584332e-06, "loss": 0.0057, "step": 13480 }, { "epoch": 0.11031606493028581, "grad_norm": 0.17734472453594208, "learning_rate": 5.51580324651429e-06, "loss": 0.0042, "step": 13490 }, { "epoch": 0.11039784110888498, "grad_norm": 0.1472131311893463, "learning_rate": 5.519892055444249e-06, "loss": 0.0053, "step": 13500 }, { "epoch": 0.11047961728748415, "grad_norm": 0.11855324357748032, "learning_rate": 5.523980864374209e-06, "loss": 0.0073, "step": 13510 }, { "epoch": 0.11056139346608333, "grad_norm": 0.22463107109069824, "learning_rate": 5.528069673304167e-06, "loss": 0.0069, "step": 13520 }, { "epoch": 0.1106431696446825, "grad_norm": 0.20247821509838104, "learning_rate": 5.532158482234126e-06, "loss": 0.0054, "step": 13530 }, { "epoch": 0.11072494582328168, "grad_norm": 0.2219718098640442, "learning_rate": 5.536247291164084e-06, "loss": 0.0056, "step": 13540 }, { "epoch": 0.11080672200188085, "grad_norm": 0.4146811366081238, "learning_rate": 5.540336100094043e-06, "loss": 0.0069, "step": 13550 }, { "epoch": 0.11088849818048002, "grad_norm": 0.19439390301704407, "learning_rate": 5.544424909024002e-06, "loss": 0.0077, "step": 13560 }, { "epoch": 0.1109702743590792, "grad_norm": 0.23279179632663727, "learning_rate": 5.548513717953961e-06, "loss": 0.0039, "step": 13570 }, { "epoch": 0.11105205053767837, "grad_norm": 0.3688916563987732, "learning_rate": 5.552602526883919e-06, "loss": 0.0043, "step": 13580 }, { "epoch": 0.11113382671627754, "grad_norm": 0.17315472662448883, "learning_rate": 5.556691335813878e-06, "loss": 0.0053, "step": 13590 }, { "epoch": 0.11121560289487673, "grad_norm": 0.16053897142410278, "learning_rate": 5.560780144743837e-06, "loss": 0.0045, "step": 13600 }, { "epoch": 0.1112973790734759, "grad_norm": 0.10708344727754593, "learning_rate": 5.5648689536737954e-06, "loss": 0.0063, "step": 13610 }, { "epoch": 0.11137915525207508, "grad_norm": 0.23824535310268402, "learning_rate": 5.5689577626037535e-06, "loss": 0.0052, "step": 13620 }, { "epoch": 0.11146093143067425, "grad_norm": 0.1429678350687027, "learning_rate": 5.573046571533712e-06, "loss": 0.0044, "step": 13630 }, { "epoch": 0.11154270760927341, "grad_norm": 0.25579535961151123, "learning_rate": 5.577135380463672e-06, "loss": 0.0043, "step": 13640 }, { "epoch": 0.1116244837878726, "grad_norm": 0.2629616856575012, "learning_rate": 5.58122418939363e-06, "loss": 0.0037, "step": 13650 }, { "epoch": 0.11170625996647177, "grad_norm": 0.21862289309501648, "learning_rate": 5.585312998323589e-06, "loss": 0.0083, "step": 13660 }, { "epoch": 0.11178803614507093, "grad_norm": 0.19621726870536804, "learning_rate": 5.589401807253547e-06, "loss": 0.0054, "step": 13670 }, { "epoch": 0.11186981232367012, "grad_norm": 0.14214102923870087, "learning_rate": 5.593490616183507e-06, "loss": 0.0056, "step": 13680 }, { "epoch": 0.11195158850226929, "grad_norm": 0.21519261598587036, "learning_rate": 5.597579425113465e-06, "loss": 0.0056, "step": 13690 }, { "epoch": 0.11203336468086847, "grad_norm": 0.10825074464082718, "learning_rate": 5.601668234043424e-06, "loss": 0.0064, "step": 13700 }, { "epoch": 0.11211514085946764, "grad_norm": 0.15087924897670746, "learning_rate": 5.605757042973382e-06, "loss": 0.0065, "step": 13710 }, { "epoch": 0.1121969170380668, "grad_norm": 0.3480886220932007, "learning_rate": 5.609845851903341e-06, "loss": 0.0053, "step": 13720 }, { "epoch": 0.11227869321666599, "grad_norm": 0.161983922123909, "learning_rate": 5.6139346608333e-06, "loss": 0.0067, "step": 13730 }, { "epoch": 0.11236046939526516, "grad_norm": 0.29367712140083313, "learning_rate": 5.618023469763259e-06, "loss": 0.0049, "step": 13740 }, { "epoch": 0.11244224557386433, "grad_norm": 0.2134099006652832, "learning_rate": 5.622112278693217e-06, "loss": 0.005, "step": 13750 }, { "epoch": 0.11252402175246351, "grad_norm": 0.07722951471805573, "learning_rate": 5.6262010876231755e-06, "loss": 0.0056, "step": 13760 }, { "epoch": 0.11260579793106268, "grad_norm": 0.21026286482810974, "learning_rate": 5.630289896553135e-06, "loss": 0.0069, "step": 13770 }, { "epoch": 0.11268757410966186, "grad_norm": 0.30654576420783997, "learning_rate": 5.634378705483093e-06, "loss": 0.0084, "step": 13780 }, { "epoch": 0.11276935028826103, "grad_norm": 0.1404421031475067, "learning_rate": 5.638467514413052e-06, "loss": 0.0049, "step": 13790 }, { "epoch": 0.1128511264668602, "grad_norm": 0.057266972959041595, "learning_rate": 5.64255632334301e-06, "loss": 0.0037, "step": 13800 }, { "epoch": 0.11293290264545938, "grad_norm": 0.2995694875717163, "learning_rate": 5.64664513227297e-06, "loss": 0.0062, "step": 13810 }, { "epoch": 0.11301467882405855, "grad_norm": 0.1842023730278015, "learning_rate": 5.650733941202928e-06, "loss": 0.0056, "step": 13820 }, { "epoch": 0.11309645500265772, "grad_norm": 0.4310401380062103, "learning_rate": 5.654822750132887e-06, "loss": 0.0071, "step": 13830 }, { "epoch": 0.1131782311812569, "grad_norm": 0.06496649235486984, "learning_rate": 5.658911559062845e-06, "loss": 0.0054, "step": 13840 }, { "epoch": 0.11326000735985607, "grad_norm": 0.36809587478637695, "learning_rate": 5.663000367992804e-06, "loss": 0.0075, "step": 13850 }, { "epoch": 0.11334178353845525, "grad_norm": 0.10896213352680206, "learning_rate": 5.667089176922763e-06, "loss": 0.0068, "step": 13860 }, { "epoch": 0.11342355971705442, "grad_norm": 0.283379465341568, "learning_rate": 5.671177985852722e-06, "loss": 0.0062, "step": 13870 }, { "epoch": 0.11350533589565359, "grad_norm": 0.3854406774044037, "learning_rate": 5.67526679478268e-06, "loss": 0.0091, "step": 13880 }, { "epoch": 0.11358711207425277, "grad_norm": 0.44156399369239807, "learning_rate": 5.679355603712639e-06, "loss": 0.007, "step": 13890 }, { "epoch": 0.11366888825285194, "grad_norm": 0.27830782532691956, "learning_rate": 5.683444412642598e-06, "loss": 0.0043, "step": 13900 }, { "epoch": 0.11375066443145111, "grad_norm": 0.6328401565551758, "learning_rate": 5.6875332215725565e-06, "loss": 0.0052, "step": 13910 }, { "epoch": 0.1138324406100503, "grad_norm": 0.14272290468215942, "learning_rate": 5.691622030502515e-06, "loss": 0.0027, "step": 13920 }, { "epoch": 0.11391421678864946, "grad_norm": 0.17286694049835205, "learning_rate": 5.695710839432473e-06, "loss": 0.0101, "step": 13930 }, { "epoch": 0.11399599296724865, "grad_norm": 0.07934601604938507, "learning_rate": 5.699799648362433e-06, "loss": 0.0049, "step": 13940 }, { "epoch": 0.11407776914584782, "grad_norm": 0.2530420124530792, "learning_rate": 5.703888457292391e-06, "loss": 0.0043, "step": 13950 }, { "epoch": 0.11415954532444698, "grad_norm": 0.3523297607898712, "learning_rate": 5.70797726622235e-06, "loss": 0.0119, "step": 13960 }, { "epoch": 0.11424132150304617, "grad_norm": 0.10704316943883896, "learning_rate": 5.712066075152308e-06, "loss": 0.0055, "step": 13970 }, { "epoch": 0.11432309768164534, "grad_norm": 0.31180429458618164, "learning_rate": 5.716154884082268e-06, "loss": 0.0058, "step": 13980 }, { "epoch": 0.1144048738602445, "grad_norm": 0.19983051717281342, "learning_rate": 5.720243693012226e-06, "loss": 0.0055, "step": 13990 }, { "epoch": 0.11448665003884369, "grad_norm": 0.3045845925807953, "learning_rate": 5.724332501942185e-06, "loss": 0.0049, "step": 14000 }, { "epoch": 0.11456842621744286, "grad_norm": 0.1689481884241104, "learning_rate": 5.728421310872143e-06, "loss": 0.0067, "step": 14010 }, { "epoch": 0.11465020239604204, "grad_norm": 0.27681463956832886, "learning_rate": 5.732510119802102e-06, "loss": 0.0058, "step": 14020 }, { "epoch": 0.11473197857464121, "grad_norm": 0.2668803036212921, "learning_rate": 5.7365989287320615e-06, "loss": 0.0072, "step": 14030 }, { "epoch": 0.11481375475324038, "grad_norm": 0.07700426876544952, "learning_rate": 5.74068773766202e-06, "loss": 0.005, "step": 14040 }, { "epoch": 0.11489553093183956, "grad_norm": 0.11485130339860916, "learning_rate": 5.744776546591978e-06, "loss": 0.0074, "step": 14050 }, { "epoch": 0.11497730711043873, "grad_norm": 0.25198400020599365, "learning_rate": 5.7488653555219365e-06, "loss": 0.0083, "step": 14060 }, { "epoch": 0.1150590832890379, "grad_norm": 0.17054037749767303, "learning_rate": 5.752954164451896e-06, "loss": 0.0068, "step": 14070 }, { "epoch": 0.11514085946763708, "grad_norm": 0.24447467923164368, "learning_rate": 5.757042973381854e-06, "loss": 0.0051, "step": 14080 }, { "epoch": 0.11522263564623625, "grad_norm": 0.5747367739677429, "learning_rate": 5.761131782311813e-06, "loss": 0.007, "step": 14090 }, { "epoch": 0.11530441182483543, "grad_norm": 0.08610384166240692, "learning_rate": 5.765220591241771e-06, "loss": 0.005, "step": 14100 }, { "epoch": 0.1153861880034346, "grad_norm": 0.15227068960666656, "learning_rate": 5.769309400171731e-06, "loss": 0.0052, "step": 14110 }, { "epoch": 0.11546796418203377, "grad_norm": 0.09673766046762466, "learning_rate": 5.773398209101689e-06, "loss": 0.0041, "step": 14120 }, { "epoch": 0.11554974036063295, "grad_norm": 0.15528464317321777, "learning_rate": 5.777487018031648e-06, "loss": 0.0068, "step": 14130 }, { "epoch": 0.11563151653923212, "grad_norm": 0.4201797842979431, "learning_rate": 5.781575826961606e-06, "loss": 0.0057, "step": 14140 }, { "epoch": 0.11571329271783129, "grad_norm": 0.31351009011268616, "learning_rate": 5.785664635891566e-06, "loss": 0.0063, "step": 14150 }, { "epoch": 0.11579506889643047, "grad_norm": 0.17532511055469513, "learning_rate": 5.789753444821524e-06, "loss": 0.0073, "step": 14160 }, { "epoch": 0.11587684507502964, "grad_norm": 0.13101983070373535, "learning_rate": 5.793842253751483e-06, "loss": 0.0058, "step": 14170 }, { "epoch": 0.11595862125362882, "grad_norm": 0.2664119005203247, "learning_rate": 5.797931062681441e-06, "loss": 0.0058, "step": 14180 }, { "epoch": 0.11604039743222799, "grad_norm": 0.34731975197792053, "learning_rate": 5.8020198716114e-06, "loss": 0.0042, "step": 14190 }, { "epoch": 0.11612217361082716, "grad_norm": 0.20340891182422638, "learning_rate": 5.8061086805413594e-06, "loss": 0.0064, "step": 14200 }, { "epoch": 0.11620394978942634, "grad_norm": 0.1585230529308319, "learning_rate": 5.8101974894713175e-06, "loss": 0.0041, "step": 14210 }, { "epoch": 0.11628572596802551, "grad_norm": 0.40366673469543457, "learning_rate": 5.814286298401276e-06, "loss": 0.007, "step": 14220 }, { "epoch": 0.11636750214662468, "grad_norm": 0.43718191981315613, "learning_rate": 5.8183751073312344e-06, "loss": 0.0078, "step": 14230 }, { "epoch": 0.11644927832522387, "grad_norm": 0.5021625757217407, "learning_rate": 5.822463916261194e-06, "loss": 0.0069, "step": 14240 }, { "epoch": 0.11653105450382303, "grad_norm": 0.07654736936092377, "learning_rate": 5.826552725191152e-06, "loss": 0.0048, "step": 14250 }, { "epoch": 0.11661283068242222, "grad_norm": 0.23758991062641144, "learning_rate": 5.830641534121111e-06, "loss": 0.0076, "step": 14260 }, { "epoch": 0.11669460686102139, "grad_norm": 0.19833868741989136, "learning_rate": 5.834730343051069e-06, "loss": 0.0063, "step": 14270 }, { "epoch": 0.11677638303962055, "grad_norm": 0.312337189912796, "learning_rate": 5.838819151981029e-06, "loss": 0.0056, "step": 14280 }, { "epoch": 0.11685815921821974, "grad_norm": 0.44800353050231934, "learning_rate": 5.842907960910987e-06, "loss": 0.0031, "step": 14290 }, { "epoch": 0.1169399353968189, "grad_norm": 0.2554435729980469, "learning_rate": 5.846996769840946e-06, "loss": 0.0054, "step": 14300 }, { "epoch": 0.11702171157541807, "grad_norm": 0.14716589450836182, "learning_rate": 5.851085578770904e-06, "loss": 0.0073, "step": 14310 }, { "epoch": 0.11710348775401726, "grad_norm": 0.17068973183631897, "learning_rate": 5.855174387700864e-06, "loss": 0.0031, "step": 14320 }, { "epoch": 0.11718526393261643, "grad_norm": 0.17807281017303467, "learning_rate": 5.8592631966308226e-06, "loss": 0.0054, "step": 14330 }, { "epoch": 0.11726704011121561, "grad_norm": 0.18744798004627228, "learning_rate": 5.863352005560781e-06, "loss": 0.0044, "step": 14340 }, { "epoch": 0.11734881628981478, "grad_norm": 0.20035940408706665, "learning_rate": 5.8674408144907395e-06, "loss": 0.0072, "step": 14350 }, { "epoch": 0.11743059246841395, "grad_norm": 0.2793084681034088, "learning_rate": 5.8715296234206976e-06, "loss": 0.0064, "step": 14360 }, { "epoch": 0.11751236864701313, "grad_norm": 0.23685705661773682, "learning_rate": 5.875618432350657e-06, "loss": 0.006, "step": 14370 }, { "epoch": 0.1175941448256123, "grad_norm": 0.2040739357471466, "learning_rate": 5.879707241280615e-06, "loss": 0.0071, "step": 14380 }, { "epoch": 0.11767592100421147, "grad_norm": 0.35830578207969666, "learning_rate": 5.883796050210574e-06, "loss": 0.0042, "step": 14390 }, { "epoch": 0.11775769718281065, "grad_norm": 0.15059874951839447, "learning_rate": 5.887884859140532e-06, "loss": 0.005, "step": 14400 }, { "epoch": 0.11783947336140982, "grad_norm": 0.21858780086040497, "learning_rate": 5.891973668070492e-06, "loss": 0.0059, "step": 14410 }, { "epoch": 0.117921249540009, "grad_norm": 0.2111825942993164, "learning_rate": 5.89606247700045e-06, "loss": 0.0039, "step": 14420 }, { "epoch": 0.11800302571860817, "grad_norm": 0.131398543715477, "learning_rate": 5.900151285930409e-06, "loss": 0.0059, "step": 14430 }, { "epoch": 0.11808480189720734, "grad_norm": 0.10813804715871811, "learning_rate": 5.904240094860367e-06, "loss": 0.011, "step": 14440 }, { "epoch": 0.11816657807580652, "grad_norm": 0.15639729797840118, "learning_rate": 5.908328903790327e-06, "loss": 0.0047, "step": 14450 }, { "epoch": 0.11824835425440569, "grad_norm": 0.12067373096942902, "learning_rate": 5.912417712720286e-06, "loss": 0.0034, "step": 14460 }, { "epoch": 0.11833013043300486, "grad_norm": 0.2131441831588745, "learning_rate": 5.916506521650244e-06, "loss": 0.0055, "step": 14470 }, { "epoch": 0.11841190661160404, "grad_norm": 0.11781197041273117, "learning_rate": 5.920595330580202e-06, "loss": 0.0039, "step": 14480 }, { "epoch": 0.11849368279020321, "grad_norm": 0.5107464790344238, "learning_rate": 5.924684139510161e-06, "loss": 0.0072, "step": 14490 }, { "epoch": 0.1185754589688024, "grad_norm": 0.33869677782058716, "learning_rate": 5.9287729484401204e-06, "loss": 0.0041, "step": 14500 }, { "epoch": 0.11865723514740156, "grad_norm": 0.19069169461727142, "learning_rate": 5.9328617573700785e-06, "loss": 0.0063, "step": 14510 }, { "epoch": 0.11873901132600073, "grad_norm": 0.5146272778511047, "learning_rate": 5.936950566300037e-06, "loss": 0.0058, "step": 14520 }, { "epoch": 0.11882078750459991, "grad_norm": 0.12695127725601196, "learning_rate": 5.9410393752299954e-06, "loss": 0.0058, "step": 14530 }, { "epoch": 0.11890256368319908, "grad_norm": 0.16362471878528595, "learning_rate": 5.945128184159955e-06, "loss": 0.0055, "step": 14540 }, { "epoch": 0.11898433986179825, "grad_norm": 0.2826056182384491, "learning_rate": 5.949216993089913e-06, "loss": 0.0061, "step": 14550 }, { "epoch": 0.11906611604039744, "grad_norm": 0.17224527895450592, "learning_rate": 5.953305802019872e-06, "loss": 0.0053, "step": 14560 }, { "epoch": 0.1191478922189966, "grad_norm": 0.16941645741462708, "learning_rate": 5.95739461094983e-06, "loss": 0.0047, "step": 14570 }, { "epoch": 0.11922966839759579, "grad_norm": 0.1893022656440735, "learning_rate": 5.96148341987979e-06, "loss": 0.004, "step": 14580 }, { "epoch": 0.11931144457619496, "grad_norm": 0.24261721968650818, "learning_rate": 5.965572228809748e-06, "loss": 0.0043, "step": 14590 }, { "epoch": 0.11939322075479412, "grad_norm": 0.2647351920604706, "learning_rate": 5.969661037739707e-06, "loss": 0.0053, "step": 14600 }, { "epoch": 0.11947499693339331, "grad_norm": 0.44967120885849, "learning_rate": 5.973749846669665e-06, "loss": 0.0053, "step": 14610 }, { "epoch": 0.11955677311199248, "grad_norm": 0.3263256549835205, "learning_rate": 5.977838655599625e-06, "loss": 0.0063, "step": 14620 }, { "epoch": 0.11963854929059164, "grad_norm": 0.5708566904067993, "learning_rate": 5.981927464529584e-06, "loss": 0.0055, "step": 14630 }, { "epoch": 0.11972032546919083, "grad_norm": 0.3113032579421997, "learning_rate": 5.986016273459542e-06, "loss": 0.0054, "step": 14640 }, { "epoch": 0.11980210164779, "grad_norm": 0.31934434175491333, "learning_rate": 5.9901050823895005e-06, "loss": 0.0051, "step": 14650 }, { "epoch": 0.11988387782638918, "grad_norm": 0.31739798188209534, "learning_rate": 5.994193891319459e-06, "loss": 0.0052, "step": 14660 }, { "epoch": 0.11996565400498835, "grad_norm": 0.141579732298851, "learning_rate": 5.998282700249418e-06, "loss": 0.0045, "step": 14670 }, { "epoch": 0.12004743018358752, "grad_norm": 0.5202257633209229, "learning_rate": 6.002371509179376e-06, "loss": 0.0067, "step": 14680 }, { "epoch": 0.1201292063621867, "grad_norm": 0.16416800022125244, "learning_rate": 6.006460318109335e-06, "loss": 0.0075, "step": 14690 }, { "epoch": 0.12021098254078587, "grad_norm": 0.1756303608417511, "learning_rate": 6.010549127039293e-06, "loss": 0.0067, "step": 14700 }, { "epoch": 0.12029275871938504, "grad_norm": 0.2776372730731964, "learning_rate": 6.014637935969253e-06, "loss": 0.005, "step": 14710 }, { "epoch": 0.12037453489798422, "grad_norm": 0.16806560754776, "learning_rate": 6.018726744899211e-06, "loss": 0.0046, "step": 14720 }, { "epoch": 0.12045631107658339, "grad_norm": 0.2246423214673996, "learning_rate": 6.02281555382917e-06, "loss": 0.0067, "step": 14730 }, { "epoch": 0.12053808725518257, "grad_norm": 0.07488560676574707, "learning_rate": 6.026904362759128e-06, "loss": 0.0056, "step": 14740 }, { "epoch": 0.12061986343378174, "grad_norm": 0.28169965744018555, "learning_rate": 6.030993171689088e-06, "loss": 0.0087, "step": 14750 }, { "epoch": 0.12070163961238091, "grad_norm": 0.5213766098022461, "learning_rate": 6.035081980619047e-06, "loss": 0.0095, "step": 14760 }, { "epoch": 0.12078341579098009, "grad_norm": 0.22602306306362152, "learning_rate": 6.039170789549005e-06, "loss": 0.0053, "step": 14770 }, { "epoch": 0.12086519196957926, "grad_norm": 0.25672414898872375, "learning_rate": 6.043259598478964e-06, "loss": 0.0039, "step": 14780 }, { "epoch": 0.12094696814817843, "grad_norm": 0.19049425423145294, "learning_rate": 6.0473484074089226e-06, "loss": 0.005, "step": 14790 }, { "epoch": 0.12102874432677761, "grad_norm": 0.15215468406677246, "learning_rate": 6.0514372163388815e-06, "loss": 0.0044, "step": 14800 }, { "epoch": 0.12111052050537678, "grad_norm": 0.15832844376564026, "learning_rate": 6.0555260252688395e-06, "loss": 0.0041, "step": 14810 }, { "epoch": 0.12119229668397596, "grad_norm": 0.20601429045200348, "learning_rate": 6.059614834198798e-06, "loss": 0.0044, "step": 14820 }, { "epoch": 0.12127407286257513, "grad_norm": 0.22647975385189056, "learning_rate": 6.0637036431287565e-06, "loss": 0.0043, "step": 14830 }, { "epoch": 0.1213558490411743, "grad_norm": 0.31584152579307556, "learning_rate": 6.067792452058716e-06, "loss": 0.0053, "step": 14840 }, { "epoch": 0.12143762521977348, "grad_norm": 0.4695416986942291, "learning_rate": 6.071881260988674e-06, "loss": 0.0066, "step": 14850 }, { "epoch": 0.12151940139837265, "grad_norm": 0.09496989101171494, "learning_rate": 6.075970069918633e-06, "loss": 0.0046, "step": 14860 }, { "epoch": 0.12160117757697182, "grad_norm": 0.12892475724220276, "learning_rate": 6.080058878848591e-06, "loss": 0.0065, "step": 14870 }, { "epoch": 0.121682953755571, "grad_norm": 0.09496725350618362, "learning_rate": 6.084147687778551e-06, "loss": 0.0057, "step": 14880 }, { "epoch": 0.12176472993417017, "grad_norm": 0.35298392176628113, "learning_rate": 6.08823649670851e-06, "loss": 0.006, "step": 14890 }, { "epoch": 0.12184650611276936, "grad_norm": 0.4878143072128296, "learning_rate": 6.092325305638468e-06, "loss": 0.0039, "step": 14900 }, { "epoch": 0.12192828229136853, "grad_norm": 0.3184422254562378, "learning_rate": 6.096414114568426e-06, "loss": 0.0055, "step": 14910 }, { "epoch": 0.1220100584699677, "grad_norm": 0.19222013652324677, "learning_rate": 6.100502923498386e-06, "loss": 0.0045, "step": 14920 }, { "epoch": 0.12209183464856688, "grad_norm": 0.13992895185947418, "learning_rate": 6.104591732428345e-06, "loss": 0.0064, "step": 14930 }, { "epoch": 0.12217361082716605, "grad_norm": 0.23135222494602203, "learning_rate": 6.108680541358303e-06, "loss": 0.0112, "step": 14940 }, { "epoch": 0.12225538700576521, "grad_norm": 0.10293523967266083, "learning_rate": 6.1127693502882616e-06, "loss": 0.0035, "step": 14950 }, { "epoch": 0.1223371631843644, "grad_norm": 0.2737216353416443, "learning_rate": 6.11685815921822e-06, "loss": 0.0068, "step": 14960 }, { "epoch": 0.12241893936296357, "grad_norm": 0.32154932618141174, "learning_rate": 6.120946968148179e-06, "loss": 0.0084, "step": 14970 }, { "epoch": 0.12250071554156275, "grad_norm": 0.3131200671195984, "learning_rate": 6.125035777078137e-06, "loss": 0.0056, "step": 14980 }, { "epoch": 0.12258249172016192, "grad_norm": 0.22667616605758667, "learning_rate": 6.129124586008096e-06, "loss": 0.0071, "step": 14990 }, { "epoch": 0.12266426789876109, "grad_norm": 0.20983771979808807, "learning_rate": 6.133213394938054e-06, "loss": 0.0061, "step": 15000 }, { "epoch": 0.12274604407736027, "grad_norm": 0.2911241948604584, "learning_rate": 6.137302203868014e-06, "loss": 0.0035, "step": 15010 }, { "epoch": 0.12282782025595944, "grad_norm": 0.39526137709617615, "learning_rate": 6.141391012797972e-06, "loss": 0.0063, "step": 15020 }, { "epoch": 0.12290959643455861, "grad_norm": 0.29554495215415955, "learning_rate": 6.145479821727931e-06, "loss": 0.0047, "step": 15030 }, { "epoch": 0.12299137261315779, "grad_norm": 0.27871131896972656, "learning_rate": 6.149568630657889e-06, "loss": 0.0059, "step": 15040 }, { "epoch": 0.12307314879175696, "grad_norm": 0.29101797938346863, "learning_rate": 6.153657439587849e-06, "loss": 0.0045, "step": 15050 }, { "epoch": 0.12315492497035614, "grad_norm": 0.31039267778396606, "learning_rate": 6.157746248517808e-06, "loss": 0.0045, "step": 15060 }, { "epoch": 0.12323670114895531, "grad_norm": 0.32320457696914673, "learning_rate": 6.161835057447766e-06, "loss": 0.0044, "step": 15070 }, { "epoch": 0.12331847732755448, "grad_norm": 0.36189723014831543, "learning_rate": 6.165923866377725e-06, "loss": 0.0069, "step": 15080 }, { "epoch": 0.12340025350615366, "grad_norm": 0.26530617475509644, "learning_rate": 6.170012675307684e-06, "loss": 0.0037, "step": 15090 }, { "epoch": 0.12348202968475283, "grad_norm": 0.32490772008895874, "learning_rate": 6.1741014842376425e-06, "loss": 0.0052, "step": 15100 }, { "epoch": 0.123563805863352, "grad_norm": 0.19107389450073242, "learning_rate": 6.1781902931676005e-06, "loss": 0.0065, "step": 15110 }, { "epoch": 0.12364558204195118, "grad_norm": 0.10495898127555847, "learning_rate": 6.1822791020975594e-06, "loss": 0.0043, "step": 15120 }, { "epoch": 0.12372735822055035, "grad_norm": 0.30764585733413696, "learning_rate": 6.1863679110275175e-06, "loss": 0.0051, "step": 15130 }, { "epoch": 0.12380913439914953, "grad_norm": 0.25797998905181885, "learning_rate": 6.190456719957477e-06, "loss": 0.0058, "step": 15140 }, { "epoch": 0.1238909105777487, "grad_norm": 0.22082342207431793, "learning_rate": 6.194545528887435e-06, "loss": 0.0059, "step": 15150 }, { "epoch": 0.12397268675634787, "grad_norm": 0.31710097193717957, "learning_rate": 6.198634337817394e-06, "loss": 0.0036, "step": 15160 }, { "epoch": 0.12405446293494705, "grad_norm": 0.2687012553215027, "learning_rate": 6.202723146747352e-06, "loss": 0.0045, "step": 15170 }, { "epoch": 0.12413623911354622, "grad_norm": 0.2212640792131424, "learning_rate": 6.206811955677312e-06, "loss": 0.0035, "step": 15180 }, { "epoch": 0.12421801529214539, "grad_norm": 0.1456516534090042, "learning_rate": 6.210900764607271e-06, "loss": 0.0041, "step": 15190 }, { "epoch": 0.12429979147074458, "grad_norm": 0.17782005667686462, "learning_rate": 6.214989573537229e-06, "loss": 0.0061, "step": 15200 }, { "epoch": 0.12438156764934374, "grad_norm": 0.16266986727714539, "learning_rate": 6.219078382467188e-06, "loss": 0.0053, "step": 15210 }, { "epoch": 0.12446334382794293, "grad_norm": 0.39479896426200867, "learning_rate": 6.223167191397147e-06, "loss": 0.006, "step": 15220 }, { "epoch": 0.1245451200065421, "grad_norm": 0.1568063646554947, "learning_rate": 6.227256000327106e-06, "loss": 0.005, "step": 15230 }, { "epoch": 0.12462689618514126, "grad_norm": 0.323108047246933, "learning_rate": 6.231344809257064e-06, "loss": 0.0049, "step": 15240 }, { "epoch": 0.12470867236374045, "grad_norm": 0.32096177339553833, "learning_rate": 6.2354336181870226e-06, "loss": 0.0046, "step": 15250 }, { "epoch": 0.12479044854233962, "grad_norm": 0.2693531811237335, "learning_rate": 6.2395224271169815e-06, "loss": 0.0047, "step": 15260 }, { "epoch": 0.12487222472093878, "grad_norm": 0.17118436098098755, "learning_rate": 6.24361123604694e-06, "loss": 0.0033, "step": 15270 }, { "epoch": 0.12495400089953797, "grad_norm": 0.12037146091461182, "learning_rate": 6.247700044976898e-06, "loss": 0.0061, "step": 15280 }, { "epoch": 0.12503577707813715, "grad_norm": 0.19543711841106415, "learning_rate": 6.251788853906857e-06, "loss": 0.0059, "step": 15290 }, { "epoch": 0.12511755325673632, "grad_norm": 0.3153015077114105, "learning_rate": 6.255877662836815e-06, "loss": 0.0058, "step": 15300 }, { "epoch": 0.1251993294353355, "grad_norm": 0.3164662718772888, "learning_rate": 6.259966471766775e-06, "loss": 0.0062, "step": 15310 }, { "epoch": 0.12528110561393466, "grad_norm": 0.25789809226989746, "learning_rate": 6.264055280696734e-06, "loss": 0.0103, "step": 15320 }, { "epoch": 0.12536288179253383, "grad_norm": 0.227155864238739, "learning_rate": 6.268144089626692e-06, "loss": 0.0049, "step": 15330 }, { "epoch": 0.12544465797113302, "grad_norm": 0.6849868297576904, "learning_rate": 6.27223289855665e-06, "loss": 0.0051, "step": 15340 }, { "epoch": 0.1255264341497322, "grad_norm": 0.37071844935417175, "learning_rate": 6.27632170748661e-06, "loss": 0.0086, "step": 15350 }, { "epoch": 0.12560821032833136, "grad_norm": 0.09949173778295517, "learning_rate": 6.280410516416569e-06, "loss": 0.005, "step": 15360 }, { "epoch": 0.12568998650693053, "grad_norm": 0.41425055265426636, "learning_rate": 6.284499325346527e-06, "loss": 0.0077, "step": 15370 }, { "epoch": 0.1257717626855297, "grad_norm": 0.20890943706035614, "learning_rate": 6.288588134276486e-06, "loss": 0.0043, "step": 15380 }, { "epoch": 0.12585353886412887, "grad_norm": 0.19169174134731293, "learning_rate": 6.292676943206445e-06, "loss": 0.0044, "step": 15390 }, { "epoch": 0.12593531504272806, "grad_norm": 0.29353731870651245, "learning_rate": 6.2967657521364035e-06, "loss": 0.0077, "step": 15400 }, { "epoch": 0.12601709122132723, "grad_norm": 0.5257724523544312, "learning_rate": 6.3008545610663616e-06, "loss": 0.0065, "step": 15410 }, { "epoch": 0.1260988673999264, "grad_norm": 0.2570820748806, "learning_rate": 6.3049433699963205e-06, "loss": 0.0234, "step": 15420 }, { "epoch": 0.12618064357852557, "grad_norm": 0.27895087003707886, "learning_rate": 6.30903217892628e-06, "loss": 0.0052, "step": 15430 }, { "epoch": 0.12626241975712474, "grad_norm": 0.13926808536052704, "learning_rate": 6.313120987856238e-06, "loss": 0.0051, "step": 15440 }, { "epoch": 0.12634419593572394, "grad_norm": 0.09308274835348129, "learning_rate": 6.317209796786196e-06, "loss": 0.005, "step": 15450 }, { "epoch": 0.1264259721143231, "grad_norm": 0.35286930203437805, "learning_rate": 6.321298605716155e-06, "loss": 0.0058, "step": 15460 }, { "epoch": 0.12650774829292227, "grad_norm": 0.1811080276966095, "learning_rate": 6.325387414646113e-06, "loss": 0.0054, "step": 15470 }, { "epoch": 0.12658952447152144, "grad_norm": 0.2807413637638092, "learning_rate": 6.329476223576073e-06, "loss": 0.005, "step": 15480 }, { "epoch": 0.1266713006501206, "grad_norm": 0.23387163877487183, "learning_rate": 6.333565032506032e-06, "loss": 0.0058, "step": 15490 }, { "epoch": 0.1267530768287198, "grad_norm": 0.11933474987745285, "learning_rate": 6.33765384143599e-06, "loss": 0.0064, "step": 15500 }, { "epoch": 0.12683485300731898, "grad_norm": 0.18880772590637207, "learning_rate": 6.341742650365949e-06, "loss": 0.0046, "step": 15510 }, { "epoch": 0.12691662918591815, "grad_norm": 0.5586189031600952, "learning_rate": 6.345831459295908e-06, "loss": 0.0043, "step": 15520 }, { "epoch": 0.1269984053645173, "grad_norm": 0.20360350608825684, "learning_rate": 6.349920268225867e-06, "loss": 0.0063, "step": 15530 }, { "epoch": 0.12708018154311648, "grad_norm": 0.056378837674856186, "learning_rate": 6.354009077155825e-06, "loss": 0.0043, "step": 15540 }, { "epoch": 0.12716195772171565, "grad_norm": 0.14223867654800415, "learning_rate": 6.358097886085784e-06, "loss": 0.0034, "step": 15550 }, { "epoch": 0.12724373390031485, "grad_norm": 0.19059637188911438, "learning_rate": 6.362186695015743e-06, "loss": 0.004, "step": 15560 }, { "epoch": 0.12732551007891402, "grad_norm": 0.12924644351005554, "learning_rate": 6.366275503945701e-06, "loss": 0.005, "step": 15570 }, { "epoch": 0.12740728625751319, "grad_norm": 0.1347794085741043, "learning_rate": 6.3703643128756594e-06, "loss": 0.0036, "step": 15580 }, { "epoch": 0.12748906243611235, "grad_norm": 0.3254605233669281, "learning_rate": 6.374453121805618e-06, "loss": 0.004, "step": 15590 }, { "epoch": 0.12757083861471152, "grad_norm": 0.228053480386734, "learning_rate": 6.378541930735576e-06, "loss": 0.0044, "step": 15600 }, { "epoch": 0.12765261479331072, "grad_norm": 0.13909867405891418, "learning_rate": 6.382630739665536e-06, "loss": 0.0039, "step": 15610 }, { "epoch": 0.1277343909719099, "grad_norm": 0.23050273954868317, "learning_rate": 6.386719548595495e-06, "loss": 0.0058, "step": 15620 }, { "epoch": 0.12781616715050906, "grad_norm": 0.3728727102279663, "learning_rate": 6.390808357525453e-06, "loss": 0.0055, "step": 15630 }, { "epoch": 0.12789794332910823, "grad_norm": 0.1857287436723709, "learning_rate": 6.394897166455412e-06, "loss": 0.0067, "step": 15640 }, { "epoch": 0.1279797195077074, "grad_norm": 0.4852793514728546, "learning_rate": 6.398985975385371e-06, "loss": 0.0056, "step": 15650 }, { "epoch": 0.1280614956863066, "grad_norm": 0.7973262071609497, "learning_rate": 6.40307478431533e-06, "loss": 0.0045, "step": 15660 }, { "epoch": 0.12814327186490576, "grad_norm": 0.1626209318637848, "learning_rate": 6.407163593245288e-06, "loss": 0.0045, "step": 15670 }, { "epoch": 0.12822504804350493, "grad_norm": 0.10874348878860474, "learning_rate": 6.411252402175247e-06, "loss": 0.0034, "step": 15680 }, { "epoch": 0.1283068242221041, "grad_norm": 0.27937522530555725, "learning_rate": 6.415341211105206e-06, "loss": 0.0065, "step": 15690 }, { "epoch": 0.12838860040070327, "grad_norm": 0.1221650168299675, "learning_rate": 6.4194300200351645e-06, "loss": 0.0075, "step": 15700 }, { "epoch": 0.12847037657930244, "grad_norm": 0.3614450991153717, "learning_rate": 6.4235188289651226e-06, "loss": 0.0049, "step": 15710 }, { "epoch": 0.12855215275790163, "grad_norm": 0.0947810560464859, "learning_rate": 6.4276076378950815e-06, "loss": 0.0059, "step": 15720 }, { "epoch": 0.1286339289365008, "grad_norm": 0.17921961843967438, "learning_rate": 6.431696446825041e-06, "loss": 0.0058, "step": 15730 }, { "epoch": 0.12871570511509997, "grad_norm": 0.34366098046302795, "learning_rate": 6.435785255754999e-06, "loss": 0.0058, "step": 15740 }, { "epoch": 0.12879748129369914, "grad_norm": 0.10747997462749481, "learning_rate": 6.439874064684958e-06, "loss": 0.0063, "step": 15750 }, { "epoch": 0.1288792574722983, "grad_norm": 0.2062256783246994, "learning_rate": 6.443962873614916e-06, "loss": 0.0051, "step": 15760 }, { "epoch": 0.1289610336508975, "grad_norm": 0.27007272839546204, "learning_rate": 6.448051682544874e-06, "loss": 0.0049, "step": 15770 }, { "epoch": 0.12904280982949667, "grad_norm": 0.21534717082977295, "learning_rate": 6.452140491474834e-06, "loss": 0.0046, "step": 15780 }, { "epoch": 0.12912458600809584, "grad_norm": 0.05208032578229904, "learning_rate": 6.456229300404793e-06, "loss": 0.0043, "step": 15790 }, { "epoch": 0.129206362186695, "grad_norm": 0.10411056131124496, "learning_rate": 6.460318109334751e-06, "loss": 0.0032, "step": 15800 }, { "epoch": 0.12928813836529418, "grad_norm": 0.09656088054180145, "learning_rate": 6.46440691826471e-06, "loss": 0.0048, "step": 15810 }, { "epoch": 0.12936991454389338, "grad_norm": 0.09856244921684265, "learning_rate": 6.468495727194669e-06, "loss": 0.0036, "step": 15820 }, { "epoch": 0.12945169072249255, "grad_norm": 0.1301838755607605, "learning_rate": 6.472584536124628e-06, "loss": 0.005, "step": 15830 }, { "epoch": 0.12953346690109172, "grad_norm": 0.10883533954620361, "learning_rate": 6.476673345054586e-06, "loss": 0.0042, "step": 15840 }, { "epoch": 0.12961524307969088, "grad_norm": 0.14838257431983948, "learning_rate": 6.480762153984545e-06, "loss": 0.0043, "step": 15850 }, { "epoch": 0.12969701925829005, "grad_norm": 0.2606872618198395, "learning_rate": 6.484850962914504e-06, "loss": 0.0054, "step": 15860 }, { "epoch": 0.12977879543688922, "grad_norm": 0.161780446767807, "learning_rate": 6.488939771844462e-06, "loss": 0.0047, "step": 15870 }, { "epoch": 0.12986057161548842, "grad_norm": 0.35576218366622925, "learning_rate": 6.493028580774421e-06, "loss": 0.0055, "step": 15880 }, { "epoch": 0.1299423477940876, "grad_norm": 0.2814883589744568, "learning_rate": 6.497117389704379e-06, "loss": 0.0043, "step": 15890 }, { "epoch": 0.13002412397268676, "grad_norm": 0.19825562834739685, "learning_rate": 6.501206198634339e-06, "loss": 0.0043, "step": 15900 }, { "epoch": 0.13010590015128592, "grad_norm": 0.10150870680809021, "learning_rate": 6.505295007564297e-06, "loss": 0.0065, "step": 15910 }, { "epoch": 0.1301876763298851, "grad_norm": 0.04743969067931175, "learning_rate": 6.509383816494256e-06, "loss": 0.007, "step": 15920 }, { "epoch": 0.1302694525084843, "grad_norm": 0.21460624039173126, "learning_rate": 6.513472625424214e-06, "loss": 0.0041, "step": 15930 }, { "epoch": 0.13035122868708346, "grad_norm": 0.18271300196647644, "learning_rate": 6.517561434354173e-06, "loss": 0.0049, "step": 15940 }, { "epoch": 0.13043300486568263, "grad_norm": 0.08699099719524384, "learning_rate": 6.521650243284132e-06, "loss": 0.0031, "step": 15950 }, { "epoch": 0.1305147810442818, "grad_norm": 0.21614940464496613, "learning_rate": 6.525739052214091e-06, "loss": 0.0029, "step": 15960 }, { "epoch": 0.13059655722288097, "grad_norm": 0.1358349621295929, "learning_rate": 6.529827861144049e-06, "loss": 0.0046, "step": 15970 }, { "epoch": 0.13067833340148016, "grad_norm": 0.12951192259788513, "learning_rate": 6.533916670074008e-06, "loss": 0.0058, "step": 15980 }, { "epoch": 0.13076010958007933, "grad_norm": 0.2494615763425827, "learning_rate": 6.5380054790039675e-06, "loss": 0.0054, "step": 15990 }, { "epoch": 0.1308418857586785, "grad_norm": 0.1532038003206253, "learning_rate": 6.5420942879339255e-06, "loss": 0.0054, "step": 16000 }, { "epoch": 0.13092366193727767, "grad_norm": 0.08720885217189789, "learning_rate": 6.546183096863884e-06, "loss": 0.0054, "step": 16010 }, { "epoch": 0.13100543811587684, "grad_norm": 0.2677474915981293, "learning_rate": 6.5502719057938425e-06, "loss": 0.005, "step": 16020 }, { "epoch": 0.131087214294476, "grad_norm": 0.44395777583122253, "learning_rate": 6.554360714723802e-06, "loss": 0.0046, "step": 16030 }, { "epoch": 0.1311689904730752, "grad_norm": 0.12744516134262085, "learning_rate": 6.55844952365376e-06, "loss": 0.0049, "step": 16040 }, { "epoch": 0.13125076665167437, "grad_norm": 0.2575877010822296, "learning_rate": 6.562538332583719e-06, "loss": 0.0059, "step": 16050 }, { "epoch": 0.13133254283027354, "grad_norm": 0.12525327503681183, "learning_rate": 6.566627141513677e-06, "loss": 0.005, "step": 16060 }, { "epoch": 0.1314143190088727, "grad_norm": 0.13535749912261963, "learning_rate": 6.570715950443637e-06, "loss": 0.0065, "step": 16070 }, { "epoch": 0.13149609518747188, "grad_norm": 0.1117658019065857, "learning_rate": 6.574804759373595e-06, "loss": 0.0051, "step": 16080 }, { "epoch": 0.13157787136607108, "grad_norm": 0.14786580204963684, "learning_rate": 6.578893568303554e-06, "loss": 0.0039, "step": 16090 }, { "epoch": 0.13165964754467024, "grad_norm": 0.34689486026763916, "learning_rate": 6.582982377233512e-06, "loss": 0.0072, "step": 16100 }, { "epoch": 0.1317414237232694, "grad_norm": 0.2312483787536621, "learning_rate": 6.587071186163471e-06, "loss": 0.0044, "step": 16110 }, { "epoch": 0.13182319990186858, "grad_norm": 0.10664594918489456, "learning_rate": 6.59115999509343e-06, "loss": 0.0067, "step": 16120 }, { "epoch": 0.13190497608046775, "grad_norm": 0.41217240691185, "learning_rate": 6.595248804023389e-06, "loss": 0.006, "step": 16130 }, { "epoch": 0.13198675225906695, "grad_norm": 0.12591952085494995, "learning_rate": 6.599337612953347e-06, "loss": 0.0034, "step": 16140 }, { "epoch": 0.13206852843766612, "grad_norm": 0.16214632987976074, "learning_rate": 6.603426421883306e-06, "loss": 0.0056, "step": 16150 }, { "epoch": 0.13215030461626529, "grad_norm": 0.33843374252319336, "learning_rate": 6.607515230813265e-06, "loss": 0.0045, "step": 16160 }, { "epoch": 0.13223208079486445, "grad_norm": 0.1618744432926178, "learning_rate": 6.6116040397432234e-06, "loss": 0.009, "step": 16170 }, { "epoch": 0.13231385697346362, "grad_norm": 0.47615909576416016, "learning_rate": 6.615692848673182e-06, "loss": 0.0065, "step": 16180 }, { "epoch": 0.1323956331520628, "grad_norm": 0.40272027254104614, "learning_rate": 6.61978165760314e-06, "loss": 0.0074, "step": 16190 }, { "epoch": 0.132477409330662, "grad_norm": 0.21779373288154602, "learning_rate": 6.6238704665331e-06, "loss": 0.009, "step": 16200 }, { "epoch": 0.13255918550926116, "grad_norm": 0.10304751992225647, "learning_rate": 6.627959275463058e-06, "loss": 0.005, "step": 16210 }, { "epoch": 0.13264096168786033, "grad_norm": 0.3830726742744446, "learning_rate": 6.632048084393017e-06, "loss": 0.0047, "step": 16220 }, { "epoch": 0.1327227378664595, "grad_norm": 0.14720392227172852, "learning_rate": 6.636136893322975e-06, "loss": 0.0069, "step": 16230 }, { "epoch": 0.13280451404505866, "grad_norm": 0.2945970594882965, "learning_rate": 6.640225702252934e-06, "loss": 0.0047, "step": 16240 }, { "epoch": 0.13288629022365786, "grad_norm": 4.76801061630249, "learning_rate": 6.644314511182893e-06, "loss": 0.0147, "step": 16250 }, { "epoch": 0.13296806640225703, "grad_norm": 0.29694870114326477, "learning_rate": 6.648403320112852e-06, "loss": 0.0056, "step": 16260 }, { "epoch": 0.1330498425808562, "grad_norm": 0.5355541110038757, "learning_rate": 6.65249212904281e-06, "loss": 0.0061, "step": 16270 }, { "epoch": 0.13313161875945537, "grad_norm": 0.2633523643016815, "learning_rate": 6.656580937972769e-06, "loss": 0.0039, "step": 16280 }, { "epoch": 0.13321339493805454, "grad_norm": 0.2639087736606598, "learning_rate": 6.6606697469027285e-06, "loss": 0.004, "step": 16290 }, { "epoch": 0.13329517111665373, "grad_norm": 0.21949529647827148, "learning_rate": 6.6647585558326866e-06, "loss": 0.0071, "step": 16300 }, { "epoch": 0.1333769472952529, "grad_norm": 0.06641790270805359, "learning_rate": 6.6688473647626455e-06, "loss": 0.0087, "step": 16310 }, { "epoch": 0.13345872347385207, "grad_norm": 0.3006872534751892, "learning_rate": 6.6729361736926035e-06, "loss": 0.0083, "step": 16320 }, { "epoch": 0.13354049965245124, "grad_norm": 0.2140251100063324, "learning_rate": 6.677024982622563e-06, "loss": 0.0108, "step": 16330 }, { "epoch": 0.1336222758310504, "grad_norm": 0.31582367420196533, "learning_rate": 6.681113791552521e-06, "loss": 0.0061, "step": 16340 }, { "epoch": 0.13370405200964958, "grad_norm": 0.05467894300818443, "learning_rate": 6.68520260048248e-06, "loss": 0.0075, "step": 16350 }, { "epoch": 0.13378582818824877, "grad_norm": 0.11154737323522568, "learning_rate": 6.689291409412438e-06, "loss": 0.0043, "step": 16360 }, { "epoch": 0.13386760436684794, "grad_norm": 0.11223200708627701, "learning_rate": 6.693380218342398e-06, "loss": 0.0053, "step": 16370 }, { "epoch": 0.1339493805454471, "grad_norm": 0.44585326313972473, "learning_rate": 6.697469027272356e-06, "loss": 0.0043, "step": 16380 }, { "epoch": 0.13403115672404628, "grad_norm": 0.14033377170562744, "learning_rate": 6.701557836202315e-06, "loss": 0.003, "step": 16390 }, { "epoch": 0.13411293290264545, "grad_norm": 0.15140846371650696, "learning_rate": 6.705646645132273e-06, "loss": 0.0051, "step": 16400 }, { "epoch": 0.13419470908124465, "grad_norm": 0.23293618857860565, "learning_rate": 6.709735454062232e-06, "loss": 0.0046, "step": 16410 }, { "epoch": 0.13427648525984381, "grad_norm": 0.5422565340995789, "learning_rate": 6.713824262992192e-06, "loss": 0.0036, "step": 16420 }, { "epoch": 0.13435826143844298, "grad_norm": 0.4038158655166626, "learning_rate": 6.71791307192215e-06, "loss": 0.009, "step": 16430 }, { "epoch": 0.13444003761704215, "grad_norm": 0.26224812865257263, "learning_rate": 6.722001880852108e-06, "loss": 0.0056, "step": 16440 }, { "epoch": 0.13452181379564132, "grad_norm": 0.20782949030399323, "learning_rate": 6.726090689782067e-06, "loss": 0.0062, "step": 16450 }, { "epoch": 0.13460358997424052, "grad_norm": 0.07992202788591385, "learning_rate": 6.730179498712026e-06, "loss": 0.0037, "step": 16460 }, { "epoch": 0.1346853661528397, "grad_norm": 0.11419398337602615, "learning_rate": 6.7342683076419844e-06, "loss": 0.005, "step": 16470 }, { "epoch": 0.13476714233143886, "grad_norm": 0.25973233580589294, "learning_rate": 6.738357116571943e-06, "loss": 0.0028, "step": 16480 }, { "epoch": 0.13484891851003802, "grad_norm": 0.0935162901878357, "learning_rate": 6.742445925501901e-06, "loss": 0.0055, "step": 16490 }, { "epoch": 0.1349306946886372, "grad_norm": 0.22112372517585754, "learning_rate": 6.746534734431861e-06, "loss": 0.0037, "step": 16500 }, { "epoch": 0.13501247086723636, "grad_norm": 0.22395208477973938, "learning_rate": 6.750623543361819e-06, "loss": 0.0039, "step": 16510 }, { "epoch": 0.13509424704583556, "grad_norm": 0.24736681580543518, "learning_rate": 6.754712352291778e-06, "loss": 0.0045, "step": 16520 }, { "epoch": 0.13517602322443473, "grad_norm": 0.3202499449253082, "learning_rate": 6.758801161221736e-06, "loss": 0.0041, "step": 16530 }, { "epoch": 0.1352577994030339, "grad_norm": 0.28328660130500793, "learning_rate": 6.762889970151696e-06, "loss": 0.006, "step": 16540 }, { "epoch": 0.13533957558163306, "grad_norm": 0.3588293790817261, "learning_rate": 6.766978779081654e-06, "loss": 0.0063, "step": 16550 }, { "epoch": 0.13542135176023223, "grad_norm": 0.24739983677864075, "learning_rate": 6.771067588011613e-06, "loss": 0.0031, "step": 16560 }, { "epoch": 0.13550312793883143, "grad_norm": 0.16260047256946564, "learning_rate": 6.775156396941571e-06, "loss": 0.0055, "step": 16570 }, { "epoch": 0.1355849041174306, "grad_norm": 0.16482390463352203, "learning_rate": 6.77924520587153e-06, "loss": 0.0047, "step": 16580 }, { "epoch": 0.13566668029602977, "grad_norm": 0.3506985902786255, "learning_rate": 6.7833340148014895e-06, "loss": 0.0059, "step": 16590 }, { "epoch": 0.13574845647462894, "grad_norm": 0.08531332015991211, "learning_rate": 6.787422823731448e-06, "loss": 0.004, "step": 16600 }, { "epoch": 0.1358302326532281, "grad_norm": 0.2516983151435852, "learning_rate": 6.7915116326614065e-06, "loss": 0.0053, "step": 16610 }, { "epoch": 0.1359120088318273, "grad_norm": 0.13503120839595795, "learning_rate": 6.7956004415913645e-06, "loss": 0.006, "step": 16620 }, { "epoch": 0.13599378501042647, "grad_norm": 0.16604939103126526, "learning_rate": 6.799689250521324e-06, "loss": 0.0044, "step": 16630 }, { "epoch": 0.13607556118902564, "grad_norm": 0.2623099386692047, "learning_rate": 6.803778059451282e-06, "loss": 0.0059, "step": 16640 }, { "epoch": 0.1361573373676248, "grad_norm": 0.28748080134391785, "learning_rate": 6.807866868381241e-06, "loss": 0.0039, "step": 16650 }, { "epoch": 0.13623911354622398, "grad_norm": 0.2768363058567047, "learning_rate": 6.811955677311199e-06, "loss": 0.005, "step": 16660 }, { "epoch": 0.13632088972482315, "grad_norm": 0.1450035125017166, "learning_rate": 6.816044486241159e-06, "loss": 0.005, "step": 16670 }, { "epoch": 0.13640266590342234, "grad_norm": 0.07613387703895569, "learning_rate": 6.820133295171117e-06, "loss": 0.0042, "step": 16680 }, { "epoch": 0.1364844420820215, "grad_norm": 0.2228611707687378, "learning_rate": 6.824222104101076e-06, "loss": 0.0049, "step": 16690 }, { "epoch": 0.13656621826062068, "grad_norm": 0.19250497221946716, "learning_rate": 6.828310913031034e-06, "loss": 0.0073, "step": 16700 }, { "epoch": 0.13664799443921985, "grad_norm": 0.23903635144233704, "learning_rate": 6.832399721960993e-06, "loss": 0.006, "step": 16710 }, { "epoch": 0.13672977061781902, "grad_norm": 0.1358039230108261, "learning_rate": 6.836488530890953e-06, "loss": 0.005, "step": 16720 }, { "epoch": 0.13681154679641822, "grad_norm": 0.18211650848388672, "learning_rate": 6.840577339820911e-06, "loss": 0.0027, "step": 16730 }, { "epoch": 0.13689332297501738, "grad_norm": 0.18912388384342194, "learning_rate": 6.84466614875087e-06, "loss": 0.0056, "step": 16740 }, { "epoch": 0.13697509915361655, "grad_norm": 0.46347776055336, "learning_rate": 6.848754957680828e-06, "loss": 0.0054, "step": 16750 }, { "epoch": 0.13705687533221572, "grad_norm": 0.11613830924034119, "learning_rate": 6.852843766610787e-06, "loss": 0.0083, "step": 16760 }, { "epoch": 0.1371386515108149, "grad_norm": 0.12239941209554672, "learning_rate": 6.8569325755407455e-06, "loss": 0.0041, "step": 16770 }, { "epoch": 0.1372204276894141, "grad_norm": 0.19373060762882233, "learning_rate": 6.861021384470704e-06, "loss": 0.0042, "step": 16780 }, { "epoch": 0.13730220386801326, "grad_norm": 0.3013044595718384, "learning_rate": 6.865110193400662e-06, "loss": 0.0049, "step": 16790 }, { "epoch": 0.13738398004661243, "grad_norm": 0.2266158163547516, "learning_rate": 6.869199002330622e-06, "loss": 0.005, "step": 16800 }, { "epoch": 0.1374657562252116, "grad_norm": 0.1375972181558609, "learning_rate": 6.87328781126058e-06, "loss": 0.0075, "step": 16810 }, { "epoch": 0.13754753240381076, "grad_norm": 0.15924814343452454, "learning_rate": 6.877376620190539e-06, "loss": 0.0079, "step": 16820 }, { "epoch": 0.13762930858240993, "grad_norm": 0.20491310954093933, "learning_rate": 6.881465429120497e-06, "loss": 0.0055, "step": 16830 }, { "epoch": 0.13771108476100913, "grad_norm": 0.18914519250392914, "learning_rate": 6.885554238050457e-06, "loss": 0.0049, "step": 16840 }, { "epoch": 0.1377928609396083, "grad_norm": 0.4146055281162262, "learning_rate": 6.889643046980416e-06, "loss": 0.0056, "step": 16850 }, { "epoch": 0.13787463711820747, "grad_norm": 0.2564540505409241, "learning_rate": 6.893731855910374e-06, "loss": 0.0053, "step": 16860 }, { "epoch": 0.13795641329680663, "grad_norm": 0.2086220681667328, "learning_rate": 6.897820664840332e-06, "loss": 0.0057, "step": 16870 }, { "epoch": 0.1380381894754058, "grad_norm": 0.36354824900627136, "learning_rate": 6.901909473770291e-06, "loss": 0.004, "step": 16880 }, { "epoch": 0.138119965654005, "grad_norm": 0.1398053616285324, "learning_rate": 6.9059982827002505e-06, "loss": 0.0054, "step": 16890 }, { "epoch": 0.13820174183260417, "grad_norm": 0.12587635219097137, "learning_rate": 6.910087091630209e-06, "loss": 0.005, "step": 16900 }, { "epoch": 0.13828351801120334, "grad_norm": 0.1921018660068512, "learning_rate": 6.9141759005601675e-06, "loss": 0.0092, "step": 16910 }, { "epoch": 0.1383652941898025, "grad_norm": 0.1534297913312912, "learning_rate": 6.9182647094901256e-06, "loss": 0.0035, "step": 16920 }, { "epoch": 0.13844707036840168, "grad_norm": 0.08767301589250565, "learning_rate": 6.922353518420085e-06, "loss": 0.0056, "step": 16930 }, { "epoch": 0.13852884654700087, "grad_norm": 0.2564643323421478, "learning_rate": 6.926442327350043e-06, "loss": 0.0063, "step": 16940 }, { "epoch": 0.13861062272560004, "grad_norm": 0.25005051493644714, "learning_rate": 6.930531136280002e-06, "loss": 0.0051, "step": 16950 }, { "epoch": 0.1386923989041992, "grad_norm": 0.08329224586486816, "learning_rate": 6.93461994520996e-06, "loss": 0.004, "step": 16960 }, { "epoch": 0.13877417508279838, "grad_norm": 0.08508851379156113, "learning_rate": 6.93870875413992e-06, "loss": 0.0085, "step": 16970 }, { "epoch": 0.13885595126139755, "grad_norm": 0.08979285508394241, "learning_rate": 6.942797563069878e-06, "loss": 0.0046, "step": 16980 }, { "epoch": 0.13893772743999672, "grad_norm": 0.17110639810562134, "learning_rate": 6.946886371999837e-06, "loss": 0.0084, "step": 16990 }, { "epoch": 0.1390195036185959, "grad_norm": 0.15098294615745544, "learning_rate": 6.950975180929795e-06, "loss": 0.0034, "step": 17000 }, { "epoch": 0.13910127979719508, "grad_norm": 0.3076593279838562, "learning_rate": 6.955063989859755e-06, "loss": 0.0038, "step": 17010 }, { "epoch": 0.13918305597579425, "grad_norm": 0.4413636326789856, "learning_rate": 6.959152798789714e-06, "loss": 0.0061, "step": 17020 }, { "epoch": 0.13926483215439342, "grad_norm": 0.11166162043809891, "learning_rate": 6.963241607719672e-06, "loss": 0.0064, "step": 17030 }, { "epoch": 0.1393466083329926, "grad_norm": 0.21720023453235626, "learning_rate": 6.967330416649631e-06, "loss": 0.0069, "step": 17040 }, { "epoch": 0.13942838451159179, "grad_norm": 0.19197741150856018, "learning_rate": 6.971419225579589e-06, "loss": 0.0063, "step": 17050 }, { "epoch": 0.13951016069019095, "grad_norm": 0.11585713177919388, "learning_rate": 6.9755080345095484e-06, "loss": 0.0067, "step": 17060 }, { "epoch": 0.13959193686879012, "grad_norm": 0.25670740008354187, "learning_rate": 6.9795968434395065e-06, "loss": 0.0042, "step": 17070 }, { "epoch": 0.1396737130473893, "grad_norm": 0.2491241693496704, "learning_rate": 6.983685652369465e-06, "loss": 0.0042, "step": 17080 }, { "epoch": 0.13975548922598846, "grad_norm": 0.5205207467079163, "learning_rate": 6.9877744612994234e-06, "loss": 0.0037, "step": 17090 }, { "epoch": 0.13983726540458766, "grad_norm": 0.5764176249504089, "learning_rate": 6.991863270229383e-06, "loss": 0.0059, "step": 17100 }, { "epoch": 0.13991904158318683, "grad_norm": 0.17587168514728546, "learning_rate": 6.995952079159341e-06, "loss": 0.0057, "step": 17110 }, { "epoch": 0.140000817761786, "grad_norm": 0.24766670167446136, "learning_rate": 7.0000408880893e-06, "loss": 0.0043, "step": 17120 }, { "epoch": 0.14008259394038516, "grad_norm": 0.2236817330121994, "learning_rate": 7.004129697019258e-06, "loss": 0.0055, "step": 17130 }, { "epoch": 0.14016437011898433, "grad_norm": 0.10514704138040543, "learning_rate": 7.008218505949218e-06, "loss": 0.0045, "step": 17140 }, { "epoch": 0.1402461462975835, "grad_norm": 0.49315527081489563, "learning_rate": 7.012307314879177e-06, "loss": 0.0036, "step": 17150 }, { "epoch": 0.1403279224761827, "grad_norm": 0.2788519859313965, "learning_rate": 7.016396123809135e-06, "loss": 0.0041, "step": 17160 }, { "epoch": 0.14040969865478187, "grad_norm": 0.15546683967113495, "learning_rate": 7.020484932739094e-06, "loss": 0.0078, "step": 17170 }, { "epoch": 0.14049147483338104, "grad_norm": 0.15478883683681488, "learning_rate": 7.024573741669053e-06, "loss": 0.0058, "step": 17180 }, { "epoch": 0.1405732510119802, "grad_norm": 0.15966930985450745, "learning_rate": 7.0286625505990116e-06, "loss": 0.0042, "step": 17190 }, { "epoch": 0.14065502719057937, "grad_norm": 0.4088832437992096, "learning_rate": 7.03275135952897e-06, "loss": 0.0042, "step": 17200 }, { "epoch": 0.14073680336917857, "grad_norm": 0.05170368030667305, "learning_rate": 7.0368401684589285e-06, "loss": 0.0045, "step": 17210 }, { "epoch": 0.14081857954777774, "grad_norm": 0.11920278519392014, "learning_rate": 7.0409289773888866e-06, "loss": 0.0049, "step": 17220 }, { "epoch": 0.1409003557263769, "grad_norm": 0.21896450221538544, "learning_rate": 7.045017786318846e-06, "loss": 0.0055, "step": 17230 }, { "epoch": 0.14098213190497608, "grad_norm": 0.309845894575119, "learning_rate": 7.049106595248804e-06, "loss": 0.0087, "step": 17240 }, { "epoch": 0.14106390808357525, "grad_norm": 0.17096151411533356, "learning_rate": 7.053195404178763e-06, "loss": 0.0031, "step": 17250 }, { "epoch": 0.14114568426217444, "grad_norm": 0.15838053822517395, "learning_rate": 7.057284213108721e-06, "loss": 0.0048, "step": 17260 }, { "epoch": 0.1412274604407736, "grad_norm": 0.10957887768745422, "learning_rate": 7.061373022038681e-06, "loss": 0.0045, "step": 17270 }, { "epoch": 0.14130923661937278, "grad_norm": 0.2426484227180481, "learning_rate": 7.06546183096864e-06, "loss": 0.0064, "step": 17280 }, { "epoch": 0.14139101279797195, "grad_norm": 0.19185811281204224, "learning_rate": 7.069550639898598e-06, "loss": 0.0052, "step": 17290 }, { "epoch": 0.14147278897657112, "grad_norm": 0.06960674375295639, "learning_rate": 7.073639448828556e-06, "loss": 0.0053, "step": 17300 }, { "epoch": 0.1415545651551703, "grad_norm": 0.19528236985206604, "learning_rate": 7.077728257758516e-06, "loss": 0.0028, "step": 17310 }, { "epoch": 0.14163634133376948, "grad_norm": 0.2255115509033203, "learning_rate": 7.081817066688475e-06, "loss": 0.0044, "step": 17320 }, { "epoch": 0.14171811751236865, "grad_norm": 0.05182491987943649, "learning_rate": 7.085905875618433e-06, "loss": 0.0051, "step": 17330 }, { "epoch": 0.14179989369096782, "grad_norm": 0.1006755530834198, "learning_rate": 7.089994684548392e-06, "loss": 0.0043, "step": 17340 }, { "epoch": 0.141881669869567, "grad_norm": 0.21791043877601624, "learning_rate": 7.09408349347835e-06, "loss": 0.0032, "step": 17350 }, { "epoch": 0.14196344604816616, "grad_norm": 0.24088440835475922, "learning_rate": 7.0981723024083095e-06, "loss": 0.0055, "step": 17360 }, { "epoch": 0.14204522222676536, "grad_norm": 0.33479222655296326, "learning_rate": 7.1022611113382675e-06, "loss": 0.0035, "step": 17370 }, { "epoch": 0.14212699840536452, "grad_norm": 0.09403961896896362, "learning_rate": 7.106349920268226e-06, "loss": 0.0051, "step": 17380 }, { "epoch": 0.1422087745839637, "grad_norm": 0.15629999339580536, "learning_rate": 7.1104387291981845e-06, "loss": 0.0046, "step": 17390 }, { "epoch": 0.14229055076256286, "grad_norm": 0.3668185770511627, "learning_rate": 7.114527538128144e-06, "loss": 0.0058, "step": 17400 }, { "epoch": 0.14237232694116203, "grad_norm": 0.30165910720825195, "learning_rate": 7.118616347058102e-06, "loss": 0.0041, "step": 17410 }, { "epoch": 0.14245410311976123, "grad_norm": 0.3343330919742584, "learning_rate": 7.122705155988061e-06, "loss": 0.0066, "step": 17420 }, { "epoch": 0.1425358792983604, "grad_norm": 0.1418878734111786, "learning_rate": 7.126793964918019e-06, "loss": 0.0042, "step": 17430 }, { "epoch": 0.14261765547695957, "grad_norm": 0.1526254266500473, "learning_rate": 7.130882773847979e-06, "loss": 0.0048, "step": 17440 }, { "epoch": 0.14269943165555873, "grad_norm": 0.2693462669849396, "learning_rate": 7.134971582777938e-06, "loss": 0.004, "step": 17450 }, { "epoch": 0.1427812078341579, "grad_norm": 0.18928824365139008, "learning_rate": 7.139060391707896e-06, "loss": 0.0041, "step": 17460 }, { "epoch": 0.14286298401275707, "grad_norm": 0.20082233846187592, "learning_rate": 7.143149200637855e-06, "loss": 0.0051, "step": 17470 }, { "epoch": 0.14294476019135627, "grad_norm": 0.2379811555147171, "learning_rate": 7.147238009567814e-06, "loss": 0.0049, "step": 17480 }, { "epoch": 0.14302653636995544, "grad_norm": 0.21056073904037476, "learning_rate": 7.151326818497773e-06, "loss": 0.0045, "step": 17490 }, { "epoch": 0.1431083125485546, "grad_norm": 0.12878338992595673, "learning_rate": 7.155415627427731e-06, "loss": 0.004, "step": 17500 }, { "epoch": 0.14319008872715377, "grad_norm": 0.20865897834300995, "learning_rate": 7.1595044363576895e-06, "loss": 0.0043, "step": 17510 }, { "epoch": 0.14327186490575294, "grad_norm": 0.1703399121761322, "learning_rate": 7.163593245287648e-06, "loss": 0.0055, "step": 17520 }, { "epoch": 0.14335364108435214, "grad_norm": 0.244110107421875, "learning_rate": 7.167682054217607e-06, "loss": 0.0035, "step": 17530 }, { "epoch": 0.1434354172629513, "grad_norm": 0.22447997331619263, "learning_rate": 7.171770863147565e-06, "loss": 0.0126, "step": 17540 }, { "epoch": 0.14351719344155048, "grad_norm": 0.262541264295578, "learning_rate": 7.175859672077524e-06, "loss": 0.0038, "step": 17550 }, { "epoch": 0.14359896962014965, "grad_norm": 0.17216609418392181, "learning_rate": 7.179948481007482e-06, "loss": 0.0062, "step": 17560 }, { "epoch": 0.14368074579874882, "grad_norm": 0.07818068563938141, "learning_rate": 7.184037289937442e-06, "loss": 0.0068, "step": 17570 }, { "epoch": 0.143762521977348, "grad_norm": 0.24035638570785522, "learning_rate": 7.188126098867401e-06, "loss": 0.006, "step": 17580 }, { "epoch": 0.14384429815594718, "grad_norm": 0.3681538999080658, "learning_rate": 7.192214907797359e-06, "loss": 0.0096, "step": 17590 }, { "epoch": 0.14392607433454635, "grad_norm": 0.1348104178905487, "learning_rate": 7.196303716727318e-06, "loss": 0.0028, "step": 17600 }, { "epoch": 0.14400785051314552, "grad_norm": 0.30681830644607544, "learning_rate": 7.200392525657277e-06, "loss": 0.004, "step": 17610 }, { "epoch": 0.1440896266917447, "grad_norm": 0.30624184012413025, "learning_rate": 7.204481334587236e-06, "loss": 0.0084, "step": 17620 }, { "epoch": 0.14417140287034386, "grad_norm": 0.15084171295166016, "learning_rate": 7.208570143517194e-06, "loss": 0.0047, "step": 17630 }, { "epoch": 0.14425317904894305, "grad_norm": 0.244252011179924, "learning_rate": 7.212658952447153e-06, "loss": 0.0052, "step": 17640 }, { "epoch": 0.14433495522754222, "grad_norm": 0.185062438249588, "learning_rate": 7.2167477613771116e-06, "loss": 0.0058, "step": 17650 }, { "epoch": 0.1444167314061414, "grad_norm": 0.46097826957702637, "learning_rate": 7.2208365703070705e-06, "loss": 0.0079, "step": 17660 }, { "epoch": 0.14449850758474056, "grad_norm": 0.23717854917049408, "learning_rate": 7.2249253792370285e-06, "loss": 0.0056, "step": 17670 }, { "epoch": 0.14458028376333973, "grad_norm": 0.12690098583698273, "learning_rate": 7.229014188166987e-06, "loss": 0.0031, "step": 17680 }, { "epoch": 0.14466205994193893, "grad_norm": 0.16000117361545563, "learning_rate": 7.2331029970969455e-06, "loss": 0.0041, "step": 17690 }, { "epoch": 0.1447438361205381, "grad_norm": 0.05219687148928642, "learning_rate": 7.237191806026905e-06, "loss": 0.0043, "step": 17700 }, { "epoch": 0.14482561229913726, "grad_norm": 0.07124803960323334, "learning_rate": 7.241280614956864e-06, "loss": 0.0052, "step": 17710 }, { "epoch": 0.14490738847773643, "grad_norm": 0.2815999984741211, "learning_rate": 7.245369423886822e-06, "loss": 0.0074, "step": 17720 }, { "epoch": 0.1449891646563356, "grad_norm": 0.330587774515152, "learning_rate": 7.24945823281678e-06, "loss": 0.0045, "step": 17730 }, { "epoch": 0.1450709408349348, "grad_norm": 0.22300690412521362, "learning_rate": 7.25354704174674e-06, "loss": 0.0054, "step": 17740 }, { "epoch": 0.14515271701353397, "grad_norm": 0.3731251060962677, "learning_rate": 7.257635850676699e-06, "loss": 0.0056, "step": 17750 }, { "epoch": 0.14523449319213314, "grad_norm": 0.2005418986082077, "learning_rate": 7.261724659606657e-06, "loss": 0.004, "step": 17760 }, { "epoch": 0.1453162693707323, "grad_norm": 0.24295595288276672, "learning_rate": 7.265813468536616e-06, "loss": 0.0042, "step": 17770 }, { "epoch": 0.14539804554933147, "grad_norm": 0.18027350306510925, "learning_rate": 7.269902277466575e-06, "loss": 0.0061, "step": 17780 }, { "epoch": 0.14547982172793064, "grad_norm": 0.07666940242052078, "learning_rate": 7.273991086396534e-06, "loss": 0.004, "step": 17790 }, { "epoch": 0.14556159790652984, "grad_norm": 0.23270602524280548, "learning_rate": 7.278079895326492e-06, "loss": 0.0047, "step": 17800 }, { "epoch": 0.145643374085129, "grad_norm": 0.2227979153394699, "learning_rate": 7.2821687042564506e-06, "loss": 0.0041, "step": 17810 }, { "epoch": 0.14572515026372818, "grad_norm": 0.26671937108039856, "learning_rate": 7.28625751318641e-06, "loss": 0.0055, "step": 17820 }, { "epoch": 0.14580692644232734, "grad_norm": 0.1407056450843811, "learning_rate": 7.290346322116368e-06, "loss": 0.0087, "step": 17830 }, { "epoch": 0.1458887026209265, "grad_norm": 0.022191811352968216, "learning_rate": 7.294435131046327e-06, "loss": 0.0072, "step": 17840 }, { "epoch": 0.1459704787995257, "grad_norm": 0.23254181444644928, "learning_rate": 7.298523939976285e-06, "loss": 0.0049, "step": 17850 }, { "epoch": 0.14605225497812488, "grad_norm": 0.10148415714502335, "learning_rate": 7.302612748906243e-06, "loss": 0.0064, "step": 17860 }, { "epoch": 0.14613403115672405, "grad_norm": 0.24949553608894348, "learning_rate": 7.306701557836203e-06, "loss": 0.0081, "step": 17870 }, { "epoch": 0.14621580733532322, "grad_norm": 0.19731003046035767, "learning_rate": 7.310790366766162e-06, "loss": 0.0047, "step": 17880 }, { "epoch": 0.14629758351392239, "grad_norm": 0.549883246421814, "learning_rate": 7.31487917569612e-06, "loss": 0.0045, "step": 17890 }, { "epoch": 0.14637935969252158, "grad_norm": 0.28217995166778564, "learning_rate": 7.318967984626079e-06, "loss": 0.0043, "step": 17900 }, { "epoch": 0.14646113587112075, "grad_norm": 0.13706783950328827, "learning_rate": 7.323056793556038e-06, "loss": 0.0051, "step": 17910 }, { "epoch": 0.14654291204971992, "grad_norm": 0.08988809585571289, "learning_rate": 7.327145602485997e-06, "loss": 0.0039, "step": 17920 }, { "epoch": 0.1466246882283191, "grad_norm": 0.16210144758224487, "learning_rate": 7.331234411415955e-06, "loss": 0.0041, "step": 17930 }, { "epoch": 0.14670646440691826, "grad_norm": 0.21671056747436523, "learning_rate": 7.335323220345914e-06, "loss": 0.0029, "step": 17940 }, { "epoch": 0.14678824058551743, "grad_norm": 0.14517784118652344, "learning_rate": 7.3394120292758734e-06, "loss": 0.0047, "step": 17950 }, { "epoch": 0.14687001676411662, "grad_norm": 0.2837407886981964, "learning_rate": 7.3435008382058315e-06, "loss": 0.0039, "step": 17960 }, { "epoch": 0.1469517929427158, "grad_norm": 0.0645352378487587, "learning_rate": 7.3475896471357895e-06, "loss": 0.0044, "step": 17970 }, { "epoch": 0.14703356912131496, "grad_norm": 0.07557259500026703, "learning_rate": 7.3516784560657484e-06, "loss": 0.0031, "step": 17980 }, { "epoch": 0.14711534529991413, "grad_norm": 0.48765960335731506, "learning_rate": 7.3557672649957065e-06, "loss": 0.0049, "step": 17990 }, { "epoch": 0.1471971214785133, "grad_norm": 0.24282364547252655, "learning_rate": 7.359856073925666e-06, "loss": 0.0044, "step": 18000 }, { "epoch": 0.1472788976571125, "grad_norm": 0.26618480682373047, "learning_rate": 7.363944882855625e-06, "loss": 0.0042, "step": 18010 }, { "epoch": 0.14736067383571166, "grad_norm": 0.3320904076099396, "learning_rate": 7.368033691785583e-06, "loss": 0.0054, "step": 18020 }, { "epoch": 0.14744245001431083, "grad_norm": 0.13718952238559723, "learning_rate": 7.372122500715542e-06, "loss": 0.007, "step": 18030 }, { "epoch": 0.14752422619291, "grad_norm": 0.2991725504398346, "learning_rate": 7.376211309645501e-06, "loss": 0.0035, "step": 18040 }, { "epoch": 0.14760600237150917, "grad_norm": 0.1141764298081398, "learning_rate": 7.38030011857546e-06, "loss": 0.0034, "step": 18050 }, { "epoch": 0.14768777855010837, "grad_norm": 0.3426600992679596, "learning_rate": 7.384388927505418e-06, "loss": 0.0034, "step": 18060 }, { "epoch": 0.14776955472870754, "grad_norm": 0.17524640262126923, "learning_rate": 7.388477736435377e-06, "loss": 0.0039, "step": 18070 }, { "epoch": 0.1478513309073067, "grad_norm": 0.16296043992042542, "learning_rate": 7.392566545365336e-06, "loss": 0.0041, "step": 18080 }, { "epoch": 0.14793310708590587, "grad_norm": 0.19769176840782166, "learning_rate": 7.396655354295295e-06, "loss": 0.0059, "step": 18090 }, { "epoch": 0.14801488326450504, "grad_norm": 0.09233380854129791, "learning_rate": 7.400744163225253e-06, "loss": 0.0034, "step": 18100 }, { "epoch": 0.1480966594431042, "grad_norm": 0.09010908007621765, "learning_rate": 7.4048329721552116e-06, "loss": 0.0043, "step": 18110 }, { "epoch": 0.1481784356217034, "grad_norm": 0.3089582324028015, "learning_rate": 7.408921781085171e-06, "loss": 0.0046, "step": 18120 }, { "epoch": 0.14826021180030258, "grad_norm": 0.13700313866138458, "learning_rate": 7.413010590015129e-06, "loss": 0.0096, "step": 18130 }, { "epoch": 0.14834198797890175, "grad_norm": 0.07530989497900009, "learning_rate": 7.417099398945088e-06, "loss": 0.0045, "step": 18140 }, { "epoch": 0.14842376415750091, "grad_norm": 0.08702608197927475, "learning_rate": 7.421188207875046e-06, "loss": 0.0056, "step": 18150 }, { "epoch": 0.14850554033610008, "grad_norm": 0.23847627639770508, "learning_rate": 7.425277016805004e-06, "loss": 0.0045, "step": 18160 }, { "epoch": 0.14858731651469928, "grad_norm": 0.1651868373155594, "learning_rate": 7.429365825734964e-06, "loss": 0.0068, "step": 18170 }, { "epoch": 0.14866909269329845, "grad_norm": 0.23924098908901215, "learning_rate": 7.433454634664923e-06, "loss": 0.0039, "step": 18180 }, { "epoch": 0.14875086887189762, "grad_norm": 0.32965219020843506, "learning_rate": 7.437543443594881e-06, "loss": 0.0063, "step": 18190 }, { "epoch": 0.1488326450504968, "grad_norm": 0.38193488121032715, "learning_rate": 7.44163225252484e-06, "loss": 0.0054, "step": 18200 }, { "epoch": 0.14891442122909596, "grad_norm": 0.08525341004133224, "learning_rate": 7.445721061454799e-06, "loss": 0.0038, "step": 18210 }, { "epoch": 0.14899619740769512, "grad_norm": 0.3822646737098694, "learning_rate": 7.449809870384758e-06, "loss": 0.0037, "step": 18220 }, { "epoch": 0.14907797358629432, "grad_norm": 0.4190632104873657, "learning_rate": 7.453898679314716e-06, "loss": 0.0037, "step": 18230 }, { "epoch": 0.1491597497648935, "grad_norm": 0.3419688940048218, "learning_rate": 7.457987488244675e-06, "loss": 0.0046, "step": 18240 }, { "epoch": 0.14924152594349266, "grad_norm": 0.5608283281326294, "learning_rate": 7.4620762971746345e-06, "loss": 0.0051, "step": 18250 }, { "epoch": 0.14932330212209183, "grad_norm": 0.17178748548030853, "learning_rate": 7.4661651061045925e-06, "loss": 0.0073, "step": 18260 }, { "epoch": 0.149405078300691, "grad_norm": 0.2637186348438263, "learning_rate": 7.470253915034551e-06, "loss": 0.0037, "step": 18270 }, { "epoch": 0.1494868544792902, "grad_norm": 0.14936865866184235, "learning_rate": 7.4743427239645095e-06, "loss": 0.0069, "step": 18280 }, { "epoch": 0.14956863065788936, "grad_norm": 0.18232928216457367, "learning_rate": 7.478431532894469e-06, "loss": 0.0067, "step": 18290 }, { "epoch": 0.14965040683648853, "grad_norm": 0.289858877658844, "learning_rate": 7.482520341824427e-06, "loss": 0.0061, "step": 18300 }, { "epoch": 0.1497321830150877, "grad_norm": 0.03600858896970749, "learning_rate": 7.486609150754386e-06, "loss": 0.0063, "step": 18310 }, { "epoch": 0.14981395919368687, "grad_norm": 0.22516454756259918, "learning_rate": 7.490697959684344e-06, "loss": 0.0061, "step": 18320 }, { "epoch": 0.14989573537228607, "grad_norm": 0.07281911373138428, "learning_rate": 7.494786768614303e-06, "loss": 0.0073, "step": 18330 }, { "epoch": 0.14997751155088523, "grad_norm": 0.2959889769554138, "learning_rate": 7.498875577544262e-06, "loss": 0.0046, "step": 18340 }, { "epoch": 0.1500592877294844, "grad_norm": 0.2058725208044052, "learning_rate": 7.502964386474221e-06, "loss": 0.0053, "step": 18350 }, { "epoch": 0.15014106390808357, "grad_norm": 0.14915204048156738, "learning_rate": 7.507053195404179e-06, "loss": 0.0055, "step": 18360 }, { "epoch": 0.15022284008668274, "grad_norm": 0.05218876525759697, "learning_rate": 7.511142004334138e-06, "loss": 0.0047, "step": 18370 }, { "epoch": 0.1503046162652819, "grad_norm": 0.24945537745952606, "learning_rate": 7.515230813264098e-06, "loss": 0.0042, "step": 18380 }, { "epoch": 0.1503863924438811, "grad_norm": 0.13852132856845856, "learning_rate": 7.519319622194056e-06, "loss": 0.0031, "step": 18390 }, { "epoch": 0.15046816862248028, "grad_norm": 0.20791274309158325, "learning_rate": 7.523408431124014e-06, "loss": 0.0072, "step": 18400 }, { "epoch": 0.15054994480107944, "grad_norm": 0.16919417679309845, "learning_rate": 7.527497240053973e-06, "loss": 0.005, "step": 18410 }, { "epoch": 0.1506317209796786, "grad_norm": 0.23250237107276917, "learning_rate": 7.531586048983932e-06, "loss": 0.0049, "step": 18420 }, { "epoch": 0.15071349715827778, "grad_norm": 0.21689057350158691, "learning_rate": 7.53567485791389e-06, "loss": 0.0121, "step": 18430 }, { "epoch": 0.15079527333687698, "grad_norm": 0.1963329315185547, "learning_rate": 7.539763666843849e-06, "loss": 0.0037, "step": 18440 }, { "epoch": 0.15087704951547615, "grad_norm": 0.17678587138652802, "learning_rate": 7.543852475773807e-06, "loss": 0.0035, "step": 18450 }, { "epoch": 0.15095882569407532, "grad_norm": 0.10572315007448196, "learning_rate": 7.547941284703766e-06, "loss": 0.004, "step": 18460 }, { "epoch": 0.15104060187267448, "grad_norm": 0.21387390792369843, "learning_rate": 7.552030093633725e-06, "loss": 0.0034, "step": 18470 }, { "epoch": 0.15112237805127365, "grad_norm": 0.18181411921977997, "learning_rate": 7.556118902563684e-06, "loss": 0.0084, "step": 18480 }, { "epoch": 0.15120415422987285, "grad_norm": 0.10167049616575241, "learning_rate": 7.560207711493642e-06, "loss": 0.0042, "step": 18490 }, { "epoch": 0.15128593040847202, "grad_norm": 0.17485344409942627, "learning_rate": 7.564296520423601e-06, "loss": 0.0043, "step": 18500 }, { "epoch": 0.1513677065870712, "grad_norm": 0.3245217800140381, "learning_rate": 7.56838532935356e-06, "loss": 0.0039, "step": 18510 }, { "epoch": 0.15144948276567036, "grad_norm": 0.31153035163879395, "learning_rate": 7.572474138283519e-06, "loss": 0.0037, "step": 18520 }, { "epoch": 0.15153125894426953, "grad_norm": 0.17120343446731567, "learning_rate": 7.576562947213477e-06, "loss": 0.0033, "step": 18530 }, { "epoch": 0.1516130351228687, "grad_norm": 0.1863209307193756, "learning_rate": 7.580651756143436e-06, "loss": 0.0079, "step": 18540 }, { "epoch": 0.1516948113014679, "grad_norm": 0.06443002074956894, "learning_rate": 7.5847405650733955e-06, "loss": 0.0052, "step": 18550 }, { "epoch": 0.15177658748006706, "grad_norm": 0.12459611892700195, "learning_rate": 7.5888293740033535e-06, "loss": 0.0083, "step": 18560 }, { "epoch": 0.15185836365866623, "grad_norm": 0.11506864428520203, "learning_rate": 7.5929181829333124e-06, "loss": 0.0047, "step": 18570 }, { "epoch": 0.1519401398372654, "grad_norm": 0.12544798851013184, "learning_rate": 7.5970069918632705e-06, "loss": 0.003, "step": 18580 }, { "epoch": 0.15202191601586457, "grad_norm": 0.4441908299922943, "learning_rate": 7.60109580079323e-06, "loss": 0.0063, "step": 18590 }, { "epoch": 0.15210369219446376, "grad_norm": 0.10419362038373947, "learning_rate": 7.605184609723188e-06, "loss": 0.0036, "step": 18600 }, { "epoch": 0.15218546837306293, "grad_norm": 0.08365693688392639, "learning_rate": 7.609273418653147e-06, "loss": 0.0054, "step": 18610 }, { "epoch": 0.1522672445516621, "grad_norm": 0.05138444900512695, "learning_rate": 7.613362227583105e-06, "loss": 0.0031, "step": 18620 }, { "epoch": 0.15234902073026127, "grad_norm": 0.09607900679111481, "learning_rate": 7.617451036513064e-06, "loss": 0.0036, "step": 18630 }, { "epoch": 0.15243079690886044, "grad_norm": 0.165877565741539, "learning_rate": 7.621539845443023e-06, "loss": 0.008, "step": 18640 }, { "epoch": 0.15251257308745964, "grad_norm": 0.18875952064990997, "learning_rate": 7.625628654372982e-06, "loss": 0.0033, "step": 18650 }, { "epoch": 0.1525943492660588, "grad_norm": 0.09977805614471436, "learning_rate": 7.62971746330294e-06, "loss": 0.0038, "step": 18660 }, { "epoch": 0.15267612544465797, "grad_norm": 0.25247135758399963, "learning_rate": 7.633806272232899e-06, "loss": 0.0052, "step": 18670 }, { "epoch": 0.15275790162325714, "grad_norm": 0.3449723422527313, "learning_rate": 7.637895081162858e-06, "loss": 0.0052, "step": 18680 }, { "epoch": 0.1528396778018563, "grad_norm": 0.22108091413974762, "learning_rate": 7.641983890092817e-06, "loss": 0.0055, "step": 18690 }, { "epoch": 0.15292145398045548, "grad_norm": 0.31994593143463135, "learning_rate": 7.646072699022776e-06, "loss": 0.0025, "step": 18700 }, { "epoch": 0.15300323015905468, "grad_norm": 0.11283480376005173, "learning_rate": 7.650161507952733e-06, "loss": 0.0054, "step": 18710 }, { "epoch": 0.15308500633765385, "grad_norm": 0.09545400738716125, "learning_rate": 7.654250316882693e-06, "loss": 0.0053, "step": 18720 }, { "epoch": 0.15316678251625301, "grad_norm": 0.11303277313709259, "learning_rate": 7.658339125812652e-06, "loss": 0.004, "step": 18730 }, { "epoch": 0.15324855869485218, "grad_norm": 0.054876647889614105, "learning_rate": 7.66242793474261e-06, "loss": 0.0054, "step": 18740 }, { "epoch": 0.15333033487345135, "grad_norm": 0.16304846107959747, "learning_rate": 7.666516743672568e-06, "loss": 0.0056, "step": 18750 }, { "epoch": 0.15341211105205055, "grad_norm": 0.0783364400267601, "learning_rate": 7.670605552602527e-06, "loss": 0.0055, "step": 18760 }, { "epoch": 0.15349388723064972, "grad_norm": 0.29017558693885803, "learning_rate": 7.674694361532486e-06, "loss": 0.0033, "step": 18770 }, { "epoch": 0.15357566340924889, "grad_norm": 0.21517062187194824, "learning_rate": 7.678783170462445e-06, "loss": 0.0039, "step": 18780 }, { "epoch": 0.15365743958784805, "grad_norm": 0.35377487540245056, "learning_rate": 7.682871979392404e-06, "loss": 0.0052, "step": 18790 }, { "epoch": 0.15373921576644722, "grad_norm": 0.15338091552257538, "learning_rate": 7.686960788322361e-06, "loss": 0.0056, "step": 18800 }, { "epoch": 0.15382099194504642, "grad_norm": 0.34776678681373596, "learning_rate": 7.691049597252322e-06, "loss": 0.0061, "step": 18810 }, { "epoch": 0.1539027681236456, "grad_norm": 0.047768302261829376, "learning_rate": 7.69513840618228e-06, "loss": 0.0054, "step": 18820 }, { "epoch": 0.15398454430224476, "grad_norm": 0.35610532760620117, "learning_rate": 7.699227215112238e-06, "loss": 0.0055, "step": 18830 }, { "epoch": 0.15406632048084393, "grad_norm": 0.21499855816364288, "learning_rate": 7.703316024042197e-06, "loss": 0.0042, "step": 18840 }, { "epoch": 0.1541480966594431, "grad_norm": 0.09314883500337601, "learning_rate": 7.707404832972156e-06, "loss": 0.0033, "step": 18850 }, { "epoch": 0.15422987283804226, "grad_norm": 0.2096370905637741, "learning_rate": 7.711493641902115e-06, "loss": 0.0047, "step": 18860 }, { "epoch": 0.15431164901664146, "grad_norm": 0.20030994713306427, "learning_rate": 7.715582450832073e-06, "loss": 0.0042, "step": 18870 }, { "epoch": 0.15439342519524063, "grad_norm": 0.06216760724782944, "learning_rate": 7.719671259762032e-06, "loss": 0.0038, "step": 18880 }, { "epoch": 0.1544752013738398, "grad_norm": 0.19236983358860016, "learning_rate": 7.723760068691991e-06, "loss": 0.0061, "step": 18890 }, { "epoch": 0.15455697755243897, "grad_norm": 0.25263652205467224, "learning_rate": 7.72784887762195e-06, "loss": 0.0037, "step": 18900 }, { "epoch": 0.15463875373103814, "grad_norm": 0.03337474167346954, "learning_rate": 7.731937686551907e-06, "loss": 0.0036, "step": 18910 }, { "epoch": 0.15472052990963733, "grad_norm": 0.16289110481739044, "learning_rate": 7.736026495481866e-06, "loss": 0.0045, "step": 18920 }, { "epoch": 0.1548023060882365, "grad_norm": 0.5065156817436218, "learning_rate": 7.740115304411827e-06, "loss": 0.0067, "step": 18930 }, { "epoch": 0.15488408226683567, "grad_norm": 0.2986551821231842, "learning_rate": 7.744204113341784e-06, "loss": 0.0059, "step": 18940 }, { "epoch": 0.15496585844543484, "grad_norm": 0.1328212320804596, "learning_rate": 7.748292922271743e-06, "loss": 0.0055, "step": 18950 }, { "epoch": 0.155047634624034, "grad_norm": 0.10863211750984192, "learning_rate": 7.752381731201702e-06, "loss": 0.0048, "step": 18960 }, { "epoch": 0.1551294108026332, "grad_norm": 0.15440131723880768, "learning_rate": 7.756470540131659e-06, "loss": 0.0032, "step": 18970 }, { "epoch": 0.15521118698123237, "grad_norm": 0.3327789008617401, "learning_rate": 7.76055934906162e-06, "loss": 0.0034, "step": 18980 }, { "epoch": 0.15529296315983154, "grad_norm": 0.25992804765701294, "learning_rate": 7.764648157991579e-06, "loss": 0.0031, "step": 18990 }, { "epoch": 0.1553747393384307, "grad_norm": 0.08857529610395432, "learning_rate": 7.768736966921536e-06, "loss": 0.0049, "step": 19000 }, { "epoch": 0.15545651551702988, "grad_norm": 0.33651015162467957, "learning_rate": 7.772825775851495e-06, "loss": 0.0058, "step": 19010 }, { "epoch": 0.15553829169562905, "grad_norm": 0.25108370184898376, "learning_rate": 7.776914584781454e-06, "loss": 0.0032, "step": 19020 }, { "epoch": 0.15562006787422825, "grad_norm": 0.2763095796108246, "learning_rate": 7.781003393711412e-06, "loss": 0.0043, "step": 19030 }, { "epoch": 0.15570184405282742, "grad_norm": 0.05476851388812065, "learning_rate": 7.785092202641371e-06, "loss": 0.0036, "step": 19040 }, { "epoch": 0.15578362023142658, "grad_norm": 0.10253007709980011, "learning_rate": 7.78918101157133e-06, "loss": 0.0022, "step": 19050 }, { "epoch": 0.15586539641002575, "grad_norm": 0.33307066559791565, "learning_rate": 7.793269820501289e-06, "loss": 0.0046, "step": 19060 }, { "epoch": 0.15594717258862492, "grad_norm": 0.2551349997520447, "learning_rate": 7.797358629431248e-06, "loss": 0.0046, "step": 19070 }, { "epoch": 0.15602894876722412, "grad_norm": 0.26555415987968445, "learning_rate": 7.801447438361205e-06, "loss": 0.0048, "step": 19080 }, { "epoch": 0.1561107249458233, "grad_norm": 0.1124529168009758, "learning_rate": 7.805536247291164e-06, "loss": 0.0038, "step": 19090 }, { "epoch": 0.15619250112442246, "grad_norm": 0.0847996398806572, "learning_rate": 7.809625056221123e-06, "loss": 0.0034, "step": 19100 }, { "epoch": 0.15627427730302162, "grad_norm": 0.2585618495941162, "learning_rate": 7.813713865151082e-06, "loss": 0.0052, "step": 19110 }, { "epoch": 0.1563560534816208, "grad_norm": 0.17448930442333221, "learning_rate": 7.81780267408104e-06, "loss": 0.0061, "step": 19120 }, { "epoch": 0.15643782966022, "grad_norm": 0.30085352063179016, "learning_rate": 7.821891483011e-06, "loss": 0.0038, "step": 19130 }, { "epoch": 0.15651960583881916, "grad_norm": 0.2855682671070099, "learning_rate": 7.825980291940957e-06, "loss": 0.0037, "step": 19140 }, { "epoch": 0.15660138201741833, "grad_norm": 0.26500603556632996, "learning_rate": 7.830069100870918e-06, "loss": 0.0063, "step": 19150 }, { "epoch": 0.1566831581960175, "grad_norm": 0.21686328947544098, "learning_rate": 7.834157909800876e-06, "loss": 0.005, "step": 19160 }, { "epoch": 0.15676493437461667, "grad_norm": 0.2562224864959717, "learning_rate": 7.838246718730834e-06, "loss": 0.0061, "step": 19170 }, { "epoch": 0.15684671055321583, "grad_norm": 0.400933176279068, "learning_rate": 7.842335527660793e-06, "loss": 0.0061, "step": 19180 }, { "epoch": 0.15692848673181503, "grad_norm": 0.10999701917171478, "learning_rate": 7.846424336590751e-06, "loss": 0.0066, "step": 19190 }, { "epoch": 0.1570102629104142, "grad_norm": 0.09772936254739761, "learning_rate": 7.85051314552071e-06, "loss": 0.0039, "step": 19200 }, { "epoch": 0.15709203908901337, "grad_norm": 0.170070618391037, "learning_rate": 7.85460195445067e-06, "loss": 0.0051, "step": 19210 }, { "epoch": 0.15717381526761254, "grad_norm": 0.11665001511573792, "learning_rate": 7.858690763380628e-06, "loss": 0.0074, "step": 19220 }, { "epoch": 0.1572555914462117, "grad_norm": 0.3261495530605316, "learning_rate": 7.862779572310587e-06, "loss": 0.0059, "step": 19230 }, { "epoch": 0.1573373676248109, "grad_norm": 0.16941359639167786, "learning_rate": 7.866868381240546e-06, "loss": 0.0036, "step": 19240 }, { "epoch": 0.15741914380341007, "grad_norm": 0.295209139585495, "learning_rate": 7.870957190170505e-06, "loss": 0.004, "step": 19250 }, { "epoch": 0.15750091998200924, "grad_norm": 0.46378272771835327, "learning_rate": 7.875045999100462e-06, "loss": 0.0037, "step": 19260 }, { "epoch": 0.1575826961606084, "grad_norm": 0.1489955484867096, "learning_rate": 7.879134808030421e-06, "loss": 0.0043, "step": 19270 }, { "epoch": 0.15766447233920758, "grad_norm": 0.23734524846076965, "learning_rate": 7.88322361696038e-06, "loss": 0.0032, "step": 19280 }, { "epoch": 0.15774624851780678, "grad_norm": 0.11441916227340698, "learning_rate": 7.887312425890339e-06, "loss": 0.005, "step": 19290 }, { "epoch": 0.15782802469640594, "grad_norm": 0.2858456075191498, "learning_rate": 7.891401234820298e-06, "loss": 0.0042, "step": 19300 }, { "epoch": 0.1579098008750051, "grad_norm": 0.30242690443992615, "learning_rate": 7.895490043750256e-06, "loss": 0.0051, "step": 19310 }, { "epoch": 0.15799157705360428, "grad_norm": 0.27384084463119507, "learning_rate": 7.899578852680215e-06, "loss": 0.0131, "step": 19320 }, { "epoch": 0.15807335323220345, "grad_norm": 0.2835315465927124, "learning_rate": 7.903667661610174e-06, "loss": 0.0041, "step": 19330 }, { "epoch": 0.15815512941080262, "grad_norm": 0.1634734570980072, "learning_rate": 7.907756470540132e-06, "loss": 0.006, "step": 19340 }, { "epoch": 0.15823690558940182, "grad_norm": 0.2001921534538269, "learning_rate": 7.91184527947009e-06, "loss": 0.005, "step": 19350 }, { "epoch": 0.15831868176800099, "grad_norm": 0.284332811832428, "learning_rate": 7.915934088400051e-06, "loss": 0.0036, "step": 19360 }, { "epoch": 0.15840045794660015, "grad_norm": 0.22458995878696442, "learning_rate": 7.920022897330008e-06, "loss": 0.005, "step": 19370 }, { "epoch": 0.15848223412519932, "grad_norm": 0.23963548243045807, "learning_rate": 7.924111706259967e-06, "loss": 0.0054, "step": 19380 }, { "epoch": 0.1585640103037985, "grad_norm": 0.31205153465270996, "learning_rate": 7.928200515189926e-06, "loss": 0.0034, "step": 19390 }, { "epoch": 0.1586457864823977, "grad_norm": 0.44947367906570435, "learning_rate": 7.932289324119885e-06, "loss": 0.0057, "step": 19400 }, { "epoch": 0.15872756266099686, "grad_norm": 0.4024558663368225, "learning_rate": 7.936378133049844e-06, "loss": 0.0062, "step": 19410 }, { "epoch": 0.15880933883959603, "grad_norm": 0.10380056500434875, "learning_rate": 7.940466941979803e-06, "loss": 0.0046, "step": 19420 }, { "epoch": 0.1588911150181952, "grad_norm": 0.36266446113586426, "learning_rate": 7.94455575090976e-06, "loss": 0.0057, "step": 19430 }, { "epoch": 0.15897289119679436, "grad_norm": 0.6275274157524109, "learning_rate": 7.948644559839719e-06, "loss": 0.0036, "step": 19440 }, { "epoch": 0.15905466737539356, "grad_norm": 0.2918349504470825, "learning_rate": 7.952733368769678e-06, "loss": 0.0034, "step": 19450 }, { "epoch": 0.15913644355399273, "grad_norm": 0.02890961617231369, "learning_rate": 7.956822177699637e-06, "loss": 0.003, "step": 19460 }, { "epoch": 0.1592182197325919, "grad_norm": 0.6761190891265869, "learning_rate": 7.960910986629595e-06, "loss": 0.0052, "step": 19470 }, { "epoch": 0.15929999591119107, "grad_norm": 0.2637091875076294, "learning_rate": 7.964999795559554e-06, "loss": 0.0061, "step": 19480 }, { "epoch": 0.15938177208979024, "grad_norm": 0.18830150365829468, "learning_rate": 7.969088604489513e-06, "loss": 0.0039, "step": 19490 }, { "epoch": 0.1594635482683894, "grad_norm": 0.30482831597328186, "learning_rate": 7.973177413419472e-06, "loss": 0.0047, "step": 19500 }, { "epoch": 0.1595453244469886, "grad_norm": 0.20776337385177612, "learning_rate": 7.97726622234943e-06, "loss": 0.0054, "step": 19510 }, { "epoch": 0.15962710062558777, "grad_norm": 0.16019687056541443, "learning_rate": 7.981355031279388e-06, "loss": 0.0052, "step": 19520 }, { "epoch": 0.15970887680418694, "grad_norm": 0.18984046578407288, "learning_rate": 7.985443840209349e-06, "loss": 0.0061, "step": 19530 }, { "epoch": 0.1597906529827861, "grad_norm": 0.16442431509494781, "learning_rate": 7.989532649139306e-06, "loss": 0.0051, "step": 19540 }, { "epoch": 0.15987242916138528, "grad_norm": 0.15349240601062775, "learning_rate": 7.993621458069265e-06, "loss": 0.0049, "step": 19550 }, { "epoch": 0.15995420533998447, "grad_norm": 0.15649288892745972, "learning_rate": 7.997710266999224e-06, "loss": 0.0036, "step": 19560 }, { "epoch": 0.16003598151858364, "grad_norm": 0.4164904057979584, "learning_rate": 8.001799075929183e-06, "loss": 0.0037, "step": 19570 }, { "epoch": 0.1601177576971828, "grad_norm": 0.3590031564235687, "learning_rate": 8.005887884859142e-06, "loss": 0.0054, "step": 19580 }, { "epoch": 0.16019953387578198, "grad_norm": 0.12972383201122284, "learning_rate": 8.0099766937891e-06, "loss": 0.0058, "step": 19590 }, { "epoch": 0.16028131005438115, "grad_norm": 0.2318953275680542, "learning_rate": 8.014065502719058e-06, "loss": 0.0077, "step": 19600 }, { "epoch": 0.16036308623298035, "grad_norm": 0.14564307034015656, "learning_rate": 8.018154311649017e-06, "loss": 0.009, "step": 19610 }, { "epoch": 0.16044486241157951, "grad_norm": 0.07929883897304535, "learning_rate": 8.022243120578976e-06, "loss": 0.0041, "step": 19620 }, { "epoch": 0.16052663859017868, "grad_norm": 0.33972325921058655, "learning_rate": 8.026331929508934e-06, "loss": 0.0059, "step": 19630 }, { "epoch": 0.16060841476877785, "grad_norm": 0.9412709474563599, "learning_rate": 8.030420738438893e-06, "loss": 0.0042, "step": 19640 }, { "epoch": 0.16069019094737702, "grad_norm": 0.24204713106155396, "learning_rate": 8.034509547368852e-06, "loss": 0.0056, "step": 19650 }, { "epoch": 0.1607719671259762, "grad_norm": 0.10471083968877792, "learning_rate": 8.038598356298811e-06, "loss": 0.0057, "step": 19660 }, { "epoch": 0.1608537433045754, "grad_norm": 0.271036297082901, "learning_rate": 8.04268716522877e-06, "loss": 0.0046, "step": 19670 }, { "epoch": 0.16093551948317456, "grad_norm": 0.10292061418294907, "learning_rate": 8.046775974158729e-06, "loss": 0.0055, "step": 19680 }, { "epoch": 0.16101729566177372, "grad_norm": 0.2430376559495926, "learning_rate": 8.050864783088686e-06, "loss": 0.0059, "step": 19690 }, { "epoch": 0.1610990718403729, "grad_norm": 0.47958847880363464, "learning_rate": 8.054953592018647e-06, "loss": 0.005, "step": 19700 }, { "epoch": 0.16118084801897206, "grad_norm": 0.18213596940040588, "learning_rate": 8.059042400948604e-06, "loss": 0.0044, "step": 19710 }, { "epoch": 0.16126262419757126, "grad_norm": 0.27116119861602783, "learning_rate": 8.063131209878563e-06, "loss": 0.0054, "step": 19720 }, { "epoch": 0.16134440037617043, "grad_norm": 0.3073331415653229, "learning_rate": 8.067220018808522e-06, "loss": 0.0046, "step": 19730 }, { "epoch": 0.1614261765547696, "grad_norm": 0.4865843653678894, "learning_rate": 8.07130882773848e-06, "loss": 0.006, "step": 19740 }, { "epoch": 0.16150795273336876, "grad_norm": 0.31106141209602356, "learning_rate": 8.07539763666844e-06, "loss": 0.0065, "step": 19750 }, { "epoch": 0.16158972891196793, "grad_norm": 0.2124381810426712, "learning_rate": 8.079486445598398e-06, "loss": 0.0045, "step": 19760 }, { "epoch": 0.16167150509056713, "grad_norm": 0.19841545820236206, "learning_rate": 8.083575254528356e-06, "loss": 0.0052, "step": 19770 }, { "epoch": 0.1617532812691663, "grad_norm": 0.3410851061344147, "learning_rate": 8.087664063458315e-06, "loss": 0.0046, "step": 19780 }, { "epoch": 0.16183505744776547, "grad_norm": 0.1871272474527359, "learning_rate": 8.091752872388275e-06, "loss": 0.0054, "step": 19790 }, { "epoch": 0.16191683362636464, "grad_norm": 0.5334616899490356, "learning_rate": 8.095841681318232e-06, "loss": 0.0068, "step": 19800 }, { "epoch": 0.1619986098049638, "grad_norm": 0.15620428323745728, "learning_rate": 8.099930490248191e-06, "loss": 0.0041, "step": 19810 }, { "epoch": 0.16208038598356297, "grad_norm": 0.20223525166511536, "learning_rate": 8.10401929917815e-06, "loss": 0.0034, "step": 19820 }, { "epoch": 0.16216216216216217, "grad_norm": 0.2155512422323227, "learning_rate": 8.108108108108109e-06, "loss": 0.0057, "step": 19830 }, { "epoch": 0.16224393834076134, "grad_norm": 0.4247313439846039, "learning_rate": 8.112196917038068e-06, "loss": 0.0059, "step": 19840 }, { "epoch": 0.1623257145193605, "grad_norm": 0.20754070580005646, "learning_rate": 8.116285725968027e-06, "loss": 0.0038, "step": 19850 }, { "epoch": 0.16240749069795968, "grad_norm": 0.2745894193649292, "learning_rate": 8.120374534897984e-06, "loss": 0.0043, "step": 19860 }, { "epoch": 0.16248926687655885, "grad_norm": 0.13027438521385193, "learning_rate": 8.124463343827945e-06, "loss": 0.0058, "step": 19870 }, { "epoch": 0.16257104305515804, "grad_norm": 0.29569971561431885, "learning_rate": 8.128552152757902e-06, "loss": 0.0068, "step": 19880 }, { "epoch": 0.1626528192337572, "grad_norm": 0.09696952998638153, "learning_rate": 8.13264096168786e-06, "loss": 0.0078, "step": 19890 }, { "epoch": 0.16273459541235638, "grad_norm": 0.23280832171440125, "learning_rate": 8.13672977061782e-06, "loss": 0.0045, "step": 19900 }, { "epoch": 0.16281637159095555, "grad_norm": 0.1693982183933258, "learning_rate": 8.140818579547779e-06, "loss": 0.0044, "step": 19910 }, { "epoch": 0.16289814776955472, "grad_norm": 0.18333950638771057, "learning_rate": 8.144907388477737e-06, "loss": 0.0055, "step": 19920 }, { "epoch": 0.16297992394815392, "grad_norm": 0.3039666414260864, "learning_rate": 8.148996197407696e-06, "loss": 0.0055, "step": 19930 }, { "epoch": 0.16306170012675308, "grad_norm": 0.18958458304405212, "learning_rate": 8.153085006337654e-06, "loss": 0.0055, "step": 19940 }, { "epoch": 0.16314347630535225, "grad_norm": 0.2153690904378891, "learning_rate": 8.157173815267612e-06, "loss": 0.007, "step": 19950 }, { "epoch": 0.16322525248395142, "grad_norm": 0.18230506777763367, "learning_rate": 8.161262624197573e-06, "loss": 0.0037, "step": 19960 }, { "epoch": 0.1633070286625506, "grad_norm": 0.14518201351165771, "learning_rate": 8.16535143312753e-06, "loss": 0.0042, "step": 19970 }, { "epoch": 0.16338880484114976, "grad_norm": 0.1204666942358017, "learning_rate": 8.169440242057489e-06, "loss": 0.0067, "step": 19980 }, { "epoch": 0.16347058101974896, "grad_norm": 0.38456183671951294, "learning_rate": 8.173529050987448e-06, "loss": 0.0069, "step": 19990 }, { "epoch": 0.16355235719834813, "grad_norm": 0.16483978927135468, "learning_rate": 8.177617859917407e-06, "loss": 0.0037, "step": 20000 }, { "epoch": 0.1636341333769473, "grad_norm": 0.2157786637544632, "learning_rate": 8.181706668847366e-06, "loss": 0.0058, "step": 20010 }, { "epoch": 0.16371590955554646, "grad_norm": 0.05149488151073456, "learning_rate": 8.185795477777325e-06, "loss": 0.004, "step": 20020 }, { "epoch": 0.16379768573414563, "grad_norm": 0.17027652263641357, "learning_rate": 8.189884286707282e-06, "loss": 0.0044, "step": 20030 }, { "epoch": 0.16387946191274483, "grad_norm": 0.0945250540971756, "learning_rate": 8.193973095637243e-06, "loss": 0.0055, "step": 20040 }, { "epoch": 0.163961238091344, "grad_norm": 0.11339961737394333, "learning_rate": 8.1980619045672e-06, "loss": 0.0048, "step": 20050 }, { "epoch": 0.16404301426994317, "grad_norm": 0.31979188323020935, "learning_rate": 8.202150713497159e-06, "loss": 0.0048, "step": 20060 }, { "epoch": 0.16412479044854233, "grad_norm": 0.1609037071466446, "learning_rate": 8.206239522427118e-06, "loss": 0.0067, "step": 20070 }, { "epoch": 0.1642065666271415, "grad_norm": 0.18014077842235565, "learning_rate": 8.210328331357076e-06, "loss": 0.0062, "step": 20080 }, { "epoch": 0.1642883428057407, "grad_norm": 0.18625156581401825, "learning_rate": 8.214417140287035e-06, "loss": 0.0062, "step": 20090 }, { "epoch": 0.16437011898433987, "grad_norm": 0.0881158858537674, "learning_rate": 8.218505949216994e-06, "loss": 0.0035, "step": 20100 }, { "epoch": 0.16445189516293904, "grad_norm": 0.23202989995479584, "learning_rate": 8.222594758146953e-06, "loss": 0.0058, "step": 20110 }, { "epoch": 0.1645336713415382, "grad_norm": 0.11487312614917755, "learning_rate": 8.22668356707691e-06, "loss": 0.0035, "step": 20120 }, { "epoch": 0.16461544752013738, "grad_norm": 0.12632177770137787, "learning_rate": 8.230772376006871e-06, "loss": 0.0029, "step": 20130 }, { "epoch": 0.16469722369873654, "grad_norm": 0.3245593011379242, "learning_rate": 8.234861184936828e-06, "loss": 0.0037, "step": 20140 }, { "epoch": 0.16477899987733574, "grad_norm": 0.23649413883686066, "learning_rate": 8.238949993866787e-06, "loss": 0.0074, "step": 20150 }, { "epoch": 0.1648607760559349, "grad_norm": 0.2218043953180313, "learning_rate": 8.243038802796746e-06, "loss": 0.0068, "step": 20160 }, { "epoch": 0.16494255223453408, "grad_norm": 0.19692188501358032, "learning_rate": 8.247127611726705e-06, "loss": 0.0038, "step": 20170 }, { "epoch": 0.16502432841313325, "grad_norm": 0.1277213990688324, "learning_rate": 8.251216420656664e-06, "loss": 0.0045, "step": 20180 }, { "epoch": 0.16510610459173242, "grad_norm": 0.2429456263780594, "learning_rate": 8.255305229586623e-06, "loss": 0.0042, "step": 20190 }, { "epoch": 0.1651878807703316, "grad_norm": 0.10899047553539276, "learning_rate": 8.25939403851658e-06, "loss": 0.0024, "step": 20200 }, { "epoch": 0.16526965694893078, "grad_norm": 0.2750144898891449, "learning_rate": 8.263482847446539e-06, "loss": 0.0045, "step": 20210 }, { "epoch": 0.16535143312752995, "grad_norm": 0.16271273791790009, "learning_rate": 8.2675716563765e-06, "loss": 0.0052, "step": 20220 }, { "epoch": 0.16543320930612912, "grad_norm": 0.08533435314893723, "learning_rate": 8.271660465306457e-06, "loss": 0.0057, "step": 20230 }, { "epoch": 0.1655149854847283, "grad_norm": 0.17352591454982758, "learning_rate": 8.275749274236415e-06, "loss": 0.0067, "step": 20240 }, { "epoch": 0.16559676166332749, "grad_norm": 0.32817763090133667, "learning_rate": 8.279838083166374e-06, "loss": 0.0057, "step": 20250 }, { "epoch": 0.16567853784192665, "grad_norm": 0.2377566546201706, "learning_rate": 8.283926892096333e-06, "loss": 0.0052, "step": 20260 }, { "epoch": 0.16576031402052582, "grad_norm": 0.22385337948799133, "learning_rate": 8.288015701026292e-06, "loss": 0.0068, "step": 20270 }, { "epoch": 0.165842090199125, "grad_norm": 0.2660122513771057, "learning_rate": 8.292104509956251e-06, "loss": 0.0078, "step": 20280 }, { "epoch": 0.16592386637772416, "grad_norm": 0.17497499287128448, "learning_rate": 8.296193318886208e-06, "loss": 0.0036, "step": 20290 }, { "epoch": 0.16600564255632333, "grad_norm": 0.09100627154111862, "learning_rate": 8.300282127816169e-06, "loss": 0.0037, "step": 20300 }, { "epoch": 0.16608741873492253, "grad_norm": 0.2387431412935257, "learning_rate": 8.304370936746126e-06, "loss": 0.0044, "step": 20310 }, { "epoch": 0.1661691949135217, "grad_norm": 0.17150796949863434, "learning_rate": 8.308459745676085e-06, "loss": 0.0059, "step": 20320 }, { "epoch": 0.16625097109212086, "grad_norm": 0.1954578012228012, "learning_rate": 8.312548554606044e-06, "loss": 0.0051, "step": 20330 }, { "epoch": 0.16633274727072003, "grad_norm": 0.29491040110588074, "learning_rate": 8.316637363536003e-06, "loss": 0.004, "step": 20340 }, { "epoch": 0.1664145234493192, "grad_norm": 0.2638390064239502, "learning_rate": 8.320726172465962e-06, "loss": 0.0056, "step": 20350 }, { "epoch": 0.1664962996279184, "grad_norm": 0.11308642476797104, "learning_rate": 8.32481498139592e-06, "loss": 0.005, "step": 20360 }, { "epoch": 0.16657807580651757, "grad_norm": 0.21917563676834106, "learning_rate": 8.328903790325878e-06, "loss": 0.0046, "step": 20370 }, { "epoch": 0.16665985198511674, "grad_norm": 0.07805346697568893, "learning_rate": 8.332992599255837e-06, "loss": 0.0029, "step": 20380 }, { "epoch": 0.1667416281637159, "grad_norm": 0.44953152537345886, "learning_rate": 8.337081408185797e-06, "loss": 0.0034, "step": 20390 }, { "epoch": 0.16682340434231507, "grad_norm": 0.24000652134418488, "learning_rate": 8.341170217115754e-06, "loss": 0.0059, "step": 20400 }, { "epoch": 0.16690518052091427, "grad_norm": 0.19645734131336212, "learning_rate": 8.345259026045713e-06, "loss": 0.0043, "step": 20410 }, { "epoch": 0.16698695669951344, "grad_norm": 0.25517985224723816, "learning_rate": 8.349347834975672e-06, "loss": 0.0072, "step": 20420 }, { "epoch": 0.1670687328781126, "grad_norm": 0.1584816575050354, "learning_rate": 8.353436643905631e-06, "loss": 0.0083, "step": 20430 }, { "epoch": 0.16715050905671178, "grad_norm": 0.22015663981437683, "learning_rate": 8.35752545283559e-06, "loss": 0.0033, "step": 20440 }, { "epoch": 0.16723228523531095, "grad_norm": 0.1487390697002411, "learning_rate": 8.361614261765549e-06, "loss": 0.0032, "step": 20450 }, { "epoch": 0.16731406141391011, "grad_norm": 0.4648723304271698, "learning_rate": 8.365703070695506e-06, "loss": 0.0039, "step": 20460 }, { "epoch": 0.1673958375925093, "grad_norm": 0.4111975431442261, "learning_rate": 8.369791879625467e-06, "loss": 0.0061, "step": 20470 }, { "epoch": 0.16747761377110848, "grad_norm": 0.10539502650499344, "learning_rate": 8.373880688555424e-06, "loss": 0.0059, "step": 20480 }, { "epoch": 0.16755938994970765, "grad_norm": 0.35376474261283875, "learning_rate": 8.377969497485383e-06, "loss": 0.0064, "step": 20490 }, { "epoch": 0.16764116612830682, "grad_norm": 0.23868773877620697, "learning_rate": 8.382058306415342e-06, "loss": 0.0049, "step": 20500 }, { "epoch": 0.167722942306906, "grad_norm": 0.08916737884283066, "learning_rate": 8.3861471153453e-06, "loss": 0.0038, "step": 20510 }, { "epoch": 0.16780471848550518, "grad_norm": 0.25320547819137573, "learning_rate": 8.39023592427526e-06, "loss": 0.003, "step": 20520 }, { "epoch": 0.16788649466410435, "grad_norm": 0.0950888991355896, "learning_rate": 8.394324733205218e-06, "loss": 0.004, "step": 20530 }, { "epoch": 0.16796827084270352, "grad_norm": 0.1741948276758194, "learning_rate": 8.398413542135177e-06, "loss": 0.0033, "step": 20540 }, { "epoch": 0.1680500470213027, "grad_norm": 0.18756790459156036, "learning_rate": 8.402502351065134e-06, "loss": 0.004, "step": 20550 }, { "epoch": 0.16813182319990186, "grad_norm": 0.12715576589107513, "learning_rate": 8.406591159995095e-06, "loss": 0.0066, "step": 20560 }, { "epoch": 0.16821359937850106, "grad_norm": 0.5837341547012329, "learning_rate": 8.410679968925052e-06, "loss": 0.01, "step": 20570 }, { "epoch": 0.16829537555710022, "grad_norm": 0.18380357325077057, "learning_rate": 8.414768777855011e-06, "loss": 0.0053, "step": 20580 }, { "epoch": 0.1683771517356994, "grad_norm": 0.29252517223358154, "learning_rate": 8.41885758678497e-06, "loss": 0.0058, "step": 20590 }, { "epoch": 0.16845892791429856, "grad_norm": 0.3185376524925232, "learning_rate": 8.422946395714929e-06, "loss": 0.0039, "step": 20600 }, { "epoch": 0.16854070409289773, "grad_norm": 0.3760288655757904, "learning_rate": 8.427035204644888e-06, "loss": 0.0052, "step": 20610 }, { "epoch": 0.1686224802714969, "grad_norm": 0.146566241979599, "learning_rate": 8.431124013574847e-06, "loss": 0.0054, "step": 20620 }, { "epoch": 0.1687042564500961, "grad_norm": 0.22395232319831848, "learning_rate": 8.435212822504804e-06, "loss": 0.0049, "step": 20630 }, { "epoch": 0.16878603262869527, "grad_norm": 0.39529168605804443, "learning_rate": 8.439301631434765e-06, "loss": 0.0077, "step": 20640 }, { "epoch": 0.16886780880729443, "grad_norm": 0.278838187456131, "learning_rate": 8.443390440364723e-06, "loss": 0.0047, "step": 20650 }, { "epoch": 0.1689495849858936, "grad_norm": 0.22103138267993927, "learning_rate": 8.44747924929468e-06, "loss": 0.0054, "step": 20660 }, { "epoch": 0.16903136116449277, "grad_norm": 0.26575928926467896, "learning_rate": 8.45156805822464e-06, "loss": 0.0065, "step": 20670 }, { "epoch": 0.16911313734309197, "grad_norm": 0.115019291639328, "learning_rate": 8.455656867154598e-06, "loss": 0.0044, "step": 20680 }, { "epoch": 0.16919491352169114, "grad_norm": 0.08485393226146698, "learning_rate": 8.459745676084557e-06, "loss": 0.0038, "step": 20690 }, { "epoch": 0.1692766897002903, "grad_norm": 0.32058635354042053, "learning_rate": 8.463834485014516e-06, "loss": 0.0038, "step": 20700 }, { "epoch": 0.16935846587888947, "grad_norm": 0.06666643172502518, "learning_rate": 8.467923293944475e-06, "loss": 0.002, "step": 20710 }, { "epoch": 0.16944024205748864, "grad_norm": 0.11610224097967148, "learning_rate": 8.472012102874432e-06, "loss": 0.006, "step": 20720 }, { "epoch": 0.16952201823608784, "grad_norm": 0.26702800393104553, "learning_rate": 8.476100911804393e-06, "loss": 0.0101, "step": 20730 }, { "epoch": 0.169603794414687, "grad_norm": 0.15062494575977325, "learning_rate": 8.48018972073435e-06, "loss": 0.0047, "step": 20740 }, { "epoch": 0.16968557059328618, "grad_norm": 0.21270307898521423, "learning_rate": 8.484278529664309e-06, "loss": 0.0052, "step": 20750 }, { "epoch": 0.16976734677188535, "grad_norm": 0.3559432923793793, "learning_rate": 8.488367338594268e-06, "loss": 0.004, "step": 20760 }, { "epoch": 0.16984912295048452, "grad_norm": 0.14466102421283722, "learning_rate": 8.492456147524227e-06, "loss": 0.0033, "step": 20770 }, { "epoch": 0.16993089912908368, "grad_norm": 0.243708997964859, "learning_rate": 8.496544956454186e-06, "loss": 0.0054, "step": 20780 }, { "epoch": 0.17001267530768288, "grad_norm": 0.058395981788635254, "learning_rate": 8.500633765384145e-06, "loss": 0.0048, "step": 20790 }, { "epoch": 0.17009445148628205, "grad_norm": 0.09158365428447723, "learning_rate": 8.504722574314102e-06, "loss": 0.0036, "step": 20800 }, { "epoch": 0.17017622766488122, "grad_norm": 0.15379595756530762, "learning_rate": 8.508811383244062e-06, "loss": 0.0051, "step": 20810 }, { "epoch": 0.1702580038434804, "grad_norm": 0.29834839701652527, "learning_rate": 8.512900192174021e-06, "loss": 0.0043, "step": 20820 }, { "epoch": 0.17033978002207956, "grad_norm": 0.17290769517421722, "learning_rate": 8.516989001103979e-06, "loss": 0.0033, "step": 20830 }, { "epoch": 0.17042155620067875, "grad_norm": 0.09078173339366913, "learning_rate": 8.521077810033937e-06, "loss": 0.0071, "step": 20840 }, { "epoch": 0.17050333237927792, "grad_norm": 0.23621369898319244, "learning_rate": 8.525166618963896e-06, "loss": 0.0099, "step": 20850 }, { "epoch": 0.1705851085578771, "grad_norm": 0.12209296226501465, "learning_rate": 8.529255427893855e-06, "loss": 0.0034, "step": 20860 }, { "epoch": 0.17066688473647626, "grad_norm": 0.21417361497879028, "learning_rate": 8.533344236823814e-06, "loss": 0.0043, "step": 20870 }, { "epoch": 0.17074866091507543, "grad_norm": 0.4282679557800293, "learning_rate": 8.537433045753773e-06, "loss": 0.0067, "step": 20880 }, { "epoch": 0.17083043709367463, "grad_norm": 0.2853556275367737, "learning_rate": 8.54152185468373e-06, "loss": 0.0078, "step": 20890 }, { "epoch": 0.1709122132722738, "grad_norm": 0.22579628229141235, "learning_rate": 8.54561066361369e-06, "loss": 0.0062, "step": 20900 }, { "epoch": 0.17099398945087296, "grad_norm": 0.08622100949287415, "learning_rate": 8.549699472543648e-06, "loss": 0.0031, "step": 20910 }, { "epoch": 0.17107576562947213, "grad_norm": 0.6095001697540283, "learning_rate": 8.553788281473607e-06, "loss": 0.0041, "step": 20920 }, { "epoch": 0.1711575418080713, "grad_norm": 0.21727490425109863, "learning_rate": 8.557877090403566e-06, "loss": 0.005, "step": 20930 }, { "epoch": 0.17123931798667047, "grad_norm": 0.2528925836086273, "learning_rate": 8.561965899333525e-06, "loss": 0.004, "step": 20940 }, { "epoch": 0.17132109416526967, "grad_norm": 0.19126541912555695, "learning_rate": 8.566054708263484e-06, "loss": 0.0039, "step": 20950 }, { "epoch": 0.17140287034386884, "grad_norm": 0.3120613992214203, "learning_rate": 8.570143517193443e-06, "loss": 0.004, "step": 20960 }, { "epoch": 0.171484646522468, "grad_norm": 0.1596815437078476, "learning_rate": 8.574232326123401e-06, "loss": 0.0068, "step": 20970 }, { "epoch": 0.17156642270106717, "grad_norm": 0.11261751502752304, "learning_rate": 8.57832113505336e-06, "loss": 0.0041, "step": 20980 }, { "epoch": 0.17164819887966634, "grad_norm": 0.34603023529052734, "learning_rate": 8.58240994398332e-06, "loss": 0.0057, "step": 20990 }, { "epoch": 0.17172997505826554, "grad_norm": 0.2724645435810089, "learning_rate": 8.586498752913276e-06, "loss": 0.0049, "step": 21000 }, { "epoch": 0.1718117512368647, "grad_norm": 0.17257694900035858, "learning_rate": 8.590587561843235e-06, "loss": 0.0038, "step": 21010 }, { "epoch": 0.17189352741546388, "grad_norm": 0.217067688703537, "learning_rate": 8.594676370773194e-06, "loss": 0.0052, "step": 21020 }, { "epoch": 0.17197530359406304, "grad_norm": 0.09983930736780167, "learning_rate": 8.598765179703153e-06, "loss": 0.0038, "step": 21030 }, { "epoch": 0.1720570797726622, "grad_norm": 0.15509112179279327, "learning_rate": 8.602853988633112e-06, "loss": 0.0037, "step": 21040 }, { "epoch": 0.1721388559512614, "grad_norm": 0.10837340354919434, "learning_rate": 8.606942797563071e-06, "loss": 0.0051, "step": 21050 }, { "epoch": 0.17222063212986058, "grad_norm": 0.41269031167030334, "learning_rate": 8.611031606493028e-06, "loss": 0.003, "step": 21060 }, { "epoch": 0.17230240830845975, "grad_norm": 0.19344782829284668, "learning_rate": 8.615120415422989e-06, "loss": 0.0051, "step": 21070 }, { "epoch": 0.17238418448705892, "grad_norm": 0.17092730104923248, "learning_rate": 8.619209224352948e-06, "loss": 0.0056, "step": 21080 }, { "epoch": 0.17246596066565809, "grad_norm": 0.2988235652446747, "learning_rate": 8.623298033282905e-06, "loss": 0.0059, "step": 21090 }, { "epoch": 0.17254773684425725, "grad_norm": 0.18716266751289368, "learning_rate": 8.627386842212864e-06, "loss": 0.005, "step": 21100 }, { "epoch": 0.17262951302285645, "grad_norm": 0.08184192329645157, "learning_rate": 8.631475651142823e-06, "loss": 0.0046, "step": 21110 }, { "epoch": 0.17271128920145562, "grad_norm": 0.18342363834381104, "learning_rate": 8.635564460072782e-06, "loss": 0.0043, "step": 21120 }, { "epoch": 0.1727930653800548, "grad_norm": 0.3301747739315033, "learning_rate": 8.63965326900274e-06, "loss": 0.0041, "step": 21130 }, { "epoch": 0.17287484155865396, "grad_norm": 0.22638551890850067, "learning_rate": 8.6437420779327e-06, "loss": 0.006, "step": 21140 }, { "epoch": 0.17295661773725313, "grad_norm": 0.13097137212753296, "learning_rate": 8.647830886862658e-06, "loss": 0.004, "step": 21150 }, { "epoch": 0.17303839391585232, "grad_norm": 0.13873964548110962, "learning_rate": 8.651919695792617e-06, "loss": 0.0046, "step": 21160 }, { "epoch": 0.1731201700944515, "grad_norm": 0.22785156965255737, "learning_rate": 8.656008504722574e-06, "loss": 0.004, "step": 21170 }, { "epoch": 0.17320194627305066, "grad_norm": 0.14422883093357086, "learning_rate": 8.660097313652533e-06, "loss": 0.0035, "step": 21180 }, { "epoch": 0.17328372245164983, "grad_norm": 0.16400480270385742, "learning_rate": 8.664186122582492e-06, "loss": 0.0054, "step": 21190 }, { "epoch": 0.173365498630249, "grad_norm": 0.24342922866344452, "learning_rate": 8.668274931512451e-06, "loss": 0.0063, "step": 21200 }, { "epoch": 0.1734472748088482, "grad_norm": 0.435362845659256, "learning_rate": 8.67236374044241e-06, "loss": 0.0053, "step": 21210 }, { "epoch": 0.17352905098744736, "grad_norm": 0.23907215893268585, "learning_rate": 8.676452549372369e-06, "loss": 0.0054, "step": 21220 }, { "epoch": 0.17361082716604653, "grad_norm": 0.22214925289154053, "learning_rate": 8.680541358302326e-06, "loss": 0.0035, "step": 21230 }, { "epoch": 0.1736926033446457, "grad_norm": 0.15509957075119019, "learning_rate": 8.684630167232287e-06, "loss": 0.0048, "step": 21240 }, { "epoch": 0.17377437952324487, "grad_norm": 0.19669951498508453, "learning_rate": 8.688718976162245e-06, "loss": 0.0033, "step": 21250 }, { "epoch": 0.17385615570184404, "grad_norm": 0.1633608192205429, "learning_rate": 8.692807785092203e-06, "loss": 0.0031, "step": 21260 }, { "epoch": 0.17393793188044324, "grad_norm": 0.09049567580223083, "learning_rate": 8.696896594022162e-06, "loss": 0.003, "step": 21270 }, { "epoch": 0.1740197080590424, "grad_norm": 0.2218427062034607, "learning_rate": 8.70098540295212e-06, "loss": 0.0047, "step": 21280 }, { "epoch": 0.17410148423764157, "grad_norm": 0.37230753898620605, "learning_rate": 8.70507421188208e-06, "loss": 0.0051, "step": 21290 }, { "epoch": 0.17418326041624074, "grad_norm": 0.15197262167930603, "learning_rate": 8.709163020812038e-06, "loss": 0.0043, "step": 21300 }, { "epoch": 0.1742650365948399, "grad_norm": 0.06353926658630371, "learning_rate": 8.713251829741997e-06, "loss": 0.0054, "step": 21310 }, { "epoch": 0.1743468127734391, "grad_norm": 0.18303602933883667, "learning_rate": 8.717340638671956e-06, "loss": 0.0054, "step": 21320 }, { "epoch": 0.17442858895203828, "grad_norm": 0.027402963489294052, "learning_rate": 8.721429447601915e-06, "loss": 0.0045, "step": 21330 }, { "epoch": 0.17451036513063745, "grad_norm": 0.14269280433654785, "learning_rate": 8.725518256531872e-06, "loss": 0.0043, "step": 21340 }, { "epoch": 0.17459214130923661, "grad_norm": 0.17800305783748627, "learning_rate": 8.729607065461831e-06, "loss": 0.0061, "step": 21350 }, { "epoch": 0.17467391748783578, "grad_norm": 0.2032129466533661, "learning_rate": 8.73369587439179e-06, "loss": 0.0042, "step": 21360 }, { "epoch": 0.17475569366643498, "grad_norm": 0.3861616849899292, "learning_rate": 8.737784683321749e-06, "loss": 0.0047, "step": 21370 }, { "epoch": 0.17483746984503415, "grad_norm": 0.09372109919786453, "learning_rate": 8.741873492251708e-06, "loss": 0.004, "step": 21380 }, { "epoch": 0.17491924602363332, "grad_norm": 0.18706314265727997, "learning_rate": 8.745962301181667e-06, "loss": 0.0062, "step": 21390 }, { "epoch": 0.1750010222022325, "grad_norm": 0.05132881924510002, "learning_rate": 8.750051110111626e-06, "loss": 0.0028, "step": 21400 }, { "epoch": 0.17508279838083166, "grad_norm": 0.0480184368789196, "learning_rate": 8.754139919041584e-06, "loss": 0.0025, "step": 21410 }, { "epoch": 0.17516457455943082, "grad_norm": 0.13797570765018463, "learning_rate": 8.758228727971543e-06, "loss": 0.0068, "step": 21420 }, { "epoch": 0.17524635073803002, "grad_norm": 0.06433945149183273, "learning_rate": 8.7623175369015e-06, "loss": 0.0037, "step": 21430 }, { "epoch": 0.1753281269166292, "grad_norm": 0.14310970902442932, "learning_rate": 8.76640634583146e-06, "loss": 0.0035, "step": 21440 }, { "epoch": 0.17540990309522836, "grad_norm": 0.09103439003229141, "learning_rate": 8.770495154761418e-06, "loss": 0.0046, "step": 21450 }, { "epoch": 0.17549167927382753, "grad_norm": 0.1002233475446701, "learning_rate": 8.774583963691377e-06, "loss": 0.0045, "step": 21460 }, { "epoch": 0.1755734554524267, "grad_norm": 0.1154126450419426, "learning_rate": 8.778672772621336e-06, "loss": 0.0054, "step": 21470 }, { "epoch": 0.1756552316310259, "grad_norm": 0.10257745534181595, "learning_rate": 8.782761581551295e-06, "loss": 0.0043, "step": 21480 }, { "epoch": 0.17573700780962506, "grad_norm": 0.21374131739139557, "learning_rate": 8.786850390481252e-06, "loss": 0.0031, "step": 21490 }, { "epoch": 0.17581878398822423, "grad_norm": 0.12286911904811859, "learning_rate": 8.790939199411213e-06, "loss": 0.0061, "step": 21500 }, { "epoch": 0.1759005601668234, "grad_norm": 0.24727846682071686, "learning_rate": 8.795028008341172e-06, "loss": 0.0053, "step": 21510 }, { "epoch": 0.17598233634542257, "grad_norm": 0.133204385638237, "learning_rate": 8.799116817271129e-06, "loss": 0.0084, "step": 21520 }, { "epoch": 0.17606411252402177, "grad_norm": 0.3565009832382202, "learning_rate": 8.803205626201088e-06, "loss": 0.0044, "step": 21530 }, { "epoch": 0.17614588870262093, "grad_norm": 0.17060092091560364, "learning_rate": 8.807294435131047e-06, "loss": 0.0049, "step": 21540 }, { "epoch": 0.1762276648812201, "grad_norm": 0.1282021403312683, "learning_rate": 8.811383244061006e-06, "loss": 0.0045, "step": 21550 }, { "epoch": 0.17630944105981927, "grad_norm": 0.15772804617881775, "learning_rate": 8.815472052990965e-06, "loss": 0.0038, "step": 21560 }, { "epoch": 0.17639121723841844, "grad_norm": 0.3002811074256897, "learning_rate": 8.819560861920923e-06, "loss": 0.0059, "step": 21570 }, { "epoch": 0.1764729934170176, "grad_norm": 0.09492280334234238, "learning_rate": 8.823649670850882e-06, "loss": 0.0049, "step": 21580 }, { "epoch": 0.1765547695956168, "grad_norm": 0.2432500720024109, "learning_rate": 8.827738479780841e-06, "loss": 0.0035, "step": 21590 }, { "epoch": 0.17663654577421598, "grad_norm": 0.25152480602264404, "learning_rate": 8.831827288710798e-06, "loss": 0.0059, "step": 21600 }, { "epoch": 0.17671832195281514, "grad_norm": 0.07266159355640411, "learning_rate": 8.835916097640757e-06, "loss": 0.0031, "step": 21610 }, { "epoch": 0.1768000981314143, "grad_norm": 0.3861648738384247, "learning_rate": 8.840004906570718e-06, "loss": 0.0046, "step": 21620 }, { "epoch": 0.17688187431001348, "grad_norm": 0.1812063306570053, "learning_rate": 8.844093715500675e-06, "loss": 0.0048, "step": 21630 }, { "epoch": 0.17696365048861268, "grad_norm": 0.09948024898767471, "learning_rate": 8.848182524430634e-06, "loss": 0.005, "step": 21640 }, { "epoch": 0.17704542666721185, "grad_norm": 0.10934072732925415, "learning_rate": 8.852271333360593e-06, "loss": 0.0043, "step": 21650 }, { "epoch": 0.17712720284581102, "grad_norm": 0.13211087882518768, "learning_rate": 8.85636014229055e-06, "loss": 0.0045, "step": 21660 }, { "epoch": 0.17720897902441018, "grad_norm": 0.40800729393959045, "learning_rate": 8.86044895122051e-06, "loss": 0.0052, "step": 21670 }, { "epoch": 0.17729075520300935, "grad_norm": 0.19755586981773376, "learning_rate": 8.86453776015047e-06, "loss": 0.0056, "step": 21680 }, { "epoch": 0.17737253138160855, "grad_norm": 0.11860110610723495, "learning_rate": 8.868626569080427e-06, "loss": 0.0031, "step": 21690 }, { "epoch": 0.17745430756020772, "grad_norm": 0.23967128992080688, "learning_rate": 8.872715378010386e-06, "loss": 0.0039, "step": 21700 }, { "epoch": 0.1775360837388069, "grad_norm": 0.252851665019989, "learning_rate": 8.876804186940345e-06, "loss": 0.0115, "step": 21710 }, { "epoch": 0.17761785991740606, "grad_norm": 0.20961354672908783, "learning_rate": 8.880892995870304e-06, "loss": 0.0058, "step": 21720 }, { "epoch": 0.17769963609600523, "grad_norm": 0.47776755690574646, "learning_rate": 8.884981804800262e-06, "loss": 0.0069, "step": 21730 }, { "epoch": 0.1777814122746044, "grad_norm": 0.01590697094798088, "learning_rate": 8.889070613730221e-06, "loss": 0.0043, "step": 21740 }, { "epoch": 0.1778631884532036, "grad_norm": 0.21825586259365082, "learning_rate": 8.89315942266018e-06, "loss": 0.0042, "step": 21750 }, { "epoch": 0.17794496463180276, "grad_norm": 0.13026978075504303, "learning_rate": 8.897248231590139e-06, "loss": 0.0048, "step": 21760 }, { "epoch": 0.17802674081040193, "grad_norm": 0.3744948208332062, "learning_rate": 8.901337040520096e-06, "loss": 0.0066, "step": 21770 }, { "epoch": 0.1781085169890011, "grad_norm": 0.11960723251104355, "learning_rate": 8.905425849450055e-06, "loss": 0.0046, "step": 21780 }, { "epoch": 0.17819029316760027, "grad_norm": 0.15466515719890594, "learning_rate": 8.909514658380016e-06, "loss": 0.0051, "step": 21790 }, { "epoch": 0.17827206934619946, "grad_norm": 0.2243616282939911, "learning_rate": 8.913603467309973e-06, "loss": 0.0038, "step": 21800 }, { "epoch": 0.17835384552479863, "grad_norm": 0.18953204154968262, "learning_rate": 8.917692276239932e-06, "loss": 0.0036, "step": 21810 }, { "epoch": 0.1784356217033978, "grad_norm": 0.3764853775501251, "learning_rate": 8.92178108516989e-06, "loss": 0.0066, "step": 21820 }, { "epoch": 0.17851739788199697, "grad_norm": 0.11388521641492844, "learning_rate": 8.92586989409985e-06, "loss": 0.0068, "step": 21830 }, { "epoch": 0.17859917406059614, "grad_norm": 0.44482067227363586, "learning_rate": 8.929958703029809e-06, "loss": 0.008, "step": 21840 }, { "epoch": 0.17868095023919534, "grad_norm": 0.12021684646606445, "learning_rate": 8.934047511959768e-06, "loss": 0.0053, "step": 21850 }, { "epoch": 0.1787627264177945, "grad_norm": 0.047418199479579926, "learning_rate": 8.938136320889725e-06, "loss": 0.0036, "step": 21860 }, { "epoch": 0.17884450259639367, "grad_norm": 0.5003379583358765, "learning_rate": 8.942225129819684e-06, "loss": 0.0054, "step": 21870 }, { "epoch": 0.17892627877499284, "grad_norm": 0.1831762194633484, "learning_rate": 8.946313938749643e-06, "loss": 0.0061, "step": 21880 }, { "epoch": 0.179008054953592, "grad_norm": 0.21912463009357452, "learning_rate": 8.950402747679601e-06, "loss": 0.0081, "step": 21890 }, { "epoch": 0.17908983113219118, "grad_norm": 0.05205094441771507, "learning_rate": 8.95449155660956e-06, "loss": 0.0058, "step": 21900 }, { "epoch": 0.17917160731079038, "grad_norm": 0.31569328904151917, "learning_rate": 8.95858036553952e-06, "loss": 0.0026, "step": 21910 }, { "epoch": 0.17925338348938955, "grad_norm": 0.18551357090473175, "learning_rate": 8.962669174469478e-06, "loss": 0.008, "step": 21920 }, { "epoch": 0.17933515966798871, "grad_norm": 0.18379977345466614, "learning_rate": 8.966757983399437e-06, "loss": 0.006, "step": 21930 }, { "epoch": 0.17941693584658788, "grad_norm": 0.2538994550704956, "learning_rate": 8.970846792329396e-06, "loss": 0.0045, "step": 21940 }, { "epoch": 0.17949871202518705, "grad_norm": 0.1458214819431305, "learning_rate": 8.974935601259353e-06, "loss": 0.0064, "step": 21950 }, { "epoch": 0.17958048820378625, "grad_norm": 0.2681921720504761, "learning_rate": 8.979024410189312e-06, "loss": 0.0052, "step": 21960 }, { "epoch": 0.17966226438238542, "grad_norm": 0.2717389166355133, "learning_rate": 8.983113219119271e-06, "loss": 0.0057, "step": 21970 }, { "epoch": 0.17974404056098459, "grad_norm": 0.19916951656341553, "learning_rate": 8.98720202804923e-06, "loss": 0.0054, "step": 21980 }, { "epoch": 0.17982581673958375, "grad_norm": 0.3172004818916321, "learning_rate": 8.991290836979189e-06, "loss": 0.0054, "step": 21990 }, { "epoch": 0.17990759291818292, "grad_norm": 0.15881343185901642, "learning_rate": 8.995379645909148e-06, "loss": 0.0041, "step": 22000 }, { "epoch": 0.17998936909678212, "grad_norm": 0.04160510003566742, "learning_rate": 8.999468454839107e-06, "loss": 0.003, "step": 22010 }, { "epoch": 0.1800711452753813, "grad_norm": 0.43700364232063293, "learning_rate": 9.003557263769065e-06, "loss": 0.0056, "step": 22020 }, { "epoch": 0.18015292145398046, "grad_norm": 0.12161879241466522, "learning_rate": 9.007646072699023e-06, "loss": 0.0029, "step": 22030 }, { "epoch": 0.18023469763257963, "grad_norm": 0.10857956856489182, "learning_rate": 9.011734881628982e-06, "loss": 0.0039, "step": 22040 }, { "epoch": 0.1803164738111788, "grad_norm": 0.0822506844997406, "learning_rate": 9.015823690558942e-06, "loss": 0.0031, "step": 22050 }, { "epoch": 0.18039824998977796, "grad_norm": 0.13091041147708893, "learning_rate": 9.0199124994889e-06, "loss": 0.0061, "step": 22060 }, { "epoch": 0.18048002616837716, "grad_norm": 0.26411375403404236, "learning_rate": 9.024001308418858e-06, "loss": 0.0052, "step": 22070 }, { "epoch": 0.18056180234697633, "grad_norm": 0.36357083916664124, "learning_rate": 9.028090117348817e-06, "loss": 0.004, "step": 22080 }, { "epoch": 0.1806435785255755, "grad_norm": 0.10939960926771164, "learning_rate": 9.032178926278776e-06, "loss": 0.0027, "step": 22090 }, { "epoch": 0.18072535470417467, "grad_norm": 0.39553695917129517, "learning_rate": 9.036267735208735e-06, "loss": 0.006, "step": 22100 }, { "epoch": 0.18080713088277384, "grad_norm": 0.19952750205993652, "learning_rate": 9.040356544138694e-06, "loss": 0.0046, "step": 22110 }, { "epoch": 0.18088890706137303, "grad_norm": 0.10919184237718582, "learning_rate": 9.044445353068651e-06, "loss": 0.0054, "step": 22120 }, { "epoch": 0.1809706832399722, "grad_norm": 0.18114513158798218, "learning_rate": 9.04853416199861e-06, "loss": 0.004, "step": 22130 }, { "epoch": 0.18105245941857137, "grad_norm": 0.1270247846841812, "learning_rate": 9.052622970928569e-06, "loss": 0.0042, "step": 22140 }, { "epoch": 0.18113423559717054, "grad_norm": 0.21943987905979156, "learning_rate": 9.056711779858528e-06, "loss": 0.0056, "step": 22150 }, { "epoch": 0.1812160117757697, "grad_norm": 0.2110968679189682, "learning_rate": 9.060800588788487e-06, "loss": 0.0054, "step": 22160 }, { "epoch": 0.1812977879543689, "grad_norm": 0.24032612144947052, "learning_rate": 9.064889397718446e-06, "loss": 0.0054, "step": 22170 }, { "epoch": 0.18137956413296807, "grad_norm": 0.1574472337961197, "learning_rate": 9.068978206648404e-06, "loss": 0.0032, "step": 22180 }, { "epoch": 0.18146134031156724, "grad_norm": 0.05744481086730957, "learning_rate": 9.073067015578363e-06, "loss": 0.0056, "step": 22190 }, { "epoch": 0.1815431164901664, "grad_norm": 0.12504428625106812, "learning_rate": 9.07715582450832e-06, "loss": 0.0041, "step": 22200 }, { "epoch": 0.18162489266876558, "grad_norm": 0.18740099668502808, "learning_rate": 9.08124463343828e-06, "loss": 0.0044, "step": 22210 }, { "epoch": 0.18170666884736475, "grad_norm": 0.0465053915977478, "learning_rate": 9.08533344236824e-06, "loss": 0.0037, "step": 22220 }, { "epoch": 0.18178844502596395, "grad_norm": 0.11156868189573288, "learning_rate": 9.089422251298197e-06, "loss": 0.005, "step": 22230 }, { "epoch": 0.18187022120456312, "grad_norm": 0.18788865208625793, "learning_rate": 9.093511060228156e-06, "loss": 0.0048, "step": 22240 }, { "epoch": 0.18195199738316228, "grad_norm": 0.1704113483428955, "learning_rate": 9.097599869158115e-06, "loss": 0.0052, "step": 22250 }, { "epoch": 0.18203377356176145, "grad_norm": 0.22893257439136505, "learning_rate": 9.101688678088074e-06, "loss": 0.0031, "step": 22260 }, { "epoch": 0.18211554974036062, "grad_norm": 0.17522557079792023, "learning_rate": 9.105777487018033e-06, "loss": 0.005, "step": 22270 }, { "epoch": 0.18219732591895982, "grad_norm": 0.16340084373950958, "learning_rate": 9.109866295947992e-06, "loss": 0.0026, "step": 22280 }, { "epoch": 0.182279102097559, "grad_norm": 0.05488044396042824, "learning_rate": 9.113955104877949e-06, "loss": 0.0054, "step": 22290 }, { "epoch": 0.18236087827615816, "grad_norm": 0.21933820843696594, "learning_rate": 9.118043913807908e-06, "loss": 0.0048, "step": 22300 }, { "epoch": 0.18244265445475732, "grad_norm": 0.06726984679698944, "learning_rate": 9.122132722737868e-06, "loss": 0.0054, "step": 22310 }, { "epoch": 0.1825244306333565, "grad_norm": 0.17054155468940735, "learning_rate": 9.126221531667826e-06, "loss": 0.0052, "step": 22320 }, { "epoch": 0.1826062068119557, "grad_norm": 0.5289710164070129, "learning_rate": 9.130310340597784e-06, "loss": 0.0036, "step": 22330 }, { "epoch": 0.18268798299055486, "grad_norm": 0.1381385177373886, "learning_rate": 9.134399149527743e-06, "loss": 0.0037, "step": 22340 }, { "epoch": 0.18276975916915403, "grad_norm": 0.20107942819595337, "learning_rate": 9.138487958457702e-06, "loss": 0.0049, "step": 22350 }, { "epoch": 0.1828515353477532, "grad_norm": 0.1331956535577774, "learning_rate": 9.142576767387661e-06, "loss": 0.0044, "step": 22360 }, { "epoch": 0.18293331152635237, "grad_norm": 0.34684813022613525, "learning_rate": 9.14666557631762e-06, "loss": 0.0048, "step": 22370 }, { "epoch": 0.18301508770495153, "grad_norm": 0.15491031110286713, "learning_rate": 9.150754385247577e-06, "loss": 0.0044, "step": 22380 }, { "epoch": 0.18309686388355073, "grad_norm": 0.20354653894901276, "learning_rate": 9.154843194177538e-06, "loss": 0.0033, "step": 22390 }, { "epoch": 0.1831786400621499, "grad_norm": 0.29583603143692017, "learning_rate": 9.158932003107495e-06, "loss": 0.0075, "step": 22400 }, { "epoch": 0.18326041624074907, "grad_norm": 0.32742035388946533, "learning_rate": 9.163020812037454e-06, "loss": 0.004, "step": 22410 }, { "epoch": 0.18334219241934824, "grad_norm": 0.1583123654127121, "learning_rate": 9.167109620967413e-06, "loss": 0.0033, "step": 22420 }, { "epoch": 0.1834239685979474, "grad_norm": 0.14461326599121094, "learning_rate": 9.171198429897372e-06, "loss": 0.0081, "step": 22430 }, { "epoch": 0.1835057447765466, "grad_norm": 0.09074704349040985, "learning_rate": 9.17528723882733e-06, "loss": 0.006, "step": 22440 }, { "epoch": 0.18358752095514577, "grad_norm": 0.15762801468372345, "learning_rate": 9.17937604775729e-06, "loss": 0.0027, "step": 22450 }, { "epoch": 0.18366929713374494, "grad_norm": 0.18093834817409515, "learning_rate": 9.183464856687247e-06, "loss": 0.0042, "step": 22460 }, { "epoch": 0.1837510733123441, "grad_norm": 0.11124768853187561, "learning_rate": 9.187553665617206e-06, "loss": 0.004, "step": 22470 }, { "epoch": 0.18383284949094328, "grad_norm": 0.06773650646209717, "learning_rate": 9.191642474547166e-06, "loss": 0.0052, "step": 22480 }, { "epoch": 0.18391462566954248, "grad_norm": 0.26210078597068787, "learning_rate": 9.195731283477123e-06, "loss": 0.0053, "step": 22490 }, { "epoch": 0.18399640184814164, "grad_norm": 0.29083356261253357, "learning_rate": 9.199820092407082e-06, "loss": 0.0061, "step": 22500 }, { "epoch": 0.1840781780267408, "grad_norm": 0.01918901689350605, "learning_rate": 9.203908901337041e-06, "loss": 0.0041, "step": 22510 }, { "epoch": 0.18415995420533998, "grad_norm": 0.054034046828746796, "learning_rate": 9.207997710267e-06, "loss": 0.0046, "step": 22520 }, { "epoch": 0.18424173038393915, "grad_norm": 0.1145373061299324, "learning_rate": 9.212086519196959e-06, "loss": 0.0036, "step": 22530 }, { "epoch": 0.18432350656253832, "grad_norm": 0.1499464064836502, "learning_rate": 9.216175328126918e-06, "loss": 0.0058, "step": 22540 }, { "epoch": 0.18440528274113752, "grad_norm": 0.11107052117586136, "learning_rate": 9.220264137056875e-06, "loss": 0.005, "step": 22550 }, { "epoch": 0.18448705891973669, "grad_norm": 0.1410515308380127, "learning_rate": 9.224352945986836e-06, "loss": 0.0054, "step": 22560 }, { "epoch": 0.18456883509833585, "grad_norm": 0.394979327917099, "learning_rate": 9.228441754916793e-06, "loss": 0.0043, "step": 22570 }, { "epoch": 0.18465061127693502, "grad_norm": 0.2222101390361786, "learning_rate": 9.232530563846752e-06, "loss": 0.0042, "step": 22580 }, { "epoch": 0.1847323874555342, "grad_norm": 0.30096435546875, "learning_rate": 9.23661937277671e-06, "loss": 0.0069, "step": 22590 }, { "epoch": 0.1848141636341334, "grad_norm": 0.2313362956047058, "learning_rate": 9.24070818170667e-06, "loss": 0.0088, "step": 22600 }, { "epoch": 0.18489593981273256, "grad_norm": 0.15868167579174042, "learning_rate": 9.244796990636629e-06, "loss": 0.0048, "step": 22610 }, { "epoch": 0.18497771599133173, "grad_norm": 0.2286916971206665, "learning_rate": 9.248885799566587e-06, "loss": 0.0024, "step": 22620 }, { "epoch": 0.1850594921699309, "grad_norm": 0.18330369889736176, "learning_rate": 9.252974608496545e-06, "loss": 0.0057, "step": 22630 }, { "epoch": 0.18514126834853006, "grad_norm": 0.2872001826763153, "learning_rate": 9.257063417426504e-06, "loss": 0.0051, "step": 22640 }, { "epoch": 0.18522304452712926, "grad_norm": 0.10023009777069092, "learning_rate": 9.261152226356464e-06, "loss": 0.004, "step": 22650 }, { "epoch": 0.18530482070572843, "grad_norm": 0.18265658617019653, "learning_rate": 9.265241035286421e-06, "loss": 0.0058, "step": 22660 }, { "epoch": 0.1853865968843276, "grad_norm": 0.17726299166679382, "learning_rate": 9.26932984421638e-06, "loss": 0.0053, "step": 22670 }, { "epoch": 0.18546837306292677, "grad_norm": 0.37237241864204407, "learning_rate": 9.273418653146339e-06, "loss": 0.0028, "step": 22680 }, { "epoch": 0.18555014924152594, "grad_norm": 0.18548637628555298, "learning_rate": 9.277507462076298e-06, "loss": 0.0065, "step": 22690 }, { "epoch": 0.1856319254201251, "grad_norm": 0.11866837739944458, "learning_rate": 9.281596271006257e-06, "loss": 0.0043, "step": 22700 }, { "epoch": 0.1857137015987243, "grad_norm": 0.14406242966651917, "learning_rate": 9.285685079936216e-06, "loss": 0.0055, "step": 22710 }, { "epoch": 0.18579547777732347, "grad_norm": 0.22784997522830963, "learning_rate": 9.289773888866173e-06, "loss": 0.0034, "step": 22720 }, { "epoch": 0.18587725395592264, "grad_norm": 0.3934492766857147, "learning_rate": 9.293862697796134e-06, "loss": 0.0066, "step": 22730 }, { "epoch": 0.1859590301345218, "grad_norm": 0.09915471822023392, "learning_rate": 9.297951506726093e-06, "loss": 0.003, "step": 22740 }, { "epoch": 0.18604080631312098, "grad_norm": 0.11357192695140839, "learning_rate": 9.30204031565605e-06, "loss": 0.0054, "step": 22750 }, { "epoch": 0.18612258249172017, "grad_norm": 0.11739855259656906, "learning_rate": 9.306129124586009e-06, "loss": 0.0038, "step": 22760 }, { "epoch": 0.18620435867031934, "grad_norm": 0.14104916155338287, "learning_rate": 9.310217933515968e-06, "loss": 0.003, "step": 22770 }, { "epoch": 0.1862861348489185, "grad_norm": 0.1142139807343483, "learning_rate": 9.314306742445926e-06, "loss": 0.0031, "step": 22780 }, { "epoch": 0.18636791102751768, "grad_norm": 0.026543796062469482, "learning_rate": 9.318395551375885e-06, "loss": 0.0034, "step": 22790 }, { "epoch": 0.18644968720611685, "grad_norm": 0.2884041666984558, "learning_rate": 9.322484360305844e-06, "loss": 0.0044, "step": 22800 }, { "epoch": 0.18653146338471605, "grad_norm": 0.07882314175367355, "learning_rate": 9.326573169235801e-06, "loss": 0.0049, "step": 22810 }, { "epoch": 0.18661323956331521, "grad_norm": 0.0777505412697792, "learning_rate": 9.330661978165762e-06, "loss": 0.0051, "step": 22820 }, { "epoch": 0.18669501574191438, "grad_norm": 0.16417519748210907, "learning_rate": 9.33475078709572e-06, "loss": 0.0051, "step": 22830 }, { "epoch": 0.18677679192051355, "grad_norm": 0.07218148559331894, "learning_rate": 9.338839596025678e-06, "loss": 0.0033, "step": 22840 }, { "epoch": 0.18685856809911272, "grad_norm": 0.3982551395893097, "learning_rate": 9.342928404955637e-06, "loss": 0.006, "step": 22850 }, { "epoch": 0.1869403442777119, "grad_norm": 0.34275245666503906, "learning_rate": 9.347017213885596e-06, "loss": 0.0031, "step": 22860 }, { "epoch": 0.1870221204563111, "grad_norm": 0.16655874252319336, "learning_rate": 9.351106022815555e-06, "loss": 0.0076, "step": 22870 }, { "epoch": 0.18710389663491026, "grad_norm": 0.17150282859802246, "learning_rate": 9.355194831745514e-06, "loss": 0.0044, "step": 22880 }, { "epoch": 0.18718567281350942, "grad_norm": 0.1650967001914978, "learning_rate": 9.359283640675471e-06, "loss": 0.0051, "step": 22890 }, { "epoch": 0.1872674489921086, "grad_norm": 0.4368581771850586, "learning_rate": 9.363372449605432e-06, "loss": 0.0049, "step": 22900 }, { "epoch": 0.18734922517070776, "grad_norm": 0.05834474787116051, "learning_rate": 9.36746125853539e-06, "loss": 0.0036, "step": 22910 }, { "epoch": 0.18743100134930696, "grad_norm": 0.043477993458509445, "learning_rate": 9.371550067465348e-06, "loss": 0.0048, "step": 22920 }, { "epoch": 0.18751277752790613, "grad_norm": 0.1369880884885788, "learning_rate": 9.375638876395307e-06, "loss": 0.007, "step": 22930 }, { "epoch": 0.1875945537065053, "grad_norm": 0.10255853086709976, "learning_rate": 9.379727685325265e-06, "loss": 0.0039, "step": 22940 }, { "epoch": 0.18767632988510446, "grad_norm": 0.36648881435394287, "learning_rate": 9.383816494255224e-06, "loss": 0.0051, "step": 22950 }, { "epoch": 0.18775810606370363, "grad_norm": 0.2697908878326416, "learning_rate": 9.387905303185183e-06, "loss": 0.0042, "step": 22960 }, { "epoch": 0.18783988224230283, "grad_norm": 0.2023037225008011, "learning_rate": 9.391994112115142e-06, "loss": 0.0055, "step": 22970 }, { "epoch": 0.187921658420902, "grad_norm": 0.13951578736305237, "learning_rate": 9.3960829210451e-06, "loss": 0.0045, "step": 22980 }, { "epoch": 0.18800343459950117, "grad_norm": 0.09771502017974854, "learning_rate": 9.40017172997506e-06, "loss": 0.0039, "step": 22990 }, { "epoch": 0.18808521077810034, "grad_norm": 0.3340296745300293, "learning_rate": 9.404260538905017e-06, "loss": 0.0044, "step": 23000 }, { "epoch": 0.1881669869566995, "grad_norm": 0.09893479943275452, "learning_rate": 9.408349347834976e-06, "loss": 0.0054, "step": 23010 }, { "epoch": 0.18824876313529867, "grad_norm": 0.10081538558006287, "learning_rate": 9.412438156764935e-06, "loss": 0.0051, "step": 23020 }, { "epoch": 0.18833053931389787, "grad_norm": 0.19024719297885895, "learning_rate": 9.416526965694894e-06, "loss": 0.0025, "step": 23030 }, { "epoch": 0.18841231549249704, "grad_norm": 0.017716780304908752, "learning_rate": 9.420615774624853e-06, "loss": 0.0049, "step": 23040 }, { "epoch": 0.1884940916710962, "grad_norm": 0.16344013810157776, "learning_rate": 9.424704583554812e-06, "loss": 0.0036, "step": 23050 }, { "epoch": 0.18857586784969538, "grad_norm": 0.10274990648031235, "learning_rate": 9.428793392484769e-06, "loss": 0.0048, "step": 23060 }, { "epoch": 0.18865764402829455, "grad_norm": 0.038497596979141235, "learning_rate": 9.43288220141473e-06, "loss": 0.0028, "step": 23070 }, { "epoch": 0.18873942020689374, "grad_norm": 0.0667378231883049, "learning_rate": 9.436971010344688e-06, "loss": 0.0063, "step": 23080 }, { "epoch": 0.1888211963854929, "grad_norm": 0.12540285289287567, "learning_rate": 9.441059819274646e-06, "loss": 0.0039, "step": 23090 }, { "epoch": 0.18890297256409208, "grad_norm": 0.0756973996758461, "learning_rate": 9.445148628204604e-06, "loss": 0.0039, "step": 23100 }, { "epoch": 0.18898474874269125, "grad_norm": 0.11915767192840576, "learning_rate": 9.449237437134563e-06, "loss": 0.0036, "step": 23110 }, { "epoch": 0.18906652492129042, "grad_norm": 0.14434587955474854, "learning_rate": 9.453326246064522e-06, "loss": 0.0043, "step": 23120 }, { "epoch": 0.18914830109988962, "grad_norm": 0.17174789309501648, "learning_rate": 9.457415054994481e-06, "loss": 0.0042, "step": 23130 }, { "epoch": 0.18923007727848878, "grad_norm": 0.26943862438201904, "learning_rate": 9.46150386392444e-06, "loss": 0.0062, "step": 23140 }, { "epoch": 0.18931185345708795, "grad_norm": 0.21345697343349457, "learning_rate": 9.465592672854397e-06, "loss": 0.0047, "step": 23150 }, { "epoch": 0.18939362963568712, "grad_norm": 0.1167960986495018, "learning_rate": 9.469681481784358e-06, "loss": 0.0026, "step": 23160 }, { "epoch": 0.1894754058142863, "grad_norm": 0.11111318320035934, "learning_rate": 9.473770290714317e-06, "loss": 0.0033, "step": 23170 }, { "epoch": 0.18955718199288546, "grad_norm": 0.05499795451760292, "learning_rate": 9.477859099644274e-06, "loss": 0.0029, "step": 23180 }, { "epoch": 0.18963895817148466, "grad_norm": 0.3640056550502777, "learning_rate": 9.481947908574233e-06, "loss": 0.0057, "step": 23190 }, { "epoch": 0.18972073435008383, "grad_norm": 0.11535175144672394, "learning_rate": 9.486036717504192e-06, "loss": 0.0025, "step": 23200 }, { "epoch": 0.189802510528683, "grad_norm": 0.03476523235440254, "learning_rate": 9.49012552643415e-06, "loss": 0.005, "step": 23210 }, { "epoch": 0.18988428670728216, "grad_norm": 0.22002306580543518, "learning_rate": 9.49421433536411e-06, "loss": 0.004, "step": 23220 }, { "epoch": 0.18996606288588133, "grad_norm": 0.20190949738025665, "learning_rate": 9.498303144294068e-06, "loss": 0.0051, "step": 23230 }, { "epoch": 0.19004783906448053, "grad_norm": 0.09671759605407715, "learning_rate": 9.502391953224026e-06, "loss": 0.0066, "step": 23240 }, { "epoch": 0.1901296152430797, "grad_norm": 0.1513526886701584, "learning_rate": 9.506480762153986e-06, "loss": 0.0058, "step": 23250 }, { "epoch": 0.19021139142167887, "grad_norm": 0.13017740845680237, "learning_rate": 9.510569571083943e-06, "loss": 0.0041, "step": 23260 }, { "epoch": 0.19029316760027803, "grad_norm": 0.11894424259662628, "learning_rate": 9.514658380013902e-06, "loss": 0.0063, "step": 23270 }, { "epoch": 0.1903749437788772, "grad_norm": 0.319222629070282, "learning_rate": 9.518747188943861e-06, "loss": 0.0042, "step": 23280 }, { "epoch": 0.1904567199574764, "grad_norm": 0.11017397791147232, "learning_rate": 9.52283599787382e-06, "loss": 0.0057, "step": 23290 }, { "epoch": 0.19053849613607557, "grad_norm": 0.06145309656858444, "learning_rate": 9.526924806803779e-06, "loss": 0.0074, "step": 23300 }, { "epoch": 0.19062027231467474, "grad_norm": 0.32770708203315735, "learning_rate": 9.531013615733738e-06, "loss": 0.0042, "step": 23310 }, { "epoch": 0.1907020484932739, "grad_norm": 0.24194657802581787, "learning_rate": 9.535102424663695e-06, "loss": 0.0061, "step": 23320 }, { "epoch": 0.19078382467187308, "grad_norm": 0.10414236783981323, "learning_rate": 9.539191233593656e-06, "loss": 0.0036, "step": 23330 }, { "epoch": 0.19086560085047224, "grad_norm": 0.07956228405237198, "learning_rate": 9.543280042523615e-06, "loss": 0.0029, "step": 23340 }, { "epoch": 0.19094737702907144, "grad_norm": 0.1406155377626419, "learning_rate": 9.547368851453572e-06, "loss": 0.0037, "step": 23350 }, { "epoch": 0.1910291532076706, "grad_norm": 0.26969465613365173, "learning_rate": 9.55145766038353e-06, "loss": 0.0033, "step": 23360 }, { "epoch": 0.19111092938626978, "grad_norm": 0.13935072720050812, "learning_rate": 9.55554646931349e-06, "loss": 0.0037, "step": 23370 }, { "epoch": 0.19119270556486895, "grad_norm": 0.28404223918914795, "learning_rate": 9.559635278243448e-06, "loss": 0.0062, "step": 23380 }, { "epoch": 0.19127448174346812, "grad_norm": 0.17041969299316406, "learning_rate": 9.563724087173407e-06, "loss": 0.0044, "step": 23390 }, { "epoch": 0.1913562579220673, "grad_norm": 0.16515576839447021, "learning_rate": 9.567812896103366e-06, "loss": 0.0037, "step": 23400 }, { "epoch": 0.19143803410066648, "grad_norm": 0.1980927437543869, "learning_rate": 9.571901705033323e-06, "loss": 0.0069, "step": 23410 }, { "epoch": 0.19151981027926565, "grad_norm": 0.19355541467666626, "learning_rate": 9.575990513963284e-06, "loss": 0.0049, "step": 23420 }, { "epoch": 0.19160158645786482, "grad_norm": 0.09293414652347565, "learning_rate": 9.580079322893241e-06, "loss": 0.0087, "step": 23430 }, { "epoch": 0.191683362636464, "grad_norm": 0.13327504694461823, "learning_rate": 9.5841681318232e-06, "loss": 0.0037, "step": 23440 }, { "epoch": 0.19176513881506319, "grad_norm": 0.2119630128145218, "learning_rate": 9.588256940753159e-06, "loss": 0.0056, "step": 23450 }, { "epoch": 0.19184691499366235, "grad_norm": 0.13592134416103363, "learning_rate": 9.592345749683118e-06, "loss": 0.0039, "step": 23460 }, { "epoch": 0.19192869117226152, "grad_norm": 0.339265912771225, "learning_rate": 9.596434558613077e-06, "loss": 0.0043, "step": 23470 }, { "epoch": 0.1920104673508607, "grad_norm": 0.18204365670681, "learning_rate": 9.600523367543036e-06, "loss": 0.0033, "step": 23480 }, { "epoch": 0.19209224352945986, "grad_norm": 0.07269870489835739, "learning_rate": 9.604612176472995e-06, "loss": 0.0044, "step": 23490 }, { "epoch": 0.19217401970805903, "grad_norm": 0.05488552898168564, "learning_rate": 9.608700985402954e-06, "loss": 0.0049, "step": 23500 }, { "epoch": 0.19225579588665823, "grad_norm": 0.2627377510070801, "learning_rate": 9.612789794332912e-06, "loss": 0.0065, "step": 23510 }, { "epoch": 0.1923375720652574, "grad_norm": 0.17430844902992249, "learning_rate": 9.61687860326287e-06, "loss": 0.004, "step": 23520 }, { "epoch": 0.19241934824385656, "grad_norm": 0.1474934220314026, "learning_rate": 9.620967412192829e-06, "loss": 0.0044, "step": 23530 }, { "epoch": 0.19250112442245573, "grad_norm": 0.21821174025535583, "learning_rate": 9.625056221122787e-06, "loss": 0.0062, "step": 23540 }, { "epoch": 0.1925829006010549, "grad_norm": 0.350473552942276, "learning_rate": 9.629145030052746e-06, "loss": 0.0049, "step": 23550 }, { "epoch": 0.1926646767796541, "grad_norm": 0.21204613149166107, "learning_rate": 9.633233838982705e-06, "loss": 0.0052, "step": 23560 }, { "epoch": 0.19274645295825327, "grad_norm": 0.25352734327316284, "learning_rate": 9.637322647912664e-06, "loss": 0.0039, "step": 23570 }, { "epoch": 0.19282822913685244, "grad_norm": 0.4243892729282379, "learning_rate": 9.641411456842621e-06, "loss": 0.0051, "step": 23580 }, { "epoch": 0.1929100053154516, "grad_norm": 0.1257249265909195, "learning_rate": 9.645500265772582e-06, "loss": 0.0051, "step": 23590 }, { "epoch": 0.19299178149405077, "grad_norm": 0.2374851554632187, "learning_rate": 9.649589074702541e-06, "loss": 0.0034, "step": 23600 }, { "epoch": 0.19307355767264997, "grad_norm": 0.06832875311374664, "learning_rate": 9.653677883632498e-06, "loss": 0.0084, "step": 23610 }, { "epoch": 0.19315533385124914, "grad_norm": 0.18209072947502136, "learning_rate": 9.657766692562457e-06, "loss": 0.0048, "step": 23620 }, { "epoch": 0.1932371100298483, "grad_norm": 0.5038244128227234, "learning_rate": 9.661855501492416e-06, "loss": 0.0043, "step": 23630 }, { "epoch": 0.19331888620844748, "grad_norm": 0.08875353634357452, "learning_rate": 9.665944310422375e-06, "loss": 0.0043, "step": 23640 }, { "epoch": 0.19340066238704665, "grad_norm": 0.2566007673740387, "learning_rate": 9.670033119352334e-06, "loss": 0.0068, "step": 23650 }, { "epoch": 0.19348243856564581, "grad_norm": 0.14120455086231232, "learning_rate": 9.674121928282293e-06, "loss": 0.0059, "step": 23660 }, { "epoch": 0.193564214744245, "grad_norm": 0.11158329248428345, "learning_rate": 9.678210737212251e-06, "loss": 0.0029, "step": 23670 }, { "epoch": 0.19364599092284418, "grad_norm": 0.2189883589744568, "learning_rate": 9.68229954614221e-06, "loss": 0.0062, "step": 23680 }, { "epoch": 0.19372776710144335, "grad_norm": 0.24966271221637726, "learning_rate": 9.686388355072168e-06, "loss": 0.0063, "step": 23690 }, { "epoch": 0.19380954328004252, "grad_norm": 0.16461271047592163, "learning_rate": 9.690477164002126e-06, "loss": 0.0039, "step": 23700 }, { "epoch": 0.1938913194586417, "grad_norm": 0.2342810034751892, "learning_rate": 9.694565972932085e-06, "loss": 0.0036, "step": 23710 }, { "epoch": 0.19397309563724088, "grad_norm": 0.11975792050361633, "learning_rate": 9.698654781862044e-06, "loss": 0.0088, "step": 23720 }, { "epoch": 0.19405487181584005, "grad_norm": 0.18847820162773132, "learning_rate": 9.702743590792003e-06, "loss": 0.0055, "step": 23730 }, { "epoch": 0.19413664799443922, "grad_norm": 0.11524433642625809, "learning_rate": 9.706832399721962e-06, "loss": 0.0027, "step": 23740 }, { "epoch": 0.1942184241730384, "grad_norm": 0.03664948418736458, "learning_rate": 9.71092120865192e-06, "loss": 0.0033, "step": 23750 }, { "epoch": 0.19430020035163756, "grad_norm": 0.25505316257476807, "learning_rate": 9.71501001758188e-06, "loss": 0.0062, "step": 23760 }, { "epoch": 0.19438197653023676, "grad_norm": 0.29935505986213684, "learning_rate": 9.719098826511839e-06, "loss": 0.0047, "step": 23770 }, { "epoch": 0.19446375270883592, "grad_norm": 0.13773119449615479, "learning_rate": 9.723187635441796e-06, "loss": 0.0063, "step": 23780 }, { "epoch": 0.1945455288874351, "grad_norm": 0.06527379155158997, "learning_rate": 9.727276444371755e-06, "loss": 0.0041, "step": 23790 }, { "epoch": 0.19462730506603426, "grad_norm": 0.13424712419509888, "learning_rate": 9.731365253301714e-06, "loss": 0.0046, "step": 23800 }, { "epoch": 0.19470908124463343, "grad_norm": 0.026249054819345474, "learning_rate": 9.735454062231673e-06, "loss": 0.004, "step": 23810 }, { "epoch": 0.1947908574232326, "grad_norm": 0.13350962102413177, "learning_rate": 9.739542871161632e-06, "loss": 0.0041, "step": 23820 }, { "epoch": 0.1948726336018318, "grad_norm": 0.12627089023590088, "learning_rate": 9.74363168009159e-06, "loss": 0.0033, "step": 23830 }, { "epoch": 0.19495440978043097, "grad_norm": 0.3209933340549469, "learning_rate": 9.74772048902155e-06, "loss": 0.0037, "step": 23840 }, { "epoch": 0.19503618595903013, "grad_norm": 0.208025723695755, "learning_rate": 9.751809297951508e-06, "loss": 0.0044, "step": 23850 }, { "epoch": 0.1951179621376293, "grad_norm": 0.0216973926872015, "learning_rate": 9.755898106881465e-06, "loss": 0.005, "step": 23860 }, { "epoch": 0.19519973831622847, "grad_norm": 0.28624624013900757, "learning_rate": 9.759986915811424e-06, "loss": 0.005, "step": 23870 }, { "epoch": 0.19528151449482767, "grad_norm": 0.14213767647743225, "learning_rate": 9.764075724741383e-06, "loss": 0.0066, "step": 23880 }, { "epoch": 0.19536329067342684, "grad_norm": 0.10910258442163467, "learning_rate": 9.768164533671342e-06, "loss": 0.0033, "step": 23890 }, { "epoch": 0.195445066852026, "grad_norm": 0.053645823150873184, "learning_rate": 9.772253342601301e-06, "loss": 0.0027, "step": 23900 }, { "epoch": 0.19552684303062517, "grad_norm": 0.2641639709472656, "learning_rate": 9.77634215153126e-06, "loss": 0.0063, "step": 23910 }, { "epoch": 0.19560861920922434, "grad_norm": 0.054279059171676636, "learning_rate": 9.780430960461219e-06, "loss": 0.0046, "step": 23920 }, { "epoch": 0.19569039538782354, "grad_norm": 0.1664089411497116, "learning_rate": 9.784519769391178e-06, "loss": 0.0031, "step": 23930 }, { "epoch": 0.1957721715664227, "grad_norm": 0.061616986989974976, "learning_rate": 9.788608578321137e-06, "loss": 0.0041, "step": 23940 }, { "epoch": 0.19585394774502188, "grad_norm": 0.2220212072134018, "learning_rate": 9.792697387251094e-06, "loss": 0.0043, "step": 23950 }, { "epoch": 0.19593572392362105, "grad_norm": 0.20512570440769196, "learning_rate": 9.796786196181053e-06, "loss": 0.0059, "step": 23960 }, { "epoch": 0.19601750010222022, "grad_norm": 0.10684715211391449, "learning_rate": 9.800875005111012e-06, "loss": 0.0037, "step": 23970 }, { "epoch": 0.19609927628081938, "grad_norm": 0.1420280486345291, "learning_rate": 9.80496381404097e-06, "loss": 0.0071, "step": 23980 }, { "epoch": 0.19618105245941858, "grad_norm": 0.4645211398601532, "learning_rate": 9.80905262297093e-06, "loss": 0.0065, "step": 23990 }, { "epoch": 0.19626282863801775, "grad_norm": 0.11592792719602585, "learning_rate": 9.813141431900888e-06, "loss": 0.0047, "step": 24000 }, { "epoch": 0.19634460481661692, "grad_norm": 0.156783327460289, "learning_rate": 9.817230240830847e-06, "loss": 0.006, "step": 24010 }, { "epoch": 0.1964263809952161, "grad_norm": 0.31586092710494995, "learning_rate": 9.821319049760806e-06, "loss": 0.0032, "step": 24020 }, { "epoch": 0.19650815717381526, "grad_norm": 0.10768914967775345, "learning_rate": 9.825407858690765e-06, "loss": 0.0036, "step": 24030 }, { "epoch": 0.19658993335241445, "grad_norm": 0.2019178569316864, "learning_rate": 9.829496667620722e-06, "loss": 0.0057, "step": 24040 }, { "epoch": 0.19667170953101362, "grad_norm": 0.2549172043800354, "learning_rate": 9.833585476550681e-06, "loss": 0.003, "step": 24050 }, { "epoch": 0.1967534857096128, "grad_norm": 0.06694336235523224, "learning_rate": 9.83767428548064e-06, "loss": 0.0065, "step": 24060 }, { "epoch": 0.19683526188821196, "grad_norm": 0.14216254651546478, "learning_rate": 9.841763094410599e-06, "loss": 0.0044, "step": 24070 }, { "epoch": 0.19691703806681113, "grad_norm": 0.22907763719558716, "learning_rate": 9.845851903340558e-06, "loss": 0.0092, "step": 24080 }, { "epoch": 0.19699881424541033, "grad_norm": 0.26519349217414856, "learning_rate": 9.849940712270517e-06, "loss": 0.0039, "step": 24090 }, { "epoch": 0.1970805904240095, "grad_norm": 0.31714633107185364, "learning_rate": 9.854029521200476e-06, "loss": 0.0036, "step": 24100 }, { "epoch": 0.19716236660260866, "grad_norm": 0.05026644468307495, "learning_rate": 9.858118330130435e-06, "loss": 0.0064, "step": 24110 }, { "epoch": 0.19724414278120783, "grad_norm": 0.05916180834174156, "learning_rate": 9.862207139060392e-06, "loss": 0.0025, "step": 24120 }, { "epoch": 0.197325918959807, "grad_norm": 0.08118964731693268, "learning_rate": 9.86629594799035e-06, "loss": 0.006, "step": 24130 }, { "epoch": 0.19740769513840617, "grad_norm": 0.4147544801235199, "learning_rate": 9.870384756920311e-06, "loss": 0.003, "step": 24140 }, { "epoch": 0.19748947131700537, "grad_norm": 0.14213348925113678, "learning_rate": 9.874473565850268e-06, "loss": 0.0057, "step": 24150 }, { "epoch": 0.19757124749560454, "grad_norm": 0.14613839983940125, "learning_rate": 9.878562374780227e-06, "loss": 0.0052, "step": 24160 }, { "epoch": 0.1976530236742037, "grad_norm": 0.20462659001350403, "learning_rate": 9.882651183710186e-06, "loss": 0.0084, "step": 24170 }, { "epoch": 0.19773479985280287, "grad_norm": 0.12377005070447922, "learning_rate": 9.886739992640145e-06, "loss": 0.0046, "step": 24180 }, { "epoch": 0.19781657603140204, "grad_norm": 0.0856296494603157, "learning_rate": 9.890828801570104e-06, "loss": 0.0029, "step": 24190 }, { "epoch": 0.19789835221000124, "grad_norm": 0.05638452246785164, "learning_rate": 9.894917610500063e-06, "loss": 0.0044, "step": 24200 }, { "epoch": 0.1979801283886004, "grad_norm": 0.15867425501346588, "learning_rate": 9.89900641943002e-06, "loss": 0.0037, "step": 24210 }, { "epoch": 0.19806190456719958, "grad_norm": 0.2574860751628876, "learning_rate": 9.903095228359979e-06, "loss": 0.0051, "step": 24220 }, { "epoch": 0.19814368074579874, "grad_norm": 0.0847267359495163, "learning_rate": 9.907184037289938e-06, "loss": 0.0028, "step": 24230 }, { "epoch": 0.1982254569243979, "grad_norm": 0.1637551635503769, "learning_rate": 9.911272846219897e-06, "loss": 0.0044, "step": 24240 }, { "epoch": 0.1983072331029971, "grad_norm": 0.12917548418045044, "learning_rate": 9.915361655149856e-06, "loss": 0.0054, "step": 24250 }, { "epoch": 0.19838900928159628, "grad_norm": 0.2563537061214447, "learning_rate": 9.919450464079815e-06, "loss": 0.0054, "step": 24260 }, { "epoch": 0.19847078546019545, "grad_norm": 0.12518063187599182, "learning_rate": 9.923539273009773e-06, "loss": 0.0051, "step": 24270 }, { "epoch": 0.19855256163879462, "grad_norm": 0.339537650346756, "learning_rate": 9.927628081939732e-06, "loss": 0.0061, "step": 24280 }, { "epoch": 0.19863433781739379, "grad_norm": 0.316913366317749, "learning_rate": 9.93171689086969e-06, "loss": 0.0083, "step": 24290 }, { "epoch": 0.19871611399599295, "grad_norm": 0.13852691650390625, "learning_rate": 9.935805699799648e-06, "loss": 0.0051, "step": 24300 }, { "epoch": 0.19879789017459215, "grad_norm": 0.13916143774986267, "learning_rate": 9.939894508729609e-06, "loss": 0.0074, "step": 24310 }, { "epoch": 0.19887966635319132, "grad_norm": 0.44294074177742004, "learning_rate": 9.943983317659566e-06, "loss": 0.0043, "step": 24320 }, { "epoch": 0.1989614425317905, "grad_norm": 0.05468727648258209, "learning_rate": 9.948072126589525e-06, "loss": 0.0041, "step": 24330 }, { "epoch": 0.19904321871038966, "grad_norm": 0.3201383948326111, "learning_rate": 9.952160935519484e-06, "loss": 0.0051, "step": 24340 }, { "epoch": 0.19912499488898883, "grad_norm": 0.5013327598571777, "learning_rate": 9.956249744449443e-06, "loss": 0.0033, "step": 24350 }, { "epoch": 0.19920677106758802, "grad_norm": 0.1966930478811264, "learning_rate": 9.960338553379402e-06, "loss": 0.006, "step": 24360 }, { "epoch": 0.1992885472461872, "grad_norm": 0.4258410632610321, "learning_rate": 9.96442736230936e-06, "loss": 0.0058, "step": 24370 }, { "epoch": 0.19937032342478636, "grad_norm": 0.08101329952478409, "learning_rate": 9.968516171239318e-06, "loss": 0.0059, "step": 24380 }, { "epoch": 0.19945209960338553, "grad_norm": 0.22164949774742126, "learning_rate": 9.972604980169277e-06, "loss": 0.0075, "step": 24390 }, { "epoch": 0.1995338757819847, "grad_norm": 0.33044910430908203, "learning_rate": 9.976693789099236e-06, "loss": 0.0059, "step": 24400 }, { "epoch": 0.1996156519605839, "grad_norm": 0.42057737708091736, "learning_rate": 9.980782598029195e-06, "loss": 0.0048, "step": 24410 }, { "epoch": 0.19969742813918306, "grad_norm": 0.2336568832397461, "learning_rate": 9.984871406959154e-06, "loss": 0.0063, "step": 24420 }, { "epoch": 0.19977920431778223, "grad_norm": 0.11078676581382751, "learning_rate": 9.988960215889112e-06, "loss": 0.0059, "step": 24430 }, { "epoch": 0.1998609804963814, "grad_norm": 0.10466776043176651, "learning_rate": 9.993049024819071e-06, "loss": 0.0045, "step": 24440 }, { "epoch": 0.19994275667498057, "grad_norm": 0.3480346202850342, "learning_rate": 9.99713783374903e-06, "loss": 0.005, "step": 24450 }, { "epoch": 0.20002453285357974, "grad_norm": 0.11216943711042404, "learning_rate": 9.999999995416567e-06, "loss": 0.0038, "step": 24460 }, { "epoch": 0.20010630903217894, "grad_norm": 0.2001187652349472, "learning_rate": 9.999999913933315e-06, "loss": 0.0042, "step": 24470 }, { "epoch": 0.2001880852107781, "grad_norm": 0.17349593341350555, "learning_rate": 9.999999730596003e-06, "loss": 0.0064, "step": 24480 }, { "epoch": 0.20026986138937727, "grad_norm": 0.18185736238956451, "learning_rate": 9.999999445404631e-06, "loss": 0.0034, "step": 24490 }, { "epoch": 0.20035163756797644, "grad_norm": 0.196306049823761, "learning_rate": 9.999999058359204e-06, "loss": 0.003, "step": 24500 }, { "epoch": 0.2004334137465756, "grad_norm": 0.22329393029212952, "learning_rate": 9.999998569459734e-06, "loss": 0.0056, "step": 24510 }, { "epoch": 0.2005151899251748, "grad_norm": 0.09757973998785019, "learning_rate": 9.999997978706229e-06, "loss": 0.0046, "step": 24520 }, { "epoch": 0.20059696610377398, "grad_norm": 0.23010241985321045, "learning_rate": 9.999997286098701e-06, "loss": 0.0043, "step": 24530 }, { "epoch": 0.20067874228237315, "grad_norm": 0.1616666167974472, "learning_rate": 9.999996491637162e-06, "loss": 0.0055, "step": 24540 }, { "epoch": 0.20076051846097231, "grad_norm": 0.07497043907642365, "learning_rate": 9.999995595321634e-06, "loss": 0.0049, "step": 24550 }, { "epoch": 0.20084229463957148, "grad_norm": 0.23776701092720032, "learning_rate": 9.99999459715213e-06, "loss": 0.0033, "step": 24560 }, { "epoch": 0.20092407081817068, "grad_norm": 0.11295383423566818, "learning_rate": 9.99999349712867e-06, "loss": 0.0042, "step": 24570 }, { "epoch": 0.20100584699676985, "grad_norm": 0.09421373903751373, "learning_rate": 9.99999229525128e-06, "loss": 0.0033, "step": 24580 }, { "epoch": 0.20108762317536902, "grad_norm": 0.05632771924138069, "learning_rate": 9.999990991519982e-06, "loss": 0.0035, "step": 24590 }, { "epoch": 0.2011693993539682, "grad_norm": 0.2549257278442383, "learning_rate": 9.999989585934803e-06, "loss": 0.0042, "step": 24600 }, { "epoch": 0.20125117553256736, "grad_norm": 0.11639375239610672, "learning_rate": 9.999988078495773e-06, "loss": 0.004, "step": 24610 }, { "epoch": 0.20133295171116652, "grad_norm": 0.20345517992973328, "learning_rate": 9.999986469202921e-06, "loss": 0.0036, "step": 24620 }, { "epoch": 0.20141472788976572, "grad_norm": 0.12394118309020996, "learning_rate": 9.99998475805628e-06, "loss": 0.0035, "step": 24630 }, { "epoch": 0.2014965040683649, "grad_norm": 0.12135665118694305, "learning_rate": 9.999982945055886e-06, "loss": 0.0035, "step": 24640 }, { "epoch": 0.20157828024696406, "grad_norm": 0.1266607940196991, "learning_rate": 9.999981030201775e-06, "loss": 0.0057, "step": 24650 }, { "epoch": 0.20166005642556323, "grad_norm": 0.13642722368240356, "learning_rate": 9.999979013493987e-06, "loss": 0.0052, "step": 24660 }, { "epoch": 0.2017418326041624, "grad_norm": 0.1563541293144226, "learning_rate": 9.999976894932562e-06, "loss": 0.0064, "step": 24670 }, { "epoch": 0.2018236087827616, "grad_norm": 0.07482452690601349, "learning_rate": 9.999974674517543e-06, "loss": 0.0025, "step": 24680 }, { "epoch": 0.20190538496136076, "grad_norm": 0.11633168160915375, "learning_rate": 9.999972352248976e-06, "loss": 0.0027, "step": 24690 }, { "epoch": 0.20198716113995993, "grad_norm": 0.15416382253170013, "learning_rate": 9.999969928126907e-06, "loss": 0.0041, "step": 24700 }, { "epoch": 0.2020689373185591, "grad_norm": 0.15163551270961761, "learning_rate": 9.999967402151388e-06, "loss": 0.0043, "step": 24710 }, { "epoch": 0.20215071349715827, "grad_norm": 0.2827039659023285, "learning_rate": 9.999964774322467e-06, "loss": 0.0041, "step": 24720 }, { "epoch": 0.20223248967575747, "grad_norm": 0.1538475900888443, "learning_rate": 9.999962044640202e-06, "loss": 0.0045, "step": 24730 }, { "epoch": 0.20231426585435663, "grad_norm": 0.12469428777694702, "learning_rate": 9.999959213104643e-06, "loss": 0.0032, "step": 24740 }, { "epoch": 0.2023960420329558, "grad_norm": 0.16817596554756165, "learning_rate": 9.999956279715852e-06, "loss": 0.0059, "step": 24750 }, { "epoch": 0.20247781821155497, "grad_norm": 0.11742103099822998, "learning_rate": 9.999953244473887e-06, "loss": 0.0058, "step": 24760 }, { "epoch": 0.20255959439015414, "grad_norm": 0.21048349142074585, "learning_rate": 9.999950107378811e-06, "loss": 0.0055, "step": 24770 }, { "epoch": 0.2026413705687533, "grad_norm": 0.09104517102241516, "learning_rate": 9.999946868430688e-06, "loss": 0.0036, "step": 24780 }, { "epoch": 0.2027231467473525, "grad_norm": 0.02607288584113121, "learning_rate": 9.99994352762958e-06, "loss": 0.0027, "step": 24790 }, { "epoch": 0.20280492292595168, "grad_norm": 0.1527785211801529, "learning_rate": 9.999940084975562e-06, "loss": 0.004, "step": 24800 }, { "epoch": 0.20288669910455084, "grad_norm": 0.09740246832370758, "learning_rate": 9.999936540468697e-06, "loss": 0.0046, "step": 24810 }, { "epoch": 0.20296847528315, "grad_norm": 0.2558511197566986, "learning_rate": 9.999932894109063e-06, "loss": 0.0037, "step": 24820 }, { "epoch": 0.20305025146174918, "grad_norm": 0.1360812783241272, "learning_rate": 9.99992914589673e-06, "loss": 0.004, "step": 24830 }, { "epoch": 0.20313202764034838, "grad_norm": 0.24733218550682068, "learning_rate": 9.999925295831778e-06, "loss": 0.0044, "step": 24840 }, { "epoch": 0.20321380381894755, "grad_norm": 0.3159753978252411, "learning_rate": 9.999921343914282e-06, "loss": 0.0068, "step": 24850 }, { "epoch": 0.20329557999754672, "grad_norm": 0.14865712821483612, "learning_rate": 9.999917290144325e-06, "loss": 0.0052, "step": 24860 }, { "epoch": 0.20337735617614588, "grad_norm": 0.20292343199253082, "learning_rate": 9.999913134521988e-06, "loss": 0.0042, "step": 24870 }, { "epoch": 0.20345913235474505, "grad_norm": 0.2825980484485626, "learning_rate": 9.999908877047357e-06, "loss": 0.0037, "step": 24880 }, { "epoch": 0.20354090853334425, "grad_norm": 0.24893732368946075, "learning_rate": 9.999904517720518e-06, "loss": 0.0053, "step": 24890 }, { "epoch": 0.20362268471194342, "grad_norm": 0.260460764169693, "learning_rate": 9.999900056541559e-06, "loss": 0.0058, "step": 24900 }, { "epoch": 0.2037044608905426, "grad_norm": 0.24768337607383728, "learning_rate": 9.999895493510572e-06, "loss": 0.0049, "step": 24910 }, { "epoch": 0.20378623706914176, "grad_norm": 0.2129841297864914, "learning_rate": 9.99989082862765e-06, "loss": 0.0045, "step": 24920 }, { "epoch": 0.20386801324774093, "grad_norm": 0.27604570984840393, "learning_rate": 9.999886061892888e-06, "loss": 0.0025, "step": 24930 }, { "epoch": 0.2039497894263401, "grad_norm": 0.10764433443546295, "learning_rate": 9.999881193306382e-06, "loss": 0.0069, "step": 24940 }, { "epoch": 0.2040315656049393, "grad_norm": 0.0872659906744957, "learning_rate": 9.999876222868233e-06, "loss": 0.0039, "step": 24950 }, { "epoch": 0.20411334178353846, "grad_norm": 0.06601427495479584, "learning_rate": 9.999871150578539e-06, "loss": 0.0053, "step": 24960 }, { "epoch": 0.20419511796213763, "grad_norm": 1.727735161781311, "learning_rate": 9.999865976437408e-06, "loss": 0.006, "step": 24970 }, { "epoch": 0.2042768941407368, "grad_norm": 0.24191702902317047, "learning_rate": 9.99986070044494e-06, "loss": 0.005, "step": 24980 }, { "epoch": 0.20435867031933597, "grad_norm": 0.18877601623535156, "learning_rate": 9.999855322601249e-06, "loss": 0.0058, "step": 24990 }, { "epoch": 0.20444044649793516, "grad_norm": 0.19875256717205048, "learning_rate": 9.99984984290644e-06, "loss": 0.0066, "step": 25000 }, { "epoch": 0.20452222267653433, "grad_norm": 0.10179654508829117, "learning_rate": 9.999844261360626e-06, "loss": 0.0031, "step": 25010 }, { "epoch": 0.2046039988551335, "grad_norm": 0.07441946119070053, "learning_rate": 9.999838577963918e-06, "loss": 0.0057, "step": 25020 }, { "epoch": 0.20468577503373267, "grad_norm": 0.07817834615707397, "learning_rate": 9.999832792716436e-06, "loss": 0.0084, "step": 25030 }, { "epoch": 0.20476755121233184, "grad_norm": 0.1846410036087036, "learning_rate": 9.999826905618294e-06, "loss": 0.0045, "step": 25040 }, { "epoch": 0.20484932739093104, "grad_norm": 0.18036283552646637, "learning_rate": 9.999820916669616e-06, "loss": 0.0045, "step": 25050 }, { "epoch": 0.2049311035695302, "grad_norm": 0.07241106778383255, "learning_rate": 9.999814825870522e-06, "loss": 0.0033, "step": 25060 }, { "epoch": 0.20501287974812937, "grad_norm": 0.03160986676812172, "learning_rate": 9.999808633221136e-06, "loss": 0.0039, "step": 25070 }, { "epoch": 0.20509465592672854, "grad_norm": 0.12926353514194489, "learning_rate": 9.999802338721583e-06, "loss": 0.0035, "step": 25080 }, { "epoch": 0.2051764321053277, "grad_norm": 0.26356762647628784, "learning_rate": 9.999795942371993e-06, "loss": 0.0044, "step": 25090 }, { "epoch": 0.20525820828392688, "grad_norm": 0.23571105301380157, "learning_rate": 9.999789444172494e-06, "loss": 0.0031, "step": 25100 }, { "epoch": 0.20533998446252608, "grad_norm": 0.05419676750898361, "learning_rate": 9.99978284412322e-06, "loss": 0.0027, "step": 25110 }, { "epoch": 0.20542176064112525, "grad_norm": 0.1560227870941162, "learning_rate": 9.99977614222431e-06, "loss": 0.0046, "step": 25120 }, { "epoch": 0.20550353681972441, "grad_norm": 0.21220289170742035, "learning_rate": 9.999769338475891e-06, "loss": 0.0036, "step": 25130 }, { "epoch": 0.20558531299832358, "grad_norm": 0.05940394848585129, "learning_rate": 9.999762432878109e-06, "loss": 0.0025, "step": 25140 }, { "epoch": 0.20566708917692275, "grad_norm": 0.10769255459308624, "learning_rate": 9.9997554254311e-06, "loss": 0.0042, "step": 25150 }, { "epoch": 0.20574886535552195, "grad_norm": 0.18228569626808167, "learning_rate": 9.99974831613501e-06, "loss": 0.0061, "step": 25160 }, { "epoch": 0.20583064153412112, "grad_norm": 0.016010185703635216, "learning_rate": 9.999741104989983e-06, "loss": 0.0045, "step": 25170 }, { "epoch": 0.20591241771272029, "grad_norm": 0.23517338931560516, "learning_rate": 9.999733791996167e-06, "loss": 0.0069, "step": 25180 }, { "epoch": 0.20599419389131945, "grad_norm": 0.18702338635921478, "learning_rate": 9.99972637715371e-06, "loss": 0.0056, "step": 25190 }, { "epoch": 0.20607597006991862, "grad_norm": 0.16894526779651642, "learning_rate": 9.999718860462759e-06, "loss": 0.0044, "step": 25200 }, { "epoch": 0.20615774624851782, "grad_norm": 0.1410539150238037, "learning_rate": 9.999711241923474e-06, "loss": 0.0059, "step": 25210 }, { "epoch": 0.206239522427117, "grad_norm": 0.2059432864189148, "learning_rate": 9.999703521536006e-06, "loss": 0.0039, "step": 25220 }, { "epoch": 0.20632129860571616, "grad_norm": 0.25134310126304626, "learning_rate": 9.999695699300513e-06, "loss": 0.0044, "step": 25230 }, { "epoch": 0.20640307478431533, "grad_norm": 0.04013099521398544, "learning_rate": 9.999687775217155e-06, "loss": 0.004, "step": 25240 }, { "epoch": 0.2064848509629145, "grad_norm": 0.08160625398159027, "learning_rate": 9.999679749286093e-06, "loss": 0.0036, "step": 25250 }, { "epoch": 0.20656662714151366, "grad_norm": 0.3958892226219177, "learning_rate": 9.999671621507492e-06, "loss": 0.0045, "step": 25260 }, { "epoch": 0.20664840332011286, "grad_norm": 0.2972862720489502, "learning_rate": 9.999663391881513e-06, "loss": 0.0029, "step": 25270 }, { "epoch": 0.20673017949871203, "grad_norm": 0.10638076066970825, "learning_rate": 9.999655060408329e-06, "loss": 0.0043, "step": 25280 }, { "epoch": 0.2068119556773112, "grad_norm": 0.10400694608688354, "learning_rate": 9.999646627088107e-06, "loss": 0.0038, "step": 25290 }, { "epoch": 0.20689373185591037, "grad_norm": 0.02990032732486725, "learning_rate": 9.99963809192102e-06, "loss": 0.0039, "step": 25300 }, { "epoch": 0.20697550803450954, "grad_norm": 0.11306479573249817, "learning_rate": 9.999629454907241e-06, "loss": 0.0051, "step": 25310 }, { "epoch": 0.20705728421310873, "grad_norm": 0.2941177487373352, "learning_rate": 9.999620716046945e-06, "loss": 0.0033, "step": 25320 }, { "epoch": 0.2071390603917079, "grad_norm": 0.06802330166101456, "learning_rate": 9.999611875340312e-06, "loss": 0.0028, "step": 25330 }, { "epoch": 0.20722083657030707, "grad_norm": 0.02728535421192646, "learning_rate": 9.99960293278752e-06, "loss": 0.0029, "step": 25340 }, { "epoch": 0.20730261274890624, "grad_norm": 0.23767593502998352, "learning_rate": 9.999593888388754e-06, "loss": 0.0032, "step": 25350 }, { "epoch": 0.2073843889275054, "grad_norm": 0.1893233358860016, "learning_rate": 9.999584742144196e-06, "loss": 0.0057, "step": 25360 }, { "epoch": 0.2074661651061046, "grad_norm": 0.027242092415690422, "learning_rate": 9.999575494054032e-06, "loss": 0.0031, "step": 25370 }, { "epoch": 0.20754794128470377, "grad_norm": 0.20651231706142426, "learning_rate": 9.999566144118453e-06, "loss": 0.0045, "step": 25380 }, { "epoch": 0.20762971746330294, "grad_norm": 0.20161373913288116, "learning_rate": 9.999556692337648e-06, "loss": 0.0054, "step": 25390 }, { "epoch": 0.2077114936419021, "grad_norm": 0.09856097400188446, "learning_rate": 9.999547138711808e-06, "loss": 0.0034, "step": 25400 }, { "epoch": 0.20779326982050128, "grad_norm": 0.06547130644321442, "learning_rate": 9.999537483241129e-06, "loss": 0.0029, "step": 25410 }, { "epoch": 0.20787504599910045, "grad_norm": 0.04940681532025337, "learning_rate": 9.999527725925808e-06, "loss": 0.0028, "step": 25420 }, { "epoch": 0.20795682217769965, "grad_norm": 0.25204983353614807, "learning_rate": 9.999517866766043e-06, "loss": 0.0037, "step": 25430 }, { "epoch": 0.20803859835629882, "grad_norm": 0.18616890907287598, "learning_rate": 9.999507905762036e-06, "loss": 0.0037, "step": 25440 }, { "epoch": 0.20812037453489798, "grad_norm": 0.5810920000076294, "learning_rate": 9.999497842913989e-06, "loss": 0.0042, "step": 25450 }, { "epoch": 0.20820215071349715, "grad_norm": 0.9774960875511169, "learning_rate": 9.999487678222106e-06, "loss": 0.0042, "step": 25460 }, { "epoch": 0.20828392689209632, "grad_norm": 0.27620354294776917, "learning_rate": 9.999477411686596e-06, "loss": 0.0059, "step": 25470 }, { "epoch": 0.20836570307069552, "grad_norm": 0.1460030972957611, "learning_rate": 9.999467043307667e-06, "loss": 0.0047, "step": 25480 }, { "epoch": 0.2084474792492947, "grad_norm": 0.21242617070674896, "learning_rate": 9.999456573085532e-06, "loss": 0.0046, "step": 25490 }, { "epoch": 0.20852925542789386, "grad_norm": 0.05110152065753937, "learning_rate": 9.999446001020402e-06, "loss": 0.0044, "step": 25500 }, { "epoch": 0.20861103160649302, "grad_norm": 0.21206645667552948, "learning_rate": 9.999435327112493e-06, "loss": 0.0051, "step": 25510 }, { "epoch": 0.2086928077850922, "grad_norm": 0.19853484630584717, "learning_rate": 9.999424551362023e-06, "loss": 0.0057, "step": 25520 }, { "epoch": 0.2087745839636914, "grad_norm": 0.12910637259483337, "learning_rate": 9.99941367376921e-06, "loss": 0.0031, "step": 25530 }, { "epoch": 0.20885636014229056, "grad_norm": 0.11970307677984238, "learning_rate": 9.999402694334277e-06, "loss": 0.0034, "step": 25540 }, { "epoch": 0.20893813632088973, "grad_norm": 0.19374902546405792, "learning_rate": 9.99939161305745e-06, "loss": 0.0041, "step": 25550 }, { "epoch": 0.2090199124994889, "grad_norm": 0.30182069540023804, "learning_rate": 9.99938042993895e-06, "loss": 0.0024, "step": 25560 }, { "epoch": 0.20910168867808807, "grad_norm": 0.05753731727600098, "learning_rate": 9.999369144979006e-06, "loss": 0.0029, "step": 25570 }, { "epoch": 0.20918346485668723, "grad_norm": 0.12941673398017883, "learning_rate": 9.99935775817785e-06, "loss": 0.0031, "step": 25580 }, { "epoch": 0.20926524103528643, "grad_norm": 0.269790381193161, "learning_rate": 9.999346269535713e-06, "loss": 0.0036, "step": 25590 }, { "epoch": 0.2093470172138856, "grad_norm": 0.23383447527885437, "learning_rate": 9.999334679052828e-06, "loss": 0.005, "step": 25600 }, { "epoch": 0.20942879339248477, "grad_norm": 0.1723758429288864, "learning_rate": 9.999322986729432e-06, "loss": 0.0063, "step": 25610 }, { "epoch": 0.20951056957108394, "grad_norm": 0.18493221700191498, "learning_rate": 9.999311192565764e-06, "loss": 0.0053, "step": 25620 }, { "epoch": 0.2095923457496831, "grad_norm": 0.21457810699939728, "learning_rate": 9.999299296562062e-06, "loss": 0.0043, "step": 25630 }, { "epoch": 0.2096741219282823, "grad_norm": 0.2242613285779953, "learning_rate": 9.999287298718572e-06, "loss": 0.004, "step": 25640 }, { "epoch": 0.20975589810688147, "grad_norm": 0.09417421370744705, "learning_rate": 9.999275199035534e-06, "loss": 0.0039, "step": 25650 }, { "epoch": 0.20983767428548064, "grad_norm": 0.23177003860473633, "learning_rate": 9.999262997513197e-06, "loss": 0.0036, "step": 25660 }, { "epoch": 0.2099194504640798, "grad_norm": 0.10677061975002289, "learning_rate": 9.99925069415181e-06, "loss": 0.0036, "step": 25670 }, { "epoch": 0.21000122664267898, "grad_norm": 0.06498955935239792, "learning_rate": 9.999238288951622e-06, "loss": 0.0043, "step": 25680 }, { "epoch": 0.21008300282127818, "grad_norm": 0.1608489751815796, "learning_rate": 9.999225781912886e-06, "loss": 0.0036, "step": 25690 }, { "epoch": 0.21016477899987734, "grad_norm": 0.08798223733901978, "learning_rate": 9.999213173035858e-06, "loss": 0.0038, "step": 25700 }, { "epoch": 0.2102465551784765, "grad_norm": 0.09017731249332428, "learning_rate": 9.999200462320794e-06, "loss": 0.0031, "step": 25710 }, { "epoch": 0.21032833135707568, "grad_norm": 0.0999460518360138, "learning_rate": 9.999187649767953e-06, "loss": 0.003, "step": 25720 }, { "epoch": 0.21041010753567485, "grad_norm": 0.156274676322937, "learning_rate": 9.999174735377594e-06, "loss": 0.0029, "step": 25730 }, { "epoch": 0.21049188371427402, "grad_norm": 0.13354989886283875, "learning_rate": 9.999161719149985e-06, "loss": 0.0061, "step": 25740 }, { "epoch": 0.21057365989287322, "grad_norm": 0.1265561729669571, "learning_rate": 9.999148601085388e-06, "loss": 0.0041, "step": 25750 }, { "epoch": 0.21065543607147239, "grad_norm": 0.149301216006279, "learning_rate": 9.99913538118407e-06, "loss": 0.004, "step": 25760 }, { "epoch": 0.21073721225007155, "grad_norm": 0.3480122685432434, "learning_rate": 9.9991220594463e-06, "loss": 0.0042, "step": 25770 }, { "epoch": 0.21081898842867072, "grad_norm": 0.05565018951892853, "learning_rate": 9.99910863587235e-06, "loss": 0.0034, "step": 25780 }, { "epoch": 0.2109007646072699, "grad_norm": 0.2506393790245056, "learning_rate": 9.999095110462495e-06, "loss": 0.0043, "step": 25790 }, { "epoch": 0.2109825407858691, "grad_norm": 0.08162159472703934, "learning_rate": 9.99908148321701e-06, "loss": 0.0021, "step": 25800 }, { "epoch": 0.21106431696446826, "grad_norm": 0.07587271928787231, "learning_rate": 9.99906775413617e-06, "loss": 0.0042, "step": 25810 }, { "epoch": 0.21114609314306743, "grad_norm": 0.19607631862163544, "learning_rate": 9.999053923220256e-06, "loss": 0.0051, "step": 25820 }, { "epoch": 0.2112278693216666, "grad_norm": 0.12055747956037521, "learning_rate": 9.999039990469551e-06, "loss": 0.0043, "step": 25830 }, { "epoch": 0.21130964550026576, "grad_norm": 0.23909319937229156, "learning_rate": 9.999025955884339e-06, "loss": 0.0047, "step": 25840 }, { "epoch": 0.21139142167886496, "grad_norm": 0.07658185809850693, "learning_rate": 9.999011819464902e-06, "loss": 0.0036, "step": 25850 }, { "epoch": 0.21147319785746413, "grad_norm": 0.10943704843521118, "learning_rate": 9.998997581211532e-06, "loss": 0.0047, "step": 25860 }, { "epoch": 0.2115549740360633, "grad_norm": 0.1330379843711853, "learning_rate": 9.998983241124519e-06, "loss": 0.004, "step": 25870 }, { "epoch": 0.21163675021466247, "grad_norm": 0.1867484748363495, "learning_rate": 9.998968799204154e-06, "loss": 0.0048, "step": 25880 }, { "epoch": 0.21171852639326164, "grad_norm": 0.4686058759689331, "learning_rate": 9.99895425545073e-06, "loss": 0.0048, "step": 25890 }, { "epoch": 0.2118003025718608, "grad_norm": 0.1504760980606079, "learning_rate": 9.998939609864544e-06, "loss": 0.0037, "step": 25900 }, { "epoch": 0.21188207875046, "grad_norm": 0.18934857845306396, "learning_rate": 9.998924862445897e-06, "loss": 0.0023, "step": 25910 }, { "epoch": 0.21196385492905917, "grad_norm": 0.28719764947891235, "learning_rate": 9.998910013195085e-06, "loss": 0.0055, "step": 25920 }, { "epoch": 0.21204563110765834, "grad_norm": 0.038868941366672516, "learning_rate": 9.998895062112414e-06, "loss": 0.0031, "step": 25930 }, { "epoch": 0.2121274072862575, "grad_norm": 0.14446188509464264, "learning_rate": 9.998880009198187e-06, "loss": 0.0042, "step": 25940 }, { "epoch": 0.21220918346485668, "grad_norm": 0.0955570861697197, "learning_rate": 9.998864854452712e-06, "loss": 0.0049, "step": 25950 }, { "epoch": 0.21229095964345587, "grad_norm": 0.19873830676078796, "learning_rate": 9.998849597876294e-06, "loss": 0.0055, "step": 25960 }, { "epoch": 0.21237273582205504, "grad_norm": 0.10889868438243866, "learning_rate": 9.998834239469248e-06, "loss": 0.003, "step": 25970 }, { "epoch": 0.2124545120006542, "grad_norm": 0.02670104242861271, "learning_rate": 9.998818779231886e-06, "loss": 0.0043, "step": 25980 }, { "epoch": 0.21253628817925338, "grad_norm": 0.10921280831098557, "learning_rate": 9.998803217164521e-06, "loss": 0.0028, "step": 25990 }, { "epoch": 0.21261806435785255, "grad_norm": 0.791595458984375, "learning_rate": 9.998787553267473e-06, "loss": 0.0065, "step": 26000 }, { "epoch": 0.21269984053645175, "grad_norm": 0.19112138450145721, "learning_rate": 9.998771787541057e-06, "loss": 0.0047, "step": 26010 }, { "epoch": 0.21278161671505091, "grad_norm": 0.19917911291122437, "learning_rate": 9.998755919985598e-06, "loss": 0.0028, "step": 26020 }, { "epoch": 0.21286339289365008, "grad_norm": 0.11321720480918884, "learning_rate": 9.998739950601417e-06, "loss": 0.006, "step": 26030 }, { "epoch": 0.21294516907224925, "grad_norm": 0.09590116143226624, "learning_rate": 9.998723879388841e-06, "loss": 0.0035, "step": 26040 }, { "epoch": 0.21302694525084842, "grad_norm": 0.07672590762376785, "learning_rate": 9.998707706348195e-06, "loss": 0.0065, "step": 26050 }, { "epoch": 0.2131087214294476, "grad_norm": 0.11316049844026566, "learning_rate": 9.99869143147981e-06, "loss": 0.004, "step": 26060 }, { "epoch": 0.2131904976080468, "grad_norm": 0.09057778865098953, "learning_rate": 9.998675054784019e-06, "loss": 0.0038, "step": 26070 }, { "epoch": 0.21327227378664596, "grad_norm": 0.2119559645652771, "learning_rate": 9.998658576261154e-06, "loss": 0.0035, "step": 26080 }, { "epoch": 0.21335404996524512, "grad_norm": 0.08965569734573364, "learning_rate": 9.99864199591155e-06, "loss": 0.0068, "step": 26090 }, { "epoch": 0.2134358261438443, "grad_norm": 0.2942794859409332, "learning_rate": 9.998625313735543e-06, "loss": 0.0046, "step": 26100 }, { "epoch": 0.21351760232244346, "grad_norm": 0.4236144423484802, "learning_rate": 9.998608529733478e-06, "loss": 0.0043, "step": 26110 }, { "epoch": 0.21359937850104266, "grad_norm": 0.2657885253429413, "learning_rate": 9.998591643905693e-06, "loss": 0.0055, "step": 26120 }, { "epoch": 0.21368115467964183, "grad_norm": 0.11586702615022659, "learning_rate": 9.998574656252533e-06, "loss": 0.0038, "step": 26130 }, { "epoch": 0.213762930858241, "grad_norm": 0.018209828063845634, "learning_rate": 9.998557566774345e-06, "loss": 0.004, "step": 26140 }, { "epoch": 0.21384470703684016, "grad_norm": 0.3542623817920685, "learning_rate": 9.998540375471478e-06, "loss": 0.0061, "step": 26150 }, { "epoch": 0.21392648321543933, "grad_norm": 0.0883990079164505, "learning_rate": 9.998523082344278e-06, "loss": 0.0023, "step": 26160 }, { "epoch": 0.21400825939403853, "grad_norm": 0.34832218289375305, "learning_rate": 9.998505687393101e-06, "loss": 0.0055, "step": 26170 }, { "epoch": 0.2140900355726377, "grad_norm": 0.5258496999740601, "learning_rate": 9.998488190618298e-06, "loss": 0.005, "step": 26180 }, { "epoch": 0.21417181175123687, "grad_norm": 0.11151987314224243, "learning_rate": 9.998470592020232e-06, "loss": 0.0041, "step": 26190 }, { "epoch": 0.21425358792983604, "grad_norm": 0.03269367292523384, "learning_rate": 9.998452891599253e-06, "loss": 0.0029, "step": 26200 }, { "epoch": 0.2143353641084352, "grad_norm": 0.09986291825771332, "learning_rate": 9.998435089355726e-06, "loss": 0.0035, "step": 26210 }, { "epoch": 0.21441714028703437, "grad_norm": 0.47696352005004883, "learning_rate": 9.998417185290015e-06, "loss": 0.0051, "step": 26220 }, { "epoch": 0.21449891646563357, "grad_norm": 0.14768236875534058, "learning_rate": 9.998399179402482e-06, "loss": 0.0039, "step": 26230 }, { "epoch": 0.21458069264423274, "grad_norm": 0.1357065886259079, "learning_rate": 9.998381071693494e-06, "loss": 0.0035, "step": 26240 }, { "epoch": 0.2146624688228319, "grad_norm": 0.15848900377750397, "learning_rate": 9.998362862163424e-06, "loss": 0.0063, "step": 26250 }, { "epoch": 0.21474424500143108, "grad_norm": 0.12203533947467804, "learning_rate": 9.998344550812636e-06, "loss": 0.0043, "step": 26260 }, { "epoch": 0.21482602118003025, "grad_norm": 0.09759923070669174, "learning_rate": 9.998326137641509e-06, "loss": 0.0047, "step": 26270 }, { "epoch": 0.21490779735862944, "grad_norm": 0.05104070529341698, "learning_rate": 9.998307622650414e-06, "loss": 0.0035, "step": 26280 }, { "epoch": 0.2149895735372286, "grad_norm": 0.10633070021867752, "learning_rate": 9.998289005839732e-06, "loss": 0.0031, "step": 26290 }, { "epoch": 0.21507134971582778, "grad_norm": 0.24000893533229828, "learning_rate": 9.998270287209838e-06, "loss": 0.0066, "step": 26300 }, { "epoch": 0.21515312589442695, "grad_norm": 0.1550738662481308, "learning_rate": 9.998251466761117e-06, "loss": 0.0027, "step": 26310 }, { "epoch": 0.21523490207302612, "grad_norm": 0.10004504770040512, "learning_rate": 9.998232544493949e-06, "loss": 0.0043, "step": 26320 }, { "epoch": 0.21531667825162532, "grad_norm": 0.1889389604330063, "learning_rate": 9.998213520408722e-06, "loss": 0.0054, "step": 26330 }, { "epoch": 0.21539845443022448, "grad_norm": 0.061994265764951706, "learning_rate": 9.998194394505824e-06, "loss": 0.003, "step": 26340 }, { "epoch": 0.21548023060882365, "grad_norm": 0.12562185525894165, "learning_rate": 9.998175166785643e-06, "loss": 0.0052, "step": 26350 }, { "epoch": 0.21556200678742282, "grad_norm": 0.13114719092845917, "learning_rate": 9.99815583724857e-06, "loss": 0.0043, "step": 26360 }, { "epoch": 0.215643782966022, "grad_norm": 0.03377504274249077, "learning_rate": 9.998136405895002e-06, "loss": 0.0061, "step": 26370 }, { "epoch": 0.21572555914462116, "grad_norm": 0.29839906096458435, "learning_rate": 9.99811687272533e-06, "loss": 0.0088, "step": 26380 }, { "epoch": 0.21580733532322036, "grad_norm": 0.1099756583571434, "learning_rate": 9.998097237739956e-06, "loss": 0.0046, "step": 26390 }, { "epoch": 0.21588911150181953, "grad_norm": 0.13857011497020721, "learning_rate": 9.99807750093928e-06, "loss": 0.004, "step": 26400 }, { "epoch": 0.2159708876804187, "grad_norm": 0.21066193282604218, "learning_rate": 9.998057662323702e-06, "loss": 0.0033, "step": 26410 }, { "epoch": 0.21605266385901786, "grad_norm": 0.11427288502454758, "learning_rate": 9.998037721893625e-06, "loss": 0.0022, "step": 26420 }, { "epoch": 0.21613444003761703, "grad_norm": 0.09274422377347946, "learning_rate": 9.998017679649458e-06, "loss": 0.0041, "step": 26430 }, { "epoch": 0.21621621621621623, "grad_norm": 0.09615671634674072, "learning_rate": 9.99799753559161e-06, "loss": 0.0025, "step": 26440 }, { "epoch": 0.2162979923948154, "grad_norm": 0.12862862646579742, "learning_rate": 9.997977289720486e-06, "loss": 0.0045, "step": 26450 }, { "epoch": 0.21637976857341457, "grad_norm": 0.2150634229183197, "learning_rate": 9.997956942036506e-06, "loss": 0.0035, "step": 26460 }, { "epoch": 0.21646154475201373, "grad_norm": 0.15584714710712433, "learning_rate": 9.997936492540079e-06, "loss": 0.0044, "step": 26470 }, { "epoch": 0.2165433209306129, "grad_norm": 0.13002566993236542, "learning_rate": 9.997915941231623e-06, "loss": 0.0029, "step": 26480 }, { "epoch": 0.2166250971092121, "grad_norm": 0.12104858458042145, "learning_rate": 9.997895288111555e-06, "loss": 0.0051, "step": 26490 }, { "epoch": 0.21670687328781127, "grad_norm": 0.09533397108316422, "learning_rate": 9.997874533180298e-06, "loss": 0.0063, "step": 26500 }, { "epoch": 0.21678864946641044, "grad_norm": 0.24861228466033936, "learning_rate": 9.997853676438277e-06, "loss": 0.0061, "step": 26510 }, { "epoch": 0.2168704256450096, "grad_norm": 0.3188518285751343, "learning_rate": 9.997832717885912e-06, "loss": 0.0055, "step": 26520 }, { "epoch": 0.21695220182360878, "grad_norm": 0.12145412713289261, "learning_rate": 9.997811657523633e-06, "loss": 0.0076, "step": 26530 }, { "epoch": 0.21703397800220794, "grad_norm": 0.1584446132183075, "learning_rate": 9.997790495351866e-06, "loss": 0.0051, "step": 26540 }, { "epoch": 0.21711575418080714, "grad_norm": 0.05256201699376106, "learning_rate": 9.997769231371045e-06, "loss": 0.0036, "step": 26550 }, { "epoch": 0.2171975303594063, "grad_norm": 0.08495315909385681, "learning_rate": 9.997747865581604e-06, "loss": 0.0049, "step": 26560 }, { "epoch": 0.21727930653800548, "grad_norm": 0.17052221298217773, "learning_rate": 9.997726397983979e-06, "loss": 0.0024, "step": 26570 }, { "epoch": 0.21736108271660465, "grad_norm": 0.15275448560714722, "learning_rate": 9.9977048285786e-06, "loss": 0.003, "step": 26580 }, { "epoch": 0.21744285889520382, "grad_norm": 0.05316479131579399, "learning_rate": 9.997683157365912e-06, "loss": 0.004, "step": 26590 }, { "epoch": 0.217524635073803, "grad_norm": 0.1075299009680748, "learning_rate": 9.997661384346357e-06, "loss": 0.0045, "step": 26600 }, { "epoch": 0.21760641125240218, "grad_norm": 0.09835457056760788, "learning_rate": 9.997639509520378e-06, "loss": 0.0043, "step": 26610 }, { "epoch": 0.21768818743100135, "grad_norm": 0.0975676104426384, "learning_rate": 9.99761753288842e-06, "loss": 0.0036, "step": 26620 }, { "epoch": 0.21776996360960052, "grad_norm": 0.11534436047077179, "learning_rate": 9.997595454450929e-06, "loss": 0.0056, "step": 26630 }, { "epoch": 0.2178517397881997, "grad_norm": 0.23194283246994019, "learning_rate": 9.997573274208357e-06, "loss": 0.0047, "step": 26640 }, { "epoch": 0.21793351596679889, "grad_norm": 0.05581124126911163, "learning_rate": 9.997550992161154e-06, "loss": 0.006, "step": 26650 }, { "epoch": 0.21801529214539805, "grad_norm": 0.20884492993354797, "learning_rate": 9.997528608309776e-06, "loss": 0.0045, "step": 26660 }, { "epoch": 0.21809706832399722, "grad_norm": 0.2019641399383545, "learning_rate": 9.997506122654679e-06, "loss": 0.0039, "step": 26670 }, { "epoch": 0.2181788445025964, "grad_norm": 0.042783766984939575, "learning_rate": 9.99748353519632e-06, "loss": 0.0026, "step": 26680 }, { "epoch": 0.21826062068119556, "grad_norm": 0.04842277243733406, "learning_rate": 9.997460845935158e-06, "loss": 0.0032, "step": 26690 }, { "epoch": 0.21834239685979473, "grad_norm": 0.07840336859226227, "learning_rate": 9.997438054871656e-06, "loss": 0.0033, "step": 26700 }, { "epoch": 0.21842417303839393, "grad_norm": 0.12334761768579483, "learning_rate": 9.99741516200628e-06, "loss": 0.0032, "step": 26710 }, { "epoch": 0.2185059492169931, "grad_norm": 0.12895642220973969, "learning_rate": 9.997392167339495e-06, "loss": 0.0031, "step": 26720 }, { "epoch": 0.21858772539559226, "grad_norm": 0.09131722897291183, "learning_rate": 9.997369070871769e-06, "loss": 0.006, "step": 26730 }, { "epoch": 0.21866950157419143, "grad_norm": 0.21844249963760376, "learning_rate": 9.997345872603574e-06, "loss": 0.0039, "step": 26740 }, { "epoch": 0.2187512777527906, "grad_norm": 0.0723213404417038, "learning_rate": 9.99732257253538e-06, "loss": 0.004, "step": 26750 }, { "epoch": 0.2188330539313898, "grad_norm": 0.1890975534915924, "learning_rate": 9.997299170667665e-06, "loss": 0.0098, "step": 26760 }, { "epoch": 0.21891483010998897, "grad_norm": 0.05748238414525986, "learning_rate": 9.997275667000902e-06, "loss": 0.0051, "step": 26770 }, { "epoch": 0.21899660628858814, "grad_norm": 0.2363673448562622, "learning_rate": 9.997252061535572e-06, "loss": 0.0047, "step": 26780 }, { "epoch": 0.2190783824671873, "grad_norm": 0.14423547685146332, "learning_rate": 9.997228354272157e-06, "loss": 0.0041, "step": 26790 }, { "epoch": 0.21916015864578647, "grad_norm": 0.12670639157295227, "learning_rate": 9.997204545211136e-06, "loss": 0.003, "step": 26800 }, { "epoch": 0.21924193482438567, "grad_norm": 0.1340371072292328, "learning_rate": 9.997180634352998e-06, "loss": 0.0042, "step": 26810 }, { "epoch": 0.21932371100298484, "grad_norm": 0.10709511488676071, "learning_rate": 9.99715662169823e-06, "loss": 0.0042, "step": 26820 }, { "epoch": 0.219405487181584, "grad_norm": 0.11142963916063309, "learning_rate": 9.997132507247317e-06, "loss": 0.0037, "step": 26830 }, { "epoch": 0.21948726336018318, "grad_norm": 0.18027223646640778, "learning_rate": 9.997108291000754e-06, "loss": 0.0035, "step": 26840 }, { "epoch": 0.21956903953878235, "grad_norm": 0.19606775045394897, "learning_rate": 9.997083972959035e-06, "loss": 0.005, "step": 26850 }, { "epoch": 0.21965081571738151, "grad_norm": 0.23399409651756287, "learning_rate": 9.997059553122651e-06, "loss": 0.0042, "step": 26860 }, { "epoch": 0.2197325918959807, "grad_norm": 0.08195438981056213, "learning_rate": 9.997035031492102e-06, "loss": 0.0052, "step": 26870 }, { "epoch": 0.21981436807457988, "grad_norm": 0.12078644335269928, "learning_rate": 9.99701040806789e-06, "loss": 0.0029, "step": 26880 }, { "epoch": 0.21989614425317905, "grad_norm": 0.06344635784626007, "learning_rate": 9.996985682850513e-06, "loss": 0.0021, "step": 26890 }, { "epoch": 0.21997792043177822, "grad_norm": 0.16077521443367004, "learning_rate": 9.996960855840477e-06, "loss": 0.0049, "step": 26900 }, { "epoch": 0.2200596966103774, "grad_norm": 0.33424919843673706, "learning_rate": 9.996935927038284e-06, "loss": 0.0053, "step": 26910 }, { "epoch": 0.22014147278897658, "grad_norm": 0.25378820300102234, "learning_rate": 9.996910896444446e-06, "loss": 0.0042, "step": 26920 }, { "epoch": 0.22022324896757575, "grad_norm": 0.11565381288528442, "learning_rate": 9.996885764059471e-06, "loss": 0.0049, "step": 26930 }, { "epoch": 0.22030502514617492, "grad_norm": 0.1327304095029831, "learning_rate": 9.996860529883872e-06, "loss": 0.009, "step": 26940 }, { "epoch": 0.2203868013247741, "grad_norm": 0.10457871109247208, "learning_rate": 9.996835193918162e-06, "loss": 0.0039, "step": 26950 }, { "epoch": 0.22046857750337326, "grad_norm": 0.10131983458995819, "learning_rate": 9.996809756162858e-06, "loss": 0.0022, "step": 26960 }, { "epoch": 0.22055035368197246, "grad_norm": 0.44501057267189026, "learning_rate": 9.996784216618476e-06, "loss": 0.0055, "step": 26970 }, { "epoch": 0.22063212986057162, "grad_norm": 0.1523790955543518, "learning_rate": 9.996758575285539e-06, "loss": 0.0041, "step": 26980 }, { "epoch": 0.2207139060391708, "grad_norm": 0.11430211365222931, "learning_rate": 9.99673283216457e-06, "loss": 0.0043, "step": 26990 }, { "epoch": 0.22079568221776996, "grad_norm": 0.12000374495983124, "learning_rate": 9.996706987256089e-06, "loss": 0.0022, "step": 27000 }, { "epoch": 0.22087745839636913, "grad_norm": 0.23967231810092926, "learning_rate": 9.996681040560625e-06, "loss": 0.0042, "step": 27010 }, { "epoch": 0.2209592345749683, "grad_norm": 0.2589830756187439, "learning_rate": 9.996654992078708e-06, "loss": 0.0059, "step": 27020 }, { "epoch": 0.2210410107535675, "grad_norm": 0.17189814150333405, "learning_rate": 9.996628841810867e-06, "loss": 0.0038, "step": 27030 }, { "epoch": 0.22112278693216667, "grad_norm": 0.1838415116071701, "learning_rate": 9.996602589757636e-06, "loss": 0.0042, "step": 27040 }, { "epoch": 0.22120456311076583, "grad_norm": 0.13148367404937744, "learning_rate": 9.996576235919548e-06, "loss": 0.0031, "step": 27050 }, { "epoch": 0.221286339289365, "grad_norm": 0.07697249203920364, "learning_rate": 9.99654978029714e-06, "loss": 0.0096, "step": 27060 }, { "epoch": 0.22136811546796417, "grad_norm": 0.168974831700325, "learning_rate": 9.996523222890951e-06, "loss": 0.0037, "step": 27070 }, { "epoch": 0.22144989164656337, "grad_norm": 0.18846561014652252, "learning_rate": 9.996496563701525e-06, "loss": 0.0041, "step": 27080 }, { "epoch": 0.22153166782516254, "grad_norm": 0.15420836210250854, "learning_rate": 9.9964698027294e-06, "loss": 0.0029, "step": 27090 }, { "epoch": 0.2216134440037617, "grad_norm": 0.18696171045303345, "learning_rate": 9.996442939975126e-06, "loss": 0.0045, "step": 27100 }, { "epoch": 0.22169522018236087, "grad_norm": 0.10267578065395355, "learning_rate": 9.996415975439246e-06, "loss": 0.0039, "step": 27110 }, { "epoch": 0.22177699636096004, "grad_norm": 0.07388083636760712, "learning_rate": 9.996388909122312e-06, "loss": 0.0039, "step": 27120 }, { "epoch": 0.22185877253955924, "grad_norm": 0.1286928653717041, "learning_rate": 9.996361741024876e-06, "loss": 0.0033, "step": 27130 }, { "epoch": 0.2219405487181584, "grad_norm": 0.0833221822977066, "learning_rate": 9.99633447114749e-06, "loss": 0.008, "step": 27140 }, { "epoch": 0.22202232489675758, "grad_norm": 0.1728924661874771, "learning_rate": 9.996307099490708e-06, "loss": 0.004, "step": 27150 }, { "epoch": 0.22210410107535675, "grad_norm": 0.06150677800178528, "learning_rate": 9.99627962605509e-06, "loss": 0.0043, "step": 27160 }, { "epoch": 0.22218587725395592, "grad_norm": 0.03323431313037872, "learning_rate": 9.996252050841196e-06, "loss": 0.003, "step": 27170 }, { "epoch": 0.22226765343255508, "grad_norm": 0.19971254467964172, "learning_rate": 9.996224373849584e-06, "loss": 0.0052, "step": 27180 }, { "epoch": 0.22234942961115428, "grad_norm": 0.06301967799663544, "learning_rate": 9.996196595080822e-06, "loss": 0.0028, "step": 27190 }, { "epoch": 0.22243120578975345, "grad_norm": 0.23832030594348907, "learning_rate": 9.996168714535473e-06, "loss": 0.0067, "step": 27200 }, { "epoch": 0.22251298196835262, "grad_norm": 0.2682593762874603, "learning_rate": 9.996140732214108e-06, "loss": 0.0044, "step": 27210 }, { "epoch": 0.2225947581469518, "grad_norm": 0.20988358557224274, "learning_rate": 9.996112648117294e-06, "loss": 0.0067, "step": 27220 }, { "epoch": 0.22267653432555096, "grad_norm": 0.14495833218097687, "learning_rate": 9.996084462245605e-06, "loss": 0.0029, "step": 27230 }, { "epoch": 0.22275831050415015, "grad_norm": 0.3325144648551941, "learning_rate": 9.996056174599615e-06, "loss": 0.0044, "step": 27240 }, { "epoch": 0.22284008668274932, "grad_norm": 0.10349862277507782, "learning_rate": 9.996027785179899e-06, "loss": 0.0043, "step": 27250 }, { "epoch": 0.2229218628613485, "grad_norm": 0.09746909886598587, "learning_rate": 9.995999293987036e-06, "loss": 0.0059, "step": 27260 }, { "epoch": 0.22300363903994766, "grad_norm": 0.06278159469366074, "learning_rate": 9.995970701021604e-06, "loss": 0.0033, "step": 27270 }, { "epoch": 0.22308541521854683, "grad_norm": 0.08443514257669449, "learning_rate": 9.995942006284192e-06, "loss": 0.0038, "step": 27280 }, { "epoch": 0.223167191397146, "grad_norm": 0.09269502013921738, "learning_rate": 9.99591320977538e-06, "loss": 0.0037, "step": 27290 }, { "epoch": 0.2232489675757452, "grad_norm": 0.17715302109718323, "learning_rate": 9.995884311495751e-06, "loss": 0.0041, "step": 27300 }, { "epoch": 0.22333074375434436, "grad_norm": 0.12906382977962494, "learning_rate": 9.9958553114459e-06, "loss": 0.0059, "step": 27310 }, { "epoch": 0.22341251993294353, "grad_norm": 0.21722300350666046, "learning_rate": 9.995826209626417e-06, "loss": 0.0041, "step": 27320 }, { "epoch": 0.2234942961115427, "grad_norm": 0.11946430057287216, "learning_rate": 9.995797006037893e-06, "loss": 0.004, "step": 27330 }, { "epoch": 0.22357607229014187, "grad_norm": 0.06941325217485428, "learning_rate": 9.99576770068092e-06, "loss": 0.0021, "step": 27340 }, { "epoch": 0.22365784846874107, "grad_norm": 0.362029105424881, "learning_rate": 9.995738293556102e-06, "loss": 0.004, "step": 27350 }, { "epoch": 0.22373962464734024, "grad_norm": 0.06846854835748672, "learning_rate": 9.995708784664033e-06, "loss": 0.0022, "step": 27360 }, { "epoch": 0.2238214008259394, "grad_norm": 0.5110177397727966, "learning_rate": 9.995679174005315e-06, "loss": 0.0044, "step": 27370 }, { "epoch": 0.22390317700453857, "grad_norm": 0.16120545566082, "learning_rate": 9.995649461580552e-06, "loss": 0.0041, "step": 27380 }, { "epoch": 0.22398495318313774, "grad_norm": 0.04934628680348396, "learning_rate": 9.995619647390348e-06, "loss": 0.0035, "step": 27390 }, { "epoch": 0.22406672936173694, "grad_norm": 0.17788122594356537, "learning_rate": 9.995589731435311e-06, "loss": 0.0043, "step": 27400 }, { "epoch": 0.2241485055403361, "grad_norm": 0.13572196662425995, "learning_rate": 9.995559713716052e-06, "loss": 0.0049, "step": 27410 }, { "epoch": 0.22423028171893528, "grad_norm": 0.06443722546100616, "learning_rate": 9.99552959423318e-06, "loss": 0.0067, "step": 27420 }, { "epoch": 0.22431205789753444, "grad_norm": 0.07726231217384338, "learning_rate": 9.99549937298731e-06, "loss": 0.0057, "step": 27430 }, { "epoch": 0.2243938340761336, "grad_norm": 0.1776088923215866, "learning_rate": 9.995469049979057e-06, "loss": 0.0019, "step": 27440 }, { "epoch": 0.22447561025473278, "grad_norm": 0.0985661968588829, "learning_rate": 9.995438625209037e-06, "loss": 0.0029, "step": 27450 }, { "epoch": 0.22455738643333198, "grad_norm": 0.07509531825780869, "learning_rate": 9.995408098677874e-06, "loss": 0.0044, "step": 27460 }, { "epoch": 0.22463916261193115, "grad_norm": 0.33518272638320923, "learning_rate": 9.995377470386188e-06, "loss": 0.0068, "step": 27470 }, { "epoch": 0.22472093879053032, "grad_norm": 0.3683425188064575, "learning_rate": 9.995346740334602e-06, "loss": 0.006, "step": 27480 }, { "epoch": 0.22480271496912949, "grad_norm": 0.04803166165947914, "learning_rate": 9.995315908523741e-06, "loss": 0.0042, "step": 27490 }, { "epoch": 0.22488449114772865, "grad_norm": 0.20454174280166626, "learning_rate": 9.995284974954237e-06, "loss": 0.0044, "step": 27500 }, { "epoch": 0.22496626732632785, "grad_norm": 0.1080632358789444, "learning_rate": 9.995253939626715e-06, "loss": 0.0055, "step": 27510 }, { "epoch": 0.22504804350492702, "grad_norm": 0.15095670521259308, "learning_rate": 9.995222802541812e-06, "loss": 0.0038, "step": 27520 }, { "epoch": 0.2251298196835262, "grad_norm": 0.08109486848115921, "learning_rate": 9.99519156370016e-06, "loss": 0.0044, "step": 27530 }, { "epoch": 0.22521159586212536, "grad_norm": 0.08134519308805466, "learning_rate": 9.995160223102394e-06, "loss": 0.0044, "step": 27540 }, { "epoch": 0.22529337204072453, "grad_norm": 0.05584889277815819, "learning_rate": 9.995128780749155e-06, "loss": 0.0034, "step": 27550 }, { "epoch": 0.22537514821932372, "grad_norm": 0.21033909916877747, "learning_rate": 9.995097236641082e-06, "loss": 0.0036, "step": 27560 }, { "epoch": 0.2254569243979229, "grad_norm": 0.2080957293510437, "learning_rate": 9.995065590778819e-06, "loss": 0.0049, "step": 27570 }, { "epoch": 0.22553870057652206, "grad_norm": 0.08967003971338272, "learning_rate": 9.995033843163008e-06, "loss": 0.0028, "step": 27580 }, { "epoch": 0.22562047675512123, "grad_norm": 0.16218580305576324, "learning_rate": 9.9950019937943e-06, "loss": 0.0048, "step": 27590 }, { "epoch": 0.2257022529337204, "grad_norm": 0.1382511407136917, "learning_rate": 9.99497004267334e-06, "loss": 0.0053, "step": 27600 }, { "epoch": 0.22578402911231957, "grad_norm": 0.009711729362607002, "learning_rate": 9.99493798980078e-06, "loss": 0.004, "step": 27610 }, { "epoch": 0.22586580529091876, "grad_norm": 0.1858227699995041, "learning_rate": 9.994905835177273e-06, "loss": 0.0065, "step": 27620 }, { "epoch": 0.22594758146951793, "grad_norm": 0.06272944808006287, "learning_rate": 9.994873578803474e-06, "loss": 0.0034, "step": 27630 }, { "epoch": 0.2260293576481171, "grad_norm": 0.07918582111597061, "learning_rate": 9.99484122068004e-06, "loss": 0.0037, "step": 27640 }, { "epoch": 0.22611113382671627, "grad_norm": 0.1427178978919983, "learning_rate": 9.994808760807632e-06, "loss": 0.0066, "step": 27650 }, { "epoch": 0.22619291000531544, "grad_norm": 0.14892223477363586, "learning_rate": 9.994776199186908e-06, "loss": 0.0051, "step": 27660 }, { "epoch": 0.22627468618391464, "grad_norm": 0.05928588658571243, "learning_rate": 9.994743535818533e-06, "loss": 0.0056, "step": 27670 }, { "epoch": 0.2263564623625138, "grad_norm": 0.19078990817070007, "learning_rate": 9.994710770703171e-06, "loss": 0.0047, "step": 27680 }, { "epoch": 0.22643823854111297, "grad_norm": 0.1674216240644455, "learning_rate": 9.994677903841494e-06, "loss": 0.0035, "step": 27690 }, { "epoch": 0.22652001471971214, "grad_norm": 0.14529985189437866, "learning_rate": 9.994644935234166e-06, "loss": 0.0028, "step": 27700 }, { "epoch": 0.2266017908983113, "grad_norm": 0.23031646013259888, "learning_rate": 9.99461186488186e-06, "loss": 0.004, "step": 27710 }, { "epoch": 0.2266835670769105, "grad_norm": 0.26371774077415466, "learning_rate": 9.994578692785253e-06, "loss": 0.0048, "step": 27720 }, { "epoch": 0.22676534325550968, "grad_norm": 0.12122060358524323, "learning_rate": 9.994545418945017e-06, "loss": 0.008, "step": 27730 }, { "epoch": 0.22684711943410885, "grad_norm": 0.11512830853462219, "learning_rate": 9.99451204336183e-06, "loss": 0.0042, "step": 27740 }, { "epoch": 0.22692889561270801, "grad_norm": 0.16123901307582855, "learning_rate": 9.994478566036374e-06, "loss": 0.0034, "step": 27750 }, { "epoch": 0.22701067179130718, "grad_norm": 0.22302190959453583, "learning_rate": 9.99444498696933e-06, "loss": 0.0033, "step": 27760 }, { "epoch": 0.22709244796990635, "grad_norm": 0.23307546973228455, "learning_rate": 9.99441130616138e-06, "loss": 0.0036, "step": 27770 }, { "epoch": 0.22717422414850555, "grad_norm": 0.28166651725769043, "learning_rate": 9.994377523613214e-06, "loss": 0.0043, "step": 27780 }, { "epoch": 0.22725600032710472, "grad_norm": 0.12387170642614365, "learning_rate": 9.99434363932552e-06, "loss": 0.0039, "step": 27790 }, { "epoch": 0.2273377765057039, "grad_norm": 0.09428321570158005, "learning_rate": 9.994309653298982e-06, "loss": 0.0039, "step": 27800 }, { "epoch": 0.22741955268430306, "grad_norm": 0.2614209055900574, "learning_rate": 9.994275565534299e-06, "loss": 0.0062, "step": 27810 }, { "epoch": 0.22750132886290222, "grad_norm": 0.1464700996875763, "learning_rate": 9.994241376032163e-06, "loss": 0.0034, "step": 27820 }, { "epoch": 0.22758310504150142, "grad_norm": 0.20180369913578033, "learning_rate": 9.994207084793272e-06, "loss": 0.0042, "step": 27830 }, { "epoch": 0.2276648812201006, "grad_norm": 0.14849331974983215, "learning_rate": 9.994172691818322e-06, "loss": 0.0035, "step": 27840 }, { "epoch": 0.22774665739869976, "grad_norm": 0.09320276230573654, "learning_rate": 9.994138197108015e-06, "loss": 0.004, "step": 27850 }, { "epoch": 0.22782843357729893, "grad_norm": 0.041046060621738434, "learning_rate": 9.994103600663053e-06, "loss": 0.0036, "step": 27860 }, { "epoch": 0.2279102097558981, "grad_norm": 0.19636216759681702, "learning_rate": 9.994068902484143e-06, "loss": 0.0032, "step": 27870 }, { "epoch": 0.2279919859344973, "grad_norm": 0.10731710493564606, "learning_rate": 9.994034102571988e-06, "loss": 0.0023, "step": 27880 }, { "epoch": 0.22807376211309646, "grad_norm": 0.10613927990198135, "learning_rate": 9.9939992009273e-06, "loss": 0.0058, "step": 27890 }, { "epoch": 0.22815553829169563, "grad_norm": 0.04282429441809654, "learning_rate": 9.993964197550787e-06, "loss": 0.0023, "step": 27900 }, { "epoch": 0.2282373144702948, "grad_norm": 0.12141784280538559, "learning_rate": 9.993929092443167e-06, "loss": 0.0034, "step": 27910 }, { "epoch": 0.22831909064889397, "grad_norm": 0.08608150482177734, "learning_rate": 9.99389388560515e-06, "loss": 0.0044, "step": 27920 }, { "epoch": 0.22840086682749314, "grad_norm": 0.14965097606182098, "learning_rate": 9.993858577037456e-06, "loss": 0.0034, "step": 27930 }, { "epoch": 0.22848264300609233, "grad_norm": 0.19394956529140472, "learning_rate": 9.993823166740804e-06, "loss": 0.0041, "step": 27940 }, { "epoch": 0.2285644191846915, "grad_norm": 0.0838976725935936, "learning_rate": 9.993787654715914e-06, "loss": 0.0035, "step": 27950 }, { "epoch": 0.22864619536329067, "grad_norm": 0.09294543415307999, "learning_rate": 9.99375204096351e-06, "loss": 0.0042, "step": 27960 }, { "epoch": 0.22872797154188984, "grad_norm": 0.14983007311820984, "learning_rate": 9.99371632548432e-06, "loss": 0.0053, "step": 27970 }, { "epoch": 0.228809747720489, "grad_norm": 0.21896445751190186, "learning_rate": 9.993680508279068e-06, "loss": 0.0055, "step": 27980 }, { "epoch": 0.2288915238990882, "grad_norm": 0.054218560457229614, "learning_rate": 9.993644589348485e-06, "loss": 0.0027, "step": 27990 }, { "epoch": 0.22897330007768738, "grad_norm": 0.13042226433753967, "learning_rate": 9.993608568693303e-06, "loss": 0.0051, "step": 28000 }, { "epoch": 0.22905507625628654, "grad_norm": 0.050862908363342285, "learning_rate": 9.993572446314254e-06, "loss": 0.0033, "step": 28010 }, { "epoch": 0.2291368524348857, "grad_norm": 0.13584572076797485, "learning_rate": 9.993536222212077e-06, "loss": 0.0041, "step": 28020 }, { "epoch": 0.22921862861348488, "grad_norm": 0.037312060594558716, "learning_rate": 9.993499896387508e-06, "loss": 0.0036, "step": 28030 }, { "epoch": 0.22930040479208408, "grad_norm": 0.17054374516010284, "learning_rate": 9.993463468841285e-06, "loss": 0.0051, "step": 28040 }, { "epoch": 0.22938218097068325, "grad_norm": 0.3735475540161133, "learning_rate": 9.993426939574156e-06, "loss": 0.003, "step": 28050 }, { "epoch": 0.22946395714928242, "grad_norm": 1.8934743404388428, "learning_rate": 9.993390308586858e-06, "loss": 0.0025, "step": 28060 }, { "epoch": 0.22954573332788158, "grad_norm": 0.31130361557006836, "learning_rate": 9.993353575880143e-06, "loss": 0.003, "step": 28070 }, { "epoch": 0.22962750950648075, "grad_norm": 0.0922408327460289, "learning_rate": 9.993316741454754e-06, "loss": 0.005, "step": 28080 }, { "epoch": 0.22970928568507992, "grad_norm": 0.09893661737442017, "learning_rate": 9.993279805311447e-06, "loss": 0.0043, "step": 28090 }, { "epoch": 0.22979106186367912, "grad_norm": 0.03464441001415253, "learning_rate": 9.99324276745097e-06, "loss": 0.0051, "step": 28100 }, { "epoch": 0.2298728380422783, "grad_norm": 0.09619127213954926, "learning_rate": 9.99320562787408e-06, "loss": 0.0037, "step": 28110 }, { "epoch": 0.22995461422087746, "grad_norm": 0.22718435525894165, "learning_rate": 9.993168386581533e-06, "loss": 0.0031, "step": 28120 }, { "epoch": 0.23003639039947663, "grad_norm": 0.04669011011719704, "learning_rate": 9.993131043574088e-06, "loss": 0.0028, "step": 28130 }, { "epoch": 0.2301181665780758, "grad_norm": 0.16700537502765656, "learning_rate": 9.993093598852502e-06, "loss": 0.0083, "step": 28140 }, { "epoch": 0.230199942756675, "grad_norm": 0.1649850606918335, "learning_rate": 9.993056052417542e-06, "loss": 0.004, "step": 28150 }, { "epoch": 0.23028171893527416, "grad_norm": 0.11860954761505127, "learning_rate": 9.993018404269974e-06, "loss": 0.0052, "step": 28160 }, { "epoch": 0.23036349511387333, "grad_norm": 0.3884819447994232, "learning_rate": 9.99298065441056e-06, "loss": 0.0026, "step": 28170 }, { "epoch": 0.2304452712924725, "grad_norm": 0.12627433240413666, "learning_rate": 9.992942802840073e-06, "loss": 0.0017, "step": 28180 }, { "epoch": 0.23052704747107167, "grad_norm": 0.3719067871570587, "learning_rate": 9.992904849559282e-06, "loss": 0.0035, "step": 28190 }, { "epoch": 0.23060882364967086, "grad_norm": 0.08990806341171265, "learning_rate": 9.99286679456896e-06, "loss": 0.0048, "step": 28200 }, { "epoch": 0.23069059982827003, "grad_norm": 0.05906914174556732, "learning_rate": 9.992828637869884e-06, "loss": 0.0061, "step": 28210 }, { "epoch": 0.2307723760068692, "grad_norm": 0.2012234777212143, "learning_rate": 9.99279037946283e-06, "loss": 0.0042, "step": 28220 }, { "epoch": 0.23085415218546837, "grad_norm": 0.15342499315738678, "learning_rate": 9.992752019348578e-06, "loss": 0.0035, "step": 28230 }, { "epoch": 0.23093592836406754, "grad_norm": 0.059853993356227875, "learning_rate": 9.992713557527908e-06, "loss": 0.0037, "step": 28240 }, { "epoch": 0.2310177045426667, "grad_norm": 0.06147846207022667, "learning_rate": 9.992674994001606e-06, "loss": 0.0069, "step": 28250 }, { "epoch": 0.2310994807212659, "grad_norm": 0.06404581665992737, "learning_rate": 9.992636328770455e-06, "loss": 0.008, "step": 28260 }, { "epoch": 0.23118125689986507, "grad_norm": 0.14634117484092712, "learning_rate": 9.992597561835245e-06, "loss": 0.0039, "step": 28270 }, { "epoch": 0.23126303307846424, "grad_norm": 0.12190405279397964, "learning_rate": 9.992558693196763e-06, "loss": 0.0038, "step": 28280 }, { "epoch": 0.2313448092570634, "grad_norm": 0.19122998416423798, "learning_rate": 9.992519722855803e-06, "loss": 0.0028, "step": 28290 }, { "epoch": 0.23142658543566258, "grad_norm": 0.060638658702373505, "learning_rate": 9.992480650813157e-06, "loss": 0.0028, "step": 28300 }, { "epoch": 0.23150836161426178, "grad_norm": 0.11266414821147919, "learning_rate": 9.992441477069623e-06, "loss": 0.0036, "step": 28310 }, { "epoch": 0.23159013779286095, "grad_norm": 0.23661299049854279, "learning_rate": 9.992402201626e-06, "loss": 0.0025, "step": 28320 }, { "epoch": 0.23167191397146011, "grad_norm": 0.16742920875549316, "learning_rate": 9.992362824483084e-06, "loss": 0.005, "step": 28330 }, { "epoch": 0.23175369015005928, "grad_norm": 0.24635230004787445, "learning_rate": 9.99232334564168e-06, "loss": 0.003, "step": 28340 }, { "epoch": 0.23183546632865845, "grad_norm": 0.007706914562731981, "learning_rate": 9.99228376510259e-06, "loss": 0.0031, "step": 28350 }, { "epoch": 0.23191724250725765, "grad_norm": 0.09832600504159927, "learning_rate": 9.992244082866622e-06, "loss": 0.0026, "step": 28360 }, { "epoch": 0.23199901868585682, "grad_norm": 0.12092184275388718, "learning_rate": 9.992204298934585e-06, "loss": 0.0039, "step": 28370 }, { "epoch": 0.23208079486445599, "grad_norm": 0.11748698353767395, "learning_rate": 9.992164413307288e-06, "loss": 0.003, "step": 28380 }, { "epoch": 0.23216257104305515, "grad_norm": 0.11640865355730057, "learning_rate": 9.992124425985545e-06, "loss": 0.0042, "step": 28390 }, { "epoch": 0.23224434722165432, "grad_norm": 0.17283639311790466, "learning_rate": 9.992084336970169e-06, "loss": 0.0035, "step": 28400 }, { "epoch": 0.2323261234002535, "grad_norm": 0.075709268450737, "learning_rate": 9.992044146261979e-06, "loss": 0.0028, "step": 28410 }, { "epoch": 0.2324078995788527, "grad_norm": 0.1042356789112091, "learning_rate": 9.99200385386179e-06, "loss": 0.0028, "step": 28420 }, { "epoch": 0.23248967575745186, "grad_norm": 0.23582357168197632, "learning_rate": 9.991963459770425e-06, "loss": 0.004, "step": 28430 }, { "epoch": 0.23257145193605103, "grad_norm": 0.013656005263328552, "learning_rate": 9.991922963988707e-06, "loss": 0.0048, "step": 28440 }, { "epoch": 0.2326532281146502, "grad_norm": 0.26223647594451904, "learning_rate": 9.99188236651746e-06, "loss": 0.0045, "step": 28450 }, { "epoch": 0.23273500429324936, "grad_norm": 0.10808158665895462, "learning_rate": 9.991841667357512e-06, "loss": 0.003, "step": 28460 }, { "epoch": 0.23281678047184856, "grad_norm": 0.14996294677257538, "learning_rate": 9.991800866509692e-06, "loss": 0.0027, "step": 28470 }, { "epoch": 0.23289855665044773, "grad_norm": 0.17904463410377502, "learning_rate": 9.99175996397483e-06, "loss": 0.0027, "step": 28480 }, { "epoch": 0.2329803328290469, "grad_norm": 0.2947329878807068, "learning_rate": 9.991718959753761e-06, "loss": 0.0065, "step": 28490 }, { "epoch": 0.23306210900764607, "grad_norm": 0.13582000136375427, "learning_rate": 9.991677853847319e-06, "loss": 0.0035, "step": 28500 }, { "epoch": 0.23314388518624524, "grad_norm": 0.14878109097480774, "learning_rate": 9.991636646256341e-06, "loss": 0.0032, "step": 28510 }, { "epoch": 0.23322566136484443, "grad_norm": 0.2275567501783371, "learning_rate": 9.991595336981667e-06, "loss": 0.0054, "step": 28520 }, { "epoch": 0.2333074375434436, "grad_norm": 0.09720832854509354, "learning_rate": 9.99155392602414e-06, "loss": 0.0038, "step": 28530 }, { "epoch": 0.23338921372204277, "grad_norm": 0.05165404453873634, "learning_rate": 9.991512413384601e-06, "loss": 0.0035, "step": 28540 }, { "epoch": 0.23347098990064194, "grad_norm": 0.09722018986940384, "learning_rate": 9.991470799063898e-06, "loss": 0.0037, "step": 28550 }, { "epoch": 0.2335527660792411, "grad_norm": 0.20327886939048767, "learning_rate": 9.991429083062878e-06, "loss": 0.006, "step": 28560 }, { "epoch": 0.23363454225784028, "grad_norm": 0.2051699459552765, "learning_rate": 9.991387265382388e-06, "loss": 0.0043, "step": 28570 }, { "epoch": 0.23371631843643947, "grad_norm": 0.04625643417239189, "learning_rate": 9.991345346023284e-06, "loss": 0.0046, "step": 28580 }, { "epoch": 0.23379809461503864, "grad_norm": 0.14881840348243713, "learning_rate": 9.991303324986417e-06, "loss": 0.0047, "step": 28590 }, { "epoch": 0.2338798707936378, "grad_norm": 0.10650970786809921, "learning_rate": 9.991261202272646e-06, "loss": 0.0046, "step": 28600 }, { "epoch": 0.23396164697223698, "grad_norm": 0.01093385647982359, "learning_rate": 9.991218977882825e-06, "loss": 0.0033, "step": 28610 }, { "epoch": 0.23404342315083615, "grad_norm": 0.19768419861793518, "learning_rate": 9.991176651817817e-06, "loss": 0.0035, "step": 28620 }, { "epoch": 0.23412519932943535, "grad_norm": 0.2576213479042053, "learning_rate": 9.991134224078483e-06, "loss": 0.0045, "step": 28630 }, { "epoch": 0.23420697550803452, "grad_norm": 0.27425163984298706, "learning_rate": 9.99109169466569e-06, "loss": 0.0045, "step": 28640 }, { "epoch": 0.23428875168663368, "grad_norm": 0.1438589245080948, "learning_rate": 9.991049063580301e-06, "loss": 0.0061, "step": 28650 }, { "epoch": 0.23437052786523285, "grad_norm": 0.061088189482688904, "learning_rate": 9.991006330823186e-06, "loss": 0.0047, "step": 28660 }, { "epoch": 0.23445230404383202, "grad_norm": 0.052057553082704544, "learning_rate": 9.990963496395214e-06, "loss": 0.003, "step": 28670 }, { "epoch": 0.23453408022243122, "grad_norm": 0.09168105572462082, "learning_rate": 9.990920560297259e-06, "loss": 0.0037, "step": 28680 }, { "epoch": 0.2346158564010304, "grad_norm": 0.15135633945465088, "learning_rate": 9.990877522530195e-06, "loss": 0.0038, "step": 28690 }, { "epoch": 0.23469763257962956, "grad_norm": 0.05485004559159279, "learning_rate": 9.9908343830949e-06, "loss": 0.0039, "step": 28700 }, { "epoch": 0.23477940875822872, "grad_norm": 0.107793428003788, "learning_rate": 9.990791141992253e-06, "loss": 0.0033, "step": 28710 }, { "epoch": 0.2348611849368279, "grad_norm": 0.06974440813064575, "learning_rate": 9.990747799223132e-06, "loss": 0.0045, "step": 28720 }, { "epoch": 0.23494296111542706, "grad_norm": 0.14574316143989563, "learning_rate": 9.99070435478842e-06, "loss": 0.0054, "step": 28730 }, { "epoch": 0.23502473729402626, "grad_norm": 0.09619714319705963, "learning_rate": 9.990660808689007e-06, "loss": 0.0041, "step": 28740 }, { "epoch": 0.23510651347262543, "grad_norm": 0.11244615912437439, "learning_rate": 9.990617160925776e-06, "loss": 0.0024, "step": 28750 }, { "epoch": 0.2351882896512246, "grad_norm": 0.05635536462068558, "learning_rate": 9.990573411499616e-06, "loss": 0.0041, "step": 28760 }, { "epoch": 0.23527006582982377, "grad_norm": 0.2384524941444397, "learning_rate": 9.990529560411419e-06, "loss": 0.0034, "step": 28770 }, { "epoch": 0.23535184200842293, "grad_norm": 0.04981127008795738, "learning_rate": 9.99048560766208e-06, "loss": 0.003, "step": 28780 }, { "epoch": 0.23543361818702213, "grad_norm": 0.3832872807979584, "learning_rate": 9.99044155325249e-06, "loss": 0.0029, "step": 28790 }, { "epoch": 0.2355153943656213, "grad_norm": 0.04038878157734871, "learning_rate": 9.990397397183551e-06, "loss": 0.0027, "step": 28800 }, { "epoch": 0.23559717054422047, "grad_norm": 0.062038954347372055, "learning_rate": 9.99035313945616e-06, "loss": 0.0042, "step": 28810 }, { "epoch": 0.23567894672281964, "grad_norm": 0.08597471565008163, "learning_rate": 9.990308780071218e-06, "loss": 0.0043, "step": 28820 }, { "epoch": 0.2357607229014188, "grad_norm": 0.16646794974803925, "learning_rate": 9.990264319029631e-06, "loss": 0.0053, "step": 28830 }, { "epoch": 0.235842499080018, "grad_norm": 0.09209680557250977, "learning_rate": 9.990219756332305e-06, "loss": 0.0051, "step": 28840 }, { "epoch": 0.23592427525861717, "grad_norm": 0.05257498100399971, "learning_rate": 9.990175091980146e-06, "loss": 0.0046, "step": 28850 }, { "epoch": 0.23600605143721634, "grad_norm": 0.08301829546689987, "learning_rate": 9.990130325974062e-06, "loss": 0.0029, "step": 28860 }, { "epoch": 0.2360878276158155, "grad_norm": 0.20614002645015717, "learning_rate": 9.990085458314969e-06, "loss": 0.003, "step": 28870 }, { "epoch": 0.23616960379441468, "grad_norm": 0.08034762740135193, "learning_rate": 9.990040489003778e-06, "loss": 0.0027, "step": 28880 }, { "epoch": 0.23625137997301385, "grad_norm": 0.07281726598739624, "learning_rate": 9.989995418041406e-06, "loss": 0.0037, "step": 28890 }, { "epoch": 0.23633315615161304, "grad_norm": 0.158447727560997, "learning_rate": 9.989950245428771e-06, "loss": 0.0051, "step": 28900 }, { "epoch": 0.2364149323302122, "grad_norm": 0.06621906906366348, "learning_rate": 9.989904971166795e-06, "loss": 0.0039, "step": 28910 }, { "epoch": 0.23649670850881138, "grad_norm": 0.036266718059778214, "learning_rate": 9.989859595256397e-06, "loss": 0.0065, "step": 28920 }, { "epoch": 0.23657848468741055, "grad_norm": 0.1690579205751419, "learning_rate": 9.989814117698505e-06, "loss": 0.002, "step": 28930 }, { "epoch": 0.23666026086600972, "grad_norm": 0.09406282007694244, "learning_rate": 9.989768538494042e-06, "loss": 0.004, "step": 28940 }, { "epoch": 0.23674203704460892, "grad_norm": 0.08372776210308075, "learning_rate": 9.989722857643938e-06, "loss": 0.0031, "step": 28950 }, { "epoch": 0.23682381322320809, "grad_norm": 0.22748884558677673, "learning_rate": 9.989677075149125e-06, "loss": 0.0038, "step": 28960 }, { "epoch": 0.23690558940180725, "grad_norm": 0.2110762894153595, "learning_rate": 9.989631191010532e-06, "loss": 0.0054, "step": 28970 }, { "epoch": 0.23698736558040642, "grad_norm": 0.08814159780740738, "learning_rate": 9.989585205229097e-06, "loss": 0.0034, "step": 28980 }, { "epoch": 0.2370691417590056, "grad_norm": 0.16835911571979523, "learning_rate": 9.989539117805755e-06, "loss": 0.0043, "step": 28990 }, { "epoch": 0.2371509179376048, "grad_norm": 0.1371040791273117, "learning_rate": 9.989492928741446e-06, "loss": 0.0074, "step": 29000 }, { "epoch": 0.23723269411620396, "grad_norm": 0.03563931956887245, "learning_rate": 9.98944663803711e-06, "loss": 0.0025, "step": 29010 }, { "epoch": 0.23731447029480313, "grad_norm": 0.06515070050954819, "learning_rate": 9.989400245693692e-06, "loss": 0.0037, "step": 29020 }, { "epoch": 0.2373962464734023, "grad_norm": 0.7657982110977173, "learning_rate": 9.989353751712135e-06, "loss": 0.0032, "step": 29030 }, { "epoch": 0.23747802265200146, "grad_norm": 0.12245962023735046, "learning_rate": 9.989307156093385e-06, "loss": 0.006, "step": 29040 }, { "epoch": 0.23755979883060063, "grad_norm": 0.1619204431772232, "learning_rate": 9.989260458838393e-06, "loss": 0.0041, "step": 29050 }, { "epoch": 0.23764157500919983, "grad_norm": 0.0938524603843689, "learning_rate": 9.98921365994811e-06, "loss": 0.0055, "step": 29060 }, { "epoch": 0.237723351187799, "grad_norm": 0.1412610560655594, "learning_rate": 9.98916675942349e-06, "loss": 0.0049, "step": 29070 }, { "epoch": 0.23780512736639817, "grad_norm": 0.1131434440612793, "learning_rate": 9.989119757265489e-06, "loss": 0.0039, "step": 29080 }, { "epoch": 0.23788690354499734, "grad_norm": 0.06637478619813919, "learning_rate": 9.98907265347506e-06, "loss": 0.0029, "step": 29090 }, { "epoch": 0.2379686797235965, "grad_norm": 0.020183565095067024, "learning_rate": 9.989025448053169e-06, "loss": 0.0051, "step": 29100 }, { "epoch": 0.2380504559021957, "grad_norm": 0.036762308329343796, "learning_rate": 9.98897814100077e-06, "loss": 0.0028, "step": 29110 }, { "epoch": 0.23813223208079487, "grad_norm": 0.03494697064161301, "learning_rate": 9.988930732318835e-06, "loss": 0.0031, "step": 29120 }, { "epoch": 0.23821400825939404, "grad_norm": 0.1576230227947235, "learning_rate": 9.988883222008325e-06, "loss": 0.0038, "step": 29130 }, { "epoch": 0.2382957844379932, "grad_norm": 0.26600077748298645, "learning_rate": 9.988835610070209e-06, "loss": 0.0045, "step": 29140 }, { "epoch": 0.23837756061659238, "grad_norm": 0.1559782773256302, "learning_rate": 9.988787896505454e-06, "loss": 0.0021, "step": 29150 }, { "epoch": 0.23845933679519157, "grad_norm": 0.20257392525672913, "learning_rate": 9.988740081315037e-06, "loss": 0.0029, "step": 29160 }, { "epoch": 0.23854111297379074, "grad_norm": 0.09364306181669235, "learning_rate": 9.988692164499929e-06, "loss": 0.0023, "step": 29170 }, { "epoch": 0.2386228891523899, "grad_norm": 0.218240424990654, "learning_rate": 9.988644146061106e-06, "loss": 0.0034, "step": 29180 }, { "epoch": 0.23870466533098908, "grad_norm": 0.38255342841148376, "learning_rate": 9.988596025999546e-06, "loss": 0.0029, "step": 29190 }, { "epoch": 0.23878644150958825, "grad_norm": 0.19606323540210724, "learning_rate": 9.988547804316232e-06, "loss": 0.0043, "step": 29200 }, { "epoch": 0.23886821768818742, "grad_norm": 0.07035315781831741, "learning_rate": 9.988499481012143e-06, "loss": 0.003, "step": 29210 }, { "epoch": 0.23894999386678661, "grad_norm": 0.26371321082115173, "learning_rate": 9.988451056088264e-06, "loss": 0.0049, "step": 29220 }, { "epoch": 0.23903177004538578, "grad_norm": 0.1954694241285324, "learning_rate": 9.988402529545584e-06, "loss": 0.0037, "step": 29230 }, { "epoch": 0.23911354622398495, "grad_norm": 0.14887675642967224, "learning_rate": 9.988353901385089e-06, "loss": 0.0031, "step": 29240 }, { "epoch": 0.23919532240258412, "grad_norm": 0.07843125611543655, "learning_rate": 9.98830517160777e-06, "loss": 0.0034, "step": 29250 }, { "epoch": 0.2392770985811833, "grad_norm": 0.08566715568304062, "learning_rate": 9.988256340214619e-06, "loss": 0.0041, "step": 29260 }, { "epoch": 0.2393588747597825, "grad_norm": 0.20850975811481476, "learning_rate": 9.988207407206633e-06, "loss": 0.0034, "step": 29270 }, { "epoch": 0.23944065093838166, "grad_norm": 0.053316663950681686, "learning_rate": 9.988158372584808e-06, "loss": 0.003, "step": 29280 }, { "epoch": 0.23952242711698082, "grad_norm": 0.23216582834720612, "learning_rate": 9.98810923635014e-06, "loss": 0.0038, "step": 29290 }, { "epoch": 0.23960420329558, "grad_norm": 0.09379011392593384, "learning_rate": 9.988059998503636e-06, "loss": 0.0039, "step": 29300 }, { "epoch": 0.23968597947417916, "grad_norm": 0.17309585213661194, "learning_rate": 9.988010659046293e-06, "loss": 0.0051, "step": 29310 }, { "epoch": 0.23976775565277836, "grad_norm": 0.3104822635650635, "learning_rate": 9.987961217979118e-06, "loss": 0.0049, "step": 29320 }, { "epoch": 0.23984953183137753, "grad_norm": 0.28537964820861816, "learning_rate": 9.987911675303119e-06, "loss": 0.005, "step": 29330 }, { "epoch": 0.2399313080099767, "grad_norm": 0.045330218970775604, "learning_rate": 9.987862031019306e-06, "loss": 0.004, "step": 29340 }, { "epoch": 0.24001308418857586, "grad_norm": 0.04829338192939758, "learning_rate": 9.987812285128689e-06, "loss": 0.0033, "step": 29350 }, { "epoch": 0.24009486036717503, "grad_norm": 0.15478338301181793, "learning_rate": 9.98776243763228e-06, "loss": 0.0053, "step": 29360 }, { "epoch": 0.2401766365457742, "grad_norm": 0.26988640427589417, "learning_rate": 9.987712488531096e-06, "loss": 0.0028, "step": 29370 }, { "epoch": 0.2402584127243734, "grad_norm": 0.12416134774684906, "learning_rate": 9.987662437826155e-06, "loss": 0.0058, "step": 29380 }, { "epoch": 0.24034018890297257, "grad_norm": 0.1768093705177307, "learning_rate": 9.987612285518476e-06, "loss": 0.0062, "step": 29390 }, { "epoch": 0.24042196508157174, "grad_norm": 0.09757161140441895, "learning_rate": 9.98756203160908e-06, "loss": 0.0034, "step": 29400 }, { "epoch": 0.2405037412601709, "grad_norm": 0.15079809725284576, "learning_rate": 9.98751167609899e-06, "loss": 0.0049, "step": 29410 }, { "epoch": 0.24058551743877007, "grad_norm": 0.06818357110023499, "learning_rate": 9.987461218989236e-06, "loss": 0.0037, "step": 29420 }, { "epoch": 0.24066729361736927, "grad_norm": 0.13310089707374573, "learning_rate": 9.987410660280844e-06, "loss": 0.0033, "step": 29430 }, { "epoch": 0.24074906979596844, "grad_norm": 0.05342699959874153, "learning_rate": 9.987359999974839e-06, "loss": 0.0031, "step": 29440 }, { "epoch": 0.2408308459745676, "grad_norm": 0.2229308784008026, "learning_rate": 9.987309238072259e-06, "loss": 0.0031, "step": 29450 }, { "epoch": 0.24091262215316678, "grad_norm": 0.08739902824163437, "learning_rate": 9.987258374574136e-06, "loss": 0.0024, "step": 29460 }, { "epoch": 0.24099439833176595, "grad_norm": 0.09063857048749924, "learning_rate": 9.987207409481506e-06, "loss": 0.004, "step": 29470 }, { "epoch": 0.24107617451036514, "grad_norm": 0.06429986655712128, "learning_rate": 9.987156342795406e-06, "loss": 0.0031, "step": 29480 }, { "epoch": 0.2411579506889643, "grad_norm": 0.13584253191947937, "learning_rate": 9.987105174516878e-06, "loss": 0.0026, "step": 29490 }, { "epoch": 0.24123972686756348, "grad_norm": 0.09510993957519531, "learning_rate": 9.987053904646964e-06, "loss": 0.0035, "step": 29500 }, { "epoch": 0.24132150304616265, "grad_norm": 0.2565014660358429, "learning_rate": 9.98700253318671e-06, "loss": 0.0038, "step": 29510 }, { "epoch": 0.24140327922476182, "grad_norm": 0.22301076352596283, "learning_rate": 9.986951060137158e-06, "loss": 0.0035, "step": 29520 }, { "epoch": 0.241485055403361, "grad_norm": 0.13276547193527222, "learning_rate": 9.986899485499361e-06, "loss": 0.0037, "step": 29530 }, { "epoch": 0.24156683158196018, "grad_norm": 0.18556179106235504, "learning_rate": 9.986847809274367e-06, "loss": 0.0035, "step": 29540 }, { "epoch": 0.24164860776055935, "grad_norm": 0.11881458014249802, "learning_rate": 9.98679603146323e-06, "loss": 0.0049, "step": 29550 }, { "epoch": 0.24173038393915852, "grad_norm": 0.13921549916267395, "learning_rate": 9.986744152067004e-06, "loss": 0.0034, "step": 29560 }, { "epoch": 0.2418121601177577, "grad_norm": 0.05999980494379997, "learning_rate": 9.986692171086747e-06, "loss": 0.0049, "step": 29570 }, { "epoch": 0.24189393629635686, "grad_norm": 0.019087383523583412, "learning_rate": 9.986640088523518e-06, "loss": 0.0035, "step": 29580 }, { "epoch": 0.24197571247495606, "grad_norm": 0.33481886982917786, "learning_rate": 9.986587904378375e-06, "loss": 0.0038, "step": 29590 }, { "epoch": 0.24205748865355523, "grad_norm": 0.30115416646003723, "learning_rate": 9.986535618652384e-06, "loss": 0.0057, "step": 29600 }, { "epoch": 0.2421392648321544, "grad_norm": 0.09433183073997498, "learning_rate": 9.986483231346609e-06, "loss": 0.0042, "step": 29610 }, { "epoch": 0.24222104101075356, "grad_norm": 0.11107619851827621, "learning_rate": 9.986430742462117e-06, "loss": 0.004, "step": 29620 }, { "epoch": 0.24230281718935273, "grad_norm": 0.19303962588310242, "learning_rate": 9.986378151999977e-06, "loss": 0.004, "step": 29630 }, { "epoch": 0.24238459336795193, "grad_norm": 0.23665472865104675, "learning_rate": 9.986325459961264e-06, "loss": 0.0042, "step": 29640 }, { "epoch": 0.2424663695465511, "grad_norm": 0.1396568864583969, "learning_rate": 9.986272666347045e-06, "loss": 0.0036, "step": 29650 }, { "epoch": 0.24254814572515027, "grad_norm": 0.034823644906282425, "learning_rate": 9.9862197711584e-06, "loss": 0.0032, "step": 29660 }, { "epoch": 0.24262992190374943, "grad_norm": 0.22816702723503113, "learning_rate": 9.986166774396406e-06, "loss": 0.0052, "step": 29670 }, { "epoch": 0.2427116980823486, "grad_norm": 0.06519090384244919, "learning_rate": 9.986113676062142e-06, "loss": 0.0036, "step": 29680 }, { "epoch": 0.24279347426094777, "grad_norm": 0.1048240140080452, "learning_rate": 9.986060476156688e-06, "loss": 0.0045, "step": 29690 }, { "epoch": 0.24287525043954697, "grad_norm": 0.1256895661354065, "learning_rate": 9.98600717468113e-06, "loss": 0.0044, "step": 29700 }, { "epoch": 0.24295702661814614, "grad_norm": 0.07273393124341965, "learning_rate": 9.985953771636551e-06, "loss": 0.0042, "step": 29710 }, { "epoch": 0.2430388027967453, "grad_norm": 0.08080589771270752, "learning_rate": 9.985900267024045e-06, "loss": 0.0028, "step": 29720 }, { "epoch": 0.24312057897534448, "grad_norm": 0.09009871631860733, "learning_rate": 9.985846660844695e-06, "loss": 0.0031, "step": 29730 }, { "epoch": 0.24320235515394364, "grad_norm": 0.08306063711643219, "learning_rate": 9.985792953099595e-06, "loss": 0.0046, "step": 29740 }, { "epoch": 0.24328413133254284, "grad_norm": 0.3469013273715973, "learning_rate": 9.985739143789843e-06, "loss": 0.004, "step": 29750 }, { "epoch": 0.243365907511142, "grad_norm": 0.04614214226603508, "learning_rate": 9.98568523291653e-06, "loss": 0.0043, "step": 29760 }, { "epoch": 0.24344768368974118, "grad_norm": 0.07082819193601608, "learning_rate": 9.985631220480756e-06, "loss": 0.0046, "step": 29770 }, { "epoch": 0.24352945986834035, "grad_norm": 0.18961264193058014, "learning_rate": 9.985577106483622e-06, "loss": 0.0039, "step": 29780 }, { "epoch": 0.24361123604693952, "grad_norm": 0.11713665723800659, "learning_rate": 9.98552289092623e-06, "loss": 0.0036, "step": 29790 }, { "epoch": 0.2436930122255387, "grad_norm": 0.052853550761938095, "learning_rate": 9.985468573809685e-06, "loss": 0.0054, "step": 29800 }, { "epoch": 0.24377478840413788, "grad_norm": 0.04826229810714722, "learning_rate": 9.985414155135092e-06, "loss": 0.0018, "step": 29810 }, { "epoch": 0.24385656458273705, "grad_norm": 0.1690136045217514, "learning_rate": 9.98535963490356e-06, "loss": 0.003, "step": 29820 }, { "epoch": 0.24393834076133622, "grad_norm": 0.24868930876255035, "learning_rate": 9.9853050131162e-06, "loss": 0.0032, "step": 29830 }, { "epoch": 0.2440201169399354, "grad_norm": 0.08858051896095276, "learning_rate": 9.985250289774124e-06, "loss": 0.0035, "step": 29840 }, { "epoch": 0.24410189311853456, "grad_norm": 0.32153573632240295, "learning_rate": 9.98519546487845e-06, "loss": 0.0043, "step": 29850 }, { "epoch": 0.24418366929713375, "grad_norm": 0.1330890953540802, "learning_rate": 9.985140538430289e-06, "loss": 0.0042, "step": 29860 }, { "epoch": 0.24426544547573292, "grad_norm": 0.06971916556358337, "learning_rate": 9.985085510430766e-06, "loss": 0.0035, "step": 29870 }, { "epoch": 0.2443472216543321, "grad_norm": 0.20135541260242462, "learning_rate": 9.985030380880996e-06, "loss": 0.0031, "step": 29880 }, { "epoch": 0.24442899783293126, "grad_norm": 0.10274674743413925, "learning_rate": 9.984975149782107e-06, "loss": 0.0034, "step": 29890 }, { "epoch": 0.24451077401153043, "grad_norm": 0.05276859179139137, "learning_rate": 9.984919817135222e-06, "loss": 0.0032, "step": 29900 }, { "epoch": 0.24459255019012963, "grad_norm": 0.0822422057390213, "learning_rate": 9.984864382941467e-06, "loss": 0.0027, "step": 29910 }, { "epoch": 0.2446743263687288, "grad_norm": 0.4243333637714386, "learning_rate": 9.984808847201974e-06, "loss": 0.0045, "step": 29920 }, { "epoch": 0.24475610254732796, "grad_norm": 0.11909907311201096, "learning_rate": 9.984753209917874e-06, "loss": 0.0046, "step": 29930 }, { "epoch": 0.24483787872592713, "grad_norm": 0.1786869317293167, "learning_rate": 9.984697471090297e-06, "loss": 0.0038, "step": 29940 }, { "epoch": 0.2449196549045263, "grad_norm": 0.12180528789758682, "learning_rate": 9.984641630720382e-06, "loss": 0.0042, "step": 29950 }, { "epoch": 0.2450014310831255, "grad_norm": 0.11211546510457993, "learning_rate": 9.984585688809264e-06, "loss": 0.0039, "step": 29960 }, { "epoch": 0.24508320726172467, "grad_norm": 0.1594473123550415, "learning_rate": 9.984529645358087e-06, "loss": 0.0045, "step": 29970 }, { "epoch": 0.24516498344032384, "grad_norm": 0.08989521116018295, "learning_rate": 9.984473500367987e-06, "loss": 0.0037, "step": 29980 }, { "epoch": 0.245246759618923, "grad_norm": 0.0877685472369194, "learning_rate": 9.984417253840111e-06, "loss": 0.0034, "step": 29990 }, { "epoch": 0.24532853579752217, "grad_norm": 0.20225070416927338, "learning_rate": 9.984360905775603e-06, "loss": 0.0036, "step": 30000 }, { "epoch": 0.24541031197612134, "grad_norm": 0.16536946594715118, "learning_rate": 9.984304456175612e-06, "loss": 0.0037, "step": 30010 }, { "epoch": 0.24549208815472054, "grad_norm": 0.24785934388637543, "learning_rate": 9.984247905041289e-06, "loss": 0.0037, "step": 30020 }, { "epoch": 0.2455738643333197, "grad_norm": 0.08324655890464783, "learning_rate": 9.984191252373786e-06, "loss": 0.0067, "step": 30030 }, { "epoch": 0.24565564051191888, "grad_norm": 0.2706073522567749, "learning_rate": 9.984134498174252e-06, "loss": 0.0037, "step": 30040 }, { "epoch": 0.24573741669051805, "grad_norm": 0.10388575494289398, "learning_rate": 9.984077642443848e-06, "loss": 0.002, "step": 30050 }, { "epoch": 0.24581919286911721, "grad_norm": 0.06507950276136398, "learning_rate": 9.984020685183732e-06, "loss": 0.0031, "step": 30060 }, { "epoch": 0.2459009690477164, "grad_norm": 0.03126433119177818, "learning_rate": 9.983963626395064e-06, "loss": 0.0031, "step": 30070 }, { "epoch": 0.24598274522631558, "grad_norm": 0.08602040261030197, "learning_rate": 9.983906466079006e-06, "loss": 0.0047, "step": 30080 }, { "epoch": 0.24606452140491475, "grad_norm": 0.15132196247577667, "learning_rate": 9.983849204236721e-06, "loss": 0.0031, "step": 30090 }, { "epoch": 0.24614629758351392, "grad_norm": 0.06834075599908829, "learning_rate": 9.983791840869376e-06, "loss": 0.0036, "step": 30100 }, { "epoch": 0.2462280737621131, "grad_norm": 0.12685894966125488, "learning_rate": 9.983734375978142e-06, "loss": 0.0035, "step": 30110 }, { "epoch": 0.24630984994071228, "grad_norm": 0.2033708393573761, "learning_rate": 9.983676809564187e-06, "loss": 0.0038, "step": 30120 }, { "epoch": 0.24639162611931145, "grad_norm": 0.15892840921878815, "learning_rate": 9.983619141628685e-06, "loss": 0.004, "step": 30130 }, { "epoch": 0.24647340229791062, "grad_norm": 0.24892765283584595, "learning_rate": 9.98356137217281e-06, "loss": 0.0038, "step": 30140 }, { "epoch": 0.2465551784765098, "grad_norm": 0.058449018746614456, "learning_rate": 9.983503501197738e-06, "loss": 0.0026, "step": 30150 }, { "epoch": 0.24663695465510896, "grad_norm": 0.05162971839308739, "learning_rate": 9.98344552870465e-06, "loss": 0.0038, "step": 30160 }, { "epoch": 0.24671873083370813, "grad_norm": 0.03749629110097885, "learning_rate": 9.983387454694724e-06, "loss": 0.0064, "step": 30170 }, { "epoch": 0.24680050701230732, "grad_norm": 0.1216573640704155, "learning_rate": 9.983329279169146e-06, "loss": 0.0036, "step": 30180 }, { "epoch": 0.2468822831909065, "grad_norm": 0.19671697914600372, "learning_rate": 9.9832710021291e-06, "loss": 0.0045, "step": 30190 }, { "epoch": 0.24696405936950566, "grad_norm": 0.09353584051132202, "learning_rate": 9.983212623575773e-06, "loss": 0.0039, "step": 30200 }, { "epoch": 0.24704583554810483, "grad_norm": 0.2597227990627289, "learning_rate": 9.983154143510355e-06, "loss": 0.0022, "step": 30210 }, { "epoch": 0.247127611726704, "grad_norm": 0.14042118191719055, "learning_rate": 9.983095561934036e-06, "loss": 0.0036, "step": 30220 }, { "epoch": 0.2472093879053032, "grad_norm": 0.20466691255569458, "learning_rate": 9.98303687884801e-06, "loss": 0.0037, "step": 30230 }, { "epoch": 0.24729116408390237, "grad_norm": 0.3002210259437561, "learning_rate": 9.982978094253473e-06, "loss": 0.0035, "step": 30240 }, { "epoch": 0.24737294026250153, "grad_norm": 0.04824938252568245, "learning_rate": 9.98291920815162e-06, "loss": 0.003, "step": 30250 }, { "epoch": 0.2474547164411007, "grad_norm": 0.12436693161725998, "learning_rate": 9.982860220543655e-06, "loss": 0.0038, "step": 30260 }, { "epoch": 0.24753649261969987, "grad_norm": 0.11006362736225128, "learning_rate": 9.982801131430775e-06, "loss": 0.0036, "step": 30270 }, { "epoch": 0.24761826879829907, "grad_norm": 0.18195903301239014, "learning_rate": 9.982741940814186e-06, "loss": 0.0029, "step": 30280 }, { "epoch": 0.24770004497689824, "grad_norm": 0.1949862390756607, "learning_rate": 9.982682648695095e-06, "loss": 0.0032, "step": 30290 }, { "epoch": 0.2477818211554974, "grad_norm": 0.22049157321453094, "learning_rate": 9.982623255074707e-06, "loss": 0.0055, "step": 30300 }, { "epoch": 0.24786359733409657, "grad_norm": 0.051293060183525085, "learning_rate": 9.982563759954234e-06, "loss": 0.0032, "step": 30310 }, { "epoch": 0.24794537351269574, "grad_norm": 0.10553944110870361, "learning_rate": 9.982504163334885e-06, "loss": 0.0036, "step": 30320 }, { "epoch": 0.2480271496912949, "grad_norm": 0.08058369159698486, "learning_rate": 9.982444465217879e-06, "loss": 0.0035, "step": 30330 }, { "epoch": 0.2481089258698941, "grad_norm": 0.17383034527301788, "learning_rate": 9.982384665604428e-06, "loss": 0.0066, "step": 30340 }, { "epoch": 0.24819070204849328, "grad_norm": 0.08320564776659012, "learning_rate": 9.982324764495751e-06, "loss": 0.0044, "step": 30350 }, { "epoch": 0.24827247822709245, "grad_norm": 0.12716782093048096, "learning_rate": 9.98226476189307e-06, "loss": 0.0031, "step": 30360 }, { "epoch": 0.24835425440569162, "grad_norm": 0.13035482168197632, "learning_rate": 9.982204657797605e-06, "loss": 0.0033, "step": 30370 }, { "epoch": 0.24843603058429078, "grad_norm": 0.14669470489025116, "learning_rate": 9.982144452210581e-06, "loss": 0.0029, "step": 30380 }, { "epoch": 0.24851780676288998, "grad_norm": 0.2219211757183075, "learning_rate": 9.982084145133226e-06, "loss": 0.0056, "step": 30390 }, { "epoch": 0.24859958294148915, "grad_norm": 0.3055313527584076, "learning_rate": 9.982023736566766e-06, "loss": 0.0038, "step": 30400 }, { "epoch": 0.24868135912008832, "grad_norm": 0.15013569593429565, "learning_rate": 9.981963226512435e-06, "loss": 0.0026, "step": 30410 }, { "epoch": 0.2487631352986875, "grad_norm": 0.27115198969841003, "learning_rate": 9.981902614971462e-06, "loss": 0.0037, "step": 30420 }, { "epoch": 0.24884491147728666, "grad_norm": 0.07394866645336151, "learning_rate": 9.981841901945082e-06, "loss": 0.0036, "step": 30430 }, { "epoch": 0.24892668765588585, "grad_norm": 0.23992224037647247, "learning_rate": 9.981781087434535e-06, "loss": 0.0049, "step": 30440 }, { "epoch": 0.24900846383448502, "grad_norm": 0.2942480146884918, "learning_rate": 9.981720171441059e-06, "loss": 0.0025, "step": 30450 }, { "epoch": 0.2490902400130842, "grad_norm": 0.03977743536233902, "learning_rate": 9.981659153965891e-06, "loss": 0.006, "step": 30460 }, { "epoch": 0.24917201619168336, "grad_norm": 0.32771551609039307, "learning_rate": 9.981598035010278e-06, "loss": 0.0034, "step": 30470 }, { "epoch": 0.24925379237028253, "grad_norm": 0.09578067064285278, "learning_rate": 9.981536814575464e-06, "loss": 0.0035, "step": 30480 }, { "epoch": 0.2493355685488817, "grad_norm": 0.10286902636289597, "learning_rate": 9.981475492662696e-06, "loss": 0.0054, "step": 30490 }, { "epoch": 0.2494173447274809, "grad_norm": 0.05297641083598137, "learning_rate": 9.981414069273221e-06, "loss": 0.0035, "step": 30500 }, { "epoch": 0.24949912090608006, "grad_norm": 0.19773074984550476, "learning_rate": 9.981352544408295e-06, "loss": 0.006, "step": 30510 }, { "epoch": 0.24958089708467923, "grad_norm": 0.07078506052494049, "learning_rate": 9.981290918069167e-06, "loss": 0.0025, "step": 30520 }, { "epoch": 0.2496626732632784, "grad_norm": 0.14029629528522491, "learning_rate": 9.981229190257094e-06, "loss": 0.0042, "step": 30530 }, { "epoch": 0.24974444944187757, "grad_norm": 0.09703335911035538, "learning_rate": 9.981167360973334e-06, "loss": 0.0048, "step": 30540 }, { "epoch": 0.24982622562047677, "grad_norm": 0.20194442570209503, "learning_rate": 9.981105430219147e-06, "loss": 0.0035, "step": 30550 }, { "epoch": 0.24990800179907594, "grad_norm": 0.1275997757911682, "learning_rate": 9.981043397995792e-06, "loss": 0.0034, "step": 30560 }, { "epoch": 0.2499897779776751, "grad_norm": 0.1365990936756134, "learning_rate": 9.980981264304535e-06, "loss": 0.0049, "step": 30570 }, { "epoch": 0.2500715541562743, "grad_norm": 0.056551434099674225, "learning_rate": 9.98091902914664e-06, "loss": 0.0046, "step": 30580 }, { "epoch": 0.25015333033487347, "grad_norm": 0.16796128451824188, "learning_rate": 9.980856692523376e-06, "loss": 0.0034, "step": 30590 }, { "epoch": 0.25023510651347264, "grad_norm": 0.089480921626091, "learning_rate": 9.980794254436012e-06, "loss": 0.0057, "step": 30600 }, { "epoch": 0.2503168826920718, "grad_norm": 0.3362720012664795, "learning_rate": 9.980731714885823e-06, "loss": 0.003, "step": 30610 }, { "epoch": 0.250398658870671, "grad_norm": 0.12561851739883423, "learning_rate": 9.980669073874078e-06, "loss": 0.0035, "step": 30620 }, { "epoch": 0.25048043504927014, "grad_norm": 0.26000335812568665, "learning_rate": 9.980606331402056e-06, "loss": 0.005, "step": 30630 }, { "epoch": 0.2505622112278693, "grad_norm": 0.11169230192899704, "learning_rate": 9.980543487471035e-06, "loss": 0.0089, "step": 30640 }, { "epoch": 0.2506439874064685, "grad_norm": 0.15452046692371368, "learning_rate": 9.980480542082293e-06, "loss": 0.0037, "step": 30650 }, { "epoch": 0.25072576358506765, "grad_norm": 0.07955894619226456, "learning_rate": 9.980417495237117e-06, "loss": 0.0078, "step": 30660 }, { "epoch": 0.2508075397636668, "grad_norm": 0.023312313482165337, "learning_rate": 9.980354346936787e-06, "loss": 0.0031, "step": 30670 }, { "epoch": 0.25088931594226604, "grad_norm": 0.20553801953792572, "learning_rate": 9.98029109718259e-06, "loss": 0.0041, "step": 30680 }, { "epoch": 0.2509710921208652, "grad_norm": 0.040519729256629944, "learning_rate": 9.980227745975815e-06, "loss": 0.0037, "step": 30690 }, { "epoch": 0.2510528682994644, "grad_norm": 0.059134867042303085, "learning_rate": 9.980164293317753e-06, "loss": 0.005, "step": 30700 }, { "epoch": 0.25113464447806355, "grad_norm": 0.07376515865325928, "learning_rate": 9.980100739209696e-06, "loss": 0.004, "step": 30710 }, { "epoch": 0.2512164206566627, "grad_norm": 0.20663666725158691, "learning_rate": 9.980037083652942e-06, "loss": 0.0056, "step": 30720 }, { "epoch": 0.2512981968352619, "grad_norm": 0.1021391972899437, "learning_rate": 9.979973326648782e-06, "loss": 0.0042, "step": 30730 }, { "epoch": 0.25137997301386106, "grad_norm": 0.10342566668987274, "learning_rate": 9.979909468198516e-06, "loss": 0.0048, "step": 30740 }, { "epoch": 0.2514617491924602, "grad_norm": 0.14503082633018494, "learning_rate": 9.979845508303449e-06, "loss": 0.0063, "step": 30750 }, { "epoch": 0.2515435253710594, "grad_norm": 0.062236759811639786, "learning_rate": 9.97978144696488e-06, "loss": 0.0036, "step": 30760 }, { "epoch": 0.25162530154965856, "grad_norm": 0.09445244818925858, "learning_rate": 9.979717284184114e-06, "loss": 0.0054, "step": 30770 }, { "epoch": 0.25170707772825773, "grad_norm": 0.23759309947490692, "learning_rate": 9.979653019962461e-06, "loss": 0.0036, "step": 30780 }, { "epoch": 0.25178885390685696, "grad_norm": 0.12357547134160995, "learning_rate": 9.97958865430123e-06, "loss": 0.0031, "step": 30790 }, { "epoch": 0.2518706300854561, "grad_norm": 0.09916208684444427, "learning_rate": 9.979524187201726e-06, "loss": 0.0042, "step": 30800 }, { "epoch": 0.2519524062640553, "grad_norm": 0.08379783481359482, "learning_rate": 9.97945961866527e-06, "loss": 0.0068, "step": 30810 }, { "epoch": 0.25203418244265446, "grad_norm": 0.2028689831495285, "learning_rate": 9.979394948693172e-06, "loss": 0.0036, "step": 30820 }, { "epoch": 0.25211595862125363, "grad_norm": 0.0572599358856678, "learning_rate": 9.979330177286752e-06, "loss": 0.0039, "step": 30830 }, { "epoch": 0.2521977347998528, "grad_norm": 0.28737151622772217, "learning_rate": 9.97926530444733e-06, "loss": 0.0043, "step": 30840 }, { "epoch": 0.25227951097845197, "grad_norm": 0.08587432652711868, "learning_rate": 9.979200330176227e-06, "loss": 0.0032, "step": 30850 }, { "epoch": 0.25236128715705114, "grad_norm": 0.16400913894176483, "learning_rate": 9.979135254474764e-06, "loss": 0.0032, "step": 30860 }, { "epoch": 0.2524430633356503, "grad_norm": 0.11659342795610428, "learning_rate": 9.979070077344268e-06, "loss": 0.0027, "step": 30870 }, { "epoch": 0.2525248395142495, "grad_norm": 0.08452890068292618, "learning_rate": 9.979004798786069e-06, "loss": 0.0028, "step": 30880 }, { "epoch": 0.25260661569284865, "grad_norm": 0.05081219971179962, "learning_rate": 9.978939418801494e-06, "loss": 0.0045, "step": 30890 }, { "epoch": 0.25268839187144787, "grad_norm": 0.07140406966209412, "learning_rate": 9.978873937391877e-06, "loss": 0.0027, "step": 30900 }, { "epoch": 0.25277016805004704, "grad_norm": 0.1975519210100174, "learning_rate": 9.978808354558551e-06, "loss": 0.0024, "step": 30910 }, { "epoch": 0.2528519442286462, "grad_norm": 0.173733189702034, "learning_rate": 9.978742670302853e-06, "loss": 0.0052, "step": 30920 }, { "epoch": 0.2529337204072454, "grad_norm": 0.12556976079940796, "learning_rate": 9.978676884626117e-06, "loss": 0.0037, "step": 30930 }, { "epoch": 0.25301549658584455, "grad_norm": 0.33251017332077026, "learning_rate": 9.978610997529687e-06, "loss": 0.0035, "step": 30940 }, { "epoch": 0.2530972727644437, "grad_norm": 0.12572672963142395, "learning_rate": 9.978545009014905e-06, "loss": 0.0057, "step": 30950 }, { "epoch": 0.2531790489430429, "grad_norm": 0.15636487305164337, "learning_rate": 9.978478919083114e-06, "loss": 0.0037, "step": 30960 }, { "epoch": 0.25326082512164205, "grad_norm": 0.14799916744232178, "learning_rate": 9.97841272773566e-06, "loss": 0.0035, "step": 30970 }, { "epoch": 0.2533426013002412, "grad_norm": 0.10881610959768295, "learning_rate": 9.978346434973893e-06, "loss": 0.0037, "step": 30980 }, { "epoch": 0.2534243774788404, "grad_norm": 0.07530369609594345, "learning_rate": 9.978280040799162e-06, "loss": 0.0041, "step": 30990 }, { "epoch": 0.2535061536574396, "grad_norm": 0.3513501286506653, "learning_rate": 9.97821354521282e-06, "loss": 0.0049, "step": 31000 }, { "epoch": 0.2535879298360388, "grad_norm": 0.10936944931745529, "learning_rate": 9.978146948216221e-06, "loss": 0.003, "step": 31010 }, { "epoch": 0.25366970601463795, "grad_norm": 0.11933876574039459, "learning_rate": 9.978080249810725e-06, "loss": 0.0062, "step": 31020 }, { "epoch": 0.2537514821932371, "grad_norm": 0.15210789442062378, "learning_rate": 9.978013449997684e-06, "loss": 0.0043, "step": 31030 }, { "epoch": 0.2538332583718363, "grad_norm": 0.1547294408082962, "learning_rate": 9.977946548778466e-06, "loss": 0.0039, "step": 31040 }, { "epoch": 0.25391503455043546, "grad_norm": 0.12584559619426727, "learning_rate": 9.977879546154428e-06, "loss": 0.0032, "step": 31050 }, { "epoch": 0.2539968107290346, "grad_norm": 0.07533751428127289, "learning_rate": 9.97781244212694e-06, "loss": 0.0041, "step": 31060 }, { "epoch": 0.2540785869076338, "grad_norm": 0.238404780626297, "learning_rate": 9.977745236697364e-06, "loss": 0.0041, "step": 31070 }, { "epoch": 0.25416036308623297, "grad_norm": 0.26614901423454285, "learning_rate": 9.977677929867072e-06, "loss": 0.0058, "step": 31080 }, { "epoch": 0.25424213926483213, "grad_norm": 0.17876598238945007, "learning_rate": 9.977610521637436e-06, "loss": 0.0039, "step": 31090 }, { "epoch": 0.2543239154434313, "grad_norm": 0.05861027166247368, "learning_rate": 9.977543012009824e-06, "loss": 0.0035, "step": 31100 }, { "epoch": 0.2544056916220305, "grad_norm": 0.15240982174873352, "learning_rate": 9.97747540098562e-06, "loss": 0.0032, "step": 31110 }, { "epoch": 0.2544874678006297, "grad_norm": 0.021809658035635948, "learning_rate": 9.977407688566193e-06, "loss": 0.0031, "step": 31120 }, { "epoch": 0.25456924397922887, "grad_norm": 0.4163854420185089, "learning_rate": 9.977339874752926e-06, "loss": 0.0043, "step": 31130 }, { "epoch": 0.25465102015782803, "grad_norm": 0.17489050328731537, "learning_rate": 9.9772719595472e-06, "loss": 0.0036, "step": 31140 }, { "epoch": 0.2547327963364272, "grad_norm": 0.15910714864730835, "learning_rate": 9.977203942950397e-06, "loss": 0.0047, "step": 31150 }, { "epoch": 0.25481457251502637, "grad_norm": 0.21145451068878174, "learning_rate": 9.977135824963906e-06, "loss": 0.0046, "step": 31160 }, { "epoch": 0.25489634869362554, "grad_norm": 0.18935628235340118, "learning_rate": 9.977067605589112e-06, "loss": 0.0038, "step": 31170 }, { "epoch": 0.2549781248722247, "grad_norm": 0.22063064575195312, "learning_rate": 9.976999284827407e-06, "loss": 0.0034, "step": 31180 }, { "epoch": 0.2550599010508239, "grad_norm": 0.1808961033821106, "learning_rate": 9.976930862680178e-06, "loss": 0.0048, "step": 31190 }, { "epoch": 0.25514167722942305, "grad_norm": 0.1396794617176056, "learning_rate": 9.976862339148825e-06, "loss": 0.0045, "step": 31200 }, { "epoch": 0.2552234534080222, "grad_norm": 0.23738068342208862, "learning_rate": 9.976793714234739e-06, "loss": 0.0041, "step": 31210 }, { "epoch": 0.25530522958662144, "grad_norm": 0.398955374956131, "learning_rate": 9.976724987939319e-06, "loss": 0.004, "step": 31220 }, { "epoch": 0.2553870057652206, "grad_norm": 0.11200984567403793, "learning_rate": 9.976656160263967e-06, "loss": 0.0023, "step": 31230 }, { "epoch": 0.2554687819438198, "grad_norm": 0.20628869533538818, "learning_rate": 9.976587231210084e-06, "loss": 0.004, "step": 31240 }, { "epoch": 0.25555055812241895, "grad_norm": 0.18271492421627045, "learning_rate": 9.976518200779073e-06, "loss": 0.0125, "step": 31250 }, { "epoch": 0.2556323343010181, "grad_norm": 0.1800965815782547, "learning_rate": 9.97644906897234e-06, "loss": 0.0025, "step": 31260 }, { "epoch": 0.2557141104796173, "grad_norm": 0.2695554792881012, "learning_rate": 9.976379835791296e-06, "loss": 0.0047, "step": 31270 }, { "epoch": 0.25579588665821645, "grad_norm": 0.5803450345993042, "learning_rate": 9.97631050123735e-06, "loss": 0.0043, "step": 31280 }, { "epoch": 0.2558776628368156, "grad_norm": 0.13056062161922455, "learning_rate": 9.976241065311914e-06, "loss": 0.003, "step": 31290 }, { "epoch": 0.2559594390154148, "grad_norm": 0.11597202718257904, "learning_rate": 9.976171528016402e-06, "loss": 0.0021, "step": 31300 }, { "epoch": 0.25604121519401396, "grad_norm": 0.3981456756591797, "learning_rate": 9.976101889352232e-06, "loss": 0.0087, "step": 31310 }, { "epoch": 0.2561229913726132, "grad_norm": 0.0684744343161583, "learning_rate": 9.97603214932082e-06, "loss": 0.0027, "step": 31320 }, { "epoch": 0.25620476755121235, "grad_norm": 0.1286219209432602, "learning_rate": 9.97596230792359e-06, "loss": 0.0062, "step": 31330 }, { "epoch": 0.2562865437298115, "grad_norm": 0.08172588050365448, "learning_rate": 9.975892365161964e-06, "loss": 0.003, "step": 31340 }, { "epoch": 0.2563683199084107, "grad_norm": 0.13170598447322845, "learning_rate": 9.975822321037365e-06, "loss": 0.0059, "step": 31350 }, { "epoch": 0.25645009608700986, "grad_norm": 0.12211538851261139, "learning_rate": 9.97575217555122e-06, "loss": 0.0036, "step": 31360 }, { "epoch": 0.25653187226560903, "grad_norm": 0.09863904863595963, "learning_rate": 9.975681928704958e-06, "loss": 0.0054, "step": 31370 }, { "epoch": 0.2566136484442082, "grad_norm": 0.14547249674797058, "learning_rate": 9.975611580500012e-06, "loss": 0.0032, "step": 31380 }, { "epoch": 0.25669542462280737, "grad_norm": 0.12423073500394821, "learning_rate": 9.975541130937814e-06, "loss": 0.003, "step": 31390 }, { "epoch": 0.25677720080140654, "grad_norm": 0.2890661954879761, "learning_rate": 9.975470580019798e-06, "loss": 0.0041, "step": 31400 }, { "epoch": 0.2568589769800057, "grad_norm": 0.12981274724006653, "learning_rate": 9.975399927747403e-06, "loss": 0.0043, "step": 31410 }, { "epoch": 0.2569407531586049, "grad_norm": 0.021742509678006172, "learning_rate": 9.975329174122066e-06, "loss": 0.0038, "step": 31420 }, { "epoch": 0.2570225293372041, "grad_norm": 0.23740732669830322, "learning_rate": 9.975258319145229e-06, "loss": 0.0028, "step": 31430 }, { "epoch": 0.25710430551580327, "grad_norm": 0.12827351689338684, "learning_rate": 9.975187362818337e-06, "loss": 0.0078, "step": 31440 }, { "epoch": 0.25718608169440244, "grad_norm": 0.22302260994911194, "learning_rate": 9.975116305142836e-06, "loss": 0.0047, "step": 31450 }, { "epoch": 0.2572678578730016, "grad_norm": 0.10686307400465012, "learning_rate": 9.97504514612017e-06, "loss": 0.0043, "step": 31460 }, { "epoch": 0.2573496340516008, "grad_norm": 0.10299459099769592, "learning_rate": 9.97497388575179e-06, "loss": 0.0034, "step": 31470 }, { "epoch": 0.25743141023019994, "grad_norm": 0.13655930757522583, "learning_rate": 9.97490252403915e-06, "loss": 0.0047, "step": 31480 }, { "epoch": 0.2575131864087991, "grad_norm": 0.18312397599220276, "learning_rate": 9.9748310609837e-06, "loss": 0.0037, "step": 31490 }, { "epoch": 0.2575949625873983, "grad_norm": 0.007842538878321648, "learning_rate": 9.974759496586898e-06, "loss": 0.0042, "step": 31500 }, { "epoch": 0.25767673876599745, "grad_norm": 0.34257811307907104, "learning_rate": 9.974687830850202e-06, "loss": 0.0035, "step": 31510 }, { "epoch": 0.2577585149445966, "grad_norm": 0.15293219685554504, "learning_rate": 9.97461606377507e-06, "loss": 0.0046, "step": 31520 }, { "epoch": 0.2578402911231958, "grad_norm": 0.2158113569021225, "learning_rate": 9.974544195362967e-06, "loss": 0.0044, "step": 31530 }, { "epoch": 0.257922067301795, "grad_norm": 0.21980537474155426, "learning_rate": 9.974472225615355e-06, "loss": 0.0045, "step": 31540 }, { "epoch": 0.2580038434803942, "grad_norm": 0.14943334460258484, "learning_rate": 9.9744001545337e-06, "loss": 0.0032, "step": 31550 }, { "epoch": 0.25808561965899335, "grad_norm": 0.159418523311615, "learning_rate": 9.97432798211947e-06, "loss": 0.0036, "step": 31560 }, { "epoch": 0.2581673958375925, "grad_norm": 0.12205637991428375, "learning_rate": 9.974255708374134e-06, "loss": 0.0034, "step": 31570 }, { "epoch": 0.2582491720161917, "grad_norm": 0.4005400538444519, "learning_rate": 9.974183333299168e-06, "loss": 0.0059, "step": 31580 }, { "epoch": 0.25833094819479085, "grad_norm": 0.046164244413375854, "learning_rate": 9.974110856896043e-06, "loss": 0.003, "step": 31590 }, { "epoch": 0.25841272437339, "grad_norm": 0.05900970846414566, "learning_rate": 9.974038279166239e-06, "loss": 0.0029, "step": 31600 }, { "epoch": 0.2584945005519892, "grad_norm": 0.2649186849594116, "learning_rate": 9.973965600111231e-06, "loss": 0.0028, "step": 31610 }, { "epoch": 0.25857627673058836, "grad_norm": 0.10769221931695938, "learning_rate": 9.9738928197325e-06, "loss": 0.0042, "step": 31620 }, { "epoch": 0.25865805290918753, "grad_norm": 0.3292199373245239, "learning_rate": 9.973819938031531e-06, "loss": 0.0027, "step": 31630 }, { "epoch": 0.25873982908778675, "grad_norm": 0.15237268805503845, "learning_rate": 9.973746955009804e-06, "loss": 0.0051, "step": 31640 }, { "epoch": 0.2588216052663859, "grad_norm": 0.08698438853025436, "learning_rate": 9.97367387066881e-06, "loss": 0.0045, "step": 31650 }, { "epoch": 0.2589033814449851, "grad_norm": 0.08831162750720978, "learning_rate": 9.973600685010038e-06, "loss": 0.0039, "step": 31660 }, { "epoch": 0.25898515762358426, "grad_norm": 0.3566472828388214, "learning_rate": 9.973527398034976e-06, "loss": 0.0037, "step": 31670 }, { "epoch": 0.25906693380218343, "grad_norm": 0.12365186214447021, "learning_rate": 9.973454009745119e-06, "loss": 0.0028, "step": 31680 }, { "epoch": 0.2591487099807826, "grad_norm": 0.09525739401578903, "learning_rate": 9.973380520141959e-06, "loss": 0.0031, "step": 31690 }, { "epoch": 0.25923048615938177, "grad_norm": 0.13298070430755615, "learning_rate": 9.973306929226998e-06, "loss": 0.0053, "step": 31700 }, { "epoch": 0.25931226233798094, "grad_norm": 0.15795041620731354, "learning_rate": 9.973233237001731e-06, "loss": 0.0053, "step": 31710 }, { "epoch": 0.2593940385165801, "grad_norm": 0.14047011733055115, "learning_rate": 9.97315944346766e-06, "loss": 0.0041, "step": 31720 }, { "epoch": 0.2594758146951793, "grad_norm": 0.3381120562553406, "learning_rate": 9.97308554862629e-06, "loss": 0.0038, "step": 31730 }, { "epoch": 0.25955759087377844, "grad_norm": 0.020603973418474197, "learning_rate": 9.973011552479123e-06, "loss": 0.0025, "step": 31740 }, { "epoch": 0.25963936705237767, "grad_norm": 0.04624088108539581, "learning_rate": 9.97293745502767e-06, "loss": 0.0028, "step": 31750 }, { "epoch": 0.25972114323097684, "grad_norm": 0.09648391604423523, "learning_rate": 9.97286325627344e-06, "loss": 0.0025, "step": 31760 }, { "epoch": 0.259802919409576, "grad_norm": 0.10039570927619934, "learning_rate": 9.97278895621794e-06, "loss": 0.0046, "step": 31770 }, { "epoch": 0.2598846955881752, "grad_norm": 0.04791082814335823, "learning_rate": 9.97271455486269e-06, "loss": 0.0035, "step": 31780 }, { "epoch": 0.25996647176677434, "grad_norm": 0.16118870675563812, "learning_rate": 9.972640052209201e-06, "loss": 0.0033, "step": 31790 }, { "epoch": 0.2600482479453735, "grad_norm": 0.09143838286399841, "learning_rate": 9.972565448258993e-06, "loss": 0.0031, "step": 31800 }, { "epoch": 0.2601300241239727, "grad_norm": 0.17121416330337524, "learning_rate": 9.972490743013584e-06, "loss": 0.0034, "step": 31810 }, { "epoch": 0.26021180030257185, "grad_norm": 0.09726286679506302, "learning_rate": 9.972415936474497e-06, "loss": 0.0054, "step": 31820 }, { "epoch": 0.260293576481171, "grad_norm": 0.0617261603474617, "learning_rate": 9.972341028643256e-06, "loss": 0.0034, "step": 31830 }, { "epoch": 0.2603753526597702, "grad_norm": 0.1403730809688568, "learning_rate": 9.972266019521387e-06, "loss": 0.0043, "step": 31840 }, { "epoch": 0.26045712883836936, "grad_norm": 0.1667250394821167, "learning_rate": 9.972190909110415e-06, "loss": 0.004, "step": 31850 }, { "epoch": 0.2605389050169686, "grad_norm": 0.2784923017024994, "learning_rate": 9.972115697411875e-06, "loss": 0.0042, "step": 31860 }, { "epoch": 0.26062068119556775, "grad_norm": 0.11663715541362762, "learning_rate": 9.972040384427295e-06, "loss": 0.0044, "step": 31870 }, { "epoch": 0.2607024573741669, "grad_norm": 0.06916654109954834, "learning_rate": 9.971964970158214e-06, "loss": 0.0033, "step": 31880 }, { "epoch": 0.2607842335527661, "grad_norm": 0.08498069643974304, "learning_rate": 9.971889454606162e-06, "loss": 0.0051, "step": 31890 }, { "epoch": 0.26086600973136526, "grad_norm": 0.23028314113616943, "learning_rate": 9.971813837772682e-06, "loss": 0.0052, "step": 31900 }, { "epoch": 0.2609477859099644, "grad_norm": 0.0880027636885643, "learning_rate": 9.971738119659311e-06, "loss": 0.0027, "step": 31910 }, { "epoch": 0.2610295620885636, "grad_norm": 0.04768969118595123, "learning_rate": 9.971662300267595e-06, "loss": 0.0034, "step": 31920 }, { "epoch": 0.26111133826716276, "grad_norm": 0.21741335093975067, "learning_rate": 9.971586379599077e-06, "loss": 0.0029, "step": 31930 }, { "epoch": 0.26119311444576193, "grad_norm": 0.15329259634017944, "learning_rate": 9.971510357655303e-06, "loss": 0.0039, "step": 31940 }, { "epoch": 0.2612748906243611, "grad_norm": 0.04385404661297798, "learning_rate": 9.971434234437822e-06, "loss": 0.0061, "step": 31950 }, { "epoch": 0.2613566668029603, "grad_norm": 0.08682530373334885, "learning_rate": 9.971358009948184e-06, "loss": 0.0045, "step": 31960 }, { "epoch": 0.2614384429815595, "grad_norm": 0.19512826204299927, "learning_rate": 9.971281684187943e-06, "loss": 0.0038, "step": 31970 }, { "epoch": 0.26152021916015866, "grad_norm": 0.23161272704601288, "learning_rate": 9.971205257158653e-06, "loss": 0.0022, "step": 31980 }, { "epoch": 0.26160199533875783, "grad_norm": 0.13521145284175873, "learning_rate": 9.971128728861871e-06, "loss": 0.004, "step": 31990 }, { "epoch": 0.261683771517357, "grad_norm": 0.09818986803293228, "learning_rate": 9.971052099299157e-06, "loss": 0.0023, "step": 32000 }, { "epoch": 0.26176554769595617, "grad_norm": 0.253371000289917, "learning_rate": 9.97097536847207e-06, "loss": 0.0033, "step": 32010 }, { "epoch": 0.26184732387455534, "grad_norm": 0.23470553755760193, "learning_rate": 9.970898536382175e-06, "loss": 0.0033, "step": 32020 }, { "epoch": 0.2619291000531545, "grad_norm": 0.09997791796922684, "learning_rate": 9.970821603031033e-06, "loss": 0.0026, "step": 32030 }, { "epoch": 0.2620108762317537, "grad_norm": 0.1320853978395462, "learning_rate": 9.970744568420219e-06, "loss": 0.0026, "step": 32040 }, { "epoch": 0.26209265241035284, "grad_norm": 0.08136046677827835, "learning_rate": 9.970667432551295e-06, "loss": 0.0026, "step": 32050 }, { "epoch": 0.262174428588952, "grad_norm": 0.06531357020139694, "learning_rate": 9.970590195425836e-06, "loss": 0.0024, "step": 32060 }, { "epoch": 0.26225620476755124, "grad_norm": 0.008423998020589352, "learning_rate": 9.970512857045414e-06, "loss": 0.0038, "step": 32070 }, { "epoch": 0.2623379809461504, "grad_norm": 0.08396115899085999, "learning_rate": 9.970435417411605e-06, "loss": 0.0019, "step": 32080 }, { "epoch": 0.2624197571247496, "grad_norm": 0.07367779314517975, "learning_rate": 9.970357876525987e-06, "loss": 0.0057, "step": 32090 }, { "epoch": 0.26250153330334874, "grad_norm": 0.0211067795753479, "learning_rate": 9.970280234390138e-06, "loss": 0.0033, "step": 32100 }, { "epoch": 0.2625833094819479, "grad_norm": 0.07966393977403641, "learning_rate": 9.97020249100564e-06, "loss": 0.0033, "step": 32110 }, { "epoch": 0.2626650856605471, "grad_norm": 0.1112782210111618, "learning_rate": 9.97012464637408e-06, "loss": 0.0043, "step": 32120 }, { "epoch": 0.26274686183914625, "grad_norm": 0.05576811358332634, "learning_rate": 9.970046700497039e-06, "loss": 0.0023, "step": 32130 }, { "epoch": 0.2628286380177454, "grad_norm": 0.12967677414417267, "learning_rate": 9.969968653376108e-06, "loss": 0.0029, "step": 32140 }, { "epoch": 0.2629104141963446, "grad_norm": 0.11173447221517563, "learning_rate": 9.969890505012873e-06, "loss": 0.0031, "step": 32150 }, { "epoch": 0.26299219037494376, "grad_norm": 0.04107612371444702, "learning_rate": 9.969812255408931e-06, "loss": 0.0021, "step": 32160 }, { "epoch": 0.2630739665535429, "grad_norm": 0.1098630279302597, "learning_rate": 9.969733904565875e-06, "loss": 0.0037, "step": 32170 }, { "epoch": 0.26315574273214215, "grad_norm": 1.9995821714401245, "learning_rate": 9.969655452485298e-06, "loss": 0.0027, "step": 32180 }, { "epoch": 0.2632375189107413, "grad_norm": 0.05177875608205795, "learning_rate": 9.969576899168799e-06, "loss": 0.0036, "step": 32190 }, { "epoch": 0.2633192950893405, "grad_norm": 0.058546438813209534, "learning_rate": 9.96949824461798e-06, "loss": 0.0042, "step": 32200 }, { "epoch": 0.26340107126793966, "grad_norm": 0.2758784294128418, "learning_rate": 9.969419488834442e-06, "loss": 0.004, "step": 32210 }, { "epoch": 0.2634828474465388, "grad_norm": 0.06941255927085876, "learning_rate": 9.96934063181979e-06, "loss": 0.0034, "step": 32220 }, { "epoch": 0.263564623625138, "grad_norm": 0.10726679861545563, "learning_rate": 9.969261673575629e-06, "loss": 0.0041, "step": 32230 }, { "epoch": 0.26364639980373716, "grad_norm": 0.20381638407707214, "learning_rate": 9.969182614103573e-06, "loss": 0.0024, "step": 32240 }, { "epoch": 0.26372817598233633, "grad_norm": 0.11536858230829239, "learning_rate": 9.969103453405223e-06, "loss": 0.0037, "step": 32250 }, { "epoch": 0.2638099521609355, "grad_norm": 0.12174735963344574, "learning_rate": 9.969024191482197e-06, "loss": 0.0036, "step": 32260 }, { "epoch": 0.26389172833953467, "grad_norm": 0.10088640451431274, "learning_rate": 9.968944828336112e-06, "loss": 0.0019, "step": 32270 }, { "epoch": 0.2639735045181339, "grad_norm": 0.1400008350610733, "learning_rate": 9.96886536396858e-06, "loss": 0.0043, "step": 32280 }, { "epoch": 0.26405528069673306, "grad_norm": 0.19362688064575195, "learning_rate": 9.968785798381222e-06, "loss": 0.0047, "step": 32290 }, { "epoch": 0.26413705687533223, "grad_norm": 0.04874834045767784, "learning_rate": 9.968706131575658e-06, "loss": 0.0036, "step": 32300 }, { "epoch": 0.2642188330539314, "grad_norm": 0.05023250728845596, "learning_rate": 9.968626363553511e-06, "loss": 0.0037, "step": 32310 }, { "epoch": 0.26430060923253057, "grad_norm": 0.1800687462091446, "learning_rate": 9.968546494316407e-06, "loss": 0.0044, "step": 32320 }, { "epoch": 0.26438238541112974, "grad_norm": 0.10881569236516953, "learning_rate": 9.968466523865971e-06, "loss": 0.0029, "step": 32330 }, { "epoch": 0.2644641615897289, "grad_norm": 0.06384467333555222, "learning_rate": 9.968386452203836e-06, "loss": 0.0048, "step": 32340 }, { "epoch": 0.2645459377683281, "grad_norm": 0.06685605645179749, "learning_rate": 9.968306279331629e-06, "loss": 0.0028, "step": 32350 }, { "epoch": 0.26462771394692725, "grad_norm": 0.15402254462242126, "learning_rate": 9.968226005250984e-06, "loss": 0.0032, "step": 32360 }, { "epoch": 0.2647094901255264, "grad_norm": 0.04915963485836983, "learning_rate": 9.968145629963538e-06, "loss": 0.0028, "step": 32370 }, { "epoch": 0.2647912663041256, "grad_norm": 0.1625746488571167, "learning_rate": 9.968065153470928e-06, "loss": 0.0068, "step": 32380 }, { "epoch": 0.2648730424827248, "grad_norm": 0.15251512825489044, "learning_rate": 9.96798457577479e-06, "loss": 0.0042, "step": 32390 }, { "epoch": 0.264954818661324, "grad_norm": 0.10875159502029419, "learning_rate": 9.96790389687677e-06, "loss": 0.0045, "step": 32400 }, { "epoch": 0.26503659483992315, "grad_norm": 0.3411659896373749, "learning_rate": 9.967823116778507e-06, "loss": 0.0044, "step": 32410 }, { "epoch": 0.2651183710185223, "grad_norm": 0.09862750768661499, "learning_rate": 9.967742235481652e-06, "loss": 0.004, "step": 32420 }, { "epoch": 0.2652001471971215, "grad_norm": 0.19697855412960052, "learning_rate": 9.967661252987848e-06, "loss": 0.0031, "step": 32430 }, { "epoch": 0.26528192337572065, "grad_norm": 0.088028185069561, "learning_rate": 9.967580169298746e-06, "loss": 0.0028, "step": 32440 }, { "epoch": 0.2653636995543198, "grad_norm": 0.20521123707294464, "learning_rate": 9.967498984416001e-06, "loss": 0.0038, "step": 32450 }, { "epoch": 0.265445475732919, "grad_norm": 0.34203436970710754, "learning_rate": 9.967417698341262e-06, "loss": 0.0042, "step": 32460 }, { "epoch": 0.26552725191151816, "grad_norm": 0.21757103502750397, "learning_rate": 9.967336311076186e-06, "loss": 0.0047, "step": 32470 }, { "epoch": 0.2656090280901173, "grad_norm": 0.281819224357605, "learning_rate": 9.967254822622431e-06, "loss": 0.0045, "step": 32480 }, { "epoch": 0.2656908042687165, "grad_norm": 0.08705760538578033, "learning_rate": 9.96717323298166e-06, "loss": 0.0025, "step": 32490 }, { "epoch": 0.2657725804473157, "grad_norm": 0.09532129764556885, "learning_rate": 9.967091542155532e-06, "loss": 0.0052, "step": 32500 }, { "epoch": 0.2658543566259149, "grad_norm": 0.07354583591222763, "learning_rate": 9.967009750145711e-06, "loss": 0.0027, "step": 32510 }, { "epoch": 0.26593613280451406, "grad_norm": 0.09319949150085449, "learning_rate": 9.966927856953865e-06, "loss": 0.0039, "step": 32520 }, { "epoch": 0.2660179089831132, "grad_norm": 0.19730345904827118, "learning_rate": 9.966845862581662e-06, "loss": 0.0035, "step": 32530 }, { "epoch": 0.2660996851617124, "grad_norm": 0.08649861812591553, "learning_rate": 9.96676376703077e-06, "loss": 0.0044, "step": 32540 }, { "epoch": 0.26618146134031156, "grad_norm": 0.266086220741272, "learning_rate": 9.966681570302862e-06, "loss": 0.0026, "step": 32550 }, { "epoch": 0.26626323751891073, "grad_norm": 0.1937081515789032, "learning_rate": 9.966599272399615e-06, "loss": 0.0034, "step": 32560 }, { "epoch": 0.2663450136975099, "grad_norm": 0.3248533010482788, "learning_rate": 9.966516873322704e-06, "loss": 0.0042, "step": 32570 }, { "epoch": 0.26642678987610907, "grad_norm": 0.11896586418151855, "learning_rate": 9.966434373073806e-06, "loss": 0.0047, "step": 32580 }, { "epoch": 0.26650856605470824, "grad_norm": 0.16915659606456757, "learning_rate": 9.966351771654604e-06, "loss": 0.002, "step": 32590 }, { "epoch": 0.26659034223330746, "grad_norm": 0.12224698811769485, "learning_rate": 9.966269069066778e-06, "loss": 0.002, "step": 32600 }, { "epoch": 0.26667211841190663, "grad_norm": 0.30624914169311523, "learning_rate": 9.966186265312016e-06, "loss": 0.0039, "step": 32610 }, { "epoch": 0.2667538945905058, "grad_norm": 0.1079375296831131, "learning_rate": 9.966103360392003e-06, "loss": 0.003, "step": 32620 }, { "epoch": 0.26683567076910497, "grad_norm": 0.08842694014310837, "learning_rate": 9.966020354308427e-06, "loss": 0.0032, "step": 32630 }, { "epoch": 0.26691744694770414, "grad_norm": 0.17042680084705353, "learning_rate": 9.96593724706298e-06, "loss": 0.0035, "step": 32640 }, { "epoch": 0.2669992231263033, "grad_norm": 0.04365761950612068, "learning_rate": 9.965854038657354e-06, "loss": 0.0025, "step": 32650 }, { "epoch": 0.2670809993049025, "grad_norm": 0.16950038075447083, "learning_rate": 9.965770729093247e-06, "loss": 0.0052, "step": 32660 }, { "epoch": 0.26716277548350165, "grad_norm": 0.06035035476088524, "learning_rate": 9.965687318372352e-06, "loss": 0.0032, "step": 32670 }, { "epoch": 0.2672445516621008, "grad_norm": 0.1351367086172104, "learning_rate": 9.965603806496371e-06, "loss": 0.0027, "step": 32680 }, { "epoch": 0.2673263278407, "grad_norm": 0.04062679037451744, "learning_rate": 9.965520193467004e-06, "loss": 0.0022, "step": 32690 }, { "epoch": 0.26740810401929915, "grad_norm": 0.09648934751749039, "learning_rate": 9.965436479285953e-06, "loss": 0.0027, "step": 32700 }, { "epoch": 0.2674898801978984, "grad_norm": 0.04802500829100609, "learning_rate": 9.965352663954926e-06, "loss": 0.0038, "step": 32710 }, { "epoch": 0.26757165637649755, "grad_norm": 0.08292075246572495, "learning_rate": 9.965268747475629e-06, "loss": 0.0035, "step": 32720 }, { "epoch": 0.2676534325550967, "grad_norm": 0.21893388032913208, "learning_rate": 9.965184729849773e-06, "loss": 0.0035, "step": 32730 }, { "epoch": 0.2677352087336959, "grad_norm": 0.09615550190210342, "learning_rate": 9.965100611079066e-06, "loss": 0.003, "step": 32740 }, { "epoch": 0.26781698491229505, "grad_norm": 0.6522821187973022, "learning_rate": 9.965016391165225e-06, "loss": 0.003, "step": 32750 }, { "epoch": 0.2678987610908942, "grad_norm": 0.05718383938074112, "learning_rate": 9.964932070109963e-06, "loss": 0.0034, "step": 32760 }, { "epoch": 0.2679805372694934, "grad_norm": 0.051591675728559494, "learning_rate": 9.964847647915002e-06, "loss": 0.0026, "step": 32770 }, { "epoch": 0.26806231344809256, "grad_norm": 0.19971153140068054, "learning_rate": 9.964763124582055e-06, "loss": 0.0027, "step": 32780 }, { "epoch": 0.26814408962669173, "grad_norm": 0.3483913242816925, "learning_rate": 9.96467850011285e-06, "loss": 0.0057, "step": 32790 }, { "epoch": 0.2682258658052909, "grad_norm": 0.3134402930736542, "learning_rate": 9.964593774509107e-06, "loss": 0.0039, "step": 32800 }, { "epoch": 0.26830764198389007, "grad_norm": 0.24857333302497864, "learning_rate": 9.964508947772555e-06, "loss": 0.0027, "step": 32810 }, { "epoch": 0.2683894181624893, "grad_norm": 0.053831733763217926, "learning_rate": 9.96442401990492e-06, "loss": 0.0044, "step": 32820 }, { "epoch": 0.26847119434108846, "grad_norm": 0.16257941722869873, "learning_rate": 9.964338990907932e-06, "loss": 0.004, "step": 32830 }, { "epoch": 0.26855297051968763, "grad_norm": 0.11950912326574326, "learning_rate": 9.964253860783325e-06, "loss": 0.0033, "step": 32840 }, { "epoch": 0.2686347466982868, "grad_norm": 0.023263389244675636, "learning_rate": 9.96416862953283e-06, "loss": 0.0054, "step": 32850 }, { "epoch": 0.26871652287688597, "grad_norm": 0.0728955939412117, "learning_rate": 9.964083297158184e-06, "loss": 0.0025, "step": 32860 }, { "epoch": 0.26879829905548513, "grad_norm": 0.08383702486753464, "learning_rate": 9.963997863661127e-06, "loss": 0.0041, "step": 32870 }, { "epoch": 0.2688800752340843, "grad_norm": 0.27439644932746887, "learning_rate": 9.9639123290434e-06, "loss": 0.0047, "step": 32880 }, { "epoch": 0.2689618514126835, "grad_norm": 0.17463064193725586, "learning_rate": 9.963826693306742e-06, "loss": 0.0033, "step": 32890 }, { "epoch": 0.26904362759128264, "grad_norm": 0.04171951115131378, "learning_rate": 9.9637409564529e-06, "loss": 0.0024, "step": 32900 }, { "epoch": 0.2691254037698818, "grad_norm": 0.1611527055501938, "learning_rate": 9.963655118483622e-06, "loss": 0.0035, "step": 32910 }, { "epoch": 0.26920717994848103, "grad_norm": 0.1466793268918991, "learning_rate": 9.963569179400654e-06, "loss": 0.0063, "step": 32920 }, { "epoch": 0.2692889561270802, "grad_norm": 0.39432647824287415, "learning_rate": 9.963483139205745e-06, "loss": 0.0037, "step": 32930 }, { "epoch": 0.2693707323056794, "grad_norm": 0.22740153968334198, "learning_rate": 9.963396997900651e-06, "loss": 0.0061, "step": 32940 }, { "epoch": 0.26945250848427854, "grad_norm": 0.03788028657436371, "learning_rate": 9.963310755487126e-06, "loss": 0.0021, "step": 32950 }, { "epoch": 0.2695342846628777, "grad_norm": 0.010257272981107235, "learning_rate": 9.963224411966927e-06, "loss": 0.0035, "step": 32960 }, { "epoch": 0.2696160608414769, "grad_norm": 0.10696396231651306, "learning_rate": 9.963137967341811e-06, "loss": 0.0044, "step": 32970 }, { "epoch": 0.26969783702007605, "grad_norm": 0.11154454201459885, "learning_rate": 9.963051421613542e-06, "loss": 0.0026, "step": 32980 }, { "epoch": 0.2697796131986752, "grad_norm": 0.11842430382966995, "learning_rate": 9.96296477478388e-06, "loss": 0.005, "step": 32990 }, { "epoch": 0.2698613893772744, "grad_norm": 0.14125658571720123, "learning_rate": 9.962878026854592e-06, "loss": 0.0043, "step": 33000 }, { "epoch": 0.26994316555587355, "grad_norm": 0.20971974730491638, "learning_rate": 9.962791177827446e-06, "loss": 0.0031, "step": 33010 }, { "epoch": 0.2700249417344727, "grad_norm": 0.2762783169746399, "learning_rate": 9.962704227704208e-06, "loss": 0.0039, "step": 33020 }, { "epoch": 0.27010671791307195, "grad_norm": 0.11655617505311966, "learning_rate": 9.962617176486652e-06, "loss": 0.0029, "step": 33030 }, { "epoch": 0.2701884940916711, "grad_norm": 0.012912265956401825, "learning_rate": 9.962530024176548e-06, "loss": 0.0032, "step": 33040 }, { "epoch": 0.2702702702702703, "grad_norm": 0.10760357230901718, "learning_rate": 9.962442770775675e-06, "loss": 0.0034, "step": 33050 }, { "epoch": 0.27035204644886945, "grad_norm": 0.07641231268644333, "learning_rate": 9.96235541628581e-06, "loss": 0.0031, "step": 33060 }, { "epoch": 0.2704338226274686, "grad_norm": 0.1356504112482071, "learning_rate": 9.962267960708732e-06, "loss": 0.0065, "step": 33070 }, { "epoch": 0.2705155988060678, "grad_norm": 0.03084523417055607, "learning_rate": 9.96218040404622e-06, "loss": 0.0036, "step": 33080 }, { "epoch": 0.27059737498466696, "grad_norm": 0.019048728048801422, "learning_rate": 9.962092746300061e-06, "loss": 0.0048, "step": 33090 }, { "epoch": 0.27067915116326613, "grad_norm": 0.09358157962560654, "learning_rate": 9.96200498747204e-06, "loss": 0.0027, "step": 33100 }, { "epoch": 0.2707609273418653, "grad_norm": 0.13790787756443024, "learning_rate": 9.961917127563943e-06, "loss": 0.0036, "step": 33110 }, { "epoch": 0.27084270352046447, "grad_norm": 0.10459201037883759, "learning_rate": 9.96182916657756e-06, "loss": 0.0036, "step": 33120 }, { "epoch": 0.27092447969906364, "grad_norm": 0.10663154721260071, "learning_rate": 9.961741104514684e-06, "loss": 0.0039, "step": 33130 }, { "epoch": 0.27100625587766286, "grad_norm": 0.08851724117994308, "learning_rate": 9.961652941377112e-06, "loss": 0.0035, "step": 33140 }, { "epoch": 0.27108803205626203, "grad_norm": 0.05993608012795448, "learning_rate": 9.961564677166634e-06, "loss": 0.0028, "step": 33150 }, { "epoch": 0.2711698082348612, "grad_norm": 0.1299789845943451, "learning_rate": 9.961476311885048e-06, "loss": 0.0049, "step": 33160 }, { "epoch": 0.27125158441346037, "grad_norm": 0.14994816482067108, "learning_rate": 9.96138784553416e-06, "loss": 0.0029, "step": 33170 }, { "epoch": 0.27133336059205954, "grad_norm": 0.1851648986339569, "learning_rate": 9.961299278115768e-06, "loss": 0.0037, "step": 33180 }, { "epoch": 0.2714151367706587, "grad_norm": 0.33854231238365173, "learning_rate": 9.961210609631677e-06, "loss": 0.0086, "step": 33190 }, { "epoch": 0.2714969129492579, "grad_norm": 0.08240925520658493, "learning_rate": 9.961121840083693e-06, "loss": 0.003, "step": 33200 }, { "epoch": 0.27157868912785704, "grad_norm": 0.09341567754745483, "learning_rate": 9.961032969473624e-06, "loss": 0.0021, "step": 33210 }, { "epoch": 0.2716604653064562, "grad_norm": 0.14023061096668243, "learning_rate": 9.960943997803281e-06, "loss": 0.0026, "step": 33220 }, { "epoch": 0.2717422414850554, "grad_norm": 0.07364361733198166, "learning_rate": 9.960854925074478e-06, "loss": 0.0023, "step": 33230 }, { "epoch": 0.2718240176636546, "grad_norm": 0.12792037427425385, "learning_rate": 9.960765751289026e-06, "loss": 0.0026, "step": 33240 }, { "epoch": 0.2719057938422538, "grad_norm": 0.05157122761011124, "learning_rate": 9.960676476448744e-06, "loss": 0.0028, "step": 33250 }, { "epoch": 0.27198757002085294, "grad_norm": 0.08564218878746033, "learning_rate": 9.96058710055545e-06, "loss": 0.002, "step": 33260 }, { "epoch": 0.2720693461994521, "grad_norm": 0.10034143924713135, "learning_rate": 9.960497623610964e-06, "loss": 0.0029, "step": 33270 }, { "epoch": 0.2721511223780513, "grad_norm": 0.07474163919687271, "learning_rate": 9.960408045617109e-06, "loss": 0.003, "step": 33280 }, { "epoch": 0.27223289855665045, "grad_norm": 0.16352827847003937, "learning_rate": 9.960318366575711e-06, "loss": 0.0038, "step": 33290 }, { "epoch": 0.2723146747352496, "grad_norm": 0.062171295285224915, "learning_rate": 9.960228586488596e-06, "loss": 0.0031, "step": 33300 }, { "epoch": 0.2723964509138488, "grad_norm": 0.21188931167125702, "learning_rate": 9.960138705357593e-06, "loss": 0.0023, "step": 33310 }, { "epoch": 0.27247822709244796, "grad_norm": 0.15456923842430115, "learning_rate": 9.960048723184532e-06, "loss": 0.004, "step": 33320 }, { "epoch": 0.2725600032710471, "grad_norm": 0.10473193973302841, "learning_rate": 9.959958639971249e-06, "loss": 0.0029, "step": 33330 }, { "epoch": 0.2726417794496463, "grad_norm": 0.10532142221927643, "learning_rate": 9.959868455719572e-06, "loss": 0.003, "step": 33340 }, { "epoch": 0.2727235556282455, "grad_norm": 0.06440679728984833, "learning_rate": 9.959778170431347e-06, "loss": 0.0026, "step": 33350 }, { "epoch": 0.2728053318068447, "grad_norm": 0.08558234572410583, "learning_rate": 9.959687784108408e-06, "loss": 0.0036, "step": 33360 }, { "epoch": 0.27288710798544386, "grad_norm": 0.048027850687503815, "learning_rate": 9.959597296752596e-06, "loss": 0.0033, "step": 33370 }, { "epoch": 0.272968884164043, "grad_norm": 0.10314057767391205, "learning_rate": 9.959506708365758e-06, "loss": 0.0029, "step": 33380 }, { "epoch": 0.2730506603426422, "grad_norm": 0.07549749314785004, "learning_rate": 9.959416018949737e-06, "loss": 0.003, "step": 33390 }, { "epoch": 0.27313243652124136, "grad_norm": 0.0968359187245369, "learning_rate": 9.95932522850638e-06, "loss": 0.0032, "step": 33400 }, { "epoch": 0.27321421269984053, "grad_norm": 0.19082096219062805, "learning_rate": 9.959234337037536e-06, "loss": 0.0043, "step": 33410 }, { "epoch": 0.2732959888784397, "grad_norm": 0.0942334458231926, "learning_rate": 9.959143344545058e-06, "loss": 0.0046, "step": 33420 }, { "epoch": 0.27337776505703887, "grad_norm": 0.05627203360199928, "learning_rate": 9.9590522510308e-06, "loss": 0.002, "step": 33430 }, { "epoch": 0.27345954123563804, "grad_norm": 0.008753499016165733, "learning_rate": 9.958961056496615e-06, "loss": 0.0024, "step": 33440 }, { "epoch": 0.2735413174142372, "grad_norm": 0.03993803262710571, "learning_rate": 9.958869760944363e-06, "loss": 0.0025, "step": 33450 }, { "epoch": 0.27362309359283643, "grad_norm": 0.12118694186210632, "learning_rate": 9.958778364375905e-06, "loss": 0.0037, "step": 33460 }, { "epoch": 0.2737048697714356, "grad_norm": 0.3035741448402405, "learning_rate": 9.9586868667931e-06, "loss": 0.003, "step": 33470 }, { "epoch": 0.27378664595003477, "grad_norm": 0.2973576486110687, "learning_rate": 9.958595268197812e-06, "loss": 0.0026, "step": 33480 }, { "epoch": 0.27386842212863394, "grad_norm": 0.1584177166223526, "learning_rate": 9.958503568591907e-06, "loss": 0.0049, "step": 33490 }, { "epoch": 0.2739501983072331, "grad_norm": 0.09174946695566177, "learning_rate": 9.958411767977256e-06, "loss": 0.0027, "step": 33500 }, { "epoch": 0.2740319744858323, "grad_norm": 0.12638503313064575, "learning_rate": 9.958319866355725e-06, "loss": 0.0049, "step": 33510 }, { "epoch": 0.27411375066443144, "grad_norm": 0.04960855096578598, "learning_rate": 9.95822786372919e-06, "loss": 0.0047, "step": 33520 }, { "epoch": 0.2741955268430306, "grad_norm": 0.07548042386770248, "learning_rate": 9.958135760099523e-06, "loss": 0.0027, "step": 33530 }, { "epoch": 0.2742773030216298, "grad_norm": 0.1507963389158249, "learning_rate": 9.958043555468599e-06, "loss": 0.0057, "step": 33540 }, { "epoch": 0.27435907920022895, "grad_norm": 0.06546508520841599, "learning_rate": 9.957951249838298e-06, "loss": 0.0036, "step": 33550 }, { "epoch": 0.2744408553788282, "grad_norm": 0.04155796766281128, "learning_rate": 9.9578588432105e-06, "loss": 0.0047, "step": 33560 }, { "epoch": 0.27452263155742734, "grad_norm": 0.2832667827606201, "learning_rate": 9.957766335587088e-06, "loss": 0.0025, "step": 33570 }, { "epoch": 0.2746044077360265, "grad_norm": 0.08852542191743851, "learning_rate": 9.957673726969946e-06, "loss": 0.0085, "step": 33580 }, { "epoch": 0.2746861839146257, "grad_norm": 0.05524018779397011, "learning_rate": 9.95758101736096e-06, "loss": 0.0046, "step": 33590 }, { "epoch": 0.27476796009322485, "grad_norm": 0.047922343015670776, "learning_rate": 9.957488206762018e-06, "loss": 0.0037, "step": 33600 }, { "epoch": 0.274849736271824, "grad_norm": 0.14436553418636322, "learning_rate": 9.957395295175012e-06, "loss": 0.0031, "step": 33610 }, { "epoch": 0.2749315124504232, "grad_norm": 0.21198216080665588, "learning_rate": 9.957302282601836e-06, "loss": 0.0033, "step": 33620 }, { "epoch": 0.27501328862902236, "grad_norm": 0.28908148407936096, "learning_rate": 9.957209169044381e-06, "loss": 0.0037, "step": 33630 }, { "epoch": 0.2750950648076215, "grad_norm": 0.0820135548710823, "learning_rate": 9.957115954504546e-06, "loss": 0.0031, "step": 33640 }, { "epoch": 0.2751768409862207, "grad_norm": 0.04566013440489769, "learning_rate": 9.95702263898423e-06, "loss": 0.0085, "step": 33650 }, { "epoch": 0.27525861716481986, "grad_norm": 0.148323655128479, "learning_rate": 9.956929222485334e-06, "loss": 0.0026, "step": 33660 }, { "epoch": 0.2753403933434191, "grad_norm": 0.038678981363773346, "learning_rate": 9.95683570500976e-06, "loss": 0.0036, "step": 33670 }, { "epoch": 0.27542216952201826, "grad_norm": 0.07343306392431259, "learning_rate": 9.956742086559414e-06, "loss": 0.0034, "step": 33680 }, { "epoch": 0.2755039457006174, "grad_norm": 0.08997385203838348, "learning_rate": 9.956648367136204e-06, "loss": 0.0039, "step": 33690 }, { "epoch": 0.2755857218792166, "grad_norm": 0.08215364813804626, "learning_rate": 9.956554546742035e-06, "loss": 0.0036, "step": 33700 }, { "epoch": 0.27566749805781576, "grad_norm": 0.04084743186831474, "learning_rate": 9.956460625378824e-06, "loss": 0.004, "step": 33710 }, { "epoch": 0.27574927423641493, "grad_norm": 0.2074662744998932, "learning_rate": 9.956366603048478e-06, "loss": 0.0044, "step": 33720 }, { "epoch": 0.2758310504150141, "grad_norm": 0.12763898074626923, "learning_rate": 9.956272479752917e-06, "loss": 0.0034, "step": 33730 }, { "epoch": 0.27591282659361327, "grad_norm": 0.1121160089969635, "learning_rate": 9.956178255494057e-06, "loss": 0.0037, "step": 33740 }, { "epoch": 0.27599460277221244, "grad_norm": 0.12337296456098557, "learning_rate": 9.956083930273819e-06, "loss": 0.0046, "step": 33750 }, { "epoch": 0.2760763789508116, "grad_norm": 0.19559210538864136, "learning_rate": 9.95598950409412e-06, "loss": 0.0026, "step": 33760 }, { "epoch": 0.2761581551294108, "grad_norm": 0.1552579253911972, "learning_rate": 9.955894976956887e-06, "loss": 0.0026, "step": 33770 }, { "epoch": 0.27623993130801, "grad_norm": 0.03521319478750229, "learning_rate": 9.955800348864047e-06, "loss": 0.0028, "step": 33780 }, { "epoch": 0.27632170748660917, "grad_norm": 0.10629460215568542, "learning_rate": 9.955705619817524e-06, "loss": 0.0024, "step": 33790 }, { "epoch": 0.27640348366520834, "grad_norm": 0.04743209108710289, "learning_rate": 9.95561078981925e-06, "loss": 0.0035, "step": 33800 }, { "epoch": 0.2764852598438075, "grad_norm": 0.17749930918216705, "learning_rate": 9.955515858871153e-06, "loss": 0.0023, "step": 33810 }, { "epoch": 0.2765670360224067, "grad_norm": 0.26021572947502136, "learning_rate": 9.955420826975176e-06, "loss": 0.0034, "step": 33820 }, { "epoch": 0.27664881220100584, "grad_norm": 0.14153389632701874, "learning_rate": 9.955325694133244e-06, "loss": 0.0028, "step": 33830 }, { "epoch": 0.276730588379605, "grad_norm": 0.14261937141418457, "learning_rate": 9.9552304603473e-06, "loss": 0.0041, "step": 33840 }, { "epoch": 0.2768123645582042, "grad_norm": 0.05343959107995033, "learning_rate": 9.955135125619285e-06, "loss": 0.0029, "step": 33850 }, { "epoch": 0.27689414073680335, "grad_norm": 0.12473069131374359, "learning_rate": 9.95503968995114e-06, "loss": 0.0021, "step": 33860 }, { "epoch": 0.2769759169154025, "grad_norm": 0.0954812690615654, "learning_rate": 9.954944153344807e-06, "loss": 0.0048, "step": 33870 }, { "epoch": 0.27705769309400174, "grad_norm": 0.2531786262989044, "learning_rate": 9.954848515802236e-06, "loss": 0.005, "step": 33880 }, { "epoch": 0.2771394692726009, "grad_norm": 0.06269699335098267, "learning_rate": 9.954752777325373e-06, "loss": 0.0028, "step": 33890 }, { "epoch": 0.2772212454512001, "grad_norm": 0.11144940555095673, "learning_rate": 9.954656937916168e-06, "loss": 0.0044, "step": 33900 }, { "epoch": 0.27730302162979925, "grad_norm": 0.14265140891075134, "learning_rate": 9.954560997576574e-06, "loss": 0.0055, "step": 33910 }, { "epoch": 0.2773847978083984, "grad_norm": 0.035599514842033386, "learning_rate": 9.954464956308545e-06, "loss": 0.0023, "step": 33920 }, { "epoch": 0.2774665739869976, "grad_norm": 0.13794583082199097, "learning_rate": 9.954368814114037e-06, "loss": 0.0022, "step": 33930 }, { "epoch": 0.27754835016559676, "grad_norm": 0.1066979318857193, "learning_rate": 9.95427257099501e-06, "loss": 0.0039, "step": 33940 }, { "epoch": 0.2776301263441959, "grad_norm": 0.16632050275802612, "learning_rate": 9.954176226953425e-06, "loss": 0.0044, "step": 33950 }, { "epoch": 0.2777119025227951, "grad_norm": 0.1602487713098526, "learning_rate": 9.954079781991242e-06, "loss": 0.0037, "step": 33960 }, { "epoch": 0.27779367870139426, "grad_norm": 0.32579362392425537, "learning_rate": 9.953983236110426e-06, "loss": 0.0053, "step": 33970 }, { "epoch": 0.27787545487999343, "grad_norm": 0.46398407220840454, "learning_rate": 9.953886589312946e-06, "loss": 0.0043, "step": 33980 }, { "epoch": 0.27795723105859266, "grad_norm": 0.11405270546674728, "learning_rate": 9.953789841600768e-06, "loss": 0.0031, "step": 33990 }, { "epoch": 0.2780390072371918, "grad_norm": 0.09231780469417572, "learning_rate": 9.953692992975866e-06, "loss": 0.0044, "step": 34000 }, { "epoch": 0.278120783415791, "grad_norm": 0.11008970439434052, "learning_rate": 9.95359604344021e-06, "loss": 0.0022, "step": 34010 }, { "epoch": 0.27820255959439016, "grad_norm": 0.13387660682201385, "learning_rate": 9.953498992995777e-06, "loss": 0.0023, "step": 34020 }, { "epoch": 0.27828433577298933, "grad_norm": 0.33619263768196106, "learning_rate": 9.953401841644543e-06, "loss": 0.003, "step": 34030 }, { "epoch": 0.2783661119515885, "grad_norm": 0.22621680796146393, "learning_rate": 9.953304589388485e-06, "loss": 0.0033, "step": 34040 }, { "epoch": 0.27844788813018767, "grad_norm": 0.028520388528704643, "learning_rate": 9.95320723622959e-06, "loss": 0.0035, "step": 34050 }, { "epoch": 0.27852966430878684, "grad_norm": 0.059558358043432236, "learning_rate": 9.953109782169835e-06, "loss": 0.0031, "step": 34060 }, { "epoch": 0.278611440487386, "grad_norm": 0.19851958751678467, "learning_rate": 9.953012227211207e-06, "loss": 0.0035, "step": 34070 }, { "epoch": 0.2786932166659852, "grad_norm": 0.2036481350660324, "learning_rate": 9.952914571355696e-06, "loss": 0.0051, "step": 34080 }, { "epoch": 0.27877499284458435, "grad_norm": 0.16633079946041107, "learning_rate": 9.952816814605288e-06, "loss": 0.0075, "step": 34090 }, { "epoch": 0.27885676902318357, "grad_norm": 0.061655644327402115, "learning_rate": 9.952718956961977e-06, "loss": 0.0031, "step": 34100 }, { "epoch": 0.27893854520178274, "grad_norm": 0.07168644666671753, "learning_rate": 9.952620998427754e-06, "loss": 0.0047, "step": 34110 }, { "epoch": 0.2790203213803819, "grad_norm": 0.09985938668251038, "learning_rate": 9.952522939004615e-06, "loss": 0.0025, "step": 34120 }, { "epoch": 0.2791020975589811, "grad_norm": 0.06255382299423218, "learning_rate": 9.952424778694558e-06, "loss": 0.0034, "step": 34130 }, { "epoch": 0.27918387373758025, "grad_norm": 0.06784658133983612, "learning_rate": 9.952326517499584e-06, "loss": 0.0041, "step": 34140 }, { "epoch": 0.2792656499161794, "grad_norm": 0.25541284680366516, "learning_rate": 9.952228155421692e-06, "loss": 0.0031, "step": 34150 }, { "epoch": 0.2793474260947786, "grad_norm": 0.05513586476445198, "learning_rate": 9.952129692462889e-06, "loss": 0.0026, "step": 34160 }, { "epoch": 0.27942920227337775, "grad_norm": 0.13684964179992676, "learning_rate": 9.952031128625177e-06, "loss": 0.0029, "step": 34170 }, { "epoch": 0.2795109784519769, "grad_norm": 0.07004057615995407, "learning_rate": 9.951932463910568e-06, "loss": 0.0038, "step": 34180 }, { "epoch": 0.2795927546305761, "grad_norm": 0.14641064405441284, "learning_rate": 9.951833698321067e-06, "loss": 0.003, "step": 34190 }, { "epoch": 0.2796745308091753, "grad_norm": 0.13560818135738373, "learning_rate": 9.95173483185869e-06, "loss": 0.0026, "step": 34200 }, { "epoch": 0.2797563069877745, "grad_norm": 0.13316123187541962, "learning_rate": 9.951635864525449e-06, "loss": 0.0033, "step": 34210 }, { "epoch": 0.27983808316637365, "grad_norm": 0.17549046874046326, "learning_rate": 9.95153679632336e-06, "loss": 0.0038, "step": 34220 }, { "epoch": 0.2799198593449728, "grad_norm": 0.07964063435792923, "learning_rate": 9.951437627254444e-06, "loss": 0.0045, "step": 34230 }, { "epoch": 0.280001635523572, "grad_norm": 0.12579841911792755, "learning_rate": 9.951338357320716e-06, "loss": 0.0031, "step": 34240 }, { "epoch": 0.28008341170217116, "grad_norm": 0.020733971148729324, "learning_rate": 9.951238986524203e-06, "loss": 0.0025, "step": 34250 }, { "epoch": 0.2801651878807703, "grad_norm": 0.12919433414936066, "learning_rate": 9.951139514866926e-06, "loss": 0.0018, "step": 34260 }, { "epoch": 0.2802469640593695, "grad_norm": 0.07002799212932587, "learning_rate": 9.951039942350913e-06, "loss": 0.0025, "step": 34270 }, { "epoch": 0.28032874023796867, "grad_norm": 0.17844434082508087, "learning_rate": 9.950940268978192e-06, "loss": 0.0031, "step": 34280 }, { "epoch": 0.28041051641656783, "grad_norm": 0.12250684201717377, "learning_rate": 9.950840494750793e-06, "loss": 0.0035, "step": 34290 }, { "epoch": 0.280492292595167, "grad_norm": 0.22549553215503693, "learning_rate": 9.95074061967075e-06, "loss": 0.005, "step": 34300 }, { "epoch": 0.2805740687737662, "grad_norm": 0.12486199289560318, "learning_rate": 9.950640643740095e-06, "loss": 0.0031, "step": 34310 }, { "epoch": 0.2806558449523654, "grad_norm": 0.1207573190331459, "learning_rate": 9.950540566960866e-06, "loss": 0.006, "step": 34320 }, { "epoch": 0.28073762113096457, "grad_norm": 0.10041143745183945, "learning_rate": 9.950440389335103e-06, "loss": 0.0029, "step": 34330 }, { "epoch": 0.28081939730956373, "grad_norm": 0.16333305835723877, "learning_rate": 9.950340110864844e-06, "loss": 0.003, "step": 34340 }, { "epoch": 0.2809011734881629, "grad_norm": 0.12826870381832123, "learning_rate": 9.950239731552134e-06, "loss": 0.0041, "step": 34350 }, { "epoch": 0.28098294966676207, "grad_norm": 0.18176087737083435, "learning_rate": 9.950139251399016e-06, "loss": 0.0041, "step": 34360 }, { "epoch": 0.28106472584536124, "grad_norm": 0.25570979714393616, "learning_rate": 9.950038670407538e-06, "loss": 0.0028, "step": 34370 }, { "epoch": 0.2811465020239604, "grad_norm": 0.20688177645206451, "learning_rate": 9.94993798857975e-06, "loss": 0.0033, "step": 34380 }, { "epoch": 0.2812282782025596, "grad_norm": 0.24692420661449432, "learning_rate": 9.949837205917701e-06, "loss": 0.0038, "step": 34390 }, { "epoch": 0.28131005438115875, "grad_norm": 0.21245309710502625, "learning_rate": 9.949736322423443e-06, "loss": 0.004, "step": 34400 }, { "epoch": 0.2813918305597579, "grad_norm": 0.13843746483325958, "learning_rate": 9.949635338099035e-06, "loss": 0.003, "step": 34410 }, { "epoch": 0.28147360673835714, "grad_norm": 0.04976167902350426, "learning_rate": 9.949534252946532e-06, "loss": 0.0035, "step": 34420 }, { "epoch": 0.2815553829169563, "grad_norm": 0.09776128083467484, "learning_rate": 9.94943306696799e-06, "loss": 0.0032, "step": 34430 }, { "epoch": 0.2816371590955555, "grad_norm": 0.07275468111038208, "learning_rate": 9.949331780165476e-06, "loss": 0.0026, "step": 34440 }, { "epoch": 0.28171893527415465, "grad_norm": 0.19991843402385712, "learning_rate": 9.949230392541051e-06, "loss": 0.0027, "step": 34450 }, { "epoch": 0.2818007114527538, "grad_norm": 0.12919406592845917, "learning_rate": 9.949128904096779e-06, "loss": 0.0024, "step": 34460 }, { "epoch": 0.281882487631353, "grad_norm": 0.13165339827537537, "learning_rate": 9.949027314834728e-06, "loss": 0.0034, "step": 34470 }, { "epoch": 0.28196426380995215, "grad_norm": 0.216047003865242, "learning_rate": 9.948925624756969e-06, "loss": 0.0047, "step": 34480 }, { "epoch": 0.2820460399885513, "grad_norm": 0.07627390325069427, "learning_rate": 9.948823833865571e-06, "loss": 0.0025, "step": 34490 }, { "epoch": 0.2821278161671505, "grad_norm": 0.05886604264378548, "learning_rate": 9.948721942162609e-06, "loss": 0.0067, "step": 34500 }, { "epoch": 0.28220959234574966, "grad_norm": 0.27252912521362305, "learning_rate": 9.94861994965016e-06, "loss": 0.0029, "step": 34510 }, { "epoch": 0.2822913685243489, "grad_norm": 0.1978888362646103, "learning_rate": 9.948517856330299e-06, "loss": 0.0037, "step": 34520 }, { "epoch": 0.28237314470294805, "grad_norm": 0.11863549053668976, "learning_rate": 9.948415662205107e-06, "loss": 0.0041, "step": 34530 }, { "epoch": 0.2824549208815472, "grad_norm": 0.05289582163095474, "learning_rate": 9.948313367276666e-06, "loss": 0.0037, "step": 34540 }, { "epoch": 0.2825366970601464, "grad_norm": 0.08052711933851242, "learning_rate": 9.948210971547058e-06, "loss": 0.0025, "step": 34550 }, { "epoch": 0.28261847323874556, "grad_norm": 0.1791612058877945, "learning_rate": 9.948108475018371e-06, "loss": 0.0046, "step": 34560 }, { "epoch": 0.28270024941734473, "grad_norm": 0.1523466855287552, "learning_rate": 9.948005877692695e-06, "loss": 0.0015, "step": 34570 }, { "epoch": 0.2827820255959439, "grad_norm": 0.07744347304105759, "learning_rate": 9.947903179572115e-06, "loss": 0.0035, "step": 34580 }, { "epoch": 0.28286380177454307, "grad_norm": 0.24804098904132843, "learning_rate": 9.947800380658726e-06, "loss": 0.004, "step": 34590 }, { "epoch": 0.28294557795314224, "grad_norm": 0.19107355177402496, "learning_rate": 9.947697480954621e-06, "loss": 0.0039, "step": 34600 }, { "epoch": 0.2830273541317414, "grad_norm": 0.09097907692193985, "learning_rate": 9.947594480461897e-06, "loss": 0.0046, "step": 34610 }, { "epoch": 0.2831091303103406, "grad_norm": 0.16509497165679932, "learning_rate": 9.947491379182652e-06, "loss": 0.0029, "step": 34620 }, { "epoch": 0.2831909064889398, "grad_norm": 0.05765361338853836, "learning_rate": 9.947388177118985e-06, "loss": 0.0029, "step": 34630 }, { "epoch": 0.28327268266753897, "grad_norm": 0.013963391073048115, "learning_rate": 9.947284874273e-06, "loss": 0.0028, "step": 34640 }, { "epoch": 0.28335445884613814, "grad_norm": 0.023337243124842644, "learning_rate": 9.947181470646801e-06, "loss": 0.0036, "step": 34650 }, { "epoch": 0.2834362350247373, "grad_norm": 0.12005265057086945, "learning_rate": 9.947077966242494e-06, "loss": 0.0036, "step": 34660 }, { "epoch": 0.2835180112033365, "grad_norm": 0.1286427229642868, "learning_rate": 9.946974361062187e-06, "loss": 0.0024, "step": 34670 }, { "epoch": 0.28359978738193564, "grad_norm": 0.7572545409202576, "learning_rate": 9.946870655107993e-06, "loss": 0.0035, "step": 34680 }, { "epoch": 0.2836815635605348, "grad_norm": 0.13720302283763885, "learning_rate": 9.946766848382021e-06, "loss": 0.0037, "step": 34690 }, { "epoch": 0.283763339739134, "grad_norm": 0.09806148707866669, "learning_rate": 9.946662940886388e-06, "loss": 0.0022, "step": 34700 }, { "epoch": 0.28384511591773315, "grad_norm": 0.1461908221244812, "learning_rate": 9.94655893262321e-06, "loss": 0.0038, "step": 34710 }, { "epoch": 0.2839268920963323, "grad_norm": 0.20034447312355042, "learning_rate": 9.946454823594607e-06, "loss": 0.0034, "step": 34720 }, { "epoch": 0.2840086682749315, "grad_norm": 0.09314354509115219, "learning_rate": 9.946350613802696e-06, "loss": 0.0039, "step": 34730 }, { "epoch": 0.2840904444535307, "grad_norm": 0.11215302348136902, "learning_rate": 9.946246303249605e-06, "loss": 0.0041, "step": 34740 }, { "epoch": 0.2841722206321299, "grad_norm": 0.1937732845544815, "learning_rate": 9.946141891937454e-06, "loss": 0.0048, "step": 34750 }, { "epoch": 0.28425399681072905, "grad_norm": 0.31036901473999023, "learning_rate": 9.946037379868373e-06, "loss": 0.0061, "step": 34760 }, { "epoch": 0.2843357729893282, "grad_norm": 0.06669411808252335, "learning_rate": 9.94593276704449e-06, "loss": 0.004, "step": 34770 }, { "epoch": 0.2844175491679274, "grad_norm": 0.04575685039162636, "learning_rate": 9.945828053467939e-06, "loss": 0.003, "step": 34780 }, { "epoch": 0.28449932534652655, "grad_norm": 0.1621055006980896, "learning_rate": 9.945723239140847e-06, "loss": 0.0033, "step": 34790 }, { "epoch": 0.2845811015251257, "grad_norm": 0.14123018085956573, "learning_rate": 9.945618324065354e-06, "loss": 0.0033, "step": 34800 }, { "epoch": 0.2846628777037249, "grad_norm": 0.03923985734581947, "learning_rate": 9.945513308243594e-06, "loss": 0.0021, "step": 34810 }, { "epoch": 0.28474465388232406, "grad_norm": 0.22086627781391144, "learning_rate": 9.94540819167771e-06, "loss": 0.0031, "step": 34820 }, { "epoch": 0.28482643006092323, "grad_norm": 0.10289176553487778, "learning_rate": 9.945302974369841e-06, "loss": 0.0035, "step": 34830 }, { "epoch": 0.28490820623952245, "grad_norm": 0.16090470552444458, "learning_rate": 9.94519765632213e-06, "loss": 0.0048, "step": 34840 }, { "epoch": 0.2849899824181216, "grad_norm": 0.09359372407197952, "learning_rate": 9.945092237536725e-06, "loss": 0.0037, "step": 34850 }, { "epoch": 0.2850717585967208, "grad_norm": 0.057828377932310104, "learning_rate": 9.94498671801577e-06, "loss": 0.0063, "step": 34860 }, { "epoch": 0.28515353477531996, "grad_norm": 0.1746435910463333, "learning_rate": 9.944881097761415e-06, "loss": 0.0034, "step": 34870 }, { "epoch": 0.28523531095391913, "grad_norm": 0.21861109137535095, "learning_rate": 9.944775376775815e-06, "loss": 0.0038, "step": 34880 }, { "epoch": 0.2853170871325183, "grad_norm": 0.34095919132232666, "learning_rate": 9.944669555061119e-06, "loss": 0.0043, "step": 34890 }, { "epoch": 0.28539886331111747, "grad_norm": 0.015201649628579617, "learning_rate": 9.944563632619486e-06, "loss": 0.0022, "step": 34900 }, { "epoch": 0.28548063948971664, "grad_norm": 0.06921397149562836, "learning_rate": 9.944457609453074e-06, "loss": 0.0037, "step": 34910 }, { "epoch": 0.2855624156683158, "grad_norm": 0.04094156250357628, "learning_rate": 9.94435148556404e-06, "loss": 0.0045, "step": 34920 }, { "epoch": 0.285644191846915, "grad_norm": 0.0758284330368042, "learning_rate": 9.944245260954546e-06, "loss": 0.0033, "step": 34930 }, { "epoch": 0.28572596802551414, "grad_norm": 0.11497757583856583, "learning_rate": 9.944138935626758e-06, "loss": 0.0039, "step": 34940 }, { "epoch": 0.28580774420411337, "grad_norm": 0.2230425477027893, "learning_rate": 9.944032509582842e-06, "loss": 0.0037, "step": 34950 }, { "epoch": 0.28588952038271254, "grad_norm": 0.09640620648860931, "learning_rate": 9.943925982824965e-06, "loss": 0.0024, "step": 34960 }, { "epoch": 0.2859712965613117, "grad_norm": 0.040279652923345566, "learning_rate": 9.943819355355295e-06, "loss": 0.0029, "step": 34970 }, { "epoch": 0.2860530727399109, "grad_norm": 0.4472619891166687, "learning_rate": 9.94371262717601e-06, "loss": 0.0024, "step": 34980 }, { "epoch": 0.28613484891851004, "grad_norm": 0.04771401733160019, "learning_rate": 9.943605798289277e-06, "loss": 0.0036, "step": 34990 }, { "epoch": 0.2862166250971092, "grad_norm": 0.09164497256278992, "learning_rate": 9.943498868697277e-06, "loss": 0.0031, "step": 35000 }, { "epoch": 0.2862984012757084, "grad_norm": 0.11985531449317932, "learning_rate": 9.943391838402187e-06, "loss": 0.0034, "step": 35010 }, { "epoch": 0.28638017745430755, "grad_norm": 0.1524299532175064, "learning_rate": 9.943284707406186e-06, "loss": 0.0017, "step": 35020 }, { "epoch": 0.2864619536329067, "grad_norm": 0.18248052895069122, "learning_rate": 9.943177475711459e-06, "loss": 0.0027, "step": 35030 }, { "epoch": 0.2865437298115059, "grad_norm": 0.14814133942127228, "learning_rate": 9.943070143320186e-06, "loss": 0.0022, "step": 35040 }, { "epoch": 0.28662550599010506, "grad_norm": 0.05961274355649948, "learning_rate": 9.942962710234559e-06, "loss": 0.0022, "step": 35050 }, { "epoch": 0.2867072821687043, "grad_norm": 0.06521140038967133, "learning_rate": 9.942855176456763e-06, "loss": 0.0021, "step": 35060 }, { "epoch": 0.28678905834730345, "grad_norm": 0.18015457689762115, "learning_rate": 9.942747541988989e-06, "loss": 0.0036, "step": 35070 }, { "epoch": 0.2868708345259026, "grad_norm": 0.0835411474108696, "learning_rate": 9.94263980683343e-06, "loss": 0.0039, "step": 35080 }, { "epoch": 0.2869526107045018, "grad_norm": 0.13210146129131317, "learning_rate": 9.94253197099228e-06, "loss": 0.0035, "step": 35090 }, { "epoch": 0.28703438688310096, "grad_norm": 0.09182898700237274, "learning_rate": 9.942424034467736e-06, "loss": 0.0035, "step": 35100 }, { "epoch": 0.2871161630617001, "grad_norm": 0.11543023586273193, "learning_rate": 9.942315997261998e-06, "loss": 0.0031, "step": 35110 }, { "epoch": 0.2871979392402993, "grad_norm": 0.12674330174922943, "learning_rate": 9.942207859377267e-06, "loss": 0.005, "step": 35120 }, { "epoch": 0.28727971541889846, "grad_norm": 0.18198956549167633, "learning_rate": 9.942099620815743e-06, "loss": 0.0017, "step": 35130 }, { "epoch": 0.28736149159749763, "grad_norm": 0.043891292065382004, "learning_rate": 9.941991281579633e-06, "loss": 0.0053, "step": 35140 }, { "epoch": 0.2874432677760968, "grad_norm": 0.09266799688339233, "learning_rate": 9.941882841671142e-06, "loss": 0.0024, "step": 35150 }, { "epoch": 0.287525043954696, "grad_norm": 0.15444506704807281, "learning_rate": 9.941774301092483e-06, "loss": 0.0022, "step": 35160 }, { "epoch": 0.2876068201332952, "grad_norm": 0.02575453743338585, "learning_rate": 9.941665659845863e-06, "loss": 0.0026, "step": 35170 }, { "epoch": 0.28768859631189436, "grad_norm": 0.10612848401069641, "learning_rate": 9.941556917933497e-06, "loss": 0.003, "step": 35180 }, { "epoch": 0.28777037249049353, "grad_norm": 0.11741107702255249, "learning_rate": 9.9414480753576e-06, "loss": 0.0029, "step": 35190 }, { "epoch": 0.2878521486690927, "grad_norm": 0.023076247423887253, "learning_rate": 9.94133913212039e-06, "loss": 0.0012, "step": 35200 }, { "epoch": 0.28793392484769187, "grad_norm": 0.2657022178173065, "learning_rate": 9.941230088224085e-06, "loss": 0.0032, "step": 35210 }, { "epoch": 0.28801570102629104, "grad_norm": 0.09780433028936386, "learning_rate": 9.941120943670906e-06, "loss": 0.0041, "step": 35220 }, { "epoch": 0.2880974772048902, "grad_norm": 0.6367831826210022, "learning_rate": 9.941011698463077e-06, "loss": 0.0056, "step": 35230 }, { "epoch": 0.2881792533834894, "grad_norm": 0.2798191010951996, "learning_rate": 9.940902352602823e-06, "loss": 0.0042, "step": 35240 }, { "epoch": 0.28826102956208854, "grad_norm": 0.09780846536159515, "learning_rate": 9.940792906092373e-06, "loss": 0.0027, "step": 35250 }, { "epoch": 0.2883428057406877, "grad_norm": 0.07046477496623993, "learning_rate": 9.940683358933955e-06, "loss": 0.0038, "step": 35260 }, { "epoch": 0.28842458191928694, "grad_norm": 0.02252020686864853, "learning_rate": 9.940573711129801e-06, "loss": 0.0029, "step": 35270 }, { "epoch": 0.2885063580978861, "grad_norm": 0.046119142323732376, "learning_rate": 9.940463962682145e-06, "loss": 0.0039, "step": 35280 }, { "epoch": 0.2885881342764853, "grad_norm": 0.06011781096458435, "learning_rate": 9.940354113593222e-06, "loss": 0.0053, "step": 35290 }, { "epoch": 0.28866991045508444, "grad_norm": 0.10982952266931534, "learning_rate": 9.940244163865272e-06, "loss": 0.0034, "step": 35300 }, { "epoch": 0.2887516866336836, "grad_norm": 0.19051796197891235, "learning_rate": 9.94013411350053e-06, "loss": 0.0035, "step": 35310 }, { "epoch": 0.2888334628122828, "grad_norm": 0.18963445723056793, "learning_rate": 9.94002396250124e-06, "loss": 0.0044, "step": 35320 }, { "epoch": 0.28891523899088195, "grad_norm": 0.19234822690486908, "learning_rate": 9.939913710869648e-06, "loss": 0.0057, "step": 35330 }, { "epoch": 0.2889970151694811, "grad_norm": 0.12551914155483246, "learning_rate": 9.939803358607998e-06, "loss": 0.0025, "step": 35340 }, { "epoch": 0.2890787913480803, "grad_norm": 0.06111976504325867, "learning_rate": 9.939692905718538e-06, "loss": 0.003, "step": 35350 }, { "epoch": 0.28916056752667946, "grad_norm": 0.1485566794872284, "learning_rate": 9.93958235220352e-06, "loss": 0.0045, "step": 35360 }, { "epoch": 0.2892423437052786, "grad_norm": 0.08817394077777863, "learning_rate": 9.939471698065193e-06, "loss": 0.0048, "step": 35370 }, { "epoch": 0.28932411988387785, "grad_norm": 0.06970670074224472, "learning_rate": 9.939360943305811e-06, "loss": 0.0026, "step": 35380 }, { "epoch": 0.289405896062477, "grad_norm": 0.11738906055688858, "learning_rate": 9.939250087927634e-06, "loss": 0.0023, "step": 35390 }, { "epoch": 0.2894876722410762, "grad_norm": 0.26842910051345825, "learning_rate": 9.939139131932918e-06, "loss": 0.0032, "step": 35400 }, { "epoch": 0.28956944841967536, "grad_norm": 0.19090449810028076, "learning_rate": 9.939028075323922e-06, "loss": 0.0048, "step": 35410 }, { "epoch": 0.2896512245982745, "grad_norm": 0.07639428973197937, "learning_rate": 9.93891691810291e-06, "loss": 0.0033, "step": 35420 }, { "epoch": 0.2897330007768737, "grad_norm": 0.07548720389604568, "learning_rate": 9.938805660272144e-06, "loss": 0.0025, "step": 35430 }, { "epoch": 0.28981477695547286, "grad_norm": 0.11265066266059875, "learning_rate": 9.938694301833896e-06, "loss": 0.0053, "step": 35440 }, { "epoch": 0.28989655313407203, "grad_norm": 0.13191184401512146, "learning_rate": 9.938582842790428e-06, "loss": 0.0042, "step": 35450 }, { "epoch": 0.2899783293126712, "grad_norm": 0.12264850735664368, "learning_rate": 9.938471283144014e-06, "loss": 0.0034, "step": 35460 }, { "epoch": 0.29006010549127037, "grad_norm": 0.0787077248096466, "learning_rate": 9.938359622896926e-06, "loss": 0.0048, "step": 35470 }, { "epoch": 0.2901418816698696, "grad_norm": 0.10795720666646957, "learning_rate": 9.938247862051437e-06, "loss": 0.0026, "step": 35480 }, { "epoch": 0.29022365784846876, "grad_norm": 0.044469352811574936, "learning_rate": 9.938136000609826e-06, "loss": 0.0027, "step": 35490 }, { "epoch": 0.29030543402706793, "grad_norm": 0.1210469976067543, "learning_rate": 9.93802403857437e-06, "loss": 0.0035, "step": 35500 }, { "epoch": 0.2903872102056671, "grad_norm": 0.12233828008174896, "learning_rate": 9.937911975947352e-06, "loss": 0.0043, "step": 35510 }, { "epoch": 0.29046898638426627, "grad_norm": 0.0711759552359581, "learning_rate": 9.937799812731051e-06, "loss": 0.0028, "step": 35520 }, { "epoch": 0.29055076256286544, "grad_norm": 0.0780133605003357, "learning_rate": 9.937687548927758e-06, "loss": 0.0042, "step": 35530 }, { "epoch": 0.2906325387414646, "grad_norm": 0.030558191239833832, "learning_rate": 9.937575184539752e-06, "loss": 0.0051, "step": 35540 }, { "epoch": 0.2907143149200638, "grad_norm": 0.08364079147577286, "learning_rate": 9.937462719569327e-06, "loss": 0.0018, "step": 35550 }, { "epoch": 0.29079609109866295, "grad_norm": 0.12154722213745117, "learning_rate": 9.937350154018775e-06, "loss": 0.0034, "step": 35560 }, { "epoch": 0.2908778672772621, "grad_norm": 0.13722394406795502, "learning_rate": 9.937237487890386e-06, "loss": 0.0027, "step": 35570 }, { "epoch": 0.2909596434558613, "grad_norm": 0.11419154703617096, "learning_rate": 9.937124721186456e-06, "loss": 0.0025, "step": 35580 }, { "epoch": 0.2910414196344605, "grad_norm": 0.1559363752603531, "learning_rate": 9.937011853909283e-06, "loss": 0.0023, "step": 35590 }, { "epoch": 0.2911231958130597, "grad_norm": 0.03618314862251282, "learning_rate": 9.936898886061163e-06, "loss": 0.0028, "step": 35600 }, { "epoch": 0.29120497199165885, "grad_norm": 0.08693714439868927, "learning_rate": 9.936785817644402e-06, "loss": 0.0045, "step": 35610 }, { "epoch": 0.291286748170258, "grad_norm": 0.14441633224487305, "learning_rate": 9.936672648661301e-06, "loss": 0.0027, "step": 35620 }, { "epoch": 0.2913685243488572, "grad_norm": 0.08890361338853836, "learning_rate": 9.936559379114166e-06, "loss": 0.0042, "step": 35630 }, { "epoch": 0.29145030052745635, "grad_norm": 0.02638852968811989, "learning_rate": 9.936446009005303e-06, "loss": 0.0033, "step": 35640 }, { "epoch": 0.2915320767060555, "grad_norm": 0.39575275778770447, "learning_rate": 9.936332538337022e-06, "loss": 0.0036, "step": 35650 }, { "epoch": 0.2916138528846547, "grad_norm": 0.06531286984682083, "learning_rate": 9.936218967111635e-06, "loss": 0.0042, "step": 35660 }, { "epoch": 0.29169562906325386, "grad_norm": 0.07855545729398727, "learning_rate": 9.936105295331456e-06, "loss": 0.0026, "step": 35670 }, { "epoch": 0.291777405241853, "grad_norm": 0.0834827572107315, "learning_rate": 9.935991522998797e-06, "loss": 0.0032, "step": 35680 }, { "epoch": 0.2918591814204522, "grad_norm": 0.08879228681325912, "learning_rate": 9.935877650115981e-06, "loss": 0.0036, "step": 35690 }, { "epoch": 0.2919409575990514, "grad_norm": 0.16098186373710632, "learning_rate": 9.935763676685326e-06, "loss": 0.0031, "step": 35700 }, { "epoch": 0.2920227337776506, "grad_norm": 0.03429548442363739, "learning_rate": 9.93564960270915e-06, "loss": 0.0035, "step": 35710 }, { "epoch": 0.29210450995624976, "grad_norm": 0.09365818649530411, "learning_rate": 9.935535428189782e-06, "loss": 0.0042, "step": 35720 }, { "epoch": 0.2921862861348489, "grad_norm": 0.07440430670976639, "learning_rate": 9.935421153129546e-06, "loss": 0.0027, "step": 35730 }, { "epoch": 0.2922680623134481, "grad_norm": 0.03630334883928299, "learning_rate": 9.935306777530766e-06, "loss": 0.0031, "step": 35740 }, { "epoch": 0.29234983849204726, "grad_norm": 0.07795511186122894, "learning_rate": 9.935192301395779e-06, "loss": 0.0043, "step": 35750 }, { "epoch": 0.29243161467064643, "grad_norm": 0.06783099472522736, "learning_rate": 9.935077724726912e-06, "loss": 0.0036, "step": 35760 }, { "epoch": 0.2925133908492456, "grad_norm": 0.03594313561916351, "learning_rate": 9.9349630475265e-06, "loss": 0.003, "step": 35770 }, { "epoch": 0.29259516702784477, "grad_norm": 0.03351833298802376, "learning_rate": 9.93484826979688e-06, "loss": 0.0021, "step": 35780 }, { "epoch": 0.29267694320644394, "grad_norm": 0.10566811263561249, "learning_rate": 9.934733391540389e-06, "loss": 0.0028, "step": 35790 }, { "epoch": 0.29275871938504316, "grad_norm": 0.10445506125688553, "learning_rate": 9.934618412759368e-06, "loss": 0.0031, "step": 35800 }, { "epoch": 0.29284049556364233, "grad_norm": 0.1789662390947342, "learning_rate": 9.934503333456157e-06, "loss": 0.0036, "step": 35810 }, { "epoch": 0.2929222717422415, "grad_norm": 0.1908639669418335, "learning_rate": 9.934388153633106e-06, "loss": 0.0046, "step": 35820 }, { "epoch": 0.29300404792084067, "grad_norm": 0.12138226628303528, "learning_rate": 9.934272873292553e-06, "loss": 0.0024, "step": 35830 }, { "epoch": 0.29308582409943984, "grad_norm": 0.12674422562122345, "learning_rate": 9.934157492436854e-06, "loss": 0.0053, "step": 35840 }, { "epoch": 0.293167600278039, "grad_norm": 0.038174815475940704, "learning_rate": 9.934042011068356e-06, "loss": 0.0016, "step": 35850 }, { "epoch": 0.2932493764566382, "grad_norm": 0.19528181850910187, "learning_rate": 9.933926429189412e-06, "loss": 0.0028, "step": 35860 }, { "epoch": 0.29333115263523735, "grad_norm": 0.20882616937160492, "learning_rate": 9.933810746802373e-06, "loss": 0.0032, "step": 35870 }, { "epoch": 0.2934129288138365, "grad_norm": 0.3341614902019501, "learning_rate": 9.9336949639096e-06, "loss": 0.0053, "step": 35880 }, { "epoch": 0.2934947049924357, "grad_norm": 0.11777131259441376, "learning_rate": 9.933579080513451e-06, "loss": 0.0028, "step": 35890 }, { "epoch": 0.29357648117103485, "grad_norm": 0.041572656482458115, "learning_rate": 9.933463096616287e-06, "loss": 0.0022, "step": 35900 }, { "epoch": 0.2936582573496341, "grad_norm": 0.04464872553944588, "learning_rate": 9.933347012220468e-06, "loss": 0.003, "step": 35910 }, { "epoch": 0.29374003352823325, "grad_norm": 0.16548490524291992, "learning_rate": 9.933230827328361e-06, "loss": 0.006, "step": 35920 }, { "epoch": 0.2938218097068324, "grad_norm": 0.03159281611442566, "learning_rate": 9.933114541942332e-06, "loss": 0.0023, "step": 35930 }, { "epoch": 0.2939035858854316, "grad_norm": 0.13309313356876373, "learning_rate": 9.93299815606475e-06, "loss": 0.0041, "step": 35940 }, { "epoch": 0.29398536206403075, "grad_norm": 0.06677492707967758, "learning_rate": 9.932881669697986e-06, "loss": 0.0045, "step": 35950 }, { "epoch": 0.2940671382426299, "grad_norm": 0.0334053561091423, "learning_rate": 9.932765082844412e-06, "loss": 0.0031, "step": 35960 }, { "epoch": 0.2941489144212291, "grad_norm": 0.12628424167633057, "learning_rate": 9.932648395506404e-06, "loss": 0.0043, "step": 35970 }, { "epoch": 0.29423069059982826, "grad_norm": 0.17654214799404144, "learning_rate": 9.932531607686339e-06, "loss": 0.0038, "step": 35980 }, { "epoch": 0.29431246677842743, "grad_norm": 0.040737126022577286, "learning_rate": 9.932414719386597e-06, "loss": 0.0041, "step": 35990 }, { "epoch": 0.2943942429570266, "grad_norm": 0.03169206902384758, "learning_rate": 9.932297730609554e-06, "loss": 0.0023, "step": 36000 }, { "epoch": 0.29447601913562577, "grad_norm": 0.06569335609674454, "learning_rate": 9.932180641357601e-06, "loss": 0.0033, "step": 36010 }, { "epoch": 0.294557795314225, "grad_norm": 0.07817908376455307, "learning_rate": 9.932063451633119e-06, "loss": 0.004, "step": 36020 }, { "epoch": 0.29463957149282416, "grad_norm": 0.09828518331050873, "learning_rate": 9.931946161438493e-06, "loss": 0.0028, "step": 36030 }, { "epoch": 0.29472134767142333, "grad_norm": 0.0923520177602768, "learning_rate": 9.931828770776116e-06, "loss": 0.0029, "step": 36040 }, { "epoch": 0.2948031238500225, "grad_norm": 0.06636962294578552, "learning_rate": 9.931711279648378e-06, "loss": 0.0025, "step": 36050 }, { "epoch": 0.29488490002862167, "grad_norm": 0.057072531431913376, "learning_rate": 9.931593688057674e-06, "loss": 0.002, "step": 36060 }, { "epoch": 0.29496667620722083, "grad_norm": 0.0641530454158783, "learning_rate": 9.931475996006396e-06, "loss": 0.0058, "step": 36070 }, { "epoch": 0.29504845238582, "grad_norm": 0.1348123699426651, "learning_rate": 9.931358203496943e-06, "loss": 0.003, "step": 36080 }, { "epoch": 0.2951302285644192, "grad_norm": 0.17111043632030487, "learning_rate": 9.931240310531718e-06, "loss": 0.0054, "step": 36090 }, { "epoch": 0.29521200474301834, "grad_norm": 0.1240982636809349, "learning_rate": 9.931122317113117e-06, "loss": 0.0043, "step": 36100 }, { "epoch": 0.2952937809216175, "grad_norm": 0.06572122126817703, "learning_rate": 9.931004223243547e-06, "loss": 0.0036, "step": 36110 }, { "epoch": 0.29537555710021673, "grad_norm": 0.08471349626779556, "learning_rate": 9.930886028925413e-06, "loss": 0.0029, "step": 36120 }, { "epoch": 0.2954573332788159, "grad_norm": 0.18920506536960602, "learning_rate": 9.930767734161123e-06, "loss": 0.0033, "step": 36130 }, { "epoch": 0.2955391094574151, "grad_norm": 0.08827762305736542, "learning_rate": 9.930649338953085e-06, "loss": 0.004, "step": 36140 }, { "epoch": 0.29562088563601424, "grad_norm": 0.1345812827348709, "learning_rate": 9.930530843303713e-06, "loss": 0.0028, "step": 36150 }, { "epoch": 0.2957026618146134, "grad_norm": 0.22981806099414825, "learning_rate": 9.930412247215419e-06, "loss": 0.003, "step": 36160 }, { "epoch": 0.2957844379932126, "grad_norm": 0.04172317311167717, "learning_rate": 9.930293550690622e-06, "loss": 0.0027, "step": 36170 }, { "epoch": 0.29586621417181175, "grad_norm": 0.13899382948875427, "learning_rate": 9.930174753731737e-06, "loss": 0.0028, "step": 36180 }, { "epoch": 0.2959479903504109, "grad_norm": 0.04280700162053108, "learning_rate": 9.930055856341182e-06, "loss": 0.0041, "step": 36190 }, { "epoch": 0.2960297665290101, "grad_norm": 0.35020989179611206, "learning_rate": 9.929936858521387e-06, "loss": 0.0064, "step": 36200 }, { "epoch": 0.29611154270760925, "grad_norm": 0.05617950111627579, "learning_rate": 9.929817760274767e-06, "loss": 0.0044, "step": 36210 }, { "epoch": 0.2961933188862084, "grad_norm": 0.06106046959757805, "learning_rate": 9.929698561603753e-06, "loss": 0.0042, "step": 36220 }, { "epoch": 0.29627509506480765, "grad_norm": 0.10099640488624573, "learning_rate": 9.929579262510774e-06, "loss": 0.0028, "step": 36230 }, { "epoch": 0.2963568712434068, "grad_norm": 0.12479256093502045, "learning_rate": 9.929459862998256e-06, "loss": 0.0054, "step": 36240 }, { "epoch": 0.296438647422006, "grad_norm": 0.2300807535648346, "learning_rate": 9.929340363068634e-06, "loss": 0.005, "step": 36250 }, { "epoch": 0.29652042360060515, "grad_norm": 0.17686854302883148, "learning_rate": 9.929220762724344e-06, "loss": 0.0032, "step": 36260 }, { "epoch": 0.2966021997792043, "grad_norm": 0.10516782850027084, "learning_rate": 9.929101061967818e-06, "loss": 0.0033, "step": 36270 }, { "epoch": 0.2966839759578035, "grad_norm": 0.09819374233484268, "learning_rate": 9.928981260801498e-06, "loss": 0.0032, "step": 36280 }, { "epoch": 0.29676575213640266, "grad_norm": 0.09138073027133942, "learning_rate": 9.928861359227824e-06, "loss": 0.0035, "step": 36290 }, { "epoch": 0.29684752831500183, "grad_norm": 0.2721363604068756, "learning_rate": 9.928741357249236e-06, "loss": 0.0035, "step": 36300 }, { "epoch": 0.296929304493601, "grad_norm": 0.10557273030281067, "learning_rate": 9.928621254868181e-06, "loss": 0.0024, "step": 36310 }, { "epoch": 0.29701108067220017, "grad_norm": 0.15982528030872345, "learning_rate": 9.928501052087105e-06, "loss": 0.0035, "step": 36320 }, { "epoch": 0.29709285685079934, "grad_norm": 0.057128701359033585, "learning_rate": 9.928380748908458e-06, "loss": 0.0032, "step": 36330 }, { "epoch": 0.29717463302939856, "grad_norm": 0.10042771697044373, "learning_rate": 9.928260345334686e-06, "loss": 0.0054, "step": 36340 }, { "epoch": 0.29725640920799773, "grad_norm": 0.13636305928230286, "learning_rate": 9.928139841368247e-06, "loss": 0.0034, "step": 36350 }, { "epoch": 0.2973381853865969, "grad_norm": 0.14707982540130615, "learning_rate": 9.928019237011593e-06, "loss": 0.0031, "step": 36360 }, { "epoch": 0.29741996156519607, "grad_norm": 0.046254925429821014, "learning_rate": 9.927898532267181e-06, "loss": 0.0031, "step": 36370 }, { "epoch": 0.29750173774379524, "grad_norm": 0.04500706121325493, "learning_rate": 9.92777772713747e-06, "loss": 0.0038, "step": 36380 }, { "epoch": 0.2975835139223944, "grad_norm": 0.08834520727396011, "learning_rate": 9.927656821624923e-06, "loss": 0.0021, "step": 36390 }, { "epoch": 0.2976652901009936, "grad_norm": 0.11428151279687881, "learning_rate": 9.927535815732002e-06, "loss": 0.0047, "step": 36400 }, { "epoch": 0.29774706627959274, "grad_norm": 0.06771322339773178, "learning_rate": 9.927414709461167e-06, "loss": 0.005, "step": 36410 }, { "epoch": 0.2978288424581919, "grad_norm": 0.12411604821681976, "learning_rate": 9.927293502814892e-06, "loss": 0.0034, "step": 36420 }, { "epoch": 0.2979106186367911, "grad_norm": 0.10176490992307663, "learning_rate": 9.927172195795642e-06, "loss": 0.0025, "step": 36430 }, { "epoch": 0.29799239481539025, "grad_norm": 0.041776057332754135, "learning_rate": 9.927050788405892e-06, "loss": 0.0032, "step": 36440 }, { "epoch": 0.2980741709939895, "grad_norm": 0.11511188745498657, "learning_rate": 9.92692928064811e-06, "loss": 0.005, "step": 36450 }, { "epoch": 0.29815594717258864, "grad_norm": 0.08014586567878723, "learning_rate": 9.926807672524774e-06, "loss": 0.0035, "step": 36460 }, { "epoch": 0.2982377233511878, "grad_norm": 0.06831270456314087, "learning_rate": 9.926685964038362e-06, "loss": 0.0024, "step": 36470 }, { "epoch": 0.298319499529787, "grad_norm": 0.2258106768131256, "learning_rate": 9.926564155191351e-06, "loss": 0.0037, "step": 36480 }, { "epoch": 0.29840127570838615, "grad_norm": 0.06288804858922958, "learning_rate": 9.926442245986225e-06, "loss": 0.0028, "step": 36490 }, { "epoch": 0.2984830518869853, "grad_norm": 0.05273565277457237, "learning_rate": 9.926320236425466e-06, "loss": 0.0033, "step": 36500 }, { "epoch": 0.2985648280655845, "grad_norm": 0.13721156120300293, "learning_rate": 9.926198126511556e-06, "loss": 0.0026, "step": 36510 }, { "epoch": 0.29864660424418366, "grad_norm": 0.08701542764902115, "learning_rate": 9.92607591624699e-06, "loss": 0.0035, "step": 36520 }, { "epoch": 0.2987283804227828, "grad_norm": 0.04393363371491432, "learning_rate": 9.925953605634254e-06, "loss": 0.004, "step": 36530 }, { "epoch": 0.298810156601382, "grad_norm": 0.172753244638443, "learning_rate": 9.925831194675838e-06, "loss": 0.0051, "step": 36540 }, { "epoch": 0.2988919327799812, "grad_norm": 0.036883555352687836, "learning_rate": 9.925708683374233e-06, "loss": 0.003, "step": 36550 }, { "epoch": 0.2989737089585804, "grad_norm": 0.06653615087270737, "learning_rate": 9.925586071731943e-06, "loss": 0.0032, "step": 36560 }, { "epoch": 0.29905548513717956, "grad_norm": 0.038452502340078354, "learning_rate": 9.925463359751458e-06, "loss": 0.0036, "step": 36570 }, { "epoch": 0.2991372613157787, "grad_norm": 0.07988383620977402, "learning_rate": 9.925340547435282e-06, "loss": 0.0026, "step": 36580 }, { "epoch": 0.2992190374943779, "grad_norm": 0.3231379985809326, "learning_rate": 9.925217634785916e-06, "loss": 0.006, "step": 36590 }, { "epoch": 0.29930081367297706, "grad_norm": 0.17980389297008514, "learning_rate": 9.925094621805863e-06, "loss": 0.0044, "step": 36600 }, { "epoch": 0.29938258985157623, "grad_norm": 0.08247332274913788, "learning_rate": 9.924971508497629e-06, "loss": 0.0029, "step": 36610 }, { "epoch": 0.2994643660301754, "grad_norm": 0.1277390718460083, "learning_rate": 9.924848294863722e-06, "loss": 0.0033, "step": 36620 }, { "epoch": 0.29954614220877457, "grad_norm": 0.06361080706119537, "learning_rate": 9.924724980906653e-06, "loss": 0.0029, "step": 36630 }, { "epoch": 0.29962791838737374, "grad_norm": 0.032881852239370346, "learning_rate": 9.924601566628932e-06, "loss": 0.0022, "step": 36640 }, { "epoch": 0.2997096945659729, "grad_norm": 0.08941551297903061, "learning_rate": 9.924478052033076e-06, "loss": 0.0037, "step": 36650 }, { "epoch": 0.29979147074457213, "grad_norm": 0.08812883496284485, "learning_rate": 9.924354437121597e-06, "loss": 0.0048, "step": 36660 }, { "epoch": 0.2998732469231713, "grad_norm": 0.10532015562057495, "learning_rate": 9.924230721897015e-06, "loss": 0.0038, "step": 36670 }, { "epoch": 0.29995502310177047, "grad_norm": 0.15489356219768524, "learning_rate": 9.924106906361852e-06, "loss": 0.0033, "step": 36680 }, { "epoch": 0.30003679928036964, "grad_norm": 0.11886090040206909, "learning_rate": 9.92398299051863e-06, "loss": 0.0079, "step": 36690 }, { "epoch": 0.3001185754589688, "grad_norm": 0.16367913782596588, "learning_rate": 9.923858974369872e-06, "loss": 0.0028, "step": 36700 }, { "epoch": 0.300200351637568, "grad_norm": 0.13494081795215607, "learning_rate": 9.9237348579181e-06, "loss": 0.0027, "step": 36710 }, { "epoch": 0.30028212781616714, "grad_norm": 0.13285142183303833, "learning_rate": 9.92361064116585e-06, "loss": 0.0132, "step": 36720 }, { "epoch": 0.3003639039947663, "grad_norm": 0.14481595158576965, "learning_rate": 9.92348632411565e-06, "loss": 0.0038, "step": 36730 }, { "epoch": 0.3004456801733655, "grad_norm": 0.22247911989688873, "learning_rate": 9.923361906770031e-06, "loss": 0.0035, "step": 36740 }, { "epoch": 0.30052745635196465, "grad_norm": 0.203705832362175, "learning_rate": 9.923237389131528e-06, "loss": 0.0053, "step": 36750 }, { "epoch": 0.3006092325305638, "grad_norm": 0.07258419692516327, "learning_rate": 9.923112771202676e-06, "loss": 0.0029, "step": 36760 }, { "epoch": 0.30069100870916304, "grad_norm": 0.09704571962356567, "learning_rate": 9.922988052986017e-06, "loss": 0.0032, "step": 36770 }, { "epoch": 0.3007727848877622, "grad_norm": 0.14186063408851624, "learning_rate": 9.922863234484088e-06, "loss": 0.0034, "step": 36780 }, { "epoch": 0.3008545610663614, "grad_norm": 0.29805517196655273, "learning_rate": 9.922738315699437e-06, "loss": 0.0013, "step": 36790 }, { "epoch": 0.30093633724496055, "grad_norm": 0.13268797099590302, "learning_rate": 9.922613296634602e-06, "loss": 0.0049, "step": 36800 }, { "epoch": 0.3010181134235597, "grad_norm": 0.15317749977111816, "learning_rate": 9.922488177292133e-06, "loss": 0.0034, "step": 36810 }, { "epoch": 0.3010998896021589, "grad_norm": 0.1284603625535965, "learning_rate": 9.922362957674579e-06, "loss": 0.0051, "step": 36820 }, { "epoch": 0.30118166578075806, "grad_norm": 0.0809970274567604, "learning_rate": 9.92223763778449e-06, "loss": 0.0024, "step": 36830 }, { "epoch": 0.3012634419593572, "grad_norm": 0.05531705915927887, "learning_rate": 9.92211221762442e-06, "loss": 0.0032, "step": 36840 }, { "epoch": 0.3013452181379564, "grad_norm": 0.104256771504879, "learning_rate": 9.921986697196923e-06, "loss": 0.0047, "step": 36850 }, { "epoch": 0.30142699431655556, "grad_norm": 0.06565428525209427, "learning_rate": 9.921861076504557e-06, "loss": 0.0035, "step": 36860 }, { "epoch": 0.3015087704951548, "grad_norm": 0.1631917506456375, "learning_rate": 9.92173535554988e-06, "loss": 0.0024, "step": 36870 }, { "epoch": 0.30159054667375396, "grad_norm": 0.16382604837417603, "learning_rate": 9.921609534335452e-06, "loss": 0.0031, "step": 36880 }, { "epoch": 0.3016723228523531, "grad_norm": 0.03044945001602173, "learning_rate": 9.921483612863838e-06, "loss": 0.0025, "step": 36890 }, { "epoch": 0.3017540990309523, "grad_norm": 0.07675189524888992, "learning_rate": 9.921357591137601e-06, "loss": 0.0024, "step": 36900 }, { "epoch": 0.30183587520955146, "grad_norm": 0.129400372505188, "learning_rate": 9.921231469159312e-06, "loss": 0.0034, "step": 36910 }, { "epoch": 0.30191765138815063, "grad_norm": 0.11532621830701828, "learning_rate": 9.921105246931537e-06, "loss": 0.0041, "step": 36920 }, { "epoch": 0.3019994275667498, "grad_norm": 0.07201151549816132, "learning_rate": 9.920978924456848e-06, "loss": 0.0041, "step": 36930 }, { "epoch": 0.30208120374534897, "grad_norm": 0.06136823073029518, "learning_rate": 9.920852501737817e-06, "loss": 0.0021, "step": 36940 }, { "epoch": 0.30216297992394814, "grad_norm": 0.18149122595787048, "learning_rate": 9.920725978777023e-06, "loss": 0.0046, "step": 36950 }, { "epoch": 0.3022447561025473, "grad_norm": 0.08885589241981506, "learning_rate": 9.920599355577038e-06, "loss": 0.003, "step": 36960 }, { "epoch": 0.3023265322811465, "grad_norm": 0.09543449431657791, "learning_rate": 9.920472632140447e-06, "loss": 0.0033, "step": 36970 }, { "epoch": 0.3024083084597457, "grad_norm": 0.0858636349439621, "learning_rate": 9.92034580846983e-06, "loss": 0.005, "step": 36980 }, { "epoch": 0.30249008463834487, "grad_norm": 0.05460560321807861, "learning_rate": 9.920218884567767e-06, "loss": 0.002, "step": 36990 }, { "epoch": 0.30257186081694404, "grad_norm": 0.08823078870773315, "learning_rate": 9.920091860436846e-06, "loss": 0.0023, "step": 37000 }, { "epoch": 0.3026536369955432, "grad_norm": 0.1826135218143463, "learning_rate": 9.919964736079654e-06, "loss": 0.0034, "step": 37010 }, { "epoch": 0.3027354131741424, "grad_norm": 0.10932762920856476, "learning_rate": 9.919837511498784e-06, "loss": 0.0041, "step": 37020 }, { "epoch": 0.30281718935274154, "grad_norm": 0.06472879648208618, "learning_rate": 9.919710186696822e-06, "loss": 0.0028, "step": 37030 }, { "epoch": 0.3028989655313407, "grad_norm": 0.1489359438419342, "learning_rate": 9.919582761676368e-06, "loss": 0.0043, "step": 37040 }, { "epoch": 0.3029807417099399, "grad_norm": 0.14487692713737488, "learning_rate": 9.91945523644001e-06, "loss": 0.0029, "step": 37050 }, { "epoch": 0.30306251788853905, "grad_norm": 0.2578354775905609, "learning_rate": 9.919327610990353e-06, "loss": 0.0033, "step": 37060 }, { "epoch": 0.3031442940671382, "grad_norm": 0.2504293918609619, "learning_rate": 9.919199885329992e-06, "loss": 0.0035, "step": 37070 }, { "epoch": 0.3032260702457374, "grad_norm": 0.058348096907138824, "learning_rate": 9.919072059461533e-06, "loss": 0.0027, "step": 37080 }, { "epoch": 0.3033078464243366, "grad_norm": 0.24201449751853943, "learning_rate": 9.918944133387575e-06, "loss": 0.0029, "step": 37090 }, { "epoch": 0.3033896226029358, "grad_norm": 0.08358119428157806, "learning_rate": 9.918816107110729e-06, "loss": 0.0023, "step": 37100 }, { "epoch": 0.30347139878153495, "grad_norm": 0.12432091683149338, "learning_rate": 9.9186879806336e-06, "loss": 0.0042, "step": 37110 }, { "epoch": 0.3035531749601341, "grad_norm": 0.11008919775485992, "learning_rate": 9.9185597539588e-06, "loss": 0.0034, "step": 37120 }, { "epoch": 0.3036349511387333, "grad_norm": 0.08574902266263962, "learning_rate": 9.918431427088938e-06, "loss": 0.0041, "step": 37130 }, { "epoch": 0.30371672731733246, "grad_norm": 0.13059508800506592, "learning_rate": 9.91830300002663e-06, "loss": 0.002, "step": 37140 }, { "epoch": 0.3037985034959316, "grad_norm": 0.0286191888153553, "learning_rate": 9.918174472774494e-06, "loss": 0.0031, "step": 37150 }, { "epoch": 0.3038802796745308, "grad_norm": 0.1684986650943756, "learning_rate": 9.918045845335145e-06, "loss": 0.0035, "step": 37160 }, { "epoch": 0.30396205585312996, "grad_norm": 0.13940323889255524, "learning_rate": 9.917917117711203e-06, "loss": 0.0034, "step": 37170 }, { "epoch": 0.30404383203172913, "grad_norm": 0.05454206466674805, "learning_rate": 9.917788289905294e-06, "loss": 0.002, "step": 37180 }, { "epoch": 0.30412560821032836, "grad_norm": 0.1494109183549881, "learning_rate": 9.917659361920039e-06, "loss": 0.004, "step": 37190 }, { "epoch": 0.3042073843889275, "grad_norm": 0.3947841227054596, "learning_rate": 9.917530333758066e-06, "loss": 0.0032, "step": 37200 }, { "epoch": 0.3042891605675267, "grad_norm": 0.10017157346010208, "learning_rate": 9.917401205422002e-06, "loss": 0.0028, "step": 37210 }, { "epoch": 0.30437093674612586, "grad_norm": 0.13186907768249512, "learning_rate": 9.91727197691448e-06, "loss": 0.0036, "step": 37220 }, { "epoch": 0.30445271292472503, "grad_norm": 0.039281658828258514, "learning_rate": 9.91714264823813e-06, "loss": 0.0038, "step": 37230 }, { "epoch": 0.3045344891033242, "grad_norm": 0.09320668131113052, "learning_rate": 9.917013219395588e-06, "loss": 0.0025, "step": 37240 }, { "epoch": 0.30461626528192337, "grad_norm": 0.031000182032585144, "learning_rate": 9.91688369038949e-06, "loss": 0.0024, "step": 37250 }, { "epoch": 0.30469804146052254, "grad_norm": 0.07827352732419968, "learning_rate": 9.916754061222473e-06, "loss": 0.0027, "step": 37260 }, { "epoch": 0.3047798176391217, "grad_norm": 0.09681306034326553, "learning_rate": 9.91662433189718e-06, "loss": 0.0033, "step": 37270 }, { "epoch": 0.3048615938177209, "grad_norm": 0.11283696442842484, "learning_rate": 9.916494502416254e-06, "loss": 0.0037, "step": 37280 }, { "epoch": 0.30494336999632005, "grad_norm": 0.05795988813042641, "learning_rate": 9.916364572782338e-06, "loss": 0.0041, "step": 37290 }, { "epoch": 0.30502514617491927, "grad_norm": 0.17333096265792847, "learning_rate": 9.916234542998079e-06, "loss": 0.0024, "step": 37300 }, { "epoch": 0.30510692235351844, "grad_norm": 0.09152255952358246, "learning_rate": 9.916104413066126e-06, "loss": 0.0033, "step": 37310 }, { "epoch": 0.3051886985321176, "grad_norm": 0.12407618016004562, "learning_rate": 9.91597418298913e-06, "loss": 0.0054, "step": 37320 }, { "epoch": 0.3052704747107168, "grad_norm": 0.17801953852176666, "learning_rate": 9.915843852769746e-06, "loss": 0.0036, "step": 37330 }, { "epoch": 0.30535225088931595, "grad_norm": 0.4254058003425598, "learning_rate": 9.915713422410624e-06, "loss": 0.0038, "step": 37340 }, { "epoch": 0.3054340270679151, "grad_norm": 0.056423768401145935, "learning_rate": 9.915582891914426e-06, "loss": 0.0051, "step": 37350 }, { "epoch": 0.3055158032465143, "grad_norm": 0.07211907207965851, "learning_rate": 9.915452261283808e-06, "loss": 0.0052, "step": 37360 }, { "epoch": 0.30559757942511345, "grad_norm": 0.11523666977882385, "learning_rate": 9.915321530521433e-06, "loss": 0.0025, "step": 37370 }, { "epoch": 0.3056793556037126, "grad_norm": 0.2774188816547394, "learning_rate": 9.915190699629962e-06, "loss": 0.0022, "step": 37380 }, { "epoch": 0.3057611317823118, "grad_norm": 0.17204764485359192, "learning_rate": 9.91505976861206e-06, "loss": 0.0036, "step": 37390 }, { "epoch": 0.30584290796091096, "grad_norm": 0.11738008260726929, "learning_rate": 9.914928737470398e-06, "loss": 0.0051, "step": 37400 }, { "epoch": 0.3059246841395102, "grad_norm": 0.08308838307857513, "learning_rate": 9.914797606207642e-06, "loss": 0.0029, "step": 37410 }, { "epoch": 0.30600646031810935, "grad_norm": 0.21045279502868652, "learning_rate": 9.914666374826462e-06, "loss": 0.0039, "step": 37420 }, { "epoch": 0.3060882364967085, "grad_norm": 0.02536752074956894, "learning_rate": 9.914535043329535e-06, "loss": 0.0024, "step": 37430 }, { "epoch": 0.3061700126753077, "grad_norm": 0.24582692980766296, "learning_rate": 9.914403611719535e-06, "loss": 0.0021, "step": 37440 }, { "epoch": 0.30625178885390686, "grad_norm": 0.38653889298439026, "learning_rate": 9.914272079999137e-06, "loss": 0.0028, "step": 37450 }, { "epoch": 0.30633356503250603, "grad_norm": 0.10677693039178848, "learning_rate": 9.914140448171023e-06, "loss": 0.0022, "step": 37460 }, { "epoch": 0.3064153412111052, "grad_norm": 0.08991928398609161, "learning_rate": 9.914008716237872e-06, "loss": 0.0038, "step": 37470 }, { "epoch": 0.30649711738970437, "grad_norm": 0.06399353593587875, "learning_rate": 9.913876884202372e-06, "loss": 0.0026, "step": 37480 }, { "epoch": 0.30657889356830353, "grad_norm": 0.20263469219207764, "learning_rate": 9.913744952067203e-06, "loss": 0.0046, "step": 37490 }, { "epoch": 0.3066606697469027, "grad_norm": 0.12950316071510315, "learning_rate": 9.913612919835057e-06, "loss": 0.0033, "step": 37500 }, { "epoch": 0.3067424459255019, "grad_norm": 0.08820205926895142, "learning_rate": 9.913480787508621e-06, "loss": 0.0045, "step": 37510 }, { "epoch": 0.3068242221041011, "grad_norm": 0.0268060602247715, "learning_rate": 9.913348555090589e-06, "loss": 0.0026, "step": 37520 }, { "epoch": 0.30690599828270027, "grad_norm": 0.05542019382119179, "learning_rate": 9.91321622258365e-06, "loss": 0.0033, "step": 37530 }, { "epoch": 0.30698777446129943, "grad_norm": 0.04904759302735329, "learning_rate": 9.913083789990505e-06, "loss": 0.0034, "step": 37540 }, { "epoch": 0.3070695506398986, "grad_norm": 0.05104585364460945, "learning_rate": 9.91295125731385e-06, "loss": 0.0026, "step": 37550 }, { "epoch": 0.30715132681849777, "grad_norm": 0.2330123484134674, "learning_rate": 9.912818624556385e-06, "loss": 0.0021, "step": 37560 }, { "epoch": 0.30723310299709694, "grad_norm": 0.17553454637527466, "learning_rate": 9.91268589172081e-06, "loss": 0.0065, "step": 37570 }, { "epoch": 0.3073148791756961, "grad_norm": 0.02138783037662506, "learning_rate": 9.91255305880983e-06, "loss": 0.0032, "step": 37580 }, { "epoch": 0.3073966553542953, "grad_norm": 0.10560108721256256, "learning_rate": 9.912420125826151e-06, "loss": 0.0034, "step": 37590 }, { "epoch": 0.30747843153289445, "grad_norm": 0.4307064116001129, "learning_rate": 9.91228709277248e-06, "loss": 0.0057, "step": 37600 }, { "epoch": 0.3075602077114936, "grad_norm": 0.11056641489267349, "learning_rate": 9.912153959651532e-06, "loss": 0.0038, "step": 37610 }, { "epoch": 0.30764198389009284, "grad_norm": 0.25538575649261475, "learning_rate": 9.912020726466012e-06, "loss": 0.005, "step": 37620 }, { "epoch": 0.307723760068692, "grad_norm": 0.5098254084587097, "learning_rate": 9.911887393218637e-06, "loss": 0.0052, "step": 37630 }, { "epoch": 0.3078055362472912, "grad_norm": 0.10425916314125061, "learning_rate": 9.911753959912124e-06, "loss": 0.0019, "step": 37640 }, { "epoch": 0.30788731242589035, "grad_norm": 0.1625686138868332, "learning_rate": 9.91162042654919e-06, "loss": 0.003, "step": 37650 }, { "epoch": 0.3079690886044895, "grad_norm": 0.05855906009674072, "learning_rate": 9.911486793132556e-06, "loss": 0.0023, "step": 37660 }, { "epoch": 0.3080508647830887, "grad_norm": 0.16001415252685547, "learning_rate": 9.911353059664943e-06, "loss": 0.003, "step": 37670 }, { "epoch": 0.30813264096168785, "grad_norm": 0.09230709075927734, "learning_rate": 9.911219226149077e-06, "loss": 0.0024, "step": 37680 }, { "epoch": 0.308214417140287, "grad_norm": 0.1314920336008072, "learning_rate": 9.911085292587684e-06, "loss": 0.0033, "step": 37690 }, { "epoch": 0.3082961933188862, "grad_norm": 0.13231267035007477, "learning_rate": 9.91095125898349e-06, "loss": 0.0024, "step": 37700 }, { "epoch": 0.30837796949748536, "grad_norm": 0.039307404309511185, "learning_rate": 9.910817125339228e-06, "loss": 0.004, "step": 37710 }, { "epoch": 0.30845974567608453, "grad_norm": 0.11130400747060776, "learning_rate": 9.910682891657628e-06, "loss": 0.0022, "step": 37720 }, { "epoch": 0.30854152185468375, "grad_norm": 0.12008846551179886, "learning_rate": 9.910548557941426e-06, "loss": 0.0044, "step": 37730 }, { "epoch": 0.3086232980332829, "grad_norm": 0.31605035066604614, "learning_rate": 9.910414124193359e-06, "loss": 0.0021, "step": 37740 }, { "epoch": 0.3087050742118821, "grad_norm": 0.20151576399803162, "learning_rate": 9.910279590416166e-06, "loss": 0.0033, "step": 37750 }, { "epoch": 0.30878685039048126, "grad_norm": 0.13525831699371338, "learning_rate": 9.910144956612585e-06, "loss": 0.0031, "step": 37760 }, { "epoch": 0.30886862656908043, "grad_norm": 0.13824570178985596, "learning_rate": 9.910010222785361e-06, "loss": 0.0029, "step": 37770 }, { "epoch": 0.3089504027476796, "grad_norm": 0.07766985893249512, "learning_rate": 9.909875388937238e-06, "loss": 0.0027, "step": 37780 }, { "epoch": 0.30903217892627877, "grad_norm": 0.0738568976521492, "learning_rate": 9.909740455070962e-06, "loss": 0.0032, "step": 37790 }, { "epoch": 0.30911395510487794, "grad_norm": 0.17023539543151855, "learning_rate": 9.90960542118928e-06, "loss": 0.0036, "step": 37800 }, { "epoch": 0.3091957312834771, "grad_norm": 0.14776574075222015, "learning_rate": 9.909470287294948e-06, "loss": 0.0022, "step": 37810 }, { "epoch": 0.3092775074620763, "grad_norm": 0.010184288024902344, "learning_rate": 9.909335053390714e-06, "loss": 0.0026, "step": 37820 }, { "epoch": 0.3093592836406755, "grad_norm": 0.1557367742061615, "learning_rate": 9.909199719479334e-06, "loss": 0.0026, "step": 37830 }, { "epoch": 0.30944105981927467, "grad_norm": 0.10981618613004684, "learning_rate": 9.909064285563568e-06, "loss": 0.0025, "step": 37840 }, { "epoch": 0.30952283599787384, "grad_norm": 0.14150641858577728, "learning_rate": 9.90892875164617e-06, "loss": 0.0041, "step": 37850 }, { "epoch": 0.309604612176473, "grad_norm": 0.09389884024858475, "learning_rate": 9.908793117729902e-06, "loss": 0.0027, "step": 37860 }, { "epoch": 0.3096863883550722, "grad_norm": 0.10747452825307846, "learning_rate": 9.90865738381753e-06, "loss": 0.0052, "step": 37870 }, { "epoch": 0.30976816453367134, "grad_norm": 0.23200049996376038, "learning_rate": 9.908521549911817e-06, "loss": 0.0033, "step": 37880 }, { "epoch": 0.3098499407122705, "grad_norm": 0.2224397361278534, "learning_rate": 9.90838561601553e-06, "loss": 0.004, "step": 37890 }, { "epoch": 0.3099317168908697, "grad_norm": 0.17487306892871857, "learning_rate": 9.908249582131438e-06, "loss": 0.0085, "step": 37900 }, { "epoch": 0.31001349306946885, "grad_norm": 0.2145022451877594, "learning_rate": 9.908113448262314e-06, "loss": 0.0053, "step": 37910 }, { "epoch": 0.310095269248068, "grad_norm": 0.06535923480987549, "learning_rate": 9.907977214410927e-06, "loss": 0.0053, "step": 37920 }, { "epoch": 0.3101770454266672, "grad_norm": 0.2080136090517044, "learning_rate": 9.907840880580054e-06, "loss": 0.0031, "step": 37930 }, { "epoch": 0.3102588216052664, "grad_norm": 0.100168377161026, "learning_rate": 9.907704446772477e-06, "loss": 0.0027, "step": 37940 }, { "epoch": 0.3103405977838656, "grad_norm": 0.05559662729501724, "learning_rate": 9.907567912990969e-06, "loss": 0.0033, "step": 37950 }, { "epoch": 0.31042237396246475, "grad_norm": 0.12254935503005981, "learning_rate": 9.907431279238313e-06, "loss": 0.0042, "step": 37960 }, { "epoch": 0.3105041501410639, "grad_norm": 0.11677790433168411, "learning_rate": 9.907294545517295e-06, "loss": 0.0038, "step": 37970 }, { "epoch": 0.3105859263196631, "grad_norm": 0.07781272381544113, "learning_rate": 9.907157711830695e-06, "loss": 0.0028, "step": 37980 }, { "epoch": 0.31066770249826225, "grad_norm": 0.10296205431222916, "learning_rate": 9.907020778181305e-06, "loss": 0.0032, "step": 37990 }, { "epoch": 0.3107494786768614, "grad_norm": 0.0441121868789196, "learning_rate": 9.906883744571914e-06, "loss": 0.0043, "step": 38000 }, { "epoch": 0.3108312548554606, "grad_norm": 0.1406482458114624, "learning_rate": 9.906746611005313e-06, "loss": 0.0027, "step": 38010 }, { "epoch": 0.31091303103405976, "grad_norm": 0.07564853876829147, "learning_rate": 9.906609377484293e-06, "loss": 0.0018, "step": 38020 }, { "epoch": 0.31099480721265893, "grad_norm": 0.1233968436717987, "learning_rate": 9.906472044011653e-06, "loss": 0.0024, "step": 38030 }, { "epoch": 0.3110765833912581, "grad_norm": 0.06842197477817535, "learning_rate": 9.90633461059019e-06, "loss": 0.0043, "step": 38040 }, { "epoch": 0.3111583595698573, "grad_norm": 0.0987224131822586, "learning_rate": 9.906197077222701e-06, "loss": 0.0032, "step": 38050 }, { "epoch": 0.3112401357484565, "grad_norm": 0.10022330284118652, "learning_rate": 9.906059443911991e-06, "loss": 0.002, "step": 38060 }, { "epoch": 0.31132191192705566, "grad_norm": 0.09857793897390366, "learning_rate": 9.905921710660862e-06, "loss": 0.0028, "step": 38070 }, { "epoch": 0.31140368810565483, "grad_norm": 0.034711647778749466, "learning_rate": 9.905783877472121e-06, "loss": 0.0034, "step": 38080 }, { "epoch": 0.311485464284254, "grad_norm": 0.26291412115097046, "learning_rate": 9.905645944348573e-06, "loss": 0.0018, "step": 38090 }, { "epoch": 0.31156724046285317, "grad_norm": 0.0726628303527832, "learning_rate": 9.90550791129303e-06, "loss": 0.0033, "step": 38100 }, { "epoch": 0.31164901664145234, "grad_norm": 0.12359990179538727, "learning_rate": 9.905369778308304e-06, "loss": 0.0024, "step": 38110 }, { "epoch": 0.3117307928200515, "grad_norm": 0.07519210129976273, "learning_rate": 9.90523154539721e-06, "loss": 0.0025, "step": 38120 }, { "epoch": 0.3118125689986507, "grad_norm": 0.07466939091682434, "learning_rate": 9.90509321256256e-06, "loss": 0.0035, "step": 38130 }, { "epoch": 0.31189434517724984, "grad_norm": 0.1951335221529007, "learning_rate": 9.904954779807173e-06, "loss": 0.0046, "step": 38140 }, { "epoch": 0.31197612135584907, "grad_norm": 0.14110878109931946, "learning_rate": 9.904816247133872e-06, "loss": 0.0031, "step": 38150 }, { "epoch": 0.31205789753444824, "grad_norm": 0.24945318698883057, "learning_rate": 9.904677614545477e-06, "loss": 0.0047, "step": 38160 }, { "epoch": 0.3121396737130474, "grad_norm": 0.24191711843013763, "learning_rate": 9.904538882044812e-06, "loss": 0.0022, "step": 38170 }, { "epoch": 0.3122214498916466, "grad_norm": 0.03813208267092705, "learning_rate": 9.904400049634704e-06, "loss": 0.0032, "step": 38180 }, { "epoch": 0.31230322607024574, "grad_norm": 0.13325214385986328, "learning_rate": 9.904261117317981e-06, "loss": 0.0033, "step": 38190 }, { "epoch": 0.3123850022488449, "grad_norm": 0.10973820090293884, "learning_rate": 9.904122085097472e-06, "loss": 0.0022, "step": 38200 }, { "epoch": 0.3124667784274441, "grad_norm": 0.16976119577884674, "learning_rate": 9.90398295297601e-06, "loss": 0.0029, "step": 38210 }, { "epoch": 0.31254855460604325, "grad_norm": 0.13752688467502594, "learning_rate": 9.903843720956427e-06, "loss": 0.0034, "step": 38220 }, { "epoch": 0.3126303307846424, "grad_norm": 0.01954893209040165, "learning_rate": 9.903704389041564e-06, "loss": 0.0021, "step": 38230 }, { "epoch": 0.3127121069632416, "grad_norm": 0.14697183668613434, "learning_rate": 9.903564957234256e-06, "loss": 0.0047, "step": 38240 }, { "epoch": 0.31279388314184076, "grad_norm": 0.1422737091779709, "learning_rate": 9.903425425537343e-06, "loss": 0.0035, "step": 38250 }, { "epoch": 0.31287565932044, "grad_norm": 0.05010201409459114, "learning_rate": 9.90328579395367e-06, "loss": 0.0023, "step": 38260 }, { "epoch": 0.31295743549903915, "grad_norm": 0.13415132462978363, "learning_rate": 9.903146062486077e-06, "loss": 0.0037, "step": 38270 }, { "epoch": 0.3130392116776383, "grad_norm": 0.04383888095617294, "learning_rate": 9.903006231137417e-06, "loss": 0.0052, "step": 38280 }, { "epoch": 0.3131209878562375, "grad_norm": 0.148223415017128, "learning_rate": 9.902866299910531e-06, "loss": 0.0033, "step": 38290 }, { "epoch": 0.31320276403483666, "grad_norm": 0.15200357139110565, "learning_rate": 9.902726268808276e-06, "loss": 0.0037, "step": 38300 }, { "epoch": 0.3132845402134358, "grad_norm": 0.20924746990203857, "learning_rate": 9.902586137833501e-06, "loss": 0.0042, "step": 38310 }, { "epoch": 0.313366316392035, "grad_norm": 0.08080127835273743, "learning_rate": 9.902445906989061e-06, "loss": 0.0036, "step": 38320 }, { "epoch": 0.31344809257063416, "grad_norm": 0.1324995458126068, "learning_rate": 9.902305576277812e-06, "loss": 0.0037, "step": 38330 }, { "epoch": 0.31352986874923333, "grad_norm": 0.1407477855682373, "learning_rate": 9.902165145702615e-06, "loss": 0.003, "step": 38340 }, { "epoch": 0.3136116449278325, "grad_norm": 0.11941145360469818, "learning_rate": 9.90202461526633e-06, "loss": 0.0019, "step": 38350 }, { "epoch": 0.31369342110643167, "grad_norm": 0.05513123422861099, "learning_rate": 9.901883984971818e-06, "loss": 0.0019, "step": 38360 }, { "epoch": 0.3137751972850309, "grad_norm": 0.2072637379169464, "learning_rate": 9.901743254821946e-06, "loss": 0.0039, "step": 38370 }, { "epoch": 0.31385697346363006, "grad_norm": 0.20225097239017487, "learning_rate": 9.901602424819579e-06, "loss": 0.0034, "step": 38380 }, { "epoch": 0.31393874964222923, "grad_norm": 0.07307428866624832, "learning_rate": 9.901461494967585e-06, "loss": 0.0043, "step": 38390 }, { "epoch": 0.3140205258208284, "grad_norm": 0.0849013552069664, "learning_rate": 9.901320465268836e-06, "loss": 0.0035, "step": 38400 }, { "epoch": 0.31410230199942757, "grad_norm": 0.06902239471673965, "learning_rate": 9.901179335726208e-06, "loss": 0.0037, "step": 38410 }, { "epoch": 0.31418407817802674, "grad_norm": 0.08569420874118805, "learning_rate": 9.90103810634257e-06, "loss": 0.0031, "step": 38420 }, { "epoch": 0.3142658543566259, "grad_norm": 0.09627140313386917, "learning_rate": 9.900896777120804e-06, "loss": 0.0015, "step": 38430 }, { "epoch": 0.3143476305352251, "grad_norm": 0.06240862235426903, "learning_rate": 9.900755348063787e-06, "loss": 0.0021, "step": 38440 }, { "epoch": 0.31442940671382424, "grad_norm": 0.17056933045387268, "learning_rate": 9.900613819174398e-06, "loss": 0.0025, "step": 38450 }, { "epoch": 0.3145111828924234, "grad_norm": 0.03600102290511131, "learning_rate": 9.900472190455524e-06, "loss": 0.0026, "step": 38460 }, { "epoch": 0.31459295907102264, "grad_norm": 0.12301401048898697, "learning_rate": 9.900330461910046e-06, "loss": 0.0033, "step": 38470 }, { "epoch": 0.3146747352496218, "grad_norm": 0.03771182522177696, "learning_rate": 9.900188633540855e-06, "loss": 0.0024, "step": 38480 }, { "epoch": 0.314756511428221, "grad_norm": 0.05524560809135437, "learning_rate": 9.900046705350838e-06, "loss": 0.0031, "step": 38490 }, { "epoch": 0.31483828760682014, "grad_norm": 0.7756600975990295, "learning_rate": 9.899904677342887e-06, "loss": 0.0024, "step": 38500 }, { "epoch": 0.3149200637854193, "grad_norm": 0.018928611651062965, "learning_rate": 9.899762549519896e-06, "loss": 0.0042, "step": 38510 }, { "epoch": 0.3150018399640185, "grad_norm": 0.1301824003458023, "learning_rate": 9.899620321884757e-06, "loss": 0.0029, "step": 38520 }, { "epoch": 0.31508361614261765, "grad_norm": 0.21820345520973206, "learning_rate": 9.899477994440371e-06, "loss": 0.0024, "step": 38530 }, { "epoch": 0.3151653923212168, "grad_norm": 0.6930935978889465, "learning_rate": 9.899335567189636e-06, "loss": 0.0026, "step": 38540 }, { "epoch": 0.315247168499816, "grad_norm": 0.09989941120147705, "learning_rate": 9.899193040135451e-06, "loss": 0.0027, "step": 38550 }, { "epoch": 0.31532894467841516, "grad_norm": 0.1407797783613205, "learning_rate": 9.899050413280722e-06, "loss": 0.0028, "step": 38560 }, { "epoch": 0.3154107208570143, "grad_norm": 0.094597727060318, "learning_rate": 9.898907686628356e-06, "loss": 0.0048, "step": 38570 }, { "epoch": 0.31549249703561355, "grad_norm": 0.0600091889500618, "learning_rate": 9.898764860181257e-06, "loss": 0.0034, "step": 38580 }, { "epoch": 0.3155742732142127, "grad_norm": 0.053142666816711426, "learning_rate": 9.898621933942335e-06, "loss": 0.0024, "step": 38590 }, { "epoch": 0.3156560493928119, "grad_norm": 0.04313679412007332, "learning_rate": 9.898478907914504e-06, "loss": 0.0025, "step": 38600 }, { "epoch": 0.31573782557141106, "grad_norm": 0.06835699826478958, "learning_rate": 9.898335782100676e-06, "loss": 0.0046, "step": 38610 }, { "epoch": 0.3158196017500102, "grad_norm": 0.13823391497135162, "learning_rate": 9.898192556503764e-06, "loss": 0.003, "step": 38620 }, { "epoch": 0.3159013779286094, "grad_norm": 0.03548337146639824, "learning_rate": 9.898049231126692e-06, "loss": 0.0026, "step": 38630 }, { "epoch": 0.31598315410720856, "grad_norm": 0.11158620566129684, "learning_rate": 9.897905805972374e-06, "loss": 0.0025, "step": 38640 }, { "epoch": 0.31606493028580773, "grad_norm": 0.08195433765649796, "learning_rate": 9.897762281043733e-06, "loss": 0.0036, "step": 38650 }, { "epoch": 0.3161467064644069, "grad_norm": 0.0761672779917717, "learning_rate": 9.897618656343694e-06, "loss": 0.0031, "step": 38660 }, { "epoch": 0.31622848264300607, "grad_norm": 0.25533559918403625, "learning_rate": 9.897474931875181e-06, "loss": 0.0029, "step": 38670 }, { "epoch": 0.31631025882160524, "grad_norm": 0.09012357890605927, "learning_rate": 9.897331107641124e-06, "loss": 0.003, "step": 38680 }, { "epoch": 0.31639203500020446, "grad_norm": 0.13286004960536957, "learning_rate": 9.89718718364445e-06, "loss": 0.0037, "step": 38690 }, { "epoch": 0.31647381117880363, "grad_norm": 0.04075619950890541, "learning_rate": 9.897043159888094e-06, "loss": 0.0047, "step": 38700 }, { "epoch": 0.3165555873574028, "grad_norm": 1.00094735622406, "learning_rate": 9.896899036374987e-06, "loss": 0.0027, "step": 38710 }, { "epoch": 0.31663736353600197, "grad_norm": 0.2581416070461273, "learning_rate": 9.896754813108067e-06, "loss": 0.0039, "step": 38720 }, { "epoch": 0.31671913971460114, "grad_norm": 0.11630509793758392, "learning_rate": 9.896610490090273e-06, "loss": 0.0045, "step": 38730 }, { "epoch": 0.3168009158932003, "grad_norm": 0.030801083892583847, "learning_rate": 9.89646606732454e-06, "loss": 0.0032, "step": 38740 }, { "epoch": 0.3168826920717995, "grad_norm": 0.26873189210891724, "learning_rate": 9.896321544813814e-06, "loss": 0.0022, "step": 38750 }, { "epoch": 0.31696446825039865, "grad_norm": 0.0909053161740303, "learning_rate": 9.89617692256104e-06, "loss": 0.0023, "step": 38760 }, { "epoch": 0.3170462444289978, "grad_norm": 0.05380889028310776, "learning_rate": 9.89603220056916e-06, "loss": 0.0048, "step": 38770 }, { "epoch": 0.317128020607597, "grad_norm": 0.06588419526815414, "learning_rate": 9.895887378841124e-06, "loss": 0.0027, "step": 38780 }, { "epoch": 0.3172097967861962, "grad_norm": 0.11058725416660309, "learning_rate": 9.895742457379886e-06, "loss": 0.0031, "step": 38790 }, { "epoch": 0.3172915729647954, "grad_norm": 0.19261936843395233, "learning_rate": 9.89559743618839e-06, "loss": 0.0024, "step": 38800 }, { "epoch": 0.31737334914339455, "grad_norm": 0.2296925187110901, "learning_rate": 9.895452315269599e-06, "loss": 0.0041, "step": 38810 }, { "epoch": 0.3174551253219937, "grad_norm": 0.0565347857773304, "learning_rate": 9.895307094626463e-06, "loss": 0.0052, "step": 38820 }, { "epoch": 0.3175369015005929, "grad_norm": 0.1545044481754303, "learning_rate": 9.895161774261943e-06, "loss": 0.003, "step": 38830 }, { "epoch": 0.31761867767919205, "grad_norm": 0.09985829144716263, "learning_rate": 9.895016354178998e-06, "loss": 0.0023, "step": 38840 }, { "epoch": 0.3177004538577912, "grad_norm": 0.2169230580329895, "learning_rate": 9.894870834380593e-06, "loss": 0.0034, "step": 38850 }, { "epoch": 0.3177822300363904, "grad_norm": 0.04092775285243988, "learning_rate": 9.894725214869687e-06, "loss": 0.0038, "step": 38860 }, { "epoch": 0.31786400621498956, "grad_norm": 0.03382478281855583, "learning_rate": 9.894579495649253e-06, "loss": 0.0037, "step": 38870 }, { "epoch": 0.3179457823935887, "grad_norm": 0.0958426371216774, "learning_rate": 9.894433676722255e-06, "loss": 0.0038, "step": 38880 }, { "epoch": 0.3180275585721879, "grad_norm": 0.02224031277000904, "learning_rate": 9.894287758091664e-06, "loss": 0.0051, "step": 38890 }, { "epoch": 0.3181093347507871, "grad_norm": 0.0947590097784996, "learning_rate": 9.894141739760454e-06, "loss": 0.0026, "step": 38900 }, { "epoch": 0.3181911109293863, "grad_norm": 0.26882579922676086, "learning_rate": 9.893995621731597e-06, "loss": 0.003, "step": 38910 }, { "epoch": 0.31827288710798546, "grad_norm": 0.056040164083242416, "learning_rate": 9.893849404008074e-06, "loss": 0.0046, "step": 38920 }, { "epoch": 0.3183546632865846, "grad_norm": 0.07213643193244934, "learning_rate": 9.893703086592859e-06, "loss": 0.0025, "step": 38930 }, { "epoch": 0.3184364394651838, "grad_norm": 0.09040533751249313, "learning_rate": 9.893556669488934e-06, "loss": 0.0023, "step": 38940 }, { "epoch": 0.31851821564378296, "grad_norm": 0.10514456033706665, "learning_rate": 9.893410152699281e-06, "loss": 0.002, "step": 38950 }, { "epoch": 0.31859999182238213, "grad_norm": 0.22251065075397491, "learning_rate": 9.893263536226887e-06, "loss": 0.006, "step": 38960 }, { "epoch": 0.3186817680009813, "grad_norm": 0.12205414474010468, "learning_rate": 9.893116820074737e-06, "loss": 0.0047, "step": 38970 }, { "epoch": 0.31876354417958047, "grad_norm": 0.15284541249275208, "learning_rate": 9.892970004245822e-06, "loss": 0.0021, "step": 38980 }, { "epoch": 0.31884532035817964, "grad_norm": 0.06579596549272537, "learning_rate": 9.892823088743128e-06, "loss": 0.0019, "step": 38990 }, { "epoch": 0.3189270965367788, "grad_norm": 0.15276509523391724, "learning_rate": 9.892676073569652e-06, "loss": 0.0047, "step": 39000 }, { "epoch": 0.31900887271537803, "grad_norm": 0.20653991401195526, "learning_rate": 9.892528958728387e-06, "loss": 0.0033, "step": 39010 }, { "epoch": 0.3190906488939772, "grad_norm": 0.08048754185438156, "learning_rate": 9.89238174422233e-06, "loss": 0.0026, "step": 39020 }, { "epoch": 0.31917242507257637, "grad_norm": 0.1340285688638687, "learning_rate": 9.89223443005448e-06, "loss": 0.0025, "step": 39030 }, { "epoch": 0.31925420125117554, "grad_norm": 0.26651695370674133, "learning_rate": 9.892087016227837e-06, "loss": 0.0028, "step": 39040 }, { "epoch": 0.3193359774297747, "grad_norm": 0.12960870563983917, "learning_rate": 9.891939502745409e-06, "loss": 0.0039, "step": 39050 }, { "epoch": 0.3194177536083739, "grad_norm": 0.12144215404987335, "learning_rate": 9.891791889610194e-06, "loss": 0.0041, "step": 39060 }, { "epoch": 0.31949952978697305, "grad_norm": 0.2172379046678543, "learning_rate": 9.891644176825201e-06, "loss": 0.004, "step": 39070 }, { "epoch": 0.3195813059655722, "grad_norm": 0.24000082910060883, "learning_rate": 9.891496364393443e-06, "loss": 0.0036, "step": 39080 }, { "epoch": 0.3196630821441714, "grad_norm": 0.1395643949508667, "learning_rate": 9.891348452317926e-06, "loss": 0.0045, "step": 39090 }, { "epoch": 0.31974485832277055, "grad_norm": 0.11410173028707504, "learning_rate": 9.891200440601665e-06, "loss": 0.0036, "step": 39100 }, { "epoch": 0.3198266345013698, "grad_norm": 0.17544588446617126, "learning_rate": 9.891052329247676e-06, "loss": 0.0021, "step": 39110 }, { "epoch": 0.31990841067996895, "grad_norm": 0.16108693182468414, "learning_rate": 9.890904118258976e-06, "loss": 0.0032, "step": 39120 }, { "epoch": 0.3199901868585681, "grad_norm": 0.10321754962205887, "learning_rate": 9.890755807638582e-06, "loss": 0.0039, "step": 39130 }, { "epoch": 0.3200719630371673, "grad_norm": 0.16565857827663422, "learning_rate": 9.890607397389518e-06, "loss": 0.0051, "step": 39140 }, { "epoch": 0.32015373921576645, "grad_norm": 0.13342374563217163, "learning_rate": 9.890458887514806e-06, "loss": 0.0027, "step": 39150 }, { "epoch": 0.3202355153943656, "grad_norm": 0.12086708843708038, "learning_rate": 9.89031027801747e-06, "loss": 0.0026, "step": 39160 }, { "epoch": 0.3203172915729648, "grad_norm": 0.13015055656433105, "learning_rate": 9.89016156890054e-06, "loss": 0.0038, "step": 39170 }, { "epoch": 0.32039906775156396, "grad_norm": 0.011008171364665031, "learning_rate": 9.890012760167043e-06, "loss": 0.0022, "step": 39180 }, { "epoch": 0.32048084393016313, "grad_norm": 0.043506473302841187, "learning_rate": 9.889863851820012e-06, "loss": 0.0034, "step": 39190 }, { "epoch": 0.3205626201087623, "grad_norm": 0.16226719319820404, "learning_rate": 9.889714843862479e-06, "loss": 0.0052, "step": 39200 }, { "epoch": 0.32064439628736147, "grad_norm": 0.01274100225418806, "learning_rate": 9.889565736297483e-06, "loss": 0.0044, "step": 39210 }, { "epoch": 0.3207261724659607, "grad_norm": 0.02209519036114216, "learning_rate": 9.889416529128054e-06, "loss": 0.0032, "step": 39220 }, { "epoch": 0.32080794864455986, "grad_norm": 0.17208480834960938, "learning_rate": 9.889267222357238e-06, "loss": 0.0024, "step": 39230 }, { "epoch": 0.32088972482315903, "grad_norm": 0.09480523318052292, "learning_rate": 9.889117815988075e-06, "loss": 0.0028, "step": 39240 }, { "epoch": 0.3209715010017582, "grad_norm": 0.0846313089132309, "learning_rate": 9.888968310023609e-06, "loss": 0.0024, "step": 39250 }, { "epoch": 0.32105327718035737, "grad_norm": 0.06583674252033234, "learning_rate": 9.88881870446688e-06, "loss": 0.0031, "step": 39260 }, { "epoch": 0.32113505335895653, "grad_norm": 0.2631874680519104, "learning_rate": 9.888668999320945e-06, "loss": 0.004, "step": 39270 }, { "epoch": 0.3212168295375557, "grad_norm": 0.06530717760324478, "learning_rate": 9.888519194588846e-06, "loss": 0.004, "step": 39280 }, { "epoch": 0.3212986057161549, "grad_norm": 0.15179798007011414, "learning_rate": 9.888369290273638e-06, "loss": 0.0018, "step": 39290 }, { "epoch": 0.32138038189475404, "grad_norm": 0.03865648806095123, "learning_rate": 9.888219286378375e-06, "loss": 0.0039, "step": 39300 }, { "epoch": 0.3214621580733532, "grad_norm": 0.07691849768161774, "learning_rate": 9.888069182906112e-06, "loss": 0.0038, "step": 39310 }, { "epoch": 0.3215439342519524, "grad_norm": 0.14214110374450684, "learning_rate": 9.887918979859905e-06, "loss": 0.0031, "step": 39320 }, { "epoch": 0.3216257104305516, "grad_norm": 0.07355671375989914, "learning_rate": 9.887768677242816e-06, "loss": 0.0074, "step": 39330 }, { "epoch": 0.3217074866091508, "grad_norm": 0.1738034039735794, "learning_rate": 9.887618275057906e-06, "loss": 0.0035, "step": 39340 }, { "epoch": 0.32178926278774994, "grad_norm": 0.09148386865854263, "learning_rate": 9.88746777330824e-06, "loss": 0.0027, "step": 39350 }, { "epoch": 0.3218710389663491, "grad_norm": 0.04727174714207649, "learning_rate": 9.887317171996881e-06, "loss": 0.004, "step": 39360 }, { "epoch": 0.3219528151449483, "grad_norm": 0.07276250422000885, "learning_rate": 9.887166471126899e-06, "loss": 0.0026, "step": 39370 }, { "epoch": 0.32203459132354745, "grad_norm": 0.07468269765377045, "learning_rate": 9.887015670701363e-06, "loss": 0.002, "step": 39380 }, { "epoch": 0.3221163675021466, "grad_norm": 0.09173697978258133, "learning_rate": 9.886864770723348e-06, "loss": 0.0048, "step": 39390 }, { "epoch": 0.3221981436807458, "grad_norm": 0.1628381907939911, "learning_rate": 9.886713771195923e-06, "loss": 0.0032, "step": 39400 }, { "epoch": 0.32227991985934495, "grad_norm": 0.10178238898515701, "learning_rate": 9.886562672122167e-06, "loss": 0.0028, "step": 39410 }, { "epoch": 0.3223616960379441, "grad_norm": 0.03725139796733856, "learning_rate": 9.886411473505157e-06, "loss": 0.0036, "step": 39420 }, { "epoch": 0.32244347221654335, "grad_norm": 0.06858452409505844, "learning_rate": 9.886260175347974e-06, "loss": 0.0036, "step": 39430 }, { "epoch": 0.3225252483951425, "grad_norm": 0.17846912145614624, "learning_rate": 9.886108777653699e-06, "loss": 0.003, "step": 39440 }, { "epoch": 0.3226070245737417, "grad_norm": 0.07266217470169067, "learning_rate": 9.885957280425418e-06, "loss": 0.002, "step": 39450 }, { "epoch": 0.32268880075234085, "grad_norm": 0.372444748878479, "learning_rate": 9.885805683666214e-06, "loss": 0.0023, "step": 39460 }, { "epoch": 0.32277057693094, "grad_norm": 0.3060673475265503, "learning_rate": 9.885653987379177e-06, "loss": 0.0025, "step": 39470 }, { "epoch": 0.3228523531095392, "grad_norm": 0.10057384520769119, "learning_rate": 9.885502191567398e-06, "loss": 0.0029, "step": 39480 }, { "epoch": 0.32293412928813836, "grad_norm": 0.05063389241695404, "learning_rate": 9.885350296233966e-06, "loss": 0.0039, "step": 39490 }, { "epoch": 0.32301590546673753, "grad_norm": 0.06699343025684357, "learning_rate": 9.885198301381979e-06, "loss": 0.0034, "step": 39500 }, { "epoch": 0.3230976816453367, "grad_norm": 0.1515154093503952, "learning_rate": 9.885046207014532e-06, "loss": 0.0038, "step": 39510 }, { "epoch": 0.32317945782393587, "grad_norm": 0.19749847054481506, "learning_rate": 9.884894013134722e-06, "loss": 0.0038, "step": 39520 }, { "epoch": 0.32326123400253504, "grad_norm": 0.12091690301895142, "learning_rate": 9.884741719745651e-06, "loss": 0.0052, "step": 39530 }, { "epoch": 0.32334301018113426, "grad_norm": 0.15829068422317505, "learning_rate": 9.884589326850423e-06, "loss": 0.0051, "step": 39540 }, { "epoch": 0.32342478635973343, "grad_norm": 0.07982543855905533, "learning_rate": 9.884436834452136e-06, "loss": 0.0027, "step": 39550 }, { "epoch": 0.3235065625383326, "grad_norm": 0.1354052722454071, "learning_rate": 9.884284242553903e-06, "loss": 0.0032, "step": 39560 }, { "epoch": 0.32358833871693177, "grad_norm": 0.04395002871751785, "learning_rate": 9.884131551158829e-06, "loss": 0.0031, "step": 39570 }, { "epoch": 0.32367011489553094, "grad_norm": 0.150294691324234, "learning_rate": 9.883978760270026e-06, "loss": 0.0023, "step": 39580 }, { "epoch": 0.3237518910741301, "grad_norm": 0.29017528891563416, "learning_rate": 9.883825869890604e-06, "loss": 0.0033, "step": 39590 }, { "epoch": 0.3238336672527293, "grad_norm": 0.18844406306743622, "learning_rate": 9.883672880023681e-06, "loss": 0.0039, "step": 39600 }, { "epoch": 0.32391544343132844, "grad_norm": 0.20682871341705322, "learning_rate": 9.88351979067237e-06, "loss": 0.0035, "step": 39610 }, { "epoch": 0.3239972196099276, "grad_norm": 0.05685088038444519, "learning_rate": 9.883366601839792e-06, "loss": 0.0019, "step": 39620 }, { "epoch": 0.3240789957885268, "grad_norm": 0.030597588047385216, "learning_rate": 9.883213313529067e-06, "loss": 0.0023, "step": 39630 }, { "epoch": 0.32416077196712595, "grad_norm": 0.12430798262357712, "learning_rate": 9.88305992574332e-06, "loss": 0.0042, "step": 39640 }, { "epoch": 0.3242425481457252, "grad_norm": 0.03492049500346184, "learning_rate": 9.882906438485668e-06, "loss": 0.0018, "step": 39650 }, { "epoch": 0.32432432432432434, "grad_norm": 0.0685187354683876, "learning_rate": 9.882752851759247e-06, "loss": 0.0037, "step": 39660 }, { "epoch": 0.3244061005029235, "grad_norm": 0.0420360192656517, "learning_rate": 9.88259916556718e-06, "loss": 0.0036, "step": 39670 }, { "epoch": 0.3244878766815227, "grad_norm": 0.10470139235258102, "learning_rate": 9.8824453799126e-06, "loss": 0.0033, "step": 39680 }, { "epoch": 0.32456965286012185, "grad_norm": 0.10245702415704727, "learning_rate": 9.882291494798639e-06, "loss": 0.0045, "step": 39690 }, { "epoch": 0.324651429038721, "grad_norm": 0.1686919927597046, "learning_rate": 9.882137510228432e-06, "loss": 0.0027, "step": 39700 }, { "epoch": 0.3247332052173202, "grad_norm": 0.022252492606639862, "learning_rate": 9.881983426205113e-06, "loss": 0.0024, "step": 39710 }, { "epoch": 0.32481498139591936, "grad_norm": 0.04094761982560158, "learning_rate": 9.881829242731826e-06, "loss": 0.0034, "step": 39720 }, { "epoch": 0.3248967575745185, "grad_norm": 0.06954128295183182, "learning_rate": 9.881674959811708e-06, "loss": 0.0018, "step": 39730 }, { "epoch": 0.3249785337531177, "grad_norm": 0.003511706367135048, "learning_rate": 9.881520577447903e-06, "loss": 0.0022, "step": 39740 }, { "epoch": 0.3250603099317169, "grad_norm": 0.05698525160551071, "learning_rate": 9.881366095643556e-06, "loss": 0.0037, "step": 39750 }, { "epoch": 0.3251420861103161, "grad_norm": 0.069041907787323, "learning_rate": 9.881211514401814e-06, "loss": 0.0036, "step": 39760 }, { "epoch": 0.32522386228891526, "grad_norm": 0.11198991537094116, "learning_rate": 9.881056833725826e-06, "loss": 0.0021, "step": 39770 }, { "epoch": 0.3253056384675144, "grad_norm": 0.0641515702009201, "learning_rate": 9.880902053618743e-06, "loss": 0.0028, "step": 39780 }, { "epoch": 0.3253874146461136, "grad_norm": 0.06428170949220657, "learning_rate": 9.880747174083718e-06, "loss": 0.0028, "step": 39790 }, { "epoch": 0.32546919082471276, "grad_norm": 0.20588836073875427, "learning_rate": 9.880592195123905e-06, "loss": 0.003, "step": 39800 }, { "epoch": 0.32555096700331193, "grad_norm": 0.11706335097551346, "learning_rate": 9.880437116742462e-06, "loss": 0.0037, "step": 39810 }, { "epoch": 0.3256327431819111, "grad_norm": 0.058299221098423004, "learning_rate": 9.880281938942548e-06, "loss": 0.0057, "step": 39820 }, { "epoch": 0.32571451936051027, "grad_norm": 0.13259218633174896, "learning_rate": 9.880126661727322e-06, "loss": 0.0026, "step": 39830 }, { "epoch": 0.32579629553910944, "grad_norm": 0.11683389544487, "learning_rate": 9.879971285099953e-06, "loss": 0.0046, "step": 39840 }, { "epoch": 0.3258780717177086, "grad_norm": 0.13614407181739807, "learning_rate": 9.8798158090636e-06, "loss": 0.0037, "step": 39850 }, { "epoch": 0.32595984789630783, "grad_norm": 0.1515888124704361, "learning_rate": 9.879660233621433e-06, "loss": 0.0062, "step": 39860 }, { "epoch": 0.326041624074907, "grad_norm": 0.19860783219337463, "learning_rate": 9.87950455877662e-06, "loss": 0.0026, "step": 39870 }, { "epoch": 0.32612340025350617, "grad_norm": 0.06255951523780823, "learning_rate": 9.879348784532334e-06, "loss": 0.0035, "step": 39880 }, { "epoch": 0.32620517643210534, "grad_norm": 0.2940182685852051, "learning_rate": 9.879192910891745e-06, "loss": 0.0033, "step": 39890 }, { "epoch": 0.3262869526107045, "grad_norm": 0.09057287126779556, "learning_rate": 9.879036937858034e-06, "loss": 0.0023, "step": 39900 }, { "epoch": 0.3263687287893037, "grad_norm": 0.13621477782726288, "learning_rate": 9.878880865434372e-06, "loss": 0.003, "step": 39910 }, { "epoch": 0.32645050496790284, "grad_norm": 0.22461269795894623, "learning_rate": 9.878724693623942e-06, "loss": 0.0036, "step": 39920 }, { "epoch": 0.326532281146502, "grad_norm": 0.11711890250444412, "learning_rate": 9.878568422429923e-06, "loss": 0.0032, "step": 39930 }, { "epoch": 0.3266140573251012, "grad_norm": 0.07239599525928497, "learning_rate": 9.878412051855502e-06, "loss": 0.0032, "step": 39940 }, { "epoch": 0.32669583350370035, "grad_norm": 0.10033787041902542, "learning_rate": 9.87825558190386e-06, "loss": 0.0034, "step": 39950 }, { "epoch": 0.3267776096822995, "grad_norm": 0.023437967523932457, "learning_rate": 9.878099012578187e-06, "loss": 0.0022, "step": 39960 }, { "epoch": 0.32685938586089874, "grad_norm": 0.21784913539886475, "learning_rate": 9.877942343881672e-06, "loss": 0.0034, "step": 39970 }, { "epoch": 0.3269411620394979, "grad_norm": 0.05776089057326317, "learning_rate": 9.877785575817507e-06, "loss": 0.0021, "step": 39980 }, { "epoch": 0.3270229382180971, "grad_norm": 0.06208192929625511, "learning_rate": 9.877628708388885e-06, "loss": 0.0018, "step": 39990 }, { "epoch": 0.32710471439669625, "grad_norm": 0.28509417176246643, "learning_rate": 9.877471741599001e-06, "loss": 0.0016, "step": 40000 }, { "epoch": 0.3271864905752954, "grad_norm": 0.3251793384552002, "learning_rate": 9.877314675451053e-06, "loss": 0.0041, "step": 40010 }, { "epoch": 0.3272682667538946, "grad_norm": 0.07598826289176941, "learning_rate": 9.87715750994824e-06, "loss": 0.0028, "step": 40020 }, { "epoch": 0.32735004293249376, "grad_norm": 0.07156430929899216, "learning_rate": 9.877000245093766e-06, "loss": 0.004, "step": 40030 }, { "epoch": 0.3274318191110929, "grad_norm": 0.20402027666568756, "learning_rate": 9.876842880890833e-06, "loss": 0.0029, "step": 40040 }, { "epoch": 0.3275135952896921, "grad_norm": 0.11373905092477798, "learning_rate": 9.876685417342644e-06, "loss": 0.002, "step": 40050 }, { "epoch": 0.32759537146829126, "grad_norm": 0.16935944557189941, "learning_rate": 9.876527854452412e-06, "loss": 0.0024, "step": 40060 }, { "epoch": 0.3276771476468905, "grad_norm": 0.06633497774600983, "learning_rate": 9.876370192223342e-06, "loss": 0.0034, "step": 40070 }, { "epoch": 0.32775892382548966, "grad_norm": 0.12372661381959915, "learning_rate": 9.876212430658647e-06, "loss": 0.0024, "step": 40080 }, { "epoch": 0.3278407000040888, "grad_norm": 0.09543728083372116, "learning_rate": 9.876054569761541e-06, "loss": 0.0049, "step": 40090 }, { "epoch": 0.327922476182688, "grad_norm": 0.05826156958937645, "learning_rate": 9.87589660953524e-06, "loss": 0.0031, "step": 40100 }, { "epoch": 0.32800425236128716, "grad_norm": 0.2538983225822449, "learning_rate": 9.875738549982963e-06, "loss": 0.004, "step": 40110 }, { "epoch": 0.32808602853988633, "grad_norm": 0.06698358803987503, "learning_rate": 9.875580391107929e-06, "loss": 0.0029, "step": 40120 }, { "epoch": 0.3281678047184855, "grad_norm": 0.13592271506786346, "learning_rate": 9.875422132913356e-06, "loss": 0.0015, "step": 40130 }, { "epoch": 0.32824958089708467, "grad_norm": 0.033848877996206284, "learning_rate": 9.875263775402475e-06, "loss": 0.0029, "step": 40140 }, { "epoch": 0.32833135707568384, "grad_norm": 0.1814841777086258, "learning_rate": 9.875105318578505e-06, "loss": 0.0038, "step": 40150 }, { "epoch": 0.328413133254283, "grad_norm": 0.1193646714091301, "learning_rate": 9.87494676244468e-06, "loss": 0.0025, "step": 40160 }, { "epoch": 0.3284949094328822, "grad_norm": 0.04797453433275223, "learning_rate": 9.874788107004227e-06, "loss": 0.0026, "step": 40170 }, { "epoch": 0.3285766856114814, "grad_norm": 0.08755431324243546, "learning_rate": 9.874629352260375e-06, "loss": 0.0018, "step": 40180 }, { "epoch": 0.32865846179008057, "grad_norm": 0.07154510170221329, "learning_rate": 9.874470498216361e-06, "loss": 0.0026, "step": 40190 }, { "epoch": 0.32874023796867974, "grad_norm": 0.2889571487903595, "learning_rate": 9.874311544875424e-06, "loss": 0.0022, "step": 40200 }, { "epoch": 0.3288220141472789, "grad_norm": 0.09941496700048447, "learning_rate": 9.874152492240797e-06, "loss": 0.0017, "step": 40210 }, { "epoch": 0.3289037903258781, "grad_norm": 0.1256294995546341, "learning_rate": 9.87399334031572e-06, "loss": 0.0028, "step": 40220 }, { "epoch": 0.32898556650447724, "grad_norm": 0.23141823709011078, "learning_rate": 9.87383408910344e-06, "loss": 0.004, "step": 40230 }, { "epoch": 0.3290673426830764, "grad_norm": 0.08002953231334686, "learning_rate": 9.873674738607194e-06, "loss": 0.0036, "step": 40240 }, { "epoch": 0.3291491188616756, "grad_norm": 0.13158755004405975, "learning_rate": 9.873515288830236e-06, "loss": 0.0037, "step": 40250 }, { "epoch": 0.32923089504027475, "grad_norm": 0.12722593545913696, "learning_rate": 9.87335573977581e-06, "loss": 0.0028, "step": 40260 }, { "epoch": 0.3293126712188739, "grad_norm": 0.05232938379049301, "learning_rate": 9.873196091447164e-06, "loss": 0.0037, "step": 40270 }, { "epoch": 0.3293944473974731, "grad_norm": 0.07674544304609299, "learning_rate": 9.873036343847553e-06, "loss": 0.0022, "step": 40280 }, { "epoch": 0.3294762235760723, "grad_norm": 0.02753489278256893, "learning_rate": 9.87287649698023e-06, "loss": 0.0034, "step": 40290 }, { "epoch": 0.3295579997546715, "grad_norm": 0.06886480003595352, "learning_rate": 9.872716550848453e-06, "loss": 0.0034, "step": 40300 }, { "epoch": 0.32963977593327065, "grad_norm": 0.06637192517518997, "learning_rate": 9.87255650545548e-06, "loss": 0.0025, "step": 40310 }, { "epoch": 0.3297215521118698, "grad_norm": 0.1408696323633194, "learning_rate": 9.872396360804568e-06, "loss": 0.0032, "step": 40320 }, { "epoch": 0.329803328290469, "grad_norm": 0.15510036051273346, "learning_rate": 9.872236116898983e-06, "loss": 0.0019, "step": 40330 }, { "epoch": 0.32988510446906816, "grad_norm": 0.08188015967607498, "learning_rate": 9.87207577374199e-06, "loss": 0.0041, "step": 40340 }, { "epoch": 0.3299668806476673, "grad_norm": 0.2776038646697998, "learning_rate": 9.87191533133685e-06, "loss": 0.0025, "step": 40350 }, { "epoch": 0.3300486568262665, "grad_norm": 0.16104756295681, "learning_rate": 9.871754789686836e-06, "loss": 0.0046, "step": 40360 }, { "epoch": 0.33013043300486566, "grad_norm": 0.08783181011676788, "learning_rate": 9.871594148795217e-06, "loss": 0.0026, "step": 40370 }, { "epoch": 0.33021220918346483, "grad_norm": 0.07394921779632568, "learning_rate": 9.871433408665266e-06, "loss": 0.0027, "step": 40380 }, { "epoch": 0.33029398536206406, "grad_norm": 0.044419314712285995, "learning_rate": 9.871272569300256e-06, "loss": 0.0029, "step": 40390 }, { "epoch": 0.3303757615406632, "grad_norm": 0.06860511004924774, "learning_rate": 9.871111630703463e-06, "loss": 0.0021, "step": 40400 }, { "epoch": 0.3304575377192624, "grad_norm": 0.06877167522907257, "learning_rate": 9.870950592878168e-06, "loss": 0.0016, "step": 40410 }, { "epoch": 0.33053931389786156, "grad_norm": 0.305754154920578, "learning_rate": 9.87078945582765e-06, "loss": 0.002, "step": 40420 }, { "epoch": 0.33062109007646073, "grad_norm": 0.11611048132181168, "learning_rate": 9.870628219555192e-06, "loss": 0.0042, "step": 40430 }, { "epoch": 0.3307028662550599, "grad_norm": 0.21134495735168457, "learning_rate": 9.870466884064078e-06, "loss": 0.0027, "step": 40440 }, { "epoch": 0.33078464243365907, "grad_norm": 0.07465017586946487, "learning_rate": 9.870305449357594e-06, "loss": 0.0036, "step": 40450 }, { "epoch": 0.33086641861225824, "grad_norm": 0.10121829807758331, "learning_rate": 9.870143915439028e-06, "loss": 0.0029, "step": 40460 }, { "epoch": 0.3309481947908574, "grad_norm": 0.08458656072616577, "learning_rate": 9.869982282311675e-06, "loss": 0.003, "step": 40470 }, { "epoch": 0.3310299709694566, "grad_norm": 0.19040599465370178, "learning_rate": 9.869820549978821e-06, "loss": 0.0057, "step": 40480 }, { "epoch": 0.33111174714805575, "grad_norm": 0.20971432328224182, "learning_rate": 9.869658718443766e-06, "loss": 0.0035, "step": 40490 }, { "epoch": 0.33119352332665497, "grad_norm": 0.04225558415055275, "learning_rate": 9.869496787709804e-06, "loss": 0.0038, "step": 40500 }, { "epoch": 0.33127529950525414, "grad_norm": 0.15269289910793304, "learning_rate": 9.869334757780235e-06, "loss": 0.0029, "step": 40510 }, { "epoch": 0.3313570756838533, "grad_norm": 0.07612203061580658, "learning_rate": 9.869172628658359e-06, "loss": 0.0027, "step": 40520 }, { "epoch": 0.3314388518624525, "grad_norm": 0.20517198741436005, "learning_rate": 9.869010400347479e-06, "loss": 0.0028, "step": 40530 }, { "epoch": 0.33152062804105165, "grad_norm": 0.10118605941534042, "learning_rate": 9.868848072850899e-06, "loss": 0.0031, "step": 40540 }, { "epoch": 0.3316024042196508, "grad_norm": 0.19786138832569122, "learning_rate": 9.868685646171925e-06, "loss": 0.0024, "step": 40550 }, { "epoch": 0.33168418039825, "grad_norm": 0.045200418680906296, "learning_rate": 9.86852312031387e-06, "loss": 0.0042, "step": 40560 }, { "epoch": 0.33176595657684915, "grad_norm": 0.08643277734518051, "learning_rate": 9.868360495280038e-06, "loss": 0.0032, "step": 40570 }, { "epoch": 0.3318477327554483, "grad_norm": 0.1386396288871765, "learning_rate": 9.868197771073746e-06, "loss": 0.0022, "step": 40580 }, { "epoch": 0.3319295089340475, "grad_norm": 0.023005304858088493, "learning_rate": 9.868034947698311e-06, "loss": 0.0062, "step": 40590 }, { "epoch": 0.33201128511264666, "grad_norm": 0.3678697645664215, "learning_rate": 9.867872025157045e-06, "loss": 0.0033, "step": 40600 }, { "epoch": 0.3320930612912459, "grad_norm": 0.06257188320159912, "learning_rate": 9.86770900345327e-06, "loss": 0.0038, "step": 40610 }, { "epoch": 0.33217483746984505, "grad_norm": 0.24753515422344208, "learning_rate": 9.867545882590304e-06, "loss": 0.0057, "step": 40620 }, { "epoch": 0.3322566136484442, "grad_norm": 0.06159808859229088, "learning_rate": 9.867382662571474e-06, "loss": 0.0022, "step": 40630 }, { "epoch": 0.3323383898270434, "grad_norm": 0.18359099328517914, "learning_rate": 9.867219343400102e-06, "loss": 0.0027, "step": 40640 }, { "epoch": 0.33242016600564256, "grad_norm": 0.09726720303297043, "learning_rate": 9.867055925079516e-06, "loss": 0.0039, "step": 40650 }, { "epoch": 0.33250194218424173, "grad_norm": 0.039910804480314255, "learning_rate": 9.866892407613044e-06, "loss": 0.0035, "step": 40660 }, { "epoch": 0.3325837183628409, "grad_norm": 0.08073653280735016, "learning_rate": 9.866728791004018e-06, "loss": 0.0023, "step": 40670 }, { "epoch": 0.33266549454144007, "grad_norm": 0.11153268069028854, "learning_rate": 9.866565075255771e-06, "loss": 0.005, "step": 40680 }, { "epoch": 0.33274727072003923, "grad_norm": 0.04202694073319435, "learning_rate": 9.866401260371638e-06, "loss": 0.0028, "step": 40690 }, { "epoch": 0.3328290468986384, "grad_norm": 0.04703335464000702, "learning_rate": 9.866237346354955e-06, "loss": 0.0028, "step": 40700 }, { "epoch": 0.3329108230772376, "grad_norm": 0.11272620409727097, "learning_rate": 9.866073333209061e-06, "loss": 0.0025, "step": 40710 }, { "epoch": 0.3329925992558368, "grad_norm": 0.0628562718629837, "learning_rate": 9.8659092209373e-06, "loss": 0.0018, "step": 40720 }, { "epoch": 0.33307437543443597, "grad_norm": 0.17363880574703217, "learning_rate": 9.865745009543011e-06, "loss": 0.0045, "step": 40730 }, { "epoch": 0.33315615161303513, "grad_norm": 0.10423113405704498, "learning_rate": 9.865580699029542e-06, "loss": 0.0035, "step": 40740 }, { "epoch": 0.3332379277916343, "grad_norm": 0.18845610320568085, "learning_rate": 9.86541628940024e-06, "loss": 0.0038, "step": 40750 }, { "epoch": 0.33331970397023347, "grad_norm": 0.18593047559261322, "learning_rate": 9.86525178065845e-06, "loss": 0.0051, "step": 40760 }, { "epoch": 0.33340148014883264, "grad_norm": 0.16690973937511444, "learning_rate": 9.865087172807531e-06, "loss": 0.0043, "step": 40770 }, { "epoch": 0.3334832563274318, "grad_norm": 0.15010401606559753, "learning_rate": 9.864922465850831e-06, "loss": 0.0029, "step": 40780 }, { "epoch": 0.333565032506031, "grad_norm": 0.20715627074241638, "learning_rate": 9.864757659791705e-06, "loss": 0.005, "step": 40790 }, { "epoch": 0.33364680868463015, "grad_norm": 0.11695601046085358, "learning_rate": 9.864592754633508e-06, "loss": 0.0036, "step": 40800 }, { "epoch": 0.3337285848632293, "grad_norm": 0.04403870552778244, "learning_rate": 9.864427750379605e-06, "loss": 0.0019, "step": 40810 }, { "epoch": 0.33381036104182854, "grad_norm": 0.24755209684371948, "learning_rate": 9.864262647033356e-06, "loss": 0.0048, "step": 40820 }, { "epoch": 0.3338921372204277, "grad_norm": 0.07772163301706314, "learning_rate": 9.864097444598122e-06, "loss": 0.0029, "step": 40830 }, { "epoch": 0.3339739133990269, "grad_norm": 0.12028302252292633, "learning_rate": 9.863932143077271e-06, "loss": 0.0024, "step": 40840 }, { "epoch": 0.33405568957762605, "grad_norm": 0.08806764334440231, "learning_rate": 9.863766742474166e-06, "loss": 0.0034, "step": 40850 }, { "epoch": 0.3341374657562252, "grad_norm": 0.09642941504716873, "learning_rate": 9.86360124279218e-06, "loss": 0.0056, "step": 40860 }, { "epoch": 0.3342192419348244, "grad_norm": 0.14542853832244873, "learning_rate": 9.863435644034683e-06, "loss": 0.003, "step": 40870 }, { "epoch": 0.33430101811342355, "grad_norm": 0.0308955367654562, "learning_rate": 9.86326994620505e-06, "loss": 0.0022, "step": 40880 }, { "epoch": 0.3343827942920227, "grad_norm": 0.5004720687866211, "learning_rate": 9.863104149306653e-06, "loss": 0.0032, "step": 40890 }, { "epoch": 0.3344645704706219, "grad_norm": 0.09251883625984192, "learning_rate": 9.862938253342873e-06, "loss": 0.0031, "step": 40900 }, { "epoch": 0.33454634664922106, "grad_norm": 0.05748830363154411, "learning_rate": 9.862772258317088e-06, "loss": 0.0031, "step": 40910 }, { "epoch": 0.33462812282782023, "grad_norm": 0.048399586230516434, "learning_rate": 9.86260616423268e-06, "loss": 0.0026, "step": 40920 }, { "epoch": 0.33470989900641945, "grad_norm": 0.21151508390903473, "learning_rate": 9.86243997109303e-06, "loss": 0.0027, "step": 40930 }, { "epoch": 0.3347916751850186, "grad_norm": 0.028591062873601913, "learning_rate": 9.862273678901527e-06, "loss": 0.0036, "step": 40940 }, { "epoch": 0.3348734513636178, "grad_norm": 0.051520876586437225, "learning_rate": 9.862107287661556e-06, "loss": 0.0038, "step": 40950 }, { "epoch": 0.33495522754221696, "grad_norm": 0.04650541767477989, "learning_rate": 9.861940797376509e-06, "loss": 0.0022, "step": 40960 }, { "epoch": 0.33503700372081613, "grad_norm": 0.0596747025847435, "learning_rate": 9.861774208049774e-06, "loss": 0.0034, "step": 40970 }, { "epoch": 0.3351187798994153, "grad_norm": 0.14003275334835052, "learning_rate": 9.861607519684747e-06, "loss": 0.0023, "step": 40980 }, { "epoch": 0.33520055607801447, "grad_norm": 0.028637520968914032, "learning_rate": 9.861440732284824e-06, "loss": 0.0025, "step": 40990 }, { "epoch": 0.33528233225661364, "grad_norm": 0.21541334688663483, "learning_rate": 9.8612738458534e-06, "loss": 0.004, "step": 41000 }, { "epoch": 0.3353641084352128, "grad_norm": 0.09440692514181137, "learning_rate": 9.861106860393878e-06, "loss": 0.0036, "step": 41010 }, { "epoch": 0.335445884613812, "grad_norm": 0.3189733028411865, "learning_rate": 9.860939775909657e-06, "loss": 0.0075, "step": 41020 }, { "epoch": 0.3355276607924112, "grad_norm": 0.04457555338740349, "learning_rate": 9.860772592404142e-06, "loss": 0.0027, "step": 41030 }, { "epoch": 0.33560943697101037, "grad_norm": 0.13031230866909027, "learning_rate": 9.86060530988074e-06, "loss": 0.0037, "step": 41040 }, { "epoch": 0.33569121314960954, "grad_norm": 0.08467110991477966, "learning_rate": 9.860437928342854e-06, "loss": 0.0024, "step": 41050 }, { "epoch": 0.3357729893282087, "grad_norm": 0.22125761210918427, "learning_rate": 9.860270447793899e-06, "loss": 0.0053, "step": 41060 }, { "epoch": 0.3358547655068079, "grad_norm": 0.09921028465032578, "learning_rate": 9.860102868237282e-06, "loss": 0.0038, "step": 41070 }, { "epoch": 0.33593654168540704, "grad_norm": 0.13357660174369812, "learning_rate": 9.859935189676421e-06, "loss": 0.0027, "step": 41080 }, { "epoch": 0.3360183178640062, "grad_norm": 0.08982257544994354, "learning_rate": 9.859767412114729e-06, "loss": 0.0031, "step": 41090 }, { "epoch": 0.3361000940426054, "grad_norm": 0.09381797164678574, "learning_rate": 9.859599535555624e-06, "loss": 0.0035, "step": 41100 }, { "epoch": 0.33618187022120455, "grad_norm": 0.07539445161819458, "learning_rate": 9.859431560002529e-06, "loss": 0.0018, "step": 41110 }, { "epoch": 0.3362636463998037, "grad_norm": 0.004960055463016033, "learning_rate": 9.859263485458859e-06, "loss": 0.0024, "step": 41120 }, { "epoch": 0.3363454225784029, "grad_norm": 0.09570816904306412, "learning_rate": 9.859095311928045e-06, "loss": 0.0022, "step": 41130 }, { "epoch": 0.3364271987570021, "grad_norm": 0.060880281031131744, "learning_rate": 9.858927039413509e-06, "loss": 0.002, "step": 41140 }, { "epoch": 0.3365089749356013, "grad_norm": 0.060635343194007874, "learning_rate": 9.85875866791868e-06, "loss": 0.0029, "step": 41150 }, { "epoch": 0.33659075111420045, "grad_norm": 0.11821414530277252, "learning_rate": 9.858590197446987e-06, "loss": 0.0024, "step": 41160 }, { "epoch": 0.3366725272927996, "grad_norm": 0.10581571608781815, "learning_rate": 9.858421628001863e-06, "loss": 0.0051, "step": 41170 }, { "epoch": 0.3367543034713988, "grad_norm": 0.2188176065683365, "learning_rate": 9.85825295958674e-06, "loss": 0.0053, "step": 41180 }, { "epoch": 0.33683607964999795, "grad_norm": 0.08716884255409241, "learning_rate": 9.858084192205058e-06, "loss": 0.0039, "step": 41190 }, { "epoch": 0.3369178558285971, "grad_norm": 0.05615820735692978, "learning_rate": 9.85791532586025e-06, "loss": 0.0033, "step": 41200 }, { "epoch": 0.3369996320071963, "grad_norm": 0.14848874509334564, "learning_rate": 9.857746360555757e-06, "loss": 0.0026, "step": 41210 }, { "epoch": 0.33708140818579546, "grad_norm": 0.08711704611778259, "learning_rate": 9.857577296295024e-06, "loss": 0.0036, "step": 41220 }, { "epoch": 0.33716318436439463, "grad_norm": 0.06237363815307617, "learning_rate": 9.857408133081493e-06, "loss": 0.0032, "step": 41230 }, { "epoch": 0.3372449605429938, "grad_norm": 0.0808502659201622, "learning_rate": 9.857238870918609e-06, "loss": 0.0026, "step": 41240 }, { "epoch": 0.337326736721593, "grad_norm": 0.022988352924585342, "learning_rate": 9.857069509809823e-06, "loss": 0.0028, "step": 41250 }, { "epoch": 0.3374085129001922, "grad_norm": 0.03707180172204971, "learning_rate": 9.856900049758582e-06, "loss": 0.0043, "step": 41260 }, { "epoch": 0.33749028907879136, "grad_norm": 0.151680588722229, "learning_rate": 9.856730490768339e-06, "loss": 0.0045, "step": 41270 }, { "epoch": 0.33757206525739053, "grad_norm": 0.08668021112680435, "learning_rate": 9.856560832842547e-06, "loss": 0.0017, "step": 41280 }, { "epoch": 0.3376538414359897, "grad_norm": 0.042376499623060226, "learning_rate": 9.856391075984664e-06, "loss": 0.0026, "step": 41290 }, { "epoch": 0.33773561761458887, "grad_norm": 0.05622687190771103, "learning_rate": 9.856221220198147e-06, "loss": 0.0027, "step": 41300 }, { "epoch": 0.33781739379318804, "grad_norm": 0.15238963067531586, "learning_rate": 9.856051265486458e-06, "loss": 0.0041, "step": 41310 }, { "epoch": 0.3378991699717872, "grad_norm": 0.1477244794368744, "learning_rate": 9.855881211853055e-06, "loss": 0.0031, "step": 41320 }, { "epoch": 0.3379809461503864, "grad_norm": 0.30206024646759033, "learning_rate": 9.855711059301406e-06, "loss": 0.0034, "step": 41330 }, { "epoch": 0.33806272232898554, "grad_norm": 0.1091311052441597, "learning_rate": 9.855540807834977e-06, "loss": 0.0021, "step": 41340 }, { "epoch": 0.33814449850758477, "grad_norm": 0.1269030123949051, "learning_rate": 9.855370457457235e-06, "loss": 0.0033, "step": 41350 }, { "epoch": 0.33822627468618394, "grad_norm": 0.08977305889129639, "learning_rate": 9.855200008171649e-06, "loss": 0.0039, "step": 41360 }, { "epoch": 0.3383080508647831, "grad_norm": 0.058241523802280426, "learning_rate": 9.855029459981692e-06, "loss": 0.0029, "step": 41370 }, { "epoch": 0.3383898270433823, "grad_norm": 0.08519021421670914, "learning_rate": 9.854858812890838e-06, "loss": 0.0023, "step": 41380 }, { "epoch": 0.33847160322198144, "grad_norm": 0.10208968818187714, "learning_rate": 9.854688066902565e-06, "loss": 0.005, "step": 41390 }, { "epoch": 0.3385533794005806, "grad_norm": 0.048121169209480286, "learning_rate": 9.85451722202035e-06, "loss": 0.0065, "step": 41400 }, { "epoch": 0.3386351555791798, "grad_norm": 0.020773442462086678, "learning_rate": 9.854346278247671e-06, "loss": 0.0041, "step": 41410 }, { "epoch": 0.33871693175777895, "grad_norm": 0.15416333079338074, "learning_rate": 9.854175235588014e-06, "loss": 0.003, "step": 41420 }, { "epoch": 0.3387987079363781, "grad_norm": 0.07215192914009094, "learning_rate": 9.854004094044864e-06, "loss": 0.0028, "step": 41430 }, { "epoch": 0.3388804841149773, "grad_norm": 0.14058616757392883, "learning_rate": 9.853832853621703e-06, "loss": 0.003, "step": 41440 }, { "epoch": 0.33896226029357646, "grad_norm": 0.04624434933066368, "learning_rate": 9.853661514322021e-06, "loss": 0.0032, "step": 41450 }, { "epoch": 0.3390440364721757, "grad_norm": 0.12426363676786423, "learning_rate": 9.85349007614931e-06, "loss": 0.0027, "step": 41460 }, { "epoch": 0.33912581265077485, "grad_norm": 0.07913604378700256, "learning_rate": 9.853318539107059e-06, "loss": 0.0026, "step": 41470 }, { "epoch": 0.339207588829374, "grad_norm": 0.10284637659788132, "learning_rate": 9.853146903198767e-06, "loss": 0.0032, "step": 41480 }, { "epoch": 0.3392893650079732, "grad_norm": 0.03430546447634697, "learning_rate": 9.852975168427925e-06, "loss": 0.0024, "step": 41490 }, { "epoch": 0.33937114118657236, "grad_norm": 0.030089233070611954, "learning_rate": 9.852803334798036e-06, "loss": 0.0047, "step": 41500 }, { "epoch": 0.3394529173651715, "grad_norm": 0.2542685270309448, "learning_rate": 9.852631402312597e-06, "loss": 0.0032, "step": 41510 }, { "epoch": 0.3395346935437707, "grad_norm": 0.3836933672428131, "learning_rate": 9.852459370975114e-06, "loss": 0.0048, "step": 41520 }, { "epoch": 0.33961646972236986, "grad_norm": 0.19184911251068115, "learning_rate": 9.852287240789088e-06, "loss": 0.0031, "step": 41530 }, { "epoch": 0.33969824590096903, "grad_norm": 0.1745183765888214, "learning_rate": 9.852115011758028e-06, "loss": 0.0024, "step": 41540 }, { "epoch": 0.3397800220795682, "grad_norm": 0.17640967667102814, "learning_rate": 9.851942683885441e-06, "loss": 0.0025, "step": 41550 }, { "epoch": 0.33986179825816737, "grad_norm": 0.07861081510782242, "learning_rate": 9.851770257174835e-06, "loss": 0.0031, "step": 41560 }, { "epoch": 0.3399435744367666, "grad_norm": 0.06772343814373016, "learning_rate": 9.851597731629727e-06, "loss": 0.0021, "step": 41570 }, { "epoch": 0.34002535061536576, "grad_norm": 0.1426919400691986, "learning_rate": 9.851425107253631e-06, "loss": 0.0032, "step": 41580 }, { "epoch": 0.34010712679396493, "grad_norm": 0.10063280165195465, "learning_rate": 9.851252384050061e-06, "loss": 0.0024, "step": 41590 }, { "epoch": 0.3401889029725641, "grad_norm": 0.08605603128671646, "learning_rate": 9.851079562022536e-06, "loss": 0.0033, "step": 41600 }, { "epoch": 0.34027067915116327, "grad_norm": 0.1349363476037979, "learning_rate": 9.850906641174578e-06, "loss": 0.0039, "step": 41610 }, { "epoch": 0.34035245532976244, "grad_norm": 0.17589634656906128, "learning_rate": 9.850733621509707e-06, "loss": 0.0026, "step": 41620 }, { "epoch": 0.3404342315083616, "grad_norm": 0.10536739975214005, "learning_rate": 9.850560503031452e-06, "loss": 0.0028, "step": 41630 }, { "epoch": 0.3405160076869608, "grad_norm": 0.09661267697811127, "learning_rate": 9.850387285743335e-06, "loss": 0.002, "step": 41640 }, { "epoch": 0.34059778386555994, "grad_norm": 0.46966832876205444, "learning_rate": 9.850213969648888e-06, "loss": 0.0029, "step": 41650 }, { "epoch": 0.3406795600441591, "grad_norm": 0.06326483935117722, "learning_rate": 9.850040554751637e-06, "loss": 0.0035, "step": 41660 }, { "epoch": 0.34076133622275834, "grad_norm": 0.10063426196575165, "learning_rate": 9.84986704105512e-06, "loss": 0.0032, "step": 41670 }, { "epoch": 0.3408431124013575, "grad_norm": 0.02311413362622261, "learning_rate": 9.849693428562868e-06, "loss": 0.0019, "step": 41680 }, { "epoch": 0.3409248885799567, "grad_norm": 0.06526147574186325, "learning_rate": 9.84951971727842e-06, "loss": 0.0022, "step": 41690 }, { "epoch": 0.34100666475855584, "grad_norm": 0.09546401351690292, "learning_rate": 9.849345907205313e-06, "loss": 0.0035, "step": 41700 }, { "epoch": 0.341088440937155, "grad_norm": 0.06408344954252243, "learning_rate": 9.849171998347089e-06, "loss": 0.0016, "step": 41710 }, { "epoch": 0.3411702171157542, "grad_norm": 0.016469797119498253, "learning_rate": 9.848997990707288e-06, "loss": 0.0021, "step": 41720 }, { "epoch": 0.34125199329435335, "grad_norm": 0.055709678679704666, "learning_rate": 9.848823884289457e-06, "loss": 0.003, "step": 41730 }, { "epoch": 0.3413337694729525, "grad_norm": 0.10182826966047287, "learning_rate": 9.848649679097142e-06, "loss": 0.003, "step": 41740 }, { "epoch": 0.3414155456515517, "grad_norm": 0.05359814688563347, "learning_rate": 9.848475375133893e-06, "loss": 0.0022, "step": 41750 }, { "epoch": 0.34149732183015086, "grad_norm": 0.11840090155601501, "learning_rate": 9.848300972403258e-06, "loss": 0.0037, "step": 41760 }, { "epoch": 0.34157909800875, "grad_norm": 0.2659514546394348, "learning_rate": 9.848126470908791e-06, "loss": 0.0027, "step": 41770 }, { "epoch": 0.34166087418734925, "grad_norm": 0.08466795086860657, "learning_rate": 9.84795187065405e-06, "loss": 0.002, "step": 41780 }, { "epoch": 0.3417426503659484, "grad_norm": 0.15156243741512299, "learning_rate": 9.847777171642587e-06, "loss": 0.003, "step": 41790 }, { "epoch": 0.3418244265445476, "grad_norm": 0.0925324335694313, "learning_rate": 9.847602373877962e-06, "loss": 0.0034, "step": 41800 }, { "epoch": 0.34190620272314676, "grad_norm": 0.13043510913848877, "learning_rate": 9.847427477363738e-06, "loss": 0.0025, "step": 41810 }, { "epoch": 0.3419879789017459, "grad_norm": 0.05930202454328537, "learning_rate": 9.847252482103474e-06, "loss": 0.0021, "step": 41820 }, { "epoch": 0.3420697550803451, "grad_norm": 0.32239830493927, "learning_rate": 9.847077388100738e-06, "loss": 0.0059, "step": 41830 }, { "epoch": 0.34215153125894426, "grad_norm": 0.1664191037416458, "learning_rate": 9.846902195359096e-06, "loss": 0.0032, "step": 41840 }, { "epoch": 0.34223330743754343, "grad_norm": 0.05478585511445999, "learning_rate": 9.846726903882115e-06, "loss": 0.0095, "step": 41850 }, { "epoch": 0.3423150836161426, "grad_norm": 0.02822929620742798, "learning_rate": 9.846551513673369e-06, "loss": 0.0025, "step": 41860 }, { "epoch": 0.34239685979474177, "grad_norm": 0.12459482252597809, "learning_rate": 9.846376024736429e-06, "loss": 0.0025, "step": 41870 }, { "epoch": 0.34247863597334094, "grad_norm": 0.07907335460186005, "learning_rate": 9.846200437074868e-06, "loss": 0.0029, "step": 41880 }, { "epoch": 0.34256041215194016, "grad_norm": 0.06932010501623154, "learning_rate": 9.846024750692268e-06, "loss": 0.0037, "step": 41890 }, { "epoch": 0.34264218833053933, "grad_norm": 0.20276860892772675, "learning_rate": 9.845848965592203e-06, "loss": 0.0052, "step": 41900 }, { "epoch": 0.3427239645091385, "grad_norm": 0.11913265287876129, "learning_rate": 9.845673081778255e-06, "loss": 0.002, "step": 41910 }, { "epoch": 0.34280574068773767, "grad_norm": 0.08522304892539978, "learning_rate": 9.845497099254009e-06, "loss": 0.0021, "step": 41920 }, { "epoch": 0.34288751686633684, "grad_norm": 0.04857531934976578, "learning_rate": 9.845321018023048e-06, "loss": 0.0024, "step": 41930 }, { "epoch": 0.342969293044936, "grad_norm": 0.10808496922254562, "learning_rate": 9.84514483808896e-06, "loss": 0.0033, "step": 41940 }, { "epoch": 0.3430510692235352, "grad_norm": 0.10142815113067627, "learning_rate": 9.844968559455332e-06, "loss": 0.002, "step": 41950 }, { "epoch": 0.34313284540213435, "grad_norm": 0.21120403707027435, "learning_rate": 9.844792182125757e-06, "loss": 0.0037, "step": 41960 }, { "epoch": 0.3432146215807335, "grad_norm": 0.10336332023143768, "learning_rate": 9.844615706103829e-06, "loss": 0.0019, "step": 41970 }, { "epoch": 0.3432963977593327, "grad_norm": 0.20586444437503815, "learning_rate": 9.844439131393138e-06, "loss": 0.0044, "step": 41980 }, { "epoch": 0.3433781739379319, "grad_norm": 0.07200132310390472, "learning_rate": 9.844262457997285e-06, "loss": 0.003, "step": 41990 }, { "epoch": 0.3434599501165311, "grad_norm": 0.10185776650905609, "learning_rate": 9.84408568591987e-06, "loss": 0.0023, "step": 42000 }, { "epoch": 0.34354172629513025, "grad_norm": 0.0996575728058815, "learning_rate": 9.843908815164489e-06, "loss": 0.0021, "step": 42010 }, { "epoch": 0.3436235024737294, "grad_norm": 0.1610236018896103, "learning_rate": 9.84373184573475e-06, "loss": 0.0029, "step": 42020 }, { "epoch": 0.3437052786523286, "grad_norm": 0.1675468385219574, "learning_rate": 9.843554777634256e-06, "loss": 0.0024, "step": 42030 }, { "epoch": 0.34378705483092775, "grad_norm": 0.21530894935131073, "learning_rate": 9.843377610866614e-06, "loss": 0.0041, "step": 42040 }, { "epoch": 0.3438688310095269, "grad_norm": 0.04647409915924072, "learning_rate": 9.843200345435432e-06, "loss": 0.0026, "step": 42050 }, { "epoch": 0.3439506071881261, "grad_norm": 0.06448878347873688, "learning_rate": 9.843022981344326e-06, "loss": 0.0017, "step": 42060 }, { "epoch": 0.34403238336672526, "grad_norm": 0.08443199843168259, "learning_rate": 9.842845518596903e-06, "loss": 0.0024, "step": 42070 }, { "epoch": 0.3441141595453244, "grad_norm": 0.12124121189117432, "learning_rate": 9.842667957196778e-06, "loss": 0.0037, "step": 42080 }, { "epoch": 0.3441959357239236, "grad_norm": 0.12358011305332184, "learning_rate": 9.842490297147573e-06, "loss": 0.0034, "step": 42090 }, { "epoch": 0.3442777119025228, "grad_norm": 0.028925573453307152, "learning_rate": 9.842312538452905e-06, "loss": 0.0041, "step": 42100 }, { "epoch": 0.344359488081122, "grad_norm": 0.12676075100898743, "learning_rate": 9.842134681116393e-06, "loss": 0.0038, "step": 42110 }, { "epoch": 0.34444126425972116, "grad_norm": 0.08383291214704514, "learning_rate": 9.841956725141662e-06, "loss": 0.0028, "step": 42120 }, { "epoch": 0.3445230404383203, "grad_norm": 0.1127503365278244, "learning_rate": 9.841778670532336e-06, "loss": 0.0023, "step": 42130 }, { "epoch": 0.3446048166169195, "grad_norm": 0.006563338916748762, "learning_rate": 9.841600517292044e-06, "loss": 0.002, "step": 42140 }, { "epoch": 0.34468659279551866, "grad_norm": 0.06987761706113815, "learning_rate": 9.841422265424414e-06, "loss": 0.0021, "step": 42150 }, { "epoch": 0.34476836897411783, "grad_norm": 0.09631387889385223, "learning_rate": 9.841243914933078e-06, "loss": 0.0031, "step": 42160 }, { "epoch": 0.344850145152717, "grad_norm": 0.09149367362260818, "learning_rate": 9.841065465821667e-06, "loss": 0.0042, "step": 42170 }, { "epoch": 0.34493192133131617, "grad_norm": 0.07315000146627426, "learning_rate": 9.840886918093817e-06, "loss": 0.0028, "step": 42180 }, { "epoch": 0.34501369750991534, "grad_norm": 0.07745639979839325, "learning_rate": 9.840708271753166e-06, "loss": 0.0026, "step": 42190 }, { "epoch": 0.3450954736885145, "grad_norm": 0.09256739169359207, "learning_rate": 9.840529526803352e-06, "loss": 0.0023, "step": 42200 }, { "epoch": 0.34517724986711373, "grad_norm": 0.1687823385000229, "learning_rate": 9.840350683248019e-06, "loss": 0.0033, "step": 42210 }, { "epoch": 0.3452590260457129, "grad_norm": 0.19404783844947815, "learning_rate": 9.840171741090806e-06, "loss": 0.0041, "step": 42220 }, { "epoch": 0.34534080222431207, "grad_norm": 0.2209528088569641, "learning_rate": 9.839992700335364e-06, "loss": 0.0057, "step": 42230 }, { "epoch": 0.34542257840291124, "grad_norm": 0.18173369765281677, "learning_rate": 9.839813560985333e-06, "loss": 0.0027, "step": 42240 }, { "epoch": 0.3455043545815104, "grad_norm": 0.18297308683395386, "learning_rate": 9.839634323044367e-06, "loss": 0.0021, "step": 42250 }, { "epoch": 0.3455861307601096, "grad_norm": 0.22851119935512543, "learning_rate": 9.839454986516114e-06, "loss": 0.0027, "step": 42260 }, { "epoch": 0.34566790693870875, "grad_norm": 0.08367306739091873, "learning_rate": 9.839275551404233e-06, "loss": 0.0025, "step": 42270 }, { "epoch": 0.3457496831173079, "grad_norm": 0.10175002366304398, "learning_rate": 9.839096017712373e-06, "loss": 0.002, "step": 42280 }, { "epoch": 0.3458314592959071, "grad_norm": 0.05614710599184036, "learning_rate": 9.838916385444195e-06, "loss": 0.0024, "step": 42290 }, { "epoch": 0.34591323547450625, "grad_norm": 0.09564420580863953, "learning_rate": 9.838736654603357e-06, "loss": 0.0028, "step": 42300 }, { "epoch": 0.3459950116531055, "grad_norm": 0.005313972942531109, "learning_rate": 9.838556825193518e-06, "loss": 0.0023, "step": 42310 }, { "epoch": 0.34607678783170465, "grad_norm": 0.07373779267072678, "learning_rate": 9.838376897218345e-06, "loss": 0.0038, "step": 42320 }, { "epoch": 0.3461585640103038, "grad_norm": 0.05490924417972565, "learning_rate": 9.838196870681501e-06, "loss": 0.0036, "step": 42330 }, { "epoch": 0.346240340188903, "grad_norm": 0.23881882429122925, "learning_rate": 9.838016745586656e-06, "loss": 0.0036, "step": 42340 }, { "epoch": 0.34632211636750215, "grad_norm": 0.12724295258522034, "learning_rate": 9.837836521937475e-06, "loss": 0.0026, "step": 42350 }, { "epoch": 0.3464038925461013, "grad_norm": 0.06802333146333694, "learning_rate": 9.837656199737633e-06, "loss": 0.0027, "step": 42360 }, { "epoch": 0.3464856687247005, "grad_norm": 0.3697448670864105, "learning_rate": 9.837475778990801e-06, "loss": 0.0019, "step": 42370 }, { "epoch": 0.34656744490329966, "grad_norm": 0.10239233076572418, "learning_rate": 9.837295259700657e-06, "loss": 0.0023, "step": 42380 }, { "epoch": 0.34664922108189883, "grad_norm": 0.25076156854629517, "learning_rate": 9.837114641870876e-06, "loss": 0.0032, "step": 42390 }, { "epoch": 0.346730997260498, "grad_norm": 0.013042384758591652, "learning_rate": 9.836933925505136e-06, "loss": 0.0024, "step": 42400 }, { "epoch": 0.34681277343909717, "grad_norm": 0.03056250885128975, "learning_rate": 9.836753110607123e-06, "loss": 0.002, "step": 42410 }, { "epoch": 0.3468945496176964, "grad_norm": 0.0192570723593235, "learning_rate": 9.836572197180515e-06, "loss": 0.0029, "step": 42420 }, { "epoch": 0.34697632579629556, "grad_norm": 0.12300708144903183, "learning_rate": 9.836391185229002e-06, "loss": 0.0035, "step": 42430 }, { "epoch": 0.34705810197489473, "grad_norm": 0.09673581272363663, "learning_rate": 9.836210074756268e-06, "loss": 0.0042, "step": 42440 }, { "epoch": 0.3471398781534939, "grad_norm": 0.16670234501361847, "learning_rate": 9.836028865766004e-06, "loss": 0.0025, "step": 42450 }, { "epoch": 0.34722165433209307, "grad_norm": 0.1743813157081604, "learning_rate": 9.835847558261902e-06, "loss": 0.0033, "step": 42460 }, { "epoch": 0.34730343051069223, "grad_norm": 0.04480774700641632, "learning_rate": 9.835666152247654e-06, "loss": 0.0028, "step": 42470 }, { "epoch": 0.3473852066892914, "grad_norm": 0.13390541076660156, "learning_rate": 9.835484647726956e-06, "loss": 0.0038, "step": 42480 }, { "epoch": 0.3474669828678906, "grad_norm": 0.09377749264240265, "learning_rate": 9.835303044703506e-06, "loss": 0.0028, "step": 42490 }, { "epoch": 0.34754875904648974, "grad_norm": 0.08804147690534592, "learning_rate": 9.835121343181e-06, "loss": 0.0021, "step": 42500 }, { "epoch": 0.3476305352250889, "grad_norm": 0.15059204399585724, "learning_rate": 9.834939543163145e-06, "loss": 0.0032, "step": 42510 }, { "epoch": 0.3477123114036881, "grad_norm": 0.2729356586933136, "learning_rate": 9.83475764465364e-06, "loss": 0.0041, "step": 42520 }, { "epoch": 0.3477940875822873, "grad_norm": 0.07299979776144028, "learning_rate": 9.834575647656193e-06, "loss": 0.0027, "step": 42530 }, { "epoch": 0.3478758637608865, "grad_norm": 0.06353349983692169, "learning_rate": 9.834393552174508e-06, "loss": 0.0034, "step": 42540 }, { "epoch": 0.34795763993948564, "grad_norm": 0.07729922980070114, "learning_rate": 9.834211358212298e-06, "loss": 0.002, "step": 42550 }, { "epoch": 0.3480394161180848, "grad_norm": 0.07640622556209564, "learning_rate": 9.834029065773274e-06, "loss": 0.0032, "step": 42560 }, { "epoch": 0.348121192296684, "grad_norm": 0.2903355360031128, "learning_rate": 9.833846674861147e-06, "loss": 0.0024, "step": 42570 }, { "epoch": 0.34820296847528315, "grad_norm": 0.1397457718849182, "learning_rate": 9.833664185479636e-06, "loss": 0.0041, "step": 42580 }, { "epoch": 0.3482847446538823, "grad_norm": 0.019124621525406837, "learning_rate": 9.833481597632454e-06, "loss": 0.004, "step": 42590 }, { "epoch": 0.3483665208324815, "grad_norm": 0.08722269535064697, "learning_rate": 9.833298911323326e-06, "loss": 0.0024, "step": 42600 }, { "epoch": 0.34844829701108065, "grad_norm": 0.13487312197685242, "learning_rate": 9.83311612655597e-06, "loss": 0.0028, "step": 42610 }, { "epoch": 0.3485300731896798, "grad_norm": 0.040359094738960266, "learning_rate": 9.832933243334108e-06, "loss": 0.0015, "step": 42620 }, { "epoch": 0.34861184936827905, "grad_norm": 0.3007899224758148, "learning_rate": 9.83275026166147e-06, "loss": 0.0046, "step": 42630 }, { "epoch": 0.3486936255468782, "grad_norm": 0.034907542169094086, "learning_rate": 9.83256718154178e-06, "loss": 0.0054, "step": 42640 }, { "epoch": 0.3487754017254774, "grad_norm": 0.12413496524095535, "learning_rate": 9.832384002978765e-06, "loss": 0.0025, "step": 42650 }, { "epoch": 0.34885717790407655, "grad_norm": 0.14850440621376038, "learning_rate": 9.832200725976164e-06, "loss": 0.0021, "step": 42660 }, { "epoch": 0.3489389540826757, "grad_norm": 0.12317712604999542, "learning_rate": 9.832017350537706e-06, "loss": 0.0019, "step": 42670 }, { "epoch": 0.3490207302612749, "grad_norm": 0.16912171244621277, "learning_rate": 9.831833876667125e-06, "loss": 0.0029, "step": 42680 }, { "epoch": 0.34910250643987406, "grad_norm": 0.04498135298490524, "learning_rate": 9.831650304368162e-06, "loss": 0.0023, "step": 42690 }, { "epoch": 0.34918428261847323, "grad_norm": 0.11354931443929672, "learning_rate": 9.831466633644552e-06, "loss": 0.0031, "step": 42700 }, { "epoch": 0.3492660587970724, "grad_norm": 0.01815333217382431, "learning_rate": 9.831282864500043e-06, "loss": 0.0023, "step": 42710 }, { "epoch": 0.34934783497567157, "grad_norm": 0.09170964360237122, "learning_rate": 9.831098996938374e-06, "loss": 0.0023, "step": 42720 }, { "epoch": 0.34942961115427074, "grad_norm": 0.05171837657690048, "learning_rate": 9.830915030963291e-06, "loss": 0.0032, "step": 42730 }, { "epoch": 0.34951138733286996, "grad_norm": 0.04763196408748627, "learning_rate": 9.830730966578543e-06, "loss": 0.0028, "step": 42740 }, { "epoch": 0.34959316351146913, "grad_norm": 0.2653171718120575, "learning_rate": 9.830546803787878e-06, "loss": 0.0024, "step": 42750 }, { "epoch": 0.3496749396900683, "grad_norm": 0.07868986576795578, "learning_rate": 9.830362542595048e-06, "loss": 0.001, "step": 42760 }, { "epoch": 0.34975671586866747, "grad_norm": 0.054105766117572784, "learning_rate": 9.830178183003807e-06, "loss": 0.0025, "step": 42770 }, { "epoch": 0.34983849204726664, "grad_norm": 0.04841721057891846, "learning_rate": 9.82999372501791e-06, "loss": 0.0049, "step": 42780 }, { "epoch": 0.3499202682258658, "grad_norm": 0.08976512402296066, "learning_rate": 9.829809168641114e-06, "loss": 0.0028, "step": 42790 }, { "epoch": 0.350002044404465, "grad_norm": 0.19362427294254303, "learning_rate": 9.82962451387718e-06, "loss": 0.0022, "step": 42800 }, { "epoch": 0.35008382058306414, "grad_norm": 0.0042134546674788, "learning_rate": 9.82943976072987e-06, "loss": 0.0032, "step": 42810 }, { "epoch": 0.3501655967616633, "grad_norm": 0.05213988572359085, "learning_rate": 9.829254909202945e-06, "loss": 0.0023, "step": 42820 }, { "epoch": 0.3502473729402625, "grad_norm": 0.05557418242096901, "learning_rate": 9.829069959300172e-06, "loss": 0.0036, "step": 42830 }, { "epoch": 0.35032914911886165, "grad_norm": 0.17088885605335236, "learning_rate": 9.82888491102532e-06, "loss": 0.0035, "step": 42840 }, { "epoch": 0.3504109252974609, "grad_norm": 0.025284379720687866, "learning_rate": 9.828699764382158e-06, "loss": 0.0032, "step": 42850 }, { "epoch": 0.35049270147606004, "grad_norm": 0.3049684762954712, "learning_rate": 9.828514519374454e-06, "loss": 0.0037, "step": 42860 }, { "epoch": 0.3505744776546592, "grad_norm": 0.016912290826439857, "learning_rate": 9.828329176005986e-06, "loss": 0.0022, "step": 42870 }, { "epoch": 0.3506562538332584, "grad_norm": 0.24513575434684753, "learning_rate": 9.828143734280528e-06, "loss": 0.0039, "step": 42880 }, { "epoch": 0.35073803001185755, "grad_norm": 0.06395503133535385, "learning_rate": 9.82795819420186e-06, "loss": 0.0032, "step": 42890 }, { "epoch": 0.3508198061904567, "grad_norm": 0.048717133700847626, "learning_rate": 9.827772555773756e-06, "loss": 0.0022, "step": 42900 }, { "epoch": 0.3509015823690559, "grad_norm": 0.10088524967432022, "learning_rate": 9.827586819000001e-06, "loss": 0.0018, "step": 42910 }, { "epoch": 0.35098335854765506, "grad_norm": 0.10759712010622025, "learning_rate": 9.82740098388438e-06, "loss": 0.0018, "step": 42920 }, { "epoch": 0.3510651347262542, "grad_norm": 0.16166256368160248, "learning_rate": 9.827215050430674e-06, "loss": 0.0028, "step": 42930 }, { "epoch": 0.3511469109048534, "grad_norm": 0.4063735902309418, "learning_rate": 9.827029018642677e-06, "loss": 0.002, "step": 42940 }, { "epoch": 0.3512286870834526, "grad_norm": 0.021441906690597534, "learning_rate": 9.826842888524172e-06, "loss": 0.0025, "step": 42950 }, { "epoch": 0.3513104632620518, "grad_norm": 0.06141702085733414, "learning_rate": 9.826656660078957e-06, "loss": 0.0034, "step": 42960 }, { "epoch": 0.35139223944065096, "grad_norm": 0.028042428195476532, "learning_rate": 9.826470333310822e-06, "loss": 0.0022, "step": 42970 }, { "epoch": 0.3514740156192501, "grad_norm": 0.04366861656308174, "learning_rate": 9.826283908223562e-06, "loss": 0.002, "step": 42980 }, { "epoch": 0.3515557917978493, "grad_norm": 0.3447876572608948, "learning_rate": 9.826097384820977e-06, "loss": 0.0067, "step": 42990 }, { "epoch": 0.35163756797644846, "grad_norm": 0.2636047899723053, "learning_rate": 9.825910763106865e-06, "loss": 0.0047, "step": 43000 }, { "epoch": 0.35171934415504763, "grad_norm": 0.12884533405303955, "learning_rate": 9.825724043085028e-06, "loss": 0.0017, "step": 43010 }, { "epoch": 0.3518011203336468, "grad_norm": 0.14172080159187317, "learning_rate": 9.82553722475927e-06, "loss": 0.0032, "step": 43020 }, { "epoch": 0.35188289651224597, "grad_norm": 0.21519815921783447, "learning_rate": 9.825350308133394e-06, "loss": 0.0026, "step": 43030 }, { "epoch": 0.35196467269084514, "grad_norm": 0.16223052144050598, "learning_rate": 9.825163293211213e-06, "loss": 0.0026, "step": 43040 }, { "epoch": 0.3520464488694443, "grad_norm": 0.11506707966327667, "learning_rate": 9.824976179996533e-06, "loss": 0.0035, "step": 43050 }, { "epoch": 0.35212822504804353, "grad_norm": 0.07193424552679062, "learning_rate": 9.824788968493166e-06, "loss": 0.0021, "step": 43060 }, { "epoch": 0.3522100012266427, "grad_norm": 0.05533626675605774, "learning_rate": 9.824601658704926e-06, "loss": 0.0018, "step": 43070 }, { "epoch": 0.35229177740524187, "grad_norm": 0.018262725323438644, "learning_rate": 9.824414250635628e-06, "loss": 0.0023, "step": 43080 }, { "epoch": 0.35237355358384104, "grad_norm": 0.06774060428142548, "learning_rate": 9.82422674428909e-06, "loss": 0.0025, "step": 43090 }, { "epoch": 0.3524553297624402, "grad_norm": 0.21477289497852325, "learning_rate": 9.824039139669132e-06, "loss": 0.0024, "step": 43100 }, { "epoch": 0.3525371059410394, "grad_norm": 0.2869933247566223, "learning_rate": 9.823851436779577e-06, "loss": 0.0038, "step": 43110 }, { "epoch": 0.35261888211963854, "grad_norm": 0.20825375616550446, "learning_rate": 9.823663635624245e-06, "loss": 0.0029, "step": 43120 }, { "epoch": 0.3527006582982377, "grad_norm": 0.014039912261068821, "learning_rate": 9.823475736206967e-06, "loss": 0.0037, "step": 43130 }, { "epoch": 0.3527824344768369, "grad_norm": 0.19684256613254547, "learning_rate": 9.823287738531565e-06, "loss": 0.0041, "step": 43140 }, { "epoch": 0.35286421065543605, "grad_norm": 0.26942119002342224, "learning_rate": 9.82309964260187e-06, "loss": 0.0025, "step": 43150 }, { "epoch": 0.3529459868340352, "grad_norm": 0.10708748549222946, "learning_rate": 9.822911448421718e-06, "loss": 0.0038, "step": 43160 }, { "epoch": 0.35302776301263444, "grad_norm": 0.22338338196277618, "learning_rate": 9.822723155994937e-06, "loss": 0.0051, "step": 43170 }, { "epoch": 0.3531095391912336, "grad_norm": 0.10108974575996399, "learning_rate": 9.822534765325367e-06, "loss": 0.0065, "step": 43180 }, { "epoch": 0.3531913153698328, "grad_norm": 0.42136961221694946, "learning_rate": 9.822346276416842e-06, "loss": 0.0032, "step": 43190 }, { "epoch": 0.35327309154843195, "grad_norm": 0.27638664841651917, "learning_rate": 9.822157689273206e-06, "loss": 0.0051, "step": 43200 }, { "epoch": 0.3533548677270311, "grad_norm": 0.034474488347768784, "learning_rate": 9.821969003898297e-06, "loss": 0.0025, "step": 43210 }, { "epoch": 0.3534366439056303, "grad_norm": 0.13601382076740265, "learning_rate": 9.82178022029596e-06, "loss": 0.0028, "step": 43220 }, { "epoch": 0.35351842008422946, "grad_norm": 0.26029789447784424, "learning_rate": 9.82159133847004e-06, "loss": 0.0028, "step": 43230 }, { "epoch": 0.3536001962628286, "grad_norm": 0.06462807208299637, "learning_rate": 9.821402358424386e-06, "loss": 0.0038, "step": 43240 }, { "epoch": 0.3536819724414278, "grad_norm": 0.09971544146537781, "learning_rate": 9.821213280162846e-06, "loss": 0.002, "step": 43250 }, { "epoch": 0.35376374862002696, "grad_norm": 0.3098011910915375, "learning_rate": 9.821024103689274e-06, "loss": 0.0032, "step": 43260 }, { "epoch": 0.3538455247986262, "grad_norm": 0.11419185250997543, "learning_rate": 9.820834829007521e-06, "loss": 0.0025, "step": 43270 }, { "epoch": 0.35392730097722536, "grad_norm": 0.09401174634695053, "learning_rate": 9.820645456121445e-06, "loss": 0.0031, "step": 43280 }, { "epoch": 0.3540090771558245, "grad_norm": 0.1240810677409172, "learning_rate": 9.820455985034903e-06, "loss": 0.005, "step": 43290 }, { "epoch": 0.3540908533344237, "grad_norm": 0.037284594029188156, "learning_rate": 9.820266415751753e-06, "loss": 0.0026, "step": 43300 }, { "epoch": 0.35417262951302286, "grad_norm": 0.07159391045570374, "learning_rate": 9.82007674827586e-06, "loss": 0.002, "step": 43310 }, { "epoch": 0.35425440569162203, "grad_norm": 0.08079446852207184, "learning_rate": 9.819886982611083e-06, "loss": 0.0046, "step": 43320 }, { "epoch": 0.3543361818702212, "grad_norm": 0.1416424810886383, "learning_rate": 9.81969711876129e-06, "loss": 0.0025, "step": 43330 }, { "epoch": 0.35441795804882037, "grad_norm": 0.08709566295146942, "learning_rate": 9.819507156730352e-06, "loss": 0.0024, "step": 43340 }, { "epoch": 0.35449973422741954, "grad_norm": 0.6726314425468445, "learning_rate": 9.819317096522134e-06, "loss": 0.0025, "step": 43350 }, { "epoch": 0.3545815104060187, "grad_norm": 0.049801770597696304, "learning_rate": 9.81912693814051e-06, "loss": 0.0045, "step": 43360 }, { "epoch": 0.3546632865846179, "grad_norm": 0.07328253984451294, "learning_rate": 9.818936681589352e-06, "loss": 0.0047, "step": 43370 }, { "epoch": 0.3547450627632171, "grad_norm": 0.047160804271698, "learning_rate": 9.818746326872539e-06, "loss": 0.0027, "step": 43380 }, { "epoch": 0.35482683894181627, "grad_norm": 0.11853503435850143, "learning_rate": 9.818555873993945e-06, "loss": 0.0035, "step": 43390 }, { "epoch": 0.35490861512041544, "grad_norm": 0.014659583568572998, "learning_rate": 9.81836532295745e-06, "loss": 0.003, "step": 43400 }, { "epoch": 0.3549903912990146, "grad_norm": 0.16953375935554504, "learning_rate": 9.81817467376694e-06, "loss": 0.0021, "step": 43410 }, { "epoch": 0.3550721674776138, "grad_norm": 0.10280296951532364, "learning_rate": 9.817983926426292e-06, "loss": 0.0046, "step": 43420 }, { "epoch": 0.35515394365621294, "grad_norm": 0.40115466713905334, "learning_rate": 9.817793080939397e-06, "loss": 0.0029, "step": 43430 }, { "epoch": 0.3552357198348121, "grad_norm": 0.04632275924086571, "learning_rate": 9.81760213731014e-06, "loss": 0.0023, "step": 43440 }, { "epoch": 0.3553174960134113, "grad_norm": 0.1541217714548111, "learning_rate": 9.817411095542413e-06, "loss": 0.0028, "step": 43450 }, { "epoch": 0.35539927219201045, "grad_norm": 0.18687781691551208, "learning_rate": 9.817219955640105e-06, "loss": 0.0044, "step": 43460 }, { "epoch": 0.3554810483706096, "grad_norm": 0.02389291115105152, "learning_rate": 9.817028717607112e-06, "loss": 0.0016, "step": 43470 }, { "epoch": 0.3555628245492088, "grad_norm": 0.35670822858810425, "learning_rate": 9.816837381447325e-06, "loss": 0.0028, "step": 43480 }, { "epoch": 0.355644600727808, "grad_norm": 0.03241559490561485, "learning_rate": 9.816645947164649e-06, "loss": 0.0027, "step": 43490 }, { "epoch": 0.3557263769064072, "grad_norm": 0.2529861330986023, "learning_rate": 9.816454414762978e-06, "loss": 0.0025, "step": 43500 }, { "epoch": 0.35580815308500635, "grad_norm": 0.08878139406442642, "learning_rate": 9.816262784246217e-06, "loss": 0.002, "step": 43510 }, { "epoch": 0.3558899292636055, "grad_norm": 0.010304897092282772, "learning_rate": 9.816071055618268e-06, "loss": 0.0012, "step": 43520 }, { "epoch": 0.3559717054422047, "grad_norm": 0.08038905262947083, "learning_rate": 9.815879228883037e-06, "loss": 0.0025, "step": 43530 }, { "epoch": 0.35605348162080386, "grad_norm": 0.11198150366544724, "learning_rate": 9.81568730404443e-06, "loss": 0.0021, "step": 43540 }, { "epoch": 0.356135257799403, "grad_norm": 0.00573609909042716, "learning_rate": 9.815495281106358e-06, "loss": 0.0021, "step": 43550 }, { "epoch": 0.3562170339780022, "grad_norm": 0.08386726677417755, "learning_rate": 9.815303160072736e-06, "loss": 0.003, "step": 43560 }, { "epoch": 0.35629881015660136, "grad_norm": 0.04874806106090546, "learning_rate": 9.815110940947472e-06, "loss": 0.0029, "step": 43570 }, { "epoch": 0.35638058633520053, "grad_norm": 0.1325991451740265, "learning_rate": 9.814918623734484e-06, "loss": 0.0022, "step": 43580 }, { "epoch": 0.35646236251379976, "grad_norm": 0.12157895416021347, "learning_rate": 9.81472620843769e-06, "loss": 0.0016, "step": 43590 }, { "epoch": 0.3565441386923989, "grad_norm": 0.06473837792873383, "learning_rate": 9.81453369506101e-06, "loss": 0.0044, "step": 43600 }, { "epoch": 0.3566259148709981, "grad_norm": 0.06756468117237091, "learning_rate": 9.814341083608366e-06, "loss": 0.0039, "step": 43610 }, { "epoch": 0.35670769104959726, "grad_norm": 0.05859381705522537, "learning_rate": 9.814148374083681e-06, "loss": 0.0024, "step": 43620 }, { "epoch": 0.35678946722819643, "grad_norm": 0.25008177757263184, "learning_rate": 9.81395556649088e-06, "loss": 0.0033, "step": 43630 }, { "epoch": 0.3568712434067956, "grad_norm": 0.23027357459068298, "learning_rate": 9.813762660833889e-06, "loss": 0.0056, "step": 43640 }, { "epoch": 0.35695301958539477, "grad_norm": 0.05768469348549843, "learning_rate": 9.813569657116643e-06, "loss": 0.0032, "step": 43650 }, { "epoch": 0.35703479576399394, "grad_norm": 0.2825648784637451, "learning_rate": 9.813376555343068e-06, "loss": 0.0032, "step": 43660 }, { "epoch": 0.3571165719425931, "grad_norm": 0.1381831020116806, "learning_rate": 9.813183355517102e-06, "loss": 0.0034, "step": 43670 }, { "epoch": 0.3571983481211923, "grad_norm": 0.13379579782485962, "learning_rate": 9.812990057642678e-06, "loss": 0.0051, "step": 43680 }, { "epoch": 0.35728012429979145, "grad_norm": 0.05763549357652664, "learning_rate": 9.812796661723734e-06, "loss": 0.0029, "step": 43690 }, { "epoch": 0.35736190047839067, "grad_norm": 0.1187363713979721, "learning_rate": 9.81260316776421e-06, "loss": 0.0031, "step": 43700 }, { "epoch": 0.35744367665698984, "grad_norm": 0.05626245215535164, "learning_rate": 9.812409575768048e-06, "loss": 0.0035, "step": 43710 }, { "epoch": 0.357525452835589, "grad_norm": 0.09240470826625824, "learning_rate": 9.812215885739192e-06, "loss": 0.0024, "step": 43720 }, { "epoch": 0.3576072290141882, "grad_norm": 0.09200157225131989, "learning_rate": 9.812022097681583e-06, "loss": 0.0017, "step": 43730 }, { "epoch": 0.35768900519278735, "grad_norm": 0.09249358624219894, "learning_rate": 9.811828211599177e-06, "loss": 0.0031, "step": 43740 }, { "epoch": 0.3577707813713865, "grad_norm": 0.26595017313957214, "learning_rate": 9.811634227495917e-06, "loss": 0.0024, "step": 43750 }, { "epoch": 0.3578525575499857, "grad_norm": 0.03161296248435974, "learning_rate": 9.811440145375757e-06, "loss": 0.0033, "step": 43760 }, { "epoch": 0.35793433372858485, "grad_norm": 0.09103939682245255, "learning_rate": 9.811245965242651e-06, "loss": 0.0026, "step": 43770 }, { "epoch": 0.358016109907184, "grad_norm": 0.32476717233657837, "learning_rate": 9.811051687100553e-06, "loss": 0.0036, "step": 43780 }, { "epoch": 0.3580978860857832, "grad_norm": 0.175290048122406, "learning_rate": 9.810857310953422e-06, "loss": 0.0042, "step": 43790 }, { "epoch": 0.35817966226438236, "grad_norm": 0.005297990050166845, "learning_rate": 9.810662836805216e-06, "loss": 0.0056, "step": 43800 }, { "epoch": 0.3582614384429816, "grad_norm": 0.0880964994430542, "learning_rate": 9.810468264659898e-06, "loss": 0.0021, "step": 43810 }, { "epoch": 0.35834321462158075, "grad_norm": 0.0666908547282219, "learning_rate": 9.810273594521432e-06, "loss": 0.0024, "step": 43820 }, { "epoch": 0.3584249908001799, "grad_norm": 0.015630193054676056, "learning_rate": 9.810078826393783e-06, "loss": 0.0014, "step": 43830 }, { "epoch": 0.3585067669787791, "grad_norm": 0.09590902924537659, "learning_rate": 9.809883960280918e-06, "loss": 0.0027, "step": 43840 }, { "epoch": 0.35858854315737826, "grad_norm": 0.05422423034906387, "learning_rate": 9.809688996186807e-06, "loss": 0.0025, "step": 43850 }, { "epoch": 0.35867031933597743, "grad_norm": 0.042719658464193344, "learning_rate": 9.809493934115421e-06, "loss": 0.0029, "step": 43860 }, { "epoch": 0.3587520955145766, "grad_norm": 0.02016429975628853, "learning_rate": 9.809298774070738e-06, "loss": 0.0018, "step": 43870 }, { "epoch": 0.35883387169317577, "grad_norm": 0.20598866045475006, "learning_rate": 9.809103516056724e-06, "loss": 0.005, "step": 43880 }, { "epoch": 0.35891564787177493, "grad_norm": 0.09056892991065979, "learning_rate": 9.808908160077366e-06, "loss": 0.006, "step": 43890 }, { "epoch": 0.3589974240503741, "grad_norm": 0.03582390770316124, "learning_rate": 9.808712706136639e-06, "loss": 0.0038, "step": 43900 }, { "epoch": 0.3590792002289733, "grad_norm": 0.056307729333639145, "learning_rate": 9.808517154238526e-06, "loss": 0.0025, "step": 43910 }, { "epoch": 0.3591609764075725, "grad_norm": 0.10908222943544388, "learning_rate": 9.80832150438701e-06, "loss": 0.0032, "step": 43920 }, { "epoch": 0.35924275258617167, "grad_norm": 0.07450760155916214, "learning_rate": 9.808125756586075e-06, "loss": 0.0018, "step": 43930 }, { "epoch": 0.35932452876477083, "grad_norm": 0.1015547662973404, "learning_rate": 9.807929910839711e-06, "loss": 0.0031, "step": 43940 }, { "epoch": 0.35940630494337, "grad_norm": 0.7583059072494507, "learning_rate": 9.807733967151907e-06, "loss": 0.0073, "step": 43950 }, { "epoch": 0.35948808112196917, "grad_norm": 0.10122832655906677, "learning_rate": 9.807537925526654e-06, "loss": 0.0036, "step": 43960 }, { "epoch": 0.35956985730056834, "grad_norm": 0.035247109830379486, "learning_rate": 9.807341785967947e-06, "loss": 0.0023, "step": 43970 }, { "epoch": 0.3596516334791675, "grad_norm": 0.08012321591377258, "learning_rate": 9.807145548479779e-06, "loss": 0.0023, "step": 43980 }, { "epoch": 0.3597334096577667, "grad_norm": 0.10093390196561813, "learning_rate": 9.806949213066149e-06, "loss": 0.0024, "step": 43990 }, { "epoch": 0.35981518583636585, "grad_norm": 0.06409142911434174, "learning_rate": 9.806752779731054e-06, "loss": 0.0027, "step": 44000 }, { "epoch": 0.359896962014965, "grad_norm": 0.21906910836696625, "learning_rate": 9.8065562484785e-06, "loss": 0.003, "step": 44010 }, { "epoch": 0.35997873819356424, "grad_norm": 0.10311445593833923, "learning_rate": 9.806359619312489e-06, "loss": 0.0027, "step": 44020 }, { "epoch": 0.3600605143721634, "grad_norm": 0.05883088707923889, "learning_rate": 9.806162892237023e-06, "loss": 0.0037, "step": 44030 }, { "epoch": 0.3601422905507626, "grad_norm": 0.07254231721162796, "learning_rate": 9.805966067256114e-06, "loss": 0.0019, "step": 44040 }, { "epoch": 0.36022406672936175, "grad_norm": 0.04287763312458992, "learning_rate": 9.805769144373771e-06, "loss": 0.0047, "step": 44050 }, { "epoch": 0.3603058429079609, "grad_norm": 0.05241323262453079, "learning_rate": 9.805572123594003e-06, "loss": 0.0022, "step": 44060 }, { "epoch": 0.3603876190865601, "grad_norm": 0.20570917427539825, "learning_rate": 9.805375004920824e-06, "loss": 0.0029, "step": 44070 }, { "epoch": 0.36046939526515925, "grad_norm": 0.11230213195085526, "learning_rate": 9.805177788358251e-06, "loss": 0.003, "step": 44080 }, { "epoch": 0.3605511714437584, "grad_norm": 0.16721133887767792, "learning_rate": 9.8049804739103e-06, "loss": 0.0029, "step": 44090 }, { "epoch": 0.3606329476223576, "grad_norm": 0.14507097005844116, "learning_rate": 9.804783061580991e-06, "loss": 0.0032, "step": 44100 }, { "epoch": 0.36071472380095676, "grad_norm": 0.1632896065711975, "learning_rate": 9.804585551374345e-06, "loss": 0.0028, "step": 44110 }, { "epoch": 0.36079649997955593, "grad_norm": 0.04536205902695656, "learning_rate": 9.804387943294388e-06, "loss": 0.0022, "step": 44120 }, { "epoch": 0.36087827615815515, "grad_norm": 0.08928457647562027, "learning_rate": 9.804190237345142e-06, "loss": 0.0026, "step": 44130 }, { "epoch": 0.3609600523367543, "grad_norm": 0.09809321910142899, "learning_rate": 9.803992433530635e-06, "loss": 0.0042, "step": 44140 }, { "epoch": 0.3610418285153535, "grad_norm": 0.056049130856990814, "learning_rate": 9.803794531854899e-06, "loss": 0.0019, "step": 44150 }, { "epoch": 0.36112360469395266, "grad_norm": 0.023251159116625786, "learning_rate": 9.803596532321963e-06, "loss": 0.0017, "step": 44160 }, { "epoch": 0.36120538087255183, "grad_norm": 0.17514322698116302, "learning_rate": 9.80339843493586e-06, "loss": 0.0029, "step": 44170 }, { "epoch": 0.361287157051151, "grad_norm": 0.061362579464912415, "learning_rate": 9.803200239700629e-06, "loss": 0.0021, "step": 44180 }, { "epoch": 0.36136893322975017, "grad_norm": 0.11446976661682129, "learning_rate": 9.803001946620303e-06, "loss": 0.0024, "step": 44190 }, { "epoch": 0.36145070940834934, "grad_norm": 0.04265381768345833, "learning_rate": 9.802803555698923e-06, "loss": 0.0042, "step": 44200 }, { "epoch": 0.3615324855869485, "grad_norm": 0.0411822684109211, "learning_rate": 9.802605066940531e-06, "loss": 0.0019, "step": 44210 }, { "epoch": 0.3616142617655477, "grad_norm": 0.13803955912590027, "learning_rate": 9.802406480349172e-06, "loss": 0.0048, "step": 44220 }, { "epoch": 0.3616960379441469, "grad_norm": 0.047517310827970505, "learning_rate": 9.802207795928886e-06, "loss": 0.003, "step": 44230 }, { "epoch": 0.36177781412274607, "grad_norm": 0.20779158174991608, "learning_rate": 9.802009013683726e-06, "loss": 0.0028, "step": 44240 }, { "epoch": 0.36185959030134524, "grad_norm": 0.18707753717899323, "learning_rate": 9.801810133617739e-06, "loss": 0.0028, "step": 44250 }, { "epoch": 0.3619413664799444, "grad_norm": 0.06881874799728394, "learning_rate": 9.801611155734976e-06, "loss": 0.0054, "step": 44260 }, { "epoch": 0.3620231426585436, "grad_norm": 0.06495344638824463, "learning_rate": 9.801412080039491e-06, "loss": 0.0035, "step": 44270 }, { "epoch": 0.36210491883714274, "grad_norm": 0.0786723718047142, "learning_rate": 9.801212906535338e-06, "loss": 0.0045, "step": 44280 }, { "epoch": 0.3621866950157419, "grad_norm": 0.15587468445301056, "learning_rate": 9.801013635226577e-06, "loss": 0.0042, "step": 44290 }, { "epoch": 0.3622684711943411, "grad_norm": 0.018310582265257835, "learning_rate": 9.800814266117265e-06, "loss": 0.0069, "step": 44300 }, { "epoch": 0.36235024737294025, "grad_norm": 0.14313842356204987, "learning_rate": 9.800614799211465e-06, "loss": 0.0022, "step": 44310 }, { "epoch": 0.3624320235515394, "grad_norm": 0.0753258466720581, "learning_rate": 9.800415234513238e-06, "loss": 0.0034, "step": 44320 }, { "epoch": 0.3625137997301386, "grad_norm": 0.015072081238031387, "learning_rate": 9.800215572026653e-06, "loss": 0.0025, "step": 44330 }, { "epoch": 0.3625955759087378, "grad_norm": 0.11284106224775314, "learning_rate": 9.800015811755772e-06, "loss": 0.0029, "step": 44340 }, { "epoch": 0.362677352087337, "grad_norm": 0.13782218098640442, "learning_rate": 9.79981595370467e-06, "loss": 0.004, "step": 44350 }, { "epoch": 0.36275912826593615, "grad_norm": 0.08276514708995819, "learning_rate": 9.799615997877414e-06, "loss": 0.0032, "step": 44360 }, { "epoch": 0.3628409044445353, "grad_norm": 0.11641357839107513, "learning_rate": 9.799415944278078e-06, "loss": 0.0055, "step": 44370 }, { "epoch": 0.3629226806231345, "grad_norm": 0.07594715803861618, "learning_rate": 9.799215792910739e-06, "loss": 0.0034, "step": 44380 }, { "epoch": 0.36300445680173365, "grad_norm": 0.1235041543841362, "learning_rate": 9.799015543779473e-06, "loss": 0.0031, "step": 44390 }, { "epoch": 0.3630862329803328, "grad_norm": 0.06115501746535301, "learning_rate": 9.798815196888361e-06, "loss": 0.0035, "step": 44400 }, { "epoch": 0.363168009158932, "grad_norm": 0.10525670647621155, "learning_rate": 9.798614752241481e-06, "loss": 0.002, "step": 44410 }, { "epoch": 0.36324978533753116, "grad_norm": 0.14739853143692017, "learning_rate": 9.798414209842918e-06, "loss": 0.0025, "step": 44420 }, { "epoch": 0.36333156151613033, "grad_norm": 0.12771287560462952, "learning_rate": 9.79821356969676e-06, "loss": 0.0013, "step": 44430 }, { "epoch": 0.3634133376947295, "grad_norm": 0.27616259455680847, "learning_rate": 9.798012831807089e-06, "loss": 0.0029, "step": 44440 }, { "epoch": 0.3634951138733287, "grad_norm": 0.1904064118862152, "learning_rate": 9.797811996177997e-06, "loss": 0.0026, "step": 44450 }, { "epoch": 0.3635768900519279, "grad_norm": 0.09685935825109482, "learning_rate": 9.797611062813574e-06, "loss": 0.0032, "step": 44460 }, { "epoch": 0.36365866623052706, "grad_norm": 0.1214338168501854, "learning_rate": 9.797410031717916e-06, "loss": 0.0031, "step": 44470 }, { "epoch": 0.36374044240912623, "grad_norm": 0.07062813639640808, "learning_rate": 9.797208902895114e-06, "loss": 0.0051, "step": 44480 }, { "epoch": 0.3638222185877254, "grad_norm": 0.035106826573610306, "learning_rate": 9.797007676349268e-06, "loss": 0.0021, "step": 44490 }, { "epoch": 0.36390399476632457, "grad_norm": 0.12057375907897949, "learning_rate": 9.796806352084476e-06, "loss": 0.0023, "step": 44500 }, { "epoch": 0.36398577094492374, "grad_norm": 0.24555830657482147, "learning_rate": 9.796604930104841e-06, "loss": 0.0027, "step": 44510 }, { "epoch": 0.3640675471235229, "grad_norm": 0.11971227824687958, "learning_rate": 9.796403410414463e-06, "loss": 0.0027, "step": 44520 }, { "epoch": 0.3641493233021221, "grad_norm": 0.07577741146087646, "learning_rate": 9.79620179301745e-06, "loss": 0.0028, "step": 44530 }, { "epoch": 0.36423109948072124, "grad_norm": 0.2062568962574005, "learning_rate": 9.796000077917906e-06, "loss": 0.0025, "step": 44540 }, { "epoch": 0.36431287565932047, "grad_norm": 0.1020083874464035, "learning_rate": 9.795798265119945e-06, "loss": 0.0052, "step": 44550 }, { "epoch": 0.36439465183791964, "grad_norm": 0.09354955703020096, "learning_rate": 9.795596354627673e-06, "loss": 0.0041, "step": 44560 }, { "epoch": 0.3644764280165188, "grad_norm": 0.04260861873626709, "learning_rate": 9.795394346445206e-06, "loss": 0.0026, "step": 44570 }, { "epoch": 0.364558204195118, "grad_norm": 0.07869783788919449, "learning_rate": 9.795192240576658e-06, "loss": 0.0025, "step": 44580 }, { "epoch": 0.36463998037371714, "grad_norm": 0.14724762737751007, "learning_rate": 9.794990037026146e-06, "loss": 0.003, "step": 44590 }, { "epoch": 0.3647217565523163, "grad_norm": 0.030271150171756744, "learning_rate": 9.79478773579779e-06, "loss": 0.0031, "step": 44600 }, { "epoch": 0.3648035327309155, "grad_norm": 0.04288248345255852, "learning_rate": 9.79458533689571e-06, "loss": 0.0023, "step": 44610 }, { "epoch": 0.36488530890951465, "grad_norm": 0.15729519724845886, "learning_rate": 9.794382840324031e-06, "loss": 0.0013, "step": 44620 }, { "epoch": 0.3649670850881138, "grad_norm": 0.023559655994176865, "learning_rate": 9.794180246086873e-06, "loss": 0.0047, "step": 44630 }, { "epoch": 0.365048861266713, "grad_norm": 0.023647870868444443, "learning_rate": 9.79397755418837e-06, "loss": 0.0053, "step": 44640 }, { "epoch": 0.36513063744531216, "grad_norm": 0.1466439962387085, "learning_rate": 9.793774764632647e-06, "loss": 0.0022, "step": 44650 }, { "epoch": 0.3652124136239114, "grad_norm": 0.11815124750137329, "learning_rate": 9.793571877423833e-06, "loss": 0.0024, "step": 44660 }, { "epoch": 0.36529418980251055, "grad_norm": 0.06384097784757614, "learning_rate": 9.793368892566065e-06, "loss": 0.0022, "step": 44670 }, { "epoch": 0.3653759659811097, "grad_norm": 0.08804363012313843, "learning_rate": 9.793165810063477e-06, "loss": 0.0021, "step": 44680 }, { "epoch": 0.3654577421597089, "grad_norm": 0.1660842001438141, "learning_rate": 9.792962629920204e-06, "loss": 0.0027, "step": 44690 }, { "epoch": 0.36553951833830806, "grad_norm": 0.043408557772636414, "learning_rate": 9.792759352140389e-06, "loss": 0.0028, "step": 44700 }, { "epoch": 0.3656212945169072, "grad_norm": 0.17913974821567535, "learning_rate": 9.792555976728166e-06, "loss": 0.0029, "step": 44710 }, { "epoch": 0.3657030706955064, "grad_norm": 0.6148196458816528, "learning_rate": 9.792352503687685e-06, "loss": 0.0033, "step": 44720 }, { "epoch": 0.36578484687410556, "grad_norm": 0.03641035407781601, "learning_rate": 9.792148933023089e-06, "loss": 0.0019, "step": 44730 }, { "epoch": 0.36586662305270473, "grad_norm": 0.007581030484288931, "learning_rate": 9.791945264738523e-06, "loss": 0.0026, "step": 44740 }, { "epoch": 0.3659483992313039, "grad_norm": 0.049446988850831985, "learning_rate": 9.791741498838135e-06, "loss": 0.002, "step": 44750 }, { "epoch": 0.36603017540990307, "grad_norm": 0.08063068985939026, "learning_rate": 9.79153763532608e-06, "loss": 0.002, "step": 44760 }, { "epoch": 0.3661119515885023, "grad_norm": 0.09354675561189651, "learning_rate": 9.791333674206507e-06, "loss": 0.0032, "step": 44770 }, { "epoch": 0.36619372776710146, "grad_norm": 0.022425206378102303, "learning_rate": 9.791129615483573e-06, "loss": 0.0018, "step": 44780 }, { "epoch": 0.36627550394570063, "grad_norm": 0.00681948522105813, "learning_rate": 9.790925459161436e-06, "loss": 0.0035, "step": 44790 }, { "epoch": 0.3663572801242998, "grad_norm": 0.03917519748210907, "learning_rate": 9.790721205244252e-06, "loss": 0.0024, "step": 44800 }, { "epoch": 0.36643905630289897, "grad_norm": 0.09167362004518509, "learning_rate": 9.790516853736182e-06, "loss": 0.0017, "step": 44810 }, { "epoch": 0.36652083248149814, "grad_norm": 0.12094443291425705, "learning_rate": 9.79031240464139e-06, "loss": 0.0023, "step": 44820 }, { "epoch": 0.3666026086600973, "grad_norm": 0.11426606774330139, "learning_rate": 9.79010785796404e-06, "loss": 0.0027, "step": 44830 }, { "epoch": 0.3666843848386965, "grad_norm": 0.09733224660158157, "learning_rate": 9.7899032137083e-06, "loss": 0.0023, "step": 44840 }, { "epoch": 0.36676616101729564, "grad_norm": 0.07122700661420822, "learning_rate": 9.78969847187834e-06, "loss": 0.0029, "step": 44850 }, { "epoch": 0.3668479371958948, "grad_norm": 0.02224421501159668, "learning_rate": 9.789493632478327e-06, "loss": 0.0022, "step": 44860 }, { "epoch": 0.36692971337449404, "grad_norm": 0.10619308799505234, "learning_rate": 9.789288695512436e-06, "loss": 0.0035, "step": 44870 }, { "epoch": 0.3670114895530932, "grad_norm": 0.12305757403373718, "learning_rate": 9.789083660984841e-06, "loss": 0.0029, "step": 44880 }, { "epoch": 0.3670932657316924, "grad_norm": 0.10648138076066971, "learning_rate": 9.788878528899719e-06, "loss": 0.0034, "step": 44890 }, { "epoch": 0.36717504191029154, "grad_norm": 0.06909186393022537, "learning_rate": 9.78867329926125e-06, "loss": 0.0049, "step": 44900 }, { "epoch": 0.3672568180888907, "grad_norm": 0.01819135621190071, "learning_rate": 9.788467972073612e-06, "loss": 0.0053, "step": 44910 }, { "epoch": 0.3673385942674899, "grad_norm": 0.035546984523534775, "learning_rate": 9.78826254734099e-06, "loss": 0.001, "step": 44920 }, { "epoch": 0.36742037044608905, "grad_norm": 0.12100351601839066, "learning_rate": 9.788057025067568e-06, "loss": 0.0044, "step": 44930 }, { "epoch": 0.3675021466246882, "grad_norm": 0.1301340013742447, "learning_rate": 9.78785140525753e-06, "loss": 0.0023, "step": 44940 }, { "epoch": 0.3675839228032874, "grad_norm": 0.06707103550434113, "learning_rate": 9.787645687915071e-06, "loss": 0.0039, "step": 44950 }, { "epoch": 0.36766569898188656, "grad_norm": 0.2341526299715042, "learning_rate": 9.787439873044376e-06, "loss": 0.0021, "step": 44960 }, { "epoch": 0.3677474751604857, "grad_norm": 0.02347066067159176, "learning_rate": 9.787233960649637e-06, "loss": 0.0027, "step": 44970 }, { "epoch": 0.36782925133908495, "grad_norm": 0.38207513093948364, "learning_rate": 9.787027950735054e-06, "loss": 0.0033, "step": 44980 }, { "epoch": 0.3679110275176841, "grad_norm": 0.12664523720741272, "learning_rate": 9.78682184330482e-06, "loss": 0.003, "step": 44990 }, { "epoch": 0.3679928036962833, "grad_norm": 0.1823272556066513, "learning_rate": 9.786615638363135e-06, "loss": 0.003, "step": 45000 }, { "epoch": 0.36807457987488246, "grad_norm": 0.1115228608250618, "learning_rate": 9.786409335914196e-06, "loss": 0.0029, "step": 45010 }, { "epoch": 0.3681563560534816, "grad_norm": 0.001796708907932043, "learning_rate": 9.78620293596221e-06, "loss": 0.0027, "step": 45020 }, { "epoch": 0.3682381322320808, "grad_norm": 0.015124679543077946, "learning_rate": 9.78599643851138e-06, "loss": 0.0028, "step": 45030 }, { "epoch": 0.36831990841067996, "grad_norm": 0.19360660016536713, "learning_rate": 9.785789843565911e-06, "loss": 0.0023, "step": 45040 }, { "epoch": 0.36840168458927913, "grad_norm": 0.12254029512405396, "learning_rate": 9.785583151130014e-06, "loss": 0.0021, "step": 45050 }, { "epoch": 0.3684834607678783, "grad_norm": 0.2075800597667694, "learning_rate": 9.785376361207897e-06, "loss": 0.0024, "step": 45060 }, { "epoch": 0.36856523694647747, "grad_norm": 0.09616612643003464, "learning_rate": 9.785169473803775e-06, "loss": 0.0024, "step": 45070 }, { "epoch": 0.36864701312507664, "grad_norm": 0.16374999284744263, "learning_rate": 9.78496248892186e-06, "loss": 0.0025, "step": 45080 }, { "epoch": 0.36872878930367586, "grad_norm": 0.0847349762916565, "learning_rate": 9.784755406566372e-06, "loss": 0.0026, "step": 45090 }, { "epoch": 0.36881056548227503, "grad_norm": 0.24270957708358765, "learning_rate": 9.784548226741525e-06, "loss": 0.0025, "step": 45100 }, { "epoch": 0.3688923416608742, "grad_norm": 0.34504228830337524, "learning_rate": 9.784340949451543e-06, "loss": 0.0025, "step": 45110 }, { "epoch": 0.36897411783947337, "grad_norm": 0.1410946398973465, "learning_rate": 9.784133574700645e-06, "loss": 0.0035, "step": 45120 }, { "epoch": 0.36905589401807254, "grad_norm": 0.046230994164943695, "learning_rate": 9.78392610249306e-06, "loss": 0.0031, "step": 45130 }, { "epoch": 0.3691376701966717, "grad_norm": 0.08287244290113449, "learning_rate": 9.783718532833011e-06, "loss": 0.0028, "step": 45140 }, { "epoch": 0.3692194463752709, "grad_norm": 0.17645804584026337, "learning_rate": 9.783510865724726e-06, "loss": 0.0031, "step": 45150 }, { "epoch": 0.36930122255387005, "grad_norm": 0.14434461295604706, "learning_rate": 9.783303101172437e-06, "loss": 0.0034, "step": 45160 }, { "epoch": 0.3693829987324692, "grad_norm": 0.07944583147764206, "learning_rate": 9.783095239180375e-06, "loss": 0.0043, "step": 45170 }, { "epoch": 0.3694647749110684, "grad_norm": 0.03663389012217522, "learning_rate": 9.782887279752775e-06, "loss": 0.0037, "step": 45180 }, { "epoch": 0.3695465510896676, "grad_norm": 0.1055150032043457, "learning_rate": 9.782679222893874e-06, "loss": 0.0024, "step": 45190 }, { "epoch": 0.3696283272682668, "grad_norm": 0.027208399027585983, "learning_rate": 9.782471068607911e-06, "loss": 0.0029, "step": 45200 }, { "epoch": 0.36971010344686595, "grad_norm": 0.21096479892730713, "learning_rate": 9.782262816899122e-06, "loss": 0.0038, "step": 45210 }, { "epoch": 0.3697918796254651, "grad_norm": 0.079734668135643, "learning_rate": 9.782054467771756e-06, "loss": 0.0035, "step": 45220 }, { "epoch": 0.3698736558040643, "grad_norm": 0.12141775339841843, "learning_rate": 9.78184602123005e-06, "loss": 0.0026, "step": 45230 }, { "epoch": 0.36995543198266345, "grad_norm": 0.06694432348012924, "learning_rate": 9.781637477278255e-06, "loss": 0.0034, "step": 45240 }, { "epoch": 0.3700372081612626, "grad_norm": 0.13785409927368164, "learning_rate": 9.781428835920618e-06, "loss": 0.0026, "step": 45250 }, { "epoch": 0.3701189843398618, "grad_norm": 0.07202921062707901, "learning_rate": 9.781220097161388e-06, "loss": 0.0024, "step": 45260 }, { "epoch": 0.37020076051846096, "grad_norm": 0.1707170456647873, "learning_rate": 9.78101126100482e-06, "loss": 0.0026, "step": 45270 }, { "epoch": 0.3702825366970601, "grad_norm": 0.0031919977627694607, "learning_rate": 9.780802327455165e-06, "loss": 0.0036, "step": 45280 }, { "epoch": 0.3703643128756593, "grad_norm": 0.06174309179186821, "learning_rate": 9.780593296516682e-06, "loss": 0.0027, "step": 45290 }, { "epoch": 0.3704460890542585, "grad_norm": 0.010640653781592846, "learning_rate": 9.780384168193626e-06, "loss": 0.0023, "step": 45300 }, { "epoch": 0.3705278652328577, "grad_norm": 0.14674343168735504, "learning_rate": 9.78017494249026e-06, "loss": 0.0025, "step": 45310 }, { "epoch": 0.37060964141145686, "grad_norm": 0.12888804078102112, "learning_rate": 9.779965619410844e-06, "loss": 0.0035, "step": 45320 }, { "epoch": 0.370691417590056, "grad_norm": 0.1002676710486412, "learning_rate": 9.779756198959644e-06, "loss": 0.0032, "step": 45330 }, { "epoch": 0.3707731937686552, "grad_norm": 0.140619158744812, "learning_rate": 9.779546681140924e-06, "loss": 0.004, "step": 45340 }, { "epoch": 0.37085496994725436, "grad_norm": 0.05536266043782234, "learning_rate": 9.779337065958953e-06, "loss": 0.0033, "step": 45350 }, { "epoch": 0.37093674612585353, "grad_norm": 0.02845699153840542, "learning_rate": 9.779127353418004e-06, "loss": 0.0024, "step": 45360 }, { "epoch": 0.3710185223044527, "grad_norm": 0.029782235622406006, "learning_rate": 9.778917543522344e-06, "loss": 0.0026, "step": 45370 }, { "epoch": 0.37110029848305187, "grad_norm": 0.07045002281665802, "learning_rate": 9.778707636276248e-06, "loss": 0.0028, "step": 45380 }, { "epoch": 0.37118207466165104, "grad_norm": 0.13064372539520264, "learning_rate": 9.778497631683994e-06, "loss": 0.0035, "step": 45390 }, { "epoch": 0.3712638508402502, "grad_norm": 0.10572236776351929, "learning_rate": 9.778287529749859e-06, "loss": 0.0043, "step": 45400 }, { "epoch": 0.37134562701884943, "grad_norm": 0.1564873903989792, "learning_rate": 9.778077330478125e-06, "loss": 0.003, "step": 45410 }, { "epoch": 0.3714274031974486, "grad_norm": 0.021625662222504616, "learning_rate": 9.77786703387307e-06, "loss": 0.0009, "step": 45420 }, { "epoch": 0.37150917937604777, "grad_norm": 0.26896387338638306, "learning_rate": 9.77765663993898e-06, "loss": 0.0062, "step": 45430 }, { "epoch": 0.37159095555464694, "grad_norm": 0.1141398698091507, "learning_rate": 9.77744614868014e-06, "loss": 0.0023, "step": 45440 }, { "epoch": 0.3716727317332461, "grad_norm": 0.0812150090932846, "learning_rate": 9.777235560100839e-06, "loss": 0.003, "step": 45450 }, { "epoch": 0.3717545079118453, "grad_norm": 0.09037493914365768, "learning_rate": 9.777024874205368e-06, "loss": 0.0023, "step": 45460 }, { "epoch": 0.37183628409044445, "grad_norm": 0.22405190765857697, "learning_rate": 9.776814090998014e-06, "loss": 0.003, "step": 45470 }, { "epoch": 0.3719180602690436, "grad_norm": 0.1933710277080536, "learning_rate": 9.776603210483078e-06, "loss": 0.0033, "step": 45480 }, { "epoch": 0.3719998364476428, "grad_norm": 0.3273651897907257, "learning_rate": 9.776392232664851e-06, "loss": 0.0037, "step": 45490 }, { "epoch": 0.37208161262624195, "grad_norm": 0.11345479637384415, "learning_rate": 9.776181157547631e-06, "loss": 0.0032, "step": 45500 }, { "epoch": 0.3721633888048411, "grad_norm": 0.054182328283786774, "learning_rate": 9.775969985135717e-06, "loss": 0.0041, "step": 45510 }, { "epoch": 0.37224516498344035, "grad_norm": 0.04643452912569046, "learning_rate": 9.775758715433417e-06, "loss": 0.0016, "step": 45520 }, { "epoch": 0.3723269411620395, "grad_norm": 0.03643225133419037, "learning_rate": 9.775547348445028e-06, "loss": 0.0031, "step": 45530 }, { "epoch": 0.3724087173406387, "grad_norm": 0.20282399654388428, "learning_rate": 9.775335884174854e-06, "loss": 0.0019, "step": 45540 }, { "epoch": 0.37249049351923785, "grad_norm": 0.18194952607154846, "learning_rate": 9.775124322627209e-06, "loss": 0.0038, "step": 45550 }, { "epoch": 0.372572269697837, "grad_norm": 0.5593550205230713, "learning_rate": 9.774912663806402e-06, "loss": 0.0046, "step": 45560 }, { "epoch": 0.3726540458764362, "grad_norm": 0.14350783824920654, "learning_rate": 9.77470090771674e-06, "loss": 0.0041, "step": 45570 }, { "epoch": 0.37273582205503536, "grad_norm": 0.12331263720989227, "learning_rate": 9.774489054362542e-06, "loss": 0.0038, "step": 45580 }, { "epoch": 0.37281759823363453, "grad_norm": 0.13553117215633392, "learning_rate": 9.77427710374812e-06, "loss": 0.0041, "step": 45590 }, { "epoch": 0.3728993744122337, "grad_norm": 0.07981979101896286, "learning_rate": 9.774065055877792e-06, "loss": 0.0035, "step": 45600 }, { "epoch": 0.37298115059083287, "grad_norm": 0.1178194060921669, "learning_rate": 9.773852910755878e-06, "loss": 0.0033, "step": 45610 }, { "epoch": 0.3730629267694321, "grad_norm": 0.04459541663527489, "learning_rate": 9.7736406683867e-06, "loss": 0.0041, "step": 45620 }, { "epoch": 0.37314470294803126, "grad_norm": 0.12469707429409027, "learning_rate": 9.773428328774583e-06, "loss": 0.0025, "step": 45630 }, { "epoch": 0.37322647912663043, "grad_norm": 0.015265559777617455, "learning_rate": 9.773215891923851e-06, "loss": 0.0014, "step": 45640 }, { "epoch": 0.3733082553052296, "grad_norm": 0.021434010937809944, "learning_rate": 9.773003357838829e-06, "loss": 0.0014, "step": 45650 }, { "epoch": 0.37339003148382877, "grad_norm": 0.11115855723619461, "learning_rate": 9.77279072652385e-06, "loss": 0.0047, "step": 45660 }, { "epoch": 0.37347180766242793, "grad_norm": 0.0878709927201271, "learning_rate": 9.772577997983245e-06, "loss": 0.0019, "step": 45670 }, { "epoch": 0.3735535838410271, "grad_norm": 0.1916891634464264, "learning_rate": 9.772365172221347e-06, "loss": 0.0028, "step": 45680 }, { "epoch": 0.3736353600196263, "grad_norm": 0.13194334506988525, "learning_rate": 9.77215224924249e-06, "loss": 0.0025, "step": 45690 }, { "epoch": 0.37371713619822544, "grad_norm": 0.02383049763739109, "learning_rate": 9.771939229051015e-06, "loss": 0.0034, "step": 45700 }, { "epoch": 0.3737989123768246, "grad_norm": 0.16115829348564148, "learning_rate": 9.771726111651256e-06, "loss": 0.0036, "step": 45710 }, { "epoch": 0.3738806885554238, "grad_norm": 0.016123512759804726, "learning_rate": 9.771512897047558e-06, "loss": 0.0026, "step": 45720 }, { "epoch": 0.373962464734023, "grad_norm": 0.1785677820444107, "learning_rate": 9.771299585244265e-06, "loss": 0.0026, "step": 45730 }, { "epoch": 0.3740442409126222, "grad_norm": 0.03499288484454155, "learning_rate": 9.77108617624572e-06, "loss": 0.0024, "step": 45740 }, { "epoch": 0.37412601709122134, "grad_norm": 0.05494605749845505, "learning_rate": 9.770872670056274e-06, "loss": 0.0025, "step": 45750 }, { "epoch": 0.3742077932698205, "grad_norm": 0.09436303377151489, "learning_rate": 9.770659066680272e-06, "loss": 0.0026, "step": 45760 }, { "epoch": 0.3742895694484197, "grad_norm": 0.15148311853408813, "learning_rate": 9.770445366122067e-06, "loss": 0.0052, "step": 45770 }, { "epoch": 0.37437134562701885, "grad_norm": 0.012203192338347435, "learning_rate": 9.770231568386013e-06, "loss": 0.0016, "step": 45780 }, { "epoch": 0.374453121805618, "grad_norm": 0.09808705747127533, "learning_rate": 9.770017673476463e-06, "loss": 0.0033, "step": 45790 }, { "epoch": 0.3745348979842172, "grad_norm": 0.05907176807522774, "learning_rate": 9.769803681397776e-06, "loss": 0.0029, "step": 45800 }, { "epoch": 0.37461667416281635, "grad_norm": 0.09211281687021255, "learning_rate": 9.769589592154312e-06, "loss": 0.0019, "step": 45810 }, { "epoch": 0.3746984503414155, "grad_norm": 0.0203991886228323, "learning_rate": 9.76937540575043e-06, "loss": 0.0011, "step": 45820 }, { "epoch": 0.3747802265200147, "grad_norm": 0.10735299438238144, "learning_rate": 9.769161122190496e-06, "loss": 0.002, "step": 45830 }, { "epoch": 0.3748620026986139, "grad_norm": 0.0025082093197852373, "learning_rate": 9.76894674147887e-06, "loss": 0.0029, "step": 45840 }, { "epoch": 0.3749437788772131, "grad_norm": 0.0983278751373291, "learning_rate": 9.768732263619927e-06, "loss": 0.0033, "step": 45850 }, { "epoch": 0.37502555505581225, "grad_norm": 0.3038986623287201, "learning_rate": 9.76851768861803e-06, "loss": 0.0029, "step": 45860 }, { "epoch": 0.3751073312344114, "grad_norm": 0.02509288676083088, "learning_rate": 9.76830301647755e-06, "loss": 0.0031, "step": 45870 }, { "epoch": 0.3751891074130106, "grad_norm": 0.10392958670854568, "learning_rate": 9.768088247202862e-06, "loss": 0.0041, "step": 45880 }, { "epoch": 0.37527088359160976, "grad_norm": 0.04008576646447182, "learning_rate": 9.76787338079834e-06, "loss": 0.0053, "step": 45890 }, { "epoch": 0.37535265977020893, "grad_norm": 0.07101287692785263, "learning_rate": 9.767658417268364e-06, "loss": 0.0021, "step": 45900 }, { "epoch": 0.3754344359488081, "grad_norm": 0.11329741030931473, "learning_rate": 9.76744335661731e-06, "loss": 0.0052, "step": 45910 }, { "epoch": 0.37551621212740727, "grad_norm": 0.05307993292808533, "learning_rate": 9.767228198849558e-06, "loss": 0.0022, "step": 45920 }, { "epoch": 0.37559798830600644, "grad_norm": 0.18040411174297333, "learning_rate": 9.767012943969494e-06, "loss": 0.0022, "step": 45930 }, { "epoch": 0.37567976448460566, "grad_norm": 0.0229710191488266, "learning_rate": 9.766797591981502e-06, "loss": 0.0013, "step": 45940 }, { "epoch": 0.37576154066320483, "grad_norm": 0.16713908314704895, "learning_rate": 9.766582142889969e-06, "loss": 0.0036, "step": 45950 }, { "epoch": 0.375843316841804, "grad_norm": 0.06454317271709442, "learning_rate": 9.766366596699282e-06, "loss": 0.0015, "step": 45960 }, { "epoch": 0.37592509302040317, "grad_norm": 0.2361760139465332, "learning_rate": 9.766150953413832e-06, "loss": 0.0026, "step": 45970 }, { "epoch": 0.37600686919900234, "grad_norm": 0.07220674306154251, "learning_rate": 9.765935213038014e-06, "loss": 0.002, "step": 45980 }, { "epoch": 0.3760886453776015, "grad_norm": 0.19638848304748535, "learning_rate": 9.765719375576222e-06, "loss": 0.0028, "step": 45990 }, { "epoch": 0.3761704215562007, "grad_norm": 0.09720662981271744, "learning_rate": 9.765503441032854e-06, "loss": 0.0049, "step": 46000 }, { "epoch": 0.37625219773479984, "grad_norm": 0.15718594193458557, "learning_rate": 9.765287409412305e-06, "loss": 0.003, "step": 46010 }, { "epoch": 0.376333973913399, "grad_norm": 0.16809386014938354, "learning_rate": 9.765071280718978e-06, "loss": 0.0029, "step": 46020 }, { "epoch": 0.3764157500919982, "grad_norm": 0.07455707341432571, "learning_rate": 9.764855054957277e-06, "loss": 0.0027, "step": 46030 }, { "epoch": 0.37649752627059735, "grad_norm": 0.1704779863357544, "learning_rate": 9.764638732131605e-06, "loss": 0.0023, "step": 46040 }, { "epoch": 0.3765793024491966, "grad_norm": 0.1138095110654831, "learning_rate": 9.76442231224637e-06, "loss": 0.0038, "step": 46050 }, { "epoch": 0.37666107862779574, "grad_norm": 0.23464912176132202, "learning_rate": 9.76420579530598e-06, "loss": 0.0043, "step": 46060 }, { "epoch": 0.3767428548063949, "grad_norm": 0.015126342885196209, "learning_rate": 9.763989181314843e-06, "loss": 0.0022, "step": 46070 }, { "epoch": 0.3768246309849941, "grad_norm": 0.5337918400764465, "learning_rate": 9.763772470277374e-06, "loss": 0.0022, "step": 46080 }, { "epoch": 0.37690640716359325, "grad_norm": 0.029785456135869026, "learning_rate": 9.76355566219799e-06, "loss": 0.003, "step": 46090 }, { "epoch": 0.3769881833421924, "grad_norm": 0.21416382491588593, "learning_rate": 9.763338757081105e-06, "loss": 0.0024, "step": 46100 }, { "epoch": 0.3770699595207916, "grad_norm": 0.1017773449420929, "learning_rate": 9.763121754931135e-06, "loss": 0.004, "step": 46110 }, { "epoch": 0.37715173569939076, "grad_norm": 0.14443722367286682, "learning_rate": 9.762904655752505e-06, "loss": 0.0028, "step": 46120 }, { "epoch": 0.3772335118779899, "grad_norm": 0.08628768473863602, "learning_rate": 9.762687459549636e-06, "loss": 0.0015, "step": 46130 }, { "epoch": 0.3773152880565891, "grad_norm": 0.059467412531375885, "learning_rate": 9.762470166326952e-06, "loss": 0.0041, "step": 46140 }, { "epoch": 0.37739706423518826, "grad_norm": 0.04873768240213394, "learning_rate": 9.76225277608888e-06, "loss": 0.0025, "step": 46150 }, { "epoch": 0.3774788404137875, "grad_norm": 0.08252324908971786, "learning_rate": 9.762035288839848e-06, "loss": 0.004, "step": 46160 }, { "epoch": 0.37756061659238666, "grad_norm": 0.028091389685869217, "learning_rate": 9.761817704584284e-06, "loss": 0.0027, "step": 46170 }, { "epoch": 0.3776423927709858, "grad_norm": 0.27514708042144775, "learning_rate": 9.761600023326625e-06, "loss": 0.0026, "step": 46180 }, { "epoch": 0.377724168949585, "grad_norm": 0.03851001709699631, "learning_rate": 9.761382245071303e-06, "loss": 0.002, "step": 46190 }, { "epoch": 0.37780594512818416, "grad_norm": 0.08711067587137222, "learning_rate": 9.761164369822754e-06, "loss": 0.0028, "step": 46200 }, { "epoch": 0.37788772130678333, "grad_norm": 0.0464634895324707, "learning_rate": 9.760946397585416e-06, "loss": 0.0035, "step": 46210 }, { "epoch": 0.3779694974853825, "grad_norm": 0.07985211163759232, "learning_rate": 9.760728328363731e-06, "loss": 0.0025, "step": 46220 }, { "epoch": 0.37805127366398167, "grad_norm": 0.08690538257360458, "learning_rate": 9.76051016216214e-06, "loss": 0.0023, "step": 46230 }, { "epoch": 0.37813304984258084, "grad_norm": 0.07236308604478836, "learning_rate": 9.760291898985089e-06, "loss": 0.003, "step": 46240 }, { "epoch": 0.37821482602118, "grad_norm": 0.028013257309794426, "learning_rate": 9.76007353883702e-06, "loss": 0.0014, "step": 46250 }, { "epoch": 0.37829660219977923, "grad_norm": 0.21129396557807922, "learning_rate": 9.759855081722384e-06, "loss": 0.0025, "step": 46260 }, { "epoch": 0.3783783783783784, "grad_norm": 0.10730747133493423, "learning_rate": 9.759636527645633e-06, "loss": 0.002, "step": 46270 }, { "epoch": 0.37846015455697757, "grad_norm": 0.5732728242874146, "learning_rate": 9.759417876611217e-06, "loss": 0.0022, "step": 46280 }, { "epoch": 0.37854193073557674, "grad_norm": 0.04123618081212044, "learning_rate": 9.759199128623588e-06, "loss": 0.003, "step": 46290 }, { "epoch": 0.3786237069141759, "grad_norm": 0.027476036921143532, "learning_rate": 9.758980283687205e-06, "loss": 0.0018, "step": 46300 }, { "epoch": 0.3787054830927751, "grad_norm": 0.14274811744689941, "learning_rate": 9.758761341806526e-06, "loss": 0.0033, "step": 46310 }, { "epoch": 0.37878725927137424, "grad_norm": 0.09131184220314026, "learning_rate": 9.758542302986011e-06, "loss": 0.0038, "step": 46320 }, { "epoch": 0.3788690354499734, "grad_norm": 0.07178118824958801, "learning_rate": 9.758323167230122e-06, "loss": 0.0019, "step": 46330 }, { "epoch": 0.3789508116285726, "grad_norm": 0.10325862467288971, "learning_rate": 9.75810393454332e-06, "loss": 0.0036, "step": 46340 }, { "epoch": 0.37903258780717175, "grad_norm": 0.054447025060653687, "learning_rate": 9.757884604930073e-06, "loss": 0.0027, "step": 46350 }, { "epoch": 0.3791143639857709, "grad_norm": 0.15815213322639465, "learning_rate": 9.75766517839485e-06, "loss": 0.0031, "step": 46360 }, { "epoch": 0.37919614016437014, "grad_norm": 0.1110207736492157, "learning_rate": 9.757445654942121e-06, "loss": 0.003, "step": 46370 }, { "epoch": 0.3792779163429693, "grad_norm": 0.045710839331150055, "learning_rate": 9.757226034576357e-06, "loss": 0.0023, "step": 46380 }, { "epoch": 0.3793596925215685, "grad_norm": 0.08001989126205444, "learning_rate": 9.757006317302031e-06, "loss": 0.0029, "step": 46390 }, { "epoch": 0.37944146870016765, "grad_norm": 0.07036411762237549, "learning_rate": 9.75678650312362e-06, "loss": 0.003, "step": 46400 }, { "epoch": 0.3795232448787668, "grad_norm": 0.06627289950847626, "learning_rate": 9.7565665920456e-06, "loss": 0.003, "step": 46410 }, { "epoch": 0.379605021057366, "grad_norm": 0.09622822701931, "learning_rate": 9.756346584072454e-06, "loss": 0.0023, "step": 46420 }, { "epoch": 0.37968679723596516, "grad_norm": 0.16172319650650024, "learning_rate": 9.756126479208662e-06, "loss": 0.0026, "step": 46430 }, { "epoch": 0.3797685734145643, "grad_norm": 0.1549449861049652, "learning_rate": 9.755906277458707e-06, "loss": 0.0023, "step": 46440 }, { "epoch": 0.3798503495931635, "grad_norm": 0.032909177243709564, "learning_rate": 9.755685978827075e-06, "loss": 0.0024, "step": 46450 }, { "epoch": 0.37993212577176266, "grad_norm": 0.1396043300628662, "learning_rate": 9.755465583318254e-06, "loss": 0.002, "step": 46460 }, { "epoch": 0.38001390195036183, "grad_norm": 0.003379527013748884, "learning_rate": 9.755245090936735e-06, "loss": 0.0033, "step": 46470 }, { "epoch": 0.38009567812896106, "grad_norm": 0.29684072732925415, "learning_rate": 9.755024501687008e-06, "loss": 0.0063, "step": 46480 }, { "epoch": 0.3801774543075602, "grad_norm": 0.05180387571454048, "learning_rate": 9.754803815573564e-06, "loss": 0.0047, "step": 46490 }, { "epoch": 0.3802592304861594, "grad_norm": 0.13091042637825012, "learning_rate": 9.754583032600904e-06, "loss": 0.0052, "step": 46500 }, { "epoch": 0.38034100666475856, "grad_norm": 0.10648305714130402, "learning_rate": 9.754362152773522e-06, "loss": 0.003, "step": 46510 }, { "epoch": 0.38042278284335773, "grad_norm": 0.03309452533721924, "learning_rate": 9.75414117609592e-06, "loss": 0.002, "step": 46520 }, { "epoch": 0.3805045590219569, "grad_norm": 0.08220798522233963, "learning_rate": 9.753920102572596e-06, "loss": 0.001, "step": 46530 }, { "epoch": 0.38058633520055607, "grad_norm": 0.13758987188339233, "learning_rate": 9.753698932208055e-06, "loss": 0.0026, "step": 46540 }, { "epoch": 0.38066811137915524, "grad_norm": 0.15768465399742126, "learning_rate": 9.753477665006805e-06, "loss": 0.0023, "step": 46550 }, { "epoch": 0.3807498875577544, "grad_norm": 0.07314643263816833, "learning_rate": 9.753256300973349e-06, "loss": 0.0016, "step": 46560 }, { "epoch": 0.3808316637363536, "grad_norm": 0.09046445041894913, "learning_rate": 9.7530348401122e-06, "loss": 0.0017, "step": 46570 }, { "epoch": 0.3809134399149528, "grad_norm": 0.07386182993650436, "learning_rate": 9.752813282427866e-06, "loss": 0.0024, "step": 46580 }, { "epoch": 0.38099521609355197, "grad_norm": 0.06059899553656578, "learning_rate": 9.752591627924864e-06, "loss": 0.0027, "step": 46590 }, { "epoch": 0.38107699227215114, "grad_norm": 0.04150755703449249, "learning_rate": 9.752369876607708e-06, "loss": 0.002, "step": 46600 }, { "epoch": 0.3811587684507503, "grad_norm": 0.05526848882436752, "learning_rate": 9.752148028480913e-06, "loss": 0.0025, "step": 46610 }, { "epoch": 0.3812405446293495, "grad_norm": 0.10476133227348328, "learning_rate": 9.751926083548999e-06, "loss": 0.003, "step": 46620 }, { "epoch": 0.38132232080794864, "grad_norm": 0.02983546070754528, "learning_rate": 9.75170404181649e-06, "loss": 0.0023, "step": 46630 }, { "epoch": 0.3814040969865478, "grad_norm": 0.0593661405146122, "learning_rate": 9.751481903287906e-06, "loss": 0.0038, "step": 46640 }, { "epoch": 0.381485873165147, "grad_norm": 0.058045223355293274, "learning_rate": 9.751259667967775e-06, "loss": 0.0027, "step": 46650 }, { "epoch": 0.38156764934374615, "grad_norm": 0.11885865777730942, "learning_rate": 9.751037335860622e-06, "loss": 0.0017, "step": 46660 }, { "epoch": 0.3816494255223453, "grad_norm": 0.09224909543991089, "learning_rate": 9.750814906970975e-06, "loss": 0.0025, "step": 46670 }, { "epoch": 0.3817312017009445, "grad_norm": 0.13271291553974152, "learning_rate": 9.75059238130337e-06, "loss": 0.0031, "step": 46680 }, { "epoch": 0.3818129778795437, "grad_norm": 0.2408873289823532, "learning_rate": 9.750369758862334e-06, "loss": 0.0041, "step": 46690 }, { "epoch": 0.3818947540581429, "grad_norm": 0.2098979353904724, "learning_rate": 9.750147039652406e-06, "loss": 0.0033, "step": 46700 }, { "epoch": 0.38197653023674205, "grad_norm": 0.116108737885952, "learning_rate": 9.749924223678122e-06, "loss": 0.0022, "step": 46710 }, { "epoch": 0.3820583064153412, "grad_norm": 0.04606722295284271, "learning_rate": 9.749701310944018e-06, "loss": 0.0026, "step": 46720 }, { "epoch": 0.3821400825939404, "grad_norm": 0.04265061020851135, "learning_rate": 9.74947830145464e-06, "loss": 0.0039, "step": 46730 }, { "epoch": 0.38222185877253956, "grad_norm": 0.06894248723983765, "learning_rate": 9.749255195214528e-06, "loss": 0.0028, "step": 46740 }, { "epoch": 0.3823036349511387, "grad_norm": 0.10657026618719101, "learning_rate": 9.749031992228227e-06, "loss": 0.0027, "step": 46750 }, { "epoch": 0.3823854111297379, "grad_norm": 0.011156663298606873, "learning_rate": 9.748808692500284e-06, "loss": 0.0024, "step": 46760 }, { "epoch": 0.38246718730833706, "grad_norm": 0.12667518854141235, "learning_rate": 9.74858529603525e-06, "loss": 0.0018, "step": 46770 }, { "epoch": 0.38254896348693623, "grad_norm": 0.03650352731347084, "learning_rate": 9.74836180283767e-06, "loss": 0.0029, "step": 46780 }, { "epoch": 0.3826307396655354, "grad_norm": 0.03965701907873154, "learning_rate": 9.748138212912102e-06, "loss": 0.0015, "step": 46790 }, { "epoch": 0.3827125158441346, "grad_norm": 0.07346440106630325, "learning_rate": 9.7479145262631e-06, "loss": 0.0023, "step": 46800 }, { "epoch": 0.3827942920227338, "grad_norm": 0.27199336886405945, "learning_rate": 9.74769074289522e-06, "loss": 0.0074, "step": 46810 }, { "epoch": 0.38287606820133296, "grad_norm": 0.06315005570650101, "learning_rate": 9.747466862813018e-06, "loss": 0.0027, "step": 46820 }, { "epoch": 0.38295784437993213, "grad_norm": 0.03191467002034187, "learning_rate": 9.74724288602106e-06, "loss": 0.0022, "step": 46830 }, { "epoch": 0.3830396205585313, "grad_norm": 0.015733186155557632, "learning_rate": 9.747018812523903e-06, "loss": 0.0027, "step": 46840 }, { "epoch": 0.38312139673713047, "grad_norm": 0.013499530032277107, "learning_rate": 9.746794642326116e-06, "loss": 0.0026, "step": 46850 }, { "epoch": 0.38320317291572964, "grad_norm": 0.09615778177976608, "learning_rate": 9.746570375432262e-06, "loss": 0.0032, "step": 46860 }, { "epoch": 0.3832849490943288, "grad_norm": 0.06451598554849625, "learning_rate": 9.746346011846914e-06, "loss": 0.0017, "step": 46870 }, { "epoch": 0.383366725272928, "grad_norm": 0.08583105355501175, "learning_rate": 9.746121551574637e-06, "loss": 0.0018, "step": 46880 }, { "epoch": 0.38344850145152715, "grad_norm": 0.033333901315927505, "learning_rate": 9.745896994620007e-06, "loss": 0.0029, "step": 46890 }, { "epoch": 0.38353027763012637, "grad_norm": 0.09226980805397034, "learning_rate": 9.7456723409876e-06, "loss": 0.0033, "step": 46900 }, { "epoch": 0.38361205380872554, "grad_norm": 0.3545267581939697, "learning_rate": 9.745447590681987e-06, "loss": 0.0029, "step": 46910 }, { "epoch": 0.3836938299873247, "grad_norm": 0.02480330504477024, "learning_rate": 9.74522274370775e-06, "loss": 0.0019, "step": 46920 }, { "epoch": 0.3837756061659239, "grad_norm": 0.05501996725797653, "learning_rate": 9.744997800069469e-06, "loss": 0.0026, "step": 46930 }, { "epoch": 0.38385738234452305, "grad_norm": 0.05729299038648605, "learning_rate": 9.744772759771725e-06, "loss": 0.0029, "step": 46940 }, { "epoch": 0.3839391585231222, "grad_norm": 0.055053871124982834, "learning_rate": 9.744547622819104e-06, "loss": 0.0022, "step": 46950 }, { "epoch": 0.3840209347017214, "grad_norm": 0.11218039691448212, "learning_rate": 9.744322389216192e-06, "loss": 0.0031, "step": 46960 }, { "epoch": 0.38410271088032055, "grad_norm": 0.07078703492879868, "learning_rate": 9.744097058967575e-06, "loss": 0.0029, "step": 46970 }, { "epoch": 0.3841844870589197, "grad_norm": 0.07284585386514664, "learning_rate": 9.743871632077846e-06, "loss": 0.0043, "step": 46980 }, { "epoch": 0.3842662632375189, "grad_norm": 0.047974348068237305, "learning_rate": 9.743646108551594e-06, "loss": 0.0021, "step": 46990 }, { "epoch": 0.38434803941611806, "grad_norm": 0.019786639139056206, "learning_rate": 9.743420488393418e-06, "loss": 0.0028, "step": 47000 }, { "epoch": 0.3844298155947173, "grad_norm": 0.05440758541226387, "learning_rate": 9.743194771607908e-06, "loss": 0.0027, "step": 47010 }, { "epoch": 0.38451159177331645, "grad_norm": 0.1775173842906952, "learning_rate": 9.742968958199666e-06, "loss": 0.0032, "step": 47020 }, { "epoch": 0.3845933679519156, "grad_norm": 0.04560722038149834, "learning_rate": 9.742743048173291e-06, "loss": 0.0033, "step": 47030 }, { "epoch": 0.3846751441305148, "grad_norm": 0.19163233041763306, "learning_rate": 9.742517041533387e-06, "loss": 0.0022, "step": 47040 }, { "epoch": 0.38475692030911396, "grad_norm": 0.06858845800161362, "learning_rate": 9.742290938284553e-06, "loss": 0.0025, "step": 47050 }, { "epoch": 0.38483869648771313, "grad_norm": 0.006548217497766018, "learning_rate": 9.742064738431398e-06, "loss": 0.0026, "step": 47060 }, { "epoch": 0.3849204726663123, "grad_norm": 0.20580461621284485, "learning_rate": 9.741838441978532e-06, "loss": 0.0032, "step": 47070 }, { "epoch": 0.38500224884491147, "grad_norm": 0.13643738627433777, "learning_rate": 9.74161204893056e-06, "loss": 0.003, "step": 47080 }, { "epoch": 0.38508402502351063, "grad_norm": 0.13865622878074646, "learning_rate": 9.7413855592921e-06, "loss": 0.0026, "step": 47090 }, { "epoch": 0.3851658012021098, "grad_norm": 0.06852401793003082, "learning_rate": 9.74115897306776e-06, "loss": 0.0016, "step": 47100 }, { "epoch": 0.38524757738070897, "grad_norm": 0.06463983654975891, "learning_rate": 9.740932290262158e-06, "loss": 0.003, "step": 47110 }, { "epoch": 0.3853293535593082, "grad_norm": 0.07690692692995071, "learning_rate": 9.740705510879911e-06, "loss": 0.0026, "step": 47120 }, { "epoch": 0.38541112973790737, "grad_norm": 0.053168442100286484, "learning_rate": 9.740478634925639e-06, "loss": 0.0023, "step": 47130 }, { "epoch": 0.38549290591650653, "grad_norm": 0.04705578833818436, "learning_rate": 9.740251662403964e-06, "loss": 0.0013, "step": 47140 }, { "epoch": 0.3855746820951057, "grad_norm": 0.1031842902302742, "learning_rate": 9.74002459331951e-06, "loss": 0.0022, "step": 47150 }, { "epoch": 0.38565645827370487, "grad_norm": 0.08718644082546234, "learning_rate": 9.739797427676904e-06, "loss": 0.0025, "step": 47160 }, { "epoch": 0.38573823445230404, "grad_norm": 0.05881369486451149, "learning_rate": 9.73957016548077e-06, "loss": 0.0026, "step": 47170 }, { "epoch": 0.3858200106309032, "grad_norm": 0.05623273551464081, "learning_rate": 9.73934280673574e-06, "loss": 0.003, "step": 47180 }, { "epoch": 0.3859017868095024, "grad_norm": 0.09277475625276566, "learning_rate": 9.739115351446444e-06, "loss": 0.0044, "step": 47190 }, { "epoch": 0.38598356298810155, "grad_norm": 0.011206992901861668, "learning_rate": 9.738887799617515e-06, "loss": 0.0036, "step": 47200 }, { "epoch": 0.3860653391667007, "grad_norm": 0.1252444088459015, "learning_rate": 9.738660151253592e-06, "loss": 0.0037, "step": 47210 }, { "epoch": 0.38614711534529994, "grad_norm": 0.017474215477705002, "learning_rate": 9.738432406359309e-06, "loss": 0.0024, "step": 47220 }, { "epoch": 0.3862288915238991, "grad_norm": 0.017143337056040764, "learning_rate": 9.738204564939304e-06, "loss": 0.0029, "step": 47230 }, { "epoch": 0.3863106677024983, "grad_norm": 0.08249908685684204, "learning_rate": 9.737976626998223e-06, "loss": 0.0028, "step": 47240 }, { "epoch": 0.38639244388109745, "grad_norm": 0.03828879073262215, "learning_rate": 9.737748592540707e-06, "loss": 0.0031, "step": 47250 }, { "epoch": 0.3864742200596966, "grad_norm": 0.10827487707138062, "learning_rate": 9.7375204615714e-06, "loss": 0.0029, "step": 47260 }, { "epoch": 0.3865559962382958, "grad_norm": 0.23721611499786377, "learning_rate": 9.73729223409495e-06, "loss": 0.003, "step": 47270 }, { "epoch": 0.38663777241689495, "grad_norm": 0.03241680562496185, "learning_rate": 9.73706391011601e-06, "loss": 0.003, "step": 47280 }, { "epoch": 0.3867195485954941, "grad_norm": 0.17036712169647217, "learning_rate": 9.736835489639224e-06, "loss": 0.0032, "step": 47290 }, { "epoch": 0.3868013247740933, "grad_norm": 0.17871646583080292, "learning_rate": 9.736606972669249e-06, "loss": 0.0016, "step": 47300 }, { "epoch": 0.38688310095269246, "grad_norm": 0.004169042222201824, "learning_rate": 9.736378359210742e-06, "loss": 0.0023, "step": 47310 }, { "epoch": 0.38696487713129163, "grad_norm": 0.2033664882183075, "learning_rate": 9.736149649268356e-06, "loss": 0.0053, "step": 47320 }, { "epoch": 0.38704665330989085, "grad_norm": 0.03318466618657112, "learning_rate": 9.735920842846754e-06, "loss": 0.0021, "step": 47330 }, { "epoch": 0.38712842948849, "grad_norm": 0.17633844912052155, "learning_rate": 9.735691939950592e-06, "loss": 0.0028, "step": 47340 }, { "epoch": 0.3872102056670892, "grad_norm": 0.22747570276260376, "learning_rate": 9.735462940584537e-06, "loss": 0.0044, "step": 47350 }, { "epoch": 0.38729198184568836, "grad_norm": 0.06525027006864548, "learning_rate": 9.735233844753253e-06, "loss": 0.0022, "step": 47360 }, { "epoch": 0.38737375802428753, "grad_norm": 0.274265855550766, "learning_rate": 9.735004652461407e-06, "loss": 0.0025, "step": 47370 }, { "epoch": 0.3874555342028867, "grad_norm": 0.08742529898881912, "learning_rate": 9.734775363713666e-06, "loss": 0.0015, "step": 47380 }, { "epoch": 0.38753731038148587, "grad_norm": 0.05023670196533203, "learning_rate": 9.734545978514705e-06, "loss": 0.0043, "step": 47390 }, { "epoch": 0.38761908656008504, "grad_norm": 0.02793215960264206, "learning_rate": 9.734316496869191e-06, "loss": 0.0021, "step": 47400 }, { "epoch": 0.3877008627386842, "grad_norm": 0.07594700902700424, "learning_rate": 9.734086918781804e-06, "loss": 0.0028, "step": 47410 }, { "epoch": 0.3877826389172834, "grad_norm": 0.03154768794775009, "learning_rate": 9.733857244257216e-06, "loss": 0.0026, "step": 47420 }, { "epoch": 0.38786441509588254, "grad_norm": 0.09020072221755981, "learning_rate": 9.733627473300112e-06, "loss": 0.0033, "step": 47430 }, { "epoch": 0.38794619127448177, "grad_norm": 0.3422609865665436, "learning_rate": 9.733397605915165e-06, "loss": 0.0044, "step": 47440 }, { "epoch": 0.38802796745308094, "grad_norm": 0.06720351427793503, "learning_rate": 9.733167642107063e-06, "loss": 0.0036, "step": 47450 }, { "epoch": 0.3881097436316801, "grad_norm": 0.23216336965560913, "learning_rate": 9.732937581880487e-06, "loss": 0.0032, "step": 47460 }, { "epoch": 0.3881915198102793, "grad_norm": 0.11942055821418762, "learning_rate": 9.732707425240126e-06, "loss": 0.0028, "step": 47470 }, { "epoch": 0.38827329598887844, "grad_norm": 0.050176654011011124, "learning_rate": 9.732477172190669e-06, "loss": 0.003, "step": 47480 }, { "epoch": 0.3883550721674776, "grad_norm": 0.4808288812637329, "learning_rate": 9.732246822736804e-06, "loss": 0.0028, "step": 47490 }, { "epoch": 0.3884368483460768, "grad_norm": 0.14120520651340485, "learning_rate": 9.732016376883227e-06, "loss": 0.004, "step": 47500 }, { "epoch": 0.38851862452467595, "grad_norm": 0.0983179584145546, "learning_rate": 9.731785834634627e-06, "loss": 0.0026, "step": 47510 }, { "epoch": 0.3886004007032751, "grad_norm": 0.15447461605072021, "learning_rate": 9.731555195995706e-06, "loss": 0.0032, "step": 47520 }, { "epoch": 0.3886821768818743, "grad_norm": 0.05981575697660446, "learning_rate": 9.731324460971156e-06, "loss": 0.0025, "step": 47530 }, { "epoch": 0.3887639530604735, "grad_norm": 0.0927041545510292, "learning_rate": 9.731093629565685e-06, "loss": 0.0017, "step": 47540 }, { "epoch": 0.3888457292390727, "grad_norm": 0.1140967532992363, "learning_rate": 9.730862701783988e-06, "loss": 0.0024, "step": 47550 }, { "epoch": 0.38892750541767185, "grad_norm": 0.0410456657409668, "learning_rate": 9.730631677630774e-06, "loss": 0.0053, "step": 47560 }, { "epoch": 0.389009281596271, "grad_norm": 0.04345278814435005, "learning_rate": 9.730400557110748e-06, "loss": 0.0021, "step": 47570 }, { "epoch": 0.3890910577748702, "grad_norm": 0.1290552169084549, "learning_rate": 9.730169340228617e-06, "loss": 0.004, "step": 47580 }, { "epoch": 0.38917283395346935, "grad_norm": 0.4626428186893463, "learning_rate": 9.729938026989089e-06, "loss": 0.0025, "step": 47590 }, { "epoch": 0.3892546101320685, "grad_norm": 0.06102938950061798, "learning_rate": 9.729706617396883e-06, "loss": 0.0026, "step": 47600 }, { "epoch": 0.3893363863106677, "grad_norm": 0.08901752531528473, "learning_rate": 9.729475111456707e-06, "loss": 0.003, "step": 47610 }, { "epoch": 0.38941816248926686, "grad_norm": 0.04080070182681084, "learning_rate": 9.729243509173278e-06, "loss": 0.0019, "step": 47620 }, { "epoch": 0.38949993866786603, "grad_norm": 0.12157026678323746, "learning_rate": 9.729011810551314e-06, "loss": 0.0034, "step": 47630 }, { "epoch": 0.3895817148464652, "grad_norm": 0.15597395598888397, "learning_rate": 9.728780015595538e-06, "loss": 0.0031, "step": 47640 }, { "epoch": 0.3896634910250644, "grad_norm": 0.0846724882721901, "learning_rate": 9.728548124310667e-06, "loss": 0.002, "step": 47650 }, { "epoch": 0.3897452672036636, "grad_norm": 0.07746553421020508, "learning_rate": 9.728316136701428e-06, "loss": 0.0019, "step": 47660 }, { "epoch": 0.38982704338226276, "grad_norm": 0.08107434958219528, "learning_rate": 9.728084052772547e-06, "loss": 0.002, "step": 47670 }, { "epoch": 0.38990881956086193, "grad_norm": 0.09045872837305069, "learning_rate": 9.727851872528749e-06, "loss": 0.0034, "step": 47680 }, { "epoch": 0.3899905957394611, "grad_norm": 0.03300534561276436, "learning_rate": 9.727619595974767e-06, "loss": 0.0024, "step": 47690 }, { "epoch": 0.39007237191806027, "grad_norm": 0.044352732598781586, "learning_rate": 9.72738722311533e-06, "loss": 0.0021, "step": 47700 }, { "epoch": 0.39015414809665944, "grad_norm": 0.006027503404766321, "learning_rate": 9.727154753955171e-06, "loss": 0.0036, "step": 47710 }, { "epoch": 0.3902359242752586, "grad_norm": 0.2603992819786072, "learning_rate": 9.726922188499029e-06, "loss": 0.0023, "step": 47720 }, { "epoch": 0.3903177004538578, "grad_norm": 0.10417363047599792, "learning_rate": 9.72668952675164e-06, "loss": 0.0036, "step": 47730 }, { "epoch": 0.39039947663245694, "grad_norm": 0.3511233925819397, "learning_rate": 9.726456768717742e-06, "loss": 0.0032, "step": 47740 }, { "epoch": 0.3904812528110561, "grad_norm": 0.24202659726142883, "learning_rate": 9.72622391440208e-06, "loss": 0.0033, "step": 47750 }, { "epoch": 0.39056302898965534, "grad_norm": 0.1353006213903427, "learning_rate": 9.725990963809393e-06, "loss": 0.0042, "step": 47760 }, { "epoch": 0.3906448051682545, "grad_norm": 0.10344698280096054, "learning_rate": 9.72575791694443e-06, "loss": 0.0019, "step": 47770 }, { "epoch": 0.3907265813468537, "grad_norm": 0.1959882229566574, "learning_rate": 9.725524773811936e-06, "loss": 0.0022, "step": 47780 }, { "epoch": 0.39080835752545284, "grad_norm": 0.06250765174627304, "learning_rate": 9.725291534416663e-06, "loss": 0.0017, "step": 47790 }, { "epoch": 0.390890133704052, "grad_norm": 0.21024906635284424, "learning_rate": 9.725058198763358e-06, "loss": 0.0029, "step": 47800 }, { "epoch": 0.3909719098826512, "grad_norm": 0.11534822732210159, "learning_rate": 9.724824766856779e-06, "loss": 0.0033, "step": 47810 }, { "epoch": 0.39105368606125035, "grad_norm": 0.04284803941845894, "learning_rate": 9.724591238701678e-06, "loss": 0.0022, "step": 47820 }, { "epoch": 0.3911354622398495, "grad_norm": 0.09522664546966553, "learning_rate": 9.724357614302814e-06, "loss": 0.0033, "step": 47830 }, { "epoch": 0.3912172384184487, "grad_norm": 0.00991299282759428, "learning_rate": 9.724123893664944e-06, "loss": 0.0024, "step": 47840 }, { "epoch": 0.39129901459704786, "grad_norm": 0.03873152285814285, "learning_rate": 9.72389007679283e-06, "loss": 0.004, "step": 47850 }, { "epoch": 0.3913807907756471, "grad_norm": 0.07013244181871414, "learning_rate": 9.723656163691236e-06, "loss": 0.0021, "step": 47860 }, { "epoch": 0.39146256695424625, "grad_norm": 0.23557910323143005, "learning_rate": 9.723422154364927e-06, "loss": 0.0037, "step": 47870 }, { "epoch": 0.3915443431328454, "grad_norm": 0.15916891396045685, "learning_rate": 9.723188048818669e-06, "loss": 0.0043, "step": 47880 }, { "epoch": 0.3916261193114446, "grad_norm": 0.037532906979322433, "learning_rate": 9.72295384705723e-06, "loss": 0.002, "step": 47890 }, { "epoch": 0.39170789549004376, "grad_norm": 0.05663686245679855, "learning_rate": 9.722719549085382e-06, "loss": 0.0015, "step": 47900 }, { "epoch": 0.3917896716686429, "grad_norm": 0.07600684463977814, "learning_rate": 9.7224851549079e-06, "loss": 0.002, "step": 47910 }, { "epoch": 0.3918714478472421, "grad_norm": 0.04884156212210655, "learning_rate": 9.722250664529556e-06, "loss": 0.0019, "step": 47920 }, { "epoch": 0.39195322402584126, "grad_norm": 0.0949745923280716, "learning_rate": 9.722016077955127e-06, "loss": 0.0022, "step": 47930 }, { "epoch": 0.39203500020444043, "grad_norm": 0.09073596447706223, "learning_rate": 9.721781395189393e-06, "loss": 0.0036, "step": 47940 }, { "epoch": 0.3921167763830396, "grad_norm": 0.07292591035366058, "learning_rate": 9.721546616237134e-06, "loss": 0.0013, "step": 47950 }, { "epoch": 0.39219855256163877, "grad_norm": 0.010393590666353703, "learning_rate": 9.721311741103132e-06, "loss": 0.0032, "step": 47960 }, { "epoch": 0.392280328740238, "grad_norm": 0.060493629425764084, "learning_rate": 9.721076769792172e-06, "loss": 0.0027, "step": 47970 }, { "epoch": 0.39236210491883716, "grad_norm": 0.1486886888742447, "learning_rate": 9.720841702309041e-06, "loss": 0.0018, "step": 47980 }, { "epoch": 0.39244388109743633, "grad_norm": 0.07003310322761536, "learning_rate": 9.720606538658527e-06, "loss": 0.0031, "step": 47990 }, { "epoch": 0.3925256572760355, "grad_norm": 0.07525093853473663, "learning_rate": 9.720371278845421e-06, "loss": 0.0027, "step": 48000 }, { "epoch": 0.39260743345463467, "grad_norm": 0.17301391065120697, "learning_rate": 9.720135922874515e-06, "loss": 0.0023, "step": 48010 }, { "epoch": 0.39268920963323384, "grad_norm": 0.035054679960012436, "learning_rate": 9.719900470750603e-06, "loss": 0.0017, "step": 48020 }, { "epoch": 0.392770985811833, "grad_norm": 0.10253459960222244, "learning_rate": 9.719664922478483e-06, "loss": 0.0033, "step": 48030 }, { "epoch": 0.3928527619904322, "grad_norm": 0.05976541340351105, "learning_rate": 9.719429278062953e-06, "loss": 0.0028, "step": 48040 }, { "epoch": 0.39293453816903134, "grad_norm": 0.06917253881692886, "learning_rate": 9.71919353750881e-06, "loss": 0.0023, "step": 48050 }, { "epoch": 0.3930163143476305, "grad_norm": 0.05928331986069679, "learning_rate": 9.71895770082086e-06, "loss": 0.0018, "step": 48060 }, { "epoch": 0.3930980905262297, "grad_norm": 0.09033550322055817, "learning_rate": 9.718721768003904e-06, "loss": 0.0028, "step": 48070 }, { "epoch": 0.3931798667048289, "grad_norm": 0.14470253884792328, "learning_rate": 9.718485739062753e-06, "loss": 0.0027, "step": 48080 }, { "epoch": 0.3932616428834281, "grad_norm": 0.24971884489059448, "learning_rate": 9.71824961400221e-06, "loss": 0.0037, "step": 48090 }, { "epoch": 0.39334341906202724, "grad_norm": 0.1359589695930481, "learning_rate": 9.718013392827087e-06, "loss": 0.0027, "step": 48100 }, { "epoch": 0.3934251952406264, "grad_norm": 0.15652619302272797, "learning_rate": 9.717777075542199e-06, "loss": 0.0031, "step": 48110 }, { "epoch": 0.3935069714192256, "grad_norm": 0.13116765022277832, "learning_rate": 9.717540662152354e-06, "loss": 0.0027, "step": 48120 }, { "epoch": 0.39358874759782475, "grad_norm": 0.06092429906129837, "learning_rate": 9.717304152662373e-06, "loss": 0.0038, "step": 48130 }, { "epoch": 0.3936705237764239, "grad_norm": 0.021958595141768456, "learning_rate": 9.717067547077073e-06, "loss": 0.005, "step": 48140 }, { "epoch": 0.3937522999550231, "grad_norm": 0.045528799295425415, "learning_rate": 9.71683084540127e-06, "loss": 0.0022, "step": 48150 }, { "epoch": 0.39383407613362226, "grad_norm": 0.13309629261493683, "learning_rate": 9.71659404763979e-06, "loss": 0.0021, "step": 48160 }, { "epoch": 0.3939158523122214, "grad_norm": 0.04477890208363533, "learning_rate": 9.716357153797456e-06, "loss": 0.0036, "step": 48170 }, { "epoch": 0.39399762849082065, "grad_norm": 0.04764917492866516, "learning_rate": 9.716120163879093e-06, "loss": 0.002, "step": 48180 }, { "epoch": 0.3940794046694198, "grad_norm": 0.17253093421459198, "learning_rate": 9.715883077889528e-06, "loss": 0.0055, "step": 48190 }, { "epoch": 0.394161180848019, "grad_norm": 0.01988922618329525, "learning_rate": 9.71564589583359e-06, "loss": 0.0016, "step": 48200 }, { "epoch": 0.39424295702661816, "grad_norm": 0.05322853848338127, "learning_rate": 9.715408617716114e-06, "loss": 0.003, "step": 48210 }, { "epoch": 0.3943247332052173, "grad_norm": 0.09821110218763351, "learning_rate": 9.715171243541932e-06, "loss": 0.0034, "step": 48220 }, { "epoch": 0.3944065093838165, "grad_norm": 0.18527324497699738, "learning_rate": 9.714933773315878e-06, "loss": 0.0034, "step": 48230 }, { "epoch": 0.39448828556241566, "grad_norm": 0.0690765231847763, "learning_rate": 9.71469620704279e-06, "loss": 0.0019, "step": 48240 }, { "epoch": 0.39457006174101483, "grad_norm": 0.02985117956995964, "learning_rate": 9.71445854472751e-06, "loss": 0.0044, "step": 48250 }, { "epoch": 0.394651837919614, "grad_norm": 0.22356972098350525, "learning_rate": 9.714220786374876e-06, "loss": 0.0027, "step": 48260 }, { "epoch": 0.39473361409821317, "grad_norm": 0.047461219131946564, "learning_rate": 9.713982931989733e-06, "loss": 0.0023, "step": 48270 }, { "epoch": 0.39481539027681234, "grad_norm": 0.18782438337802887, "learning_rate": 9.713744981576926e-06, "loss": 0.0035, "step": 48280 }, { "epoch": 0.39489716645541156, "grad_norm": 0.06607513129711151, "learning_rate": 9.713506935141302e-06, "loss": 0.0027, "step": 48290 }, { "epoch": 0.39497894263401073, "grad_norm": 0.07541091740131378, "learning_rate": 9.713268792687711e-06, "loss": 0.0018, "step": 48300 }, { "epoch": 0.3950607188126099, "grad_norm": 0.07929152250289917, "learning_rate": 9.713030554221002e-06, "loss": 0.0025, "step": 48310 }, { "epoch": 0.39514249499120907, "grad_norm": 0.2104053646326065, "learning_rate": 9.712792219746031e-06, "loss": 0.003, "step": 48320 }, { "epoch": 0.39522427116980824, "grad_norm": 0.014798007905483246, "learning_rate": 9.712553789267652e-06, "loss": 0.0028, "step": 48330 }, { "epoch": 0.3953060473484074, "grad_norm": 0.1265399158000946, "learning_rate": 9.712315262790721e-06, "loss": 0.0028, "step": 48340 }, { "epoch": 0.3953878235270066, "grad_norm": 0.03612815961241722, "learning_rate": 9.712076640320099e-06, "loss": 0.0018, "step": 48350 }, { "epoch": 0.39546959970560575, "grad_norm": 0.04190688952803612, "learning_rate": 9.711837921860643e-06, "loss": 0.0023, "step": 48360 }, { "epoch": 0.3955513758842049, "grad_norm": 0.06948679685592651, "learning_rate": 9.71159910741722e-06, "loss": 0.0021, "step": 48370 }, { "epoch": 0.3956331520628041, "grad_norm": 0.09581221640110016, "learning_rate": 9.711360196994695e-06, "loss": 0.0021, "step": 48380 }, { "epoch": 0.39571492824140325, "grad_norm": 0.1008683443069458, "learning_rate": 9.71112119059793e-06, "loss": 0.0034, "step": 48390 }, { "epoch": 0.3957967044200025, "grad_norm": 0.184195876121521, "learning_rate": 9.710882088231798e-06, "loss": 0.0038, "step": 48400 }, { "epoch": 0.39587848059860165, "grad_norm": 0.1146397814154625, "learning_rate": 9.710642889901171e-06, "loss": 0.0029, "step": 48410 }, { "epoch": 0.3959602567772008, "grad_norm": 0.07011739164590836, "learning_rate": 9.710403595610918e-06, "loss": 0.0032, "step": 48420 }, { "epoch": 0.3960420329558, "grad_norm": 0.0034020335879176855, "learning_rate": 9.710164205365913e-06, "loss": 0.0023, "step": 48430 }, { "epoch": 0.39612380913439915, "grad_norm": 0.12316178530454636, "learning_rate": 9.709924719171038e-06, "loss": 0.0032, "step": 48440 }, { "epoch": 0.3962055853129983, "grad_norm": 0.027240483090281487, "learning_rate": 9.709685137031166e-06, "loss": 0.0066, "step": 48450 }, { "epoch": 0.3962873614915975, "grad_norm": 0.04457925632596016, "learning_rate": 9.709445458951179e-06, "loss": 0.0016, "step": 48460 }, { "epoch": 0.39636913767019666, "grad_norm": 0.1026555672287941, "learning_rate": 9.709205684935961e-06, "loss": 0.0014, "step": 48470 }, { "epoch": 0.3964509138487958, "grad_norm": 0.05631641671061516, "learning_rate": 9.708965814990395e-06, "loss": 0.0017, "step": 48480 }, { "epoch": 0.396532690027395, "grad_norm": 0.1414976269006729, "learning_rate": 9.708725849119366e-06, "loss": 0.0042, "step": 48490 }, { "epoch": 0.3966144662059942, "grad_norm": 0.030690684914588928, "learning_rate": 9.708485787327768e-06, "loss": 0.0026, "step": 48500 }, { "epoch": 0.3966962423845934, "grad_norm": 0.026220083236694336, "learning_rate": 9.708245629620485e-06, "loss": 0.0019, "step": 48510 }, { "epoch": 0.39677801856319256, "grad_norm": 0.059355076402425766, "learning_rate": 9.708005376002412e-06, "loss": 0.0033, "step": 48520 }, { "epoch": 0.3968597947417917, "grad_norm": 0.019518304616212845, "learning_rate": 9.707765026478441e-06, "loss": 0.0033, "step": 48530 }, { "epoch": 0.3969415709203909, "grad_norm": 0.12327030301094055, "learning_rate": 9.70752458105347e-06, "loss": 0.0014, "step": 48540 }, { "epoch": 0.39702334709899006, "grad_norm": 0.045970045030117035, "learning_rate": 9.707284039732398e-06, "loss": 0.0032, "step": 48550 }, { "epoch": 0.39710512327758923, "grad_norm": 0.081728994846344, "learning_rate": 9.707043402520124e-06, "loss": 0.0025, "step": 48560 }, { "epoch": 0.3971868994561884, "grad_norm": 0.0749935582280159, "learning_rate": 9.70680266942155e-06, "loss": 0.0044, "step": 48570 }, { "epoch": 0.39726867563478757, "grad_norm": 0.09602098166942596, "learning_rate": 9.70656184044158e-06, "loss": 0.0018, "step": 48580 }, { "epoch": 0.39735045181338674, "grad_norm": 0.061816271394491196, "learning_rate": 9.706320915585118e-06, "loss": 0.0022, "step": 48590 }, { "epoch": 0.3974322279919859, "grad_norm": 0.050489459186792374, "learning_rate": 9.706079894857074e-06, "loss": 0.0021, "step": 48600 }, { "epoch": 0.39751400417058513, "grad_norm": 0.027741609141230583, "learning_rate": 9.705838778262358e-06, "loss": 0.0067, "step": 48610 }, { "epoch": 0.3975957803491843, "grad_norm": 0.14647679030895233, "learning_rate": 9.70559756580588e-06, "loss": 0.0043, "step": 48620 }, { "epoch": 0.39767755652778347, "grad_norm": 0.11989491432905197, "learning_rate": 9.705356257492556e-06, "loss": 0.0019, "step": 48630 }, { "epoch": 0.39775933270638264, "grad_norm": 0.005987108685076237, "learning_rate": 9.705114853327298e-06, "loss": 0.0018, "step": 48640 }, { "epoch": 0.3978411088849818, "grad_norm": 0.0674513578414917, "learning_rate": 9.704873353315027e-06, "loss": 0.002, "step": 48650 }, { "epoch": 0.397922885063581, "grad_norm": 0.021129362285137177, "learning_rate": 9.704631757460663e-06, "loss": 0.0041, "step": 48660 }, { "epoch": 0.39800466124218015, "grad_norm": 0.04722980409860611, "learning_rate": 9.704390065769124e-06, "loss": 0.0023, "step": 48670 }, { "epoch": 0.3980864374207793, "grad_norm": 0.06953674554824829, "learning_rate": 9.704148278245336e-06, "loss": 0.0036, "step": 48680 }, { "epoch": 0.3981682135993785, "grad_norm": 0.07751716673374176, "learning_rate": 9.703906394894224e-06, "loss": 0.0015, "step": 48690 }, { "epoch": 0.39824998977797765, "grad_norm": 0.11075339466333389, "learning_rate": 9.703664415720716e-06, "loss": 0.004, "step": 48700 }, { "epoch": 0.3983317659565768, "grad_norm": 0.09253112971782684, "learning_rate": 9.70342234072974e-06, "loss": 0.0031, "step": 48710 }, { "epoch": 0.39841354213517605, "grad_norm": 0.022578813135623932, "learning_rate": 9.703180169926228e-06, "loss": 0.0026, "step": 48720 }, { "epoch": 0.3984953183137752, "grad_norm": 0.391902893781662, "learning_rate": 9.702937903315113e-06, "loss": 0.0036, "step": 48730 }, { "epoch": 0.3985770944923744, "grad_norm": 0.11925578862428665, "learning_rate": 9.70269554090133e-06, "loss": 0.0031, "step": 48740 }, { "epoch": 0.39865887067097355, "grad_norm": 0.10352781414985657, "learning_rate": 9.702453082689815e-06, "loss": 0.0023, "step": 48750 }, { "epoch": 0.3987406468495727, "grad_norm": 0.3391132354736328, "learning_rate": 9.70221052868551e-06, "loss": 0.0031, "step": 48760 }, { "epoch": 0.3988224230281719, "grad_norm": 0.06853105127811432, "learning_rate": 9.701967878893352e-06, "loss": 0.0025, "step": 48770 }, { "epoch": 0.39890419920677106, "grad_norm": 0.14559097588062286, "learning_rate": 9.701725133318289e-06, "loss": 0.0028, "step": 48780 }, { "epoch": 0.39898597538537023, "grad_norm": 0.09871822595596313, "learning_rate": 9.701482291965263e-06, "loss": 0.0017, "step": 48790 }, { "epoch": 0.3990677515639694, "grad_norm": 0.11592784523963928, "learning_rate": 9.70123935483922e-06, "loss": 0.0035, "step": 48800 }, { "epoch": 0.39914952774256857, "grad_norm": 0.060627274215221405, "learning_rate": 9.70099632194511e-06, "loss": 0.0016, "step": 48810 }, { "epoch": 0.3992313039211678, "grad_norm": 0.1273396760225296, "learning_rate": 9.700753193287886e-06, "loss": 0.0038, "step": 48820 }, { "epoch": 0.39931308009976696, "grad_norm": 0.09854864329099655, "learning_rate": 9.700509968872495e-06, "loss": 0.0054, "step": 48830 }, { "epoch": 0.39939485627836613, "grad_norm": 0.08470698446035385, "learning_rate": 9.700266648703897e-06, "loss": 0.0043, "step": 48840 }, { "epoch": 0.3994766324569653, "grad_norm": 0.025642115622758865, "learning_rate": 9.700023232787048e-06, "loss": 0.0015, "step": 48850 }, { "epoch": 0.39955840863556447, "grad_norm": 0.1584187150001526, "learning_rate": 9.699779721126904e-06, "loss": 0.0028, "step": 48860 }, { "epoch": 0.39964018481416363, "grad_norm": 0.24313271045684814, "learning_rate": 9.699536113728426e-06, "loss": 0.0037, "step": 48870 }, { "epoch": 0.3997219609927628, "grad_norm": 0.09099625796079636, "learning_rate": 9.699292410596579e-06, "loss": 0.0041, "step": 48880 }, { "epoch": 0.399803737171362, "grad_norm": 0.046857140958309174, "learning_rate": 9.699048611736327e-06, "loss": 0.0015, "step": 48890 }, { "epoch": 0.39988551334996114, "grad_norm": 0.0280633382499218, "learning_rate": 9.698804717152632e-06, "loss": 0.0019, "step": 48900 }, { "epoch": 0.3999672895285603, "grad_norm": 0.0749816820025444, "learning_rate": 9.698560726850467e-06, "loss": 0.0015, "step": 48910 }, { "epoch": 0.4000490657071595, "grad_norm": 0.050030164420604706, "learning_rate": 9.698316640834801e-06, "loss": 0.0027, "step": 48920 }, { "epoch": 0.4001308418857587, "grad_norm": 0.164841428399086, "learning_rate": 9.698072459110605e-06, "loss": 0.0021, "step": 48930 }, { "epoch": 0.4002126180643579, "grad_norm": 0.02278093248605728, "learning_rate": 9.697828181682855e-06, "loss": 0.0024, "step": 48940 }, { "epoch": 0.40029439424295704, "grad_norm": 0.0333966389298439, "learning_rate": 9.697583808556528e-06, "loss": 0.0035, "step": 48950 }, { "epoch": 0.4003761704215562, "grad_norm": 0.05648637190461159, "learning_rate": 9.697339339736598e-06, "loss": 0.0023, "step": 48960 }, { "epoch": 0.4004579466001554, "grad_norm": 0.030392013490200043, "learning_rate": 9.69709477522805e-06, "loss": 0.0021, "step": 48970 }, { "epoch": 0.40053972277875455, "grad_norm": 0.24136857688426971, "learning_rate": 9.696850115035862e-06, "loss": 0.0022, "step": 48980 }, { "epoch": 0.4006214989573537, "grad_norm": 0.1442355215549469, "learning_rate": 9.696605359165017e-06, "loss": 0.002, "step": 48990 }, { "epoch": 0.4007032751359529, "grad_norm": 0.11199258267879486, "learning_rate": 9.696360507620506e-06, "loss": 0.0025, "step": 49000 }, { "epoch": 0.40078505131455205, "grad_norm": 0.08280228823423386, "learning_rate": 9.696115560407313e-06, "loss": 0.0031, "step": 49010 }, { "epoch": 0.4008668274931512, "grad_norm": 0.13051575422286987, "learning_rate": 9.69587051753043e-06, "loss": 0.0032, "step": 49020 }, { "epoch": 0.4009486036717504, "grad_norm": 0.04800092428922653, "learning_rate": 9.695625378994846e-06, "loss": 0.002, "step": 49030 }, { "epoch": 0.4010303798503496, "grad_norm": 0.058497704565525055, "learning_rate": 9.695380144805557e-06, "loss": 0.0023, "step": 49040 }, { "epoch": 0.4011121560289488, "grad_norm": 0.0871199294924736, "learning_rate": 9.695134814967559e-06, "loss": 0.0022, "step": 49050 }, { "epoch": 0.40119393220754795, "grad_norm": 0.0944748967885971, "learning_rate": 9.694889389485847e-06, "loss": 0.0014, "step": 49060 }, { "epoch": 0.4012757083861471, "grad_norm": 0.1309269815683365, "learning_rate": 9.694643868365423e-06, "loss": 0.0023, "step": 49070 }, { "epoch": 0.4013574845647463, "grad_norm": 0.23875188827514648, "learning_rate": 9.694398251611285e-06, "loss": 0.0018, "step": 49080 }, { "epoch": 0.40143926074334546, "grad_norm": 0.14615461230278015, "learning_rate": 9.694152539228442e-06, "loss": 0.0028, "step": 49090 }, { "epoch": 0.40152103692194463, "grad_norm": 0.02736954763531685, "learning_rate": 9.693906731221893e-06, "loss": 0.0031, "step": 49100 }, { "epoch": 0.4016028131005438, "grad_norm": 0.045188065618276596, "learning_rate": 9.693660827596652e-06, "loss": 0.002, "step": 49110 }, { "epoch": 0.40168458927914297, "grad_norm": 0.5953157544136047, "learning_rate": 9.693414828357722e-06, "loss": 0.0029, "step": 49120 }, { "epoch": 0.40176636545774214, "grad_norm": 0.054262105375528336, "learning_rate": 9.693168733510119e-06, "loss": 0.005, "step": 49130 }, { "epoch": 0.40184814163634136, "grad_norm": 0.05466437339782715, "learning_rate": 9.692922543058852e-06, "loss": 0.0019, "step": 49140 }, { "epoch": 0.40192991781494053, "grad_norm": 0.12934216856956482, "learning_rate": 9.69267625700894e-06, "loss": 0.0034, "step": 49150 }, { "epoch": 0.4020116939935397, "grad_norm": 0.11002421379089355, "learning_rate": 9.692429875365396e-06, "loss": 0.0022, "step": 49160 }, { "epoch": 0.40209347017213887, "grad_norm": 0.1250961422920227, "learning_rate": 9.692183398133243e-06, "loss": 0.0037, "step": 49170 }, { "epoch": 0.40217524635073804, "grad_norm": 0.05025361850857735, "learning_rate": 9.6919368253175e-06, "loss": 0.0019, "step": 49180 }, { "epoch": 0.4022570225293372, "grad_norm": 0.03285502642393112, "learning_rate": 9.69169015692319e-06, "loss": 0.004, "step": 49190 }, { "epoch": 0.4023387987079364, "grad_norm": 0.047594103962183, "learning_rate": 9.691443392955338e-06, "loss": 0.0023, "step": 49200 }, { "epoch": 0.40242057488653554, "grad_norm": 0.1489904373884201, "learning_rate": 9.69119653341897e-06, "loss": 0.0032, "step": 49210 }, { "epoch": 0.4025023510651347, "grad_norm": 0.15310829877853394, "learning_rate": 9.690949578319117e-06, "loss": 0.0022, "step": 49220 }, { "epoch": 0.4025841272437339, "grad_norm": 0.10421034693717957, "learning_rate": 9.690702527660807e-06, "loss": 0.0028, "step": 49230 }, { "epoch": 0.40266590342233305, "grad_norm": 0.07734541594982147, "learning_rate": 9.690455381449074e-06, "loss": 0.0058, "step": 49240 }, { "epoch": 0.4027476796009323, "grad_norm": 0.05910644680261612, "learning_rate": 9.69020813968895e-06, "loss": 0.0017, "step": 49250 }, { "epoch": 0.40282945577953144, "grad_norm": 0.09984961897134781, "learning_rate": 9.689960802385476e-06, "loss": 0.0029, "step": 49260 }, { "epoch": 0.4029112319581306, "grad_norm": 0.059880971908569336, "learning_rate": 9.68971336954369e-06, "loss": 0.0022, "step": 49270 }, { "epoch": 0.4029930081367298, "grad_norm": 0.13778056204319, "learning_rate": 9.689465841168628e-06, "loss": 0.0042, "step": 49280 }, { "epoch": 0.40307478431532895, "grad_norm": 0.12270131707191467, "learning_rate": 9.689218217265335e-06, "loss": 0.0024, "step": 49290 }, { "epoch": 0.4031565604939281, "grad_norm": 0.10292617231607437, "learning_rate": 9.688970497838857e-06, "loss": 0.0031, "step": 49300 }, { "epoch": 0.4032383366725273, "grad_norm": 0.15414653718471527, "learning_rate": 9.688722682894236e-06, "loss": 0.0026, "step": 49310 }, { "epoch": 0.40332011285112646, "grad_norm": 0.07087934762239456, "learning_rate": 9.688474772436525e-06, "loss": 0.002, "step": 49320 }, { "epoch": 0.4034018890297256, "grad_norm": 0.03343648090958595, "learning_rate": 9.688226766470772e-06, "loss": 0.0037, "step": 49330 }, { "epoch": 0.4034836652083248, "grad_norm": 0.0632423534989357, "learning_rate": 9.687978665002027e-06, "loss": 0.0025, "step": 49340 }, { "epoch": 0.40356544138692396, "grad_norm": 0.10874682664871216, "learning_rate": 9.687730468035347e-06, "loss": 0.0021, "step": 49350 }, { "epoch": 0.4036472175655232, "grad_norm": 0.03585052117705345, "learning_rate": 9.687482175575789e-06, "loss": 0.0026, "step": 49360 }, { "epoch": 0.40372899374412236, "grad_norm": 0.09493881464004517, "learning_rate": 9.687233787628407e-06, "loss": 0.0023, "step": 49370 }, { "epoch": 0.4038107699227215, "grad_norm": 0.06695599853992462, "learning_rate": 9.686985304198262e-06, "loss": 0.0026, "step": 49380 }, { "epoch": 0.4038925461013207, "grad_norm": 0.10294628888368607, "learning_rate": 9.686736725290418e-06, "loss": 0.0032, "step": 49390 }, { "epoch": 0.40397432227991986, "grad_norm": 0.079105444252491, "learning_rate": 9.686488050909936e-06, "loss": 0.0033, "step": 49400 }, { "epoch": 0.40405609845851903, "grad_norm": 0.06887683272361755, "learning_rate": 9.686239281061885e-06, "loss": 0.0058, "step": 49410 }, { "epoch": 0.4041378746371182, "grad_norm": 0.06563787907361984, "learning_rate": 9.685990415751329e-06, "loss": 0.0018, "step": 49420 }, { "epoch": 0.40421965081571737, "grad_norm": 0.06463272124528885, "learning_rate": 9.685741454983341e-06, "loss": 0.0032, "step": 49430 }, { "epoch": 0.40430142699431654, "grad_norm": 0.3470010459423065, "learning_rate": 9.68549239876299e-06, "loss": 0.0029, "step": 49440 }, { "epoch": 0.4043832031729157, "grad_norm": 0.05528227612376213, "learning_rate": 9.68524324709535e-06, "loss": 0.0024, "step": 49450 }, { "epoch": 0.40446497935151493, "grad_norm": 0.09859880059957504, "learning_rate": 9.684993999985495e-06, "loss": 0.0023, "step": 49460 }, { "epoch": 0.4045467555301141, "grad_norm": 0.03655855730175972, "learning_rate": 9.684744657438508e-06, "loss": 0.0027, "step": 49470 }, { "epoch": 0.40462853170871327, "grad_norm": 0.10548138618469238, "learning_rate": 9.68449521945946e-06, "loss": 0.0029, "step": 49480 }, { "epoch": 0.40471030788731244, "grad_norm": 0.05325500667095184, "learning_rate": 9.684245686053439e-06, "loss": 0.0034, "step": 49490 }, { "epoch": 0.4047920840659116, "grad_norm": 0.2062518298625946, "learning_rate": 9.683996057225527e-06, "loss": 0.0067, "step": 49500 }, { "epoch": 0.4048738602445108, "grad_norm": 0.2238469272851944, "learning_rate": 9.683746332980806e-06, "loss": 0.0046, "step": 49510 }, { "epoch": 0.40495563642310994, "grad_norm": 0.14304739236831665, "learning_rate": 9.683496513324365e-06, "loss": 0.0031, "step": 49520 }, { "epoch": 0.4050374126017091, "grad_norm": 0.033258676528930664, "learning_rate": 9.683246598261293e-06, "loss": 0.0023, "step": 49530 }, { "epoch": 0.4051191887803083, "grad_norm": 0.046739690005779266, "learning_rate": 9.682996587796682e-06, "loss": 0.0033, "step": 49540 }, { "epoch": 0.40520096495890745, "grad_norm": 0.1413975954055786, "learning_rate": 9.682746481935625e-06, "loss": 0.0048, "step": 49550 }, { "epoch": 0.4052827411375066, "grad_norm": 0.1587369590997696, "learning_rate": 9.682496280683213e-06, "loss": 0.0022, "step": 49560 }, { "epoch": 0.40536451731610584, "grad_norm": 0.20479929447174072, "learning_rate": 9.682245984044548e-06, "loss": 0.0025, "step": 49570 }, { "epoch": 0.405446293494705, "grad_norm": 0.11011554300785065, "learning_rate": 9.681995592024725e-06, "loss": 0.0027, "step": 49580 }, { "epoch": 0.4055280696733042, "grad_norm": 0.06362732499837875, "learning_rate": 9.681745104628848e-06, "loss": 0.0018, "step": 49590 }, { "epoch": 0.40560984585190335, "grad_norm": 0.020955044776201248, "learning_rate": 9.681494521862016e-06, "loss": 0.0018, "step": 49600 }, { "epoch": 0.4056916220305025, "grad_norm": 0.03848183527588844, "learning_rate": 9.681243843729335e-06, "loss": 0.0015, "step": 49610 }, { "epoch": 0.4057733982091017, "grad_norm": 0.06272491812705994, "learning_rate": 9.680993070235913e-06, "loss": 0.0013, "step": 49620 }, { "epoch": 0.40585517438770086, "grad_norm": 0.188309445977211, "learning_rate": 9.680742201386857e-06, "loss": 0.0031, "step": 49630 }, { "epoch": 0.4059369505663, "grad_norm": 0.04891892150044441, "learning_rate": 9.680491237187276e-06, "loss": 0.0044, "step": 49640 }, { "epoch": 0.4060187267448992, "grad_norm": 0.16351129114627838, "learning_rate": 9.680240177642286e-06, "loss": 0.0032, "step": 49650 }, { "epoch": 0.40610050292349836, "grad_norm": 0.026171090081334114, "learning_rate": 9.679989022756997e-06, "loss": 0.0039, "step": 49660 }, { "epoch": 0.40618227910209753, "grad_norm": 0.03892083093523979, "learning_rate": 9.679737772536531e-06, "loss": 0.0021, "step": 49670 }, { "epoch": 0.40626405528069676, "grad_norm": 0.1289360076189041, "learning_rate": 9.679486426986e-06, "loss": 0.0033, "step": 49680 }, { "epoch": 0.4063458314592959, "grad_norm": 0.09252247959375381, "learning_rate": 9.679234986110528e-06, "loss": 0.003, "step": 49690 }, { "epoch": 0.4064276076378951, "grad_norm": 0.09395337104797363, "learning_rate": 9.678983449915236e-06, "loss": 0.0023, "step": 49700 }, { "epoch": 0.40650938381649426, "grad_norm": 0.06471297144889832, "learning_rate": 9.678731818405247e-06, "loss": 0.0024, "step": 49710 }, { "epoch": 0.40659115999509343, "grad_norm": 0.17768940329551697, "learning_rate": 9.678480091585687e-06, "loss": 0.0033, "step": 49720 }, { "epoch": 0.4066729361736926, "grad_norm": 0.1882152557373047, "learning_rate": 9.678228269461687e-06, "loss": 0.0025, "step": 49730 }, { "epoch": 0.40675471235229177, "grad_norm": 0.0382625013589859, "learning_rate": 9.677976352038372e-06, "loss": 0.0032, "step": 49740 }, { "epoch": 0.40683648853089094, "grad_norm": 0.09167458862066269, "learning_rate": 9.677724339320878e-06, "loss": 0.0026, "step": 49750 }, { "epoch": 0.4069182647094901, "grad_norm": 0.05407720059156418, "learning_rate": 9.677472231314337e-06, "loss": 0.0029, "step": 49760 }, { "epoch": 0.4070000408880893, "grad_norm": 0.06973335891962051, "learning_rate": 9.677220028023884e-06, "loss": 0.0054, "step": 49770 }, { "epoch": 0.4070818170666885, "grad_norm": 0.06970670819282532, "learning_rate": 9.676967729454658e-06, "loss": 0.0027, "step": 49780 }, { "epoch": 0.40716359324528767, "grad_norm": 0.024206897243857384, "learning_rate": 9.676715335611797e-06, "loss": 0.0026, "step": 49790 }, { "epoch": 0.40724536942388684, "grad_norm": 0.12118108570575714, "learning_rate": 9.676462846500444e-06, "loss": 0.0059, "step": 49800 }, { "epoch": 0.407327145602486, "grad_norm": 0.0851365327835083, "learning_rate": 9.67621026212574e-06, "loss": 0.0048, "step": 49810 }, { "epoch": 0.4074089217810852, "grad_norm": 0.09294435381889343, "learning_rate": 9.675957582492832e-06, "loss": 0.0021, "step": 49820 }, { "epoch": 0.40749069795968434, "grad_norm": 0.07170885801315308, "learning_rate": 9.675704807606867e-06, "loss": 0.0021, "step": 49830 }, { "epoch": 0.4075724741382835, "grad_norm": 0.1999044418334961, "learning_rate": 9.675451937472998e-06, "loss": 0.0021, "step": 49840 }, { "epoch": 0.4076542503168827, "grad_norm": 0.031309522688388824, "learning_rate": 9.675198972096369e-06, "loss": 0.0031, "step": 49850 }, { "epoch": 0.40773602649548185, "grad_norm": 0.06388922780752182, "learning_rate": 9.674945911482137e-06, "loss": 0.0024, "step": 49860 }, { "epoch": 0.407817802674081, "grad_norm": 0.20580251514911652, "learning_rate": 9.674692755635457e-06, "loss": 0.0026, "step": 49870 }, { "epoch": 0.4078995788526802, "grad_norm": 0.3388758599758148, "learning_rate": 9.674439504561487e-06, "loss": 0.0036, "step": 49880 }, { "epoch": 0.4079813550312794, "grad_norm": 0.08339034765958786, "learning_rate": 9.674186158265384e-06, "loss": 0.0042, "step": 49890 }, { "epoch": 0.4080631312098786, "grad_norm": 0.03979477286338806, "learning_rate": 9.67393271675231e-06, "loss": 0.002, "step": 49900 }, { "epoch": 0.40814490738847775, "grad_norm": 0.14766499400138855, "learning_rate": 9.673679180027428e-06, "loss": 0.004, "step": 49910 }, { "epoch": 0.4082266835670769, "grad_norm": 0.09439312666654587, "learning_rate": 9.6734255480959e-06, "loss": 0.0018, "step": 49920 }, { "epoch": 0.4083084597456761, "grad_norm": 0.11033651232719421, "learning_rate": 9.673171820962896e-06, "loss": 0.0027, "step": 49930 }, { "epoch": 0.40839023592427526, "grad_norm": 0.20047089457511902, "learning_rate": 9.672917998633584e-06, "loss": 0.0014, "step": 49940 }, { "epoch": 0.4084720121028744, "grad_norm": 0.08171328157186508, "learning_rate": 9.672664081113135e-06, "loss": 0.0022, "step": 49950 }, { "epoch": 0.4085537882814736, "grad_norm": 0.11744613945484161, "learning_rate": 9.67241006840672e-06, "loss": 0.0025, "step": 49960 }, { "epoch": 0.40863556446007276, "grad_norm": 0.038512878119945526, "learning_rate": 9.672155960519511e-06, "loss": 0.002, "step": 49970 }, { "epoch": 0.40871734063867193, "grad_norm": 0.08099228143692017, "learning_rate": 9.67190175745669e-06, "loss": 0.0023, "step": 49980 }, { "epoch": 0.4087991168172711, "grad_norm": 0.12088492512702942, "learning_rate": 9.671647459223431e-06, "loss": 0.0019, "step": 49990 }, { "epoch": 0.4088808929958703, "grad_norm": 0.20063529908657074, "learning_rate": 9.671393065824919e-06, "loss": 0.0025, "step": 50000 }, { "epoch": 0.4089626691744695, "grad_norm": 0.034650567919015884, "learning_rate": 9.67113857726633e-06, "loss": 0.0025, "step": 50010 }, { "epoch": 0.40904444535306866, "grad_norm": 0.19551981985569, "learning_rate": 9.670883993552853e-06, "loss": 0.0028, "step": 50020 }, { "epoch": 0.40912622153166783, "grad_norm": 0.25090670585632324, "learning_rate": 9.670629314689672e-06, "loss": 0.0036, "step": 50030 }, { "epoch": 0.409207997710267, "grad_norm": 0.07889103144407272, "learning_rate": 9.670374540681974e-06, "loss": 0.0036, "step": 50040 }, { "epoch": 0.40928977388886617, "grad_norm": 0.07927330583333969, "learning_rate": 9.670119671534952e-06, "loss": 0.0028, "step": 50050 }, { "epoch": 0.40937155006746534, "grad_norm": 0.04015140235424042, "learning_rate": 9.669864707253797e-06, "loss": 0.004, "step": 50060 }, { "epoch": 0.4094533262460645, "grad_norm": 0.06705645471811295, "learning_rate": 9.669609647843701e-06, "loss": 0.0018, "step": 50070 }, { "epoch": 0.4095351024246637, "grad_norm": 0.02235124632716179, "learning_rate": 9.669354493309861e-06, "loss": 0.0037, "step": 50080 }, { "epoch": 0.40961687860326285, "grad_norm": 0.14627164602279663, "learning_rate": 9.669099243657473e-06, "loss": 0.0061, "step": 50090 }, { "epoch": 0.40969865478186207, "grad_norm": 0.048519689589738846, "learning_rate": 9.66884389889174e-06, "loss": 0.0045, "step": 50100 }, { "epoch": 0.40978043096046124, "grad_norm": 0.06524518877267838, "learning_rate": 9.66858845901786e-06, "loss": 0.0039, "step": 50110 }, { "epoch": 0.4098622071390604, "grad_norm": 0.01518089696764946, "learning_rate": 9.66833292404104e-06, "loss": 0.0034, "step": 50120 }, { "epoch": 0.4099439833176596, "grad_norm": 0.08657481521368027, "learning_rate": 9.668077293966483e-06, "loss": 0.0017, "step": 50130 }, { "epoch": 0.41002575949625875, "grad_norm": 0.026327084749937057, "learning_rate": 9.667821568799399e-06, "loss": 0.0027, "step": 50140 }, { "epoch": 0.4101075356748579, "grad_norm": 0.02240702696144581, "learning_rate": 9.667565748544992e-06, "loss": 0.0032, "step": 50150 }, { "epoch": 0.4101893118534571, "grad_norm": 0.106133833527565, "learning_rate": 9.66730983320848e-06, "loss": 0.005, "step": 50160 }, { "epoch": 0.41027108803205625, "grad_norm": 0.014497822150588036, "learning_rate": 9.66705382279507e-06, "loss": 0.0038, "step": 50170 }, { "epoch": 0.4103528642106554, "grad_norm": 0.06794076412916183, "learning_rate": 9.66679771730998e-06, "loss": 0.0019, "step": 50180 }, { "epoch": 0.4104346403892546, "grad_norm": 0.03850529342889786, "learning_rate": 9.66654151675843e-06, "loss": 0.0028, "step": 50190 }, { "epoch": 0.41051641656785376, "grad_norm": 0.1492929756641388, "learning_rate": 9.666285221145635e-06, "loss": 0.0038, "step": 50200 }, { "epoch": 0.410598192746453, "grad_norm": 0.07078240811824799, "learning_rate": 9.666028830476816e-06, "loss": 0.0031, "step": 50210 }, { "epoch": 0.41067996892505215, "grad_norm": 0.15328994393348694, "learning_rate": 9.665772344757199e-06, "loss": 0.0025, "step": 50220 }, { "epoch": 0.4107617451036513, "grad_norm": 0.061567023396492004, "learning_rate": 9.665515763992006e-06, "loss": 0.0039, "step": 50230 }, { "epoch": 0.4108435212822505, "grad_norm": 0.057687435299158096, "learning_rate": 9.665259088186464e-06, "loss": 0.0024, "step": 50240 }, { "epoch": 0.41092529746084966, "grad_norm": 0.0884142816066742, "learning_rate": 9.665002317345802e-06, "loss": 0.0023, "step": 50250 }, { "epoch": 0.41100707363944883, "grad_norm": 0.047308601438999176, "learning_rate": 9.664745451475251e-06, "loss": 0.0024, "step": 50260 }, { "epoch": 0.411088849818048, "grad_norm": 0.5391659736633301, "learning_rate": 9.664488490580044e-06, "loss": 0.0024, "step": 50270 }, { "epoch": 0.41117062599664717, "grad_norm": 0.04937112703919411, "learning_rate": 9.664231434665414e-06, "loss": 0.0047, "step": 50280 }, { "epoch": 0.41125240217524633, "grad_norm": 0.057990506291389465, "learning_rate": 9.6639742837366e-06, "loss": 0.0025, "step": 50290 }, { "epoch": 0.4113341783538455, "grad_norm": 0.09001109004020691, "learning_rate": 9.663717037798837e-06, "loss": 0.003, "step": 50300 }, { "epoch": 0.41141595453244467, "grad_norm": 0.12593573331832886, "learning_rate": 9.66345969685737e-06, "loss": 0.0014, "step": 50310 }, { "epoch": 0.4114977307110439, "grad_norm": 0.10192584991455078, "learning_rate": 9.663202260917435e-06, "loss": 0.0023, "step": 50320 }, { "epoch": 0.41157950688964307, "grad_norm": 0.1606808602809906, "learning_rate": 9.66294472998428e-06, "loss": 0.0024, "step": 50330 }, { "epoch": 0.41166128306824223, "grad_norm": 0.028947139158844948, "learning_rate": 9.662687104063154e-06, "loss": 0.0021, "step": 50340 }, { "epoch": 0.4117430592468414, "grad_norm": 0.039317864924669266, "learning_rate": 9.662429383159298e-06, "loss": 0.0025, "step": 50350 }, { "epoch": 0.41182483542544057, "grad_norm": 0.03677722066640854, "learning_rate": 9.662171567277966e-06, "loss": 0.0033, "step": 50360 }, { "epoch": 0.41190661160403974, "grad_norm": 0.044696565717458725, "learning_rate": 9.66191365642441e-06, "loss": 0.0019, "step": 50370 }, { "epoch": 0.4119883877826389, "grad_norm": 0.2238389402627945, "learning_rate": 9.661655650603885e-06, "loss": 0.0026, "step": 50380 }, { "epoch": 0.4120701639612381, "grad_norm": 0.09032011032104492, "learning_rate": 9.661397549821643e-06, "loss": 0.003, "step": 50390 }, { "epoch": 0.41215194013983725, "grad_norm": 0.1870184987783432, "learning_rate": 9.661139354082945e-06, "loss": 0.003, "step": 50400 }, { "epoch": 0.4122337163184364, "grad_norm": 0.24959346652030945, "learning_rate": 9.66088106339305e-06, "loss": 0.0029, "step": 50410 }, { "epoch": 0.41231549249703564, "grad_norm": 0.0644153282046318, "learning_rate": 9.660622677757218e-06, "loss": 0.002, "step": 50420 }, { "epoch": 0.4123972686756348, "grad_norm": 0.007603298407047987, "learning_rate": 9.660364197180713e-06, "loss": 0.0027, "step": 50430 }, { "epoch": 0.412479044854234, "grad_norm": 0.029610447585582733, "learning_rate": 9.660105621668805e-06, "loss": 0.0018, "step": 50440 }, { "epoch": 0.41256082103283315, "grad_norm": 0.07090721279382706, "learning_rate": 9.659846951226754e-06, "loss": 0.0026, "step": 50450 }, { "epoch": 0.4126425972114323, "grad_norm": 0.0020897602662444115, "learning_rate": 9.659588185859834e-06, "loss": 0.002, "step": 50460 }, { "epoch": 0.4127243733900315, "grad_norm": 0.11091871559619904, "learning_rate": 9.659329325573314e-06, "loss": 0.0026, "step": 50470 }, { "epoch": 0.41280614956863065, "grad_norm": 0.037955719977617264, "learning_rate": 9.659070370372471e-06, "loss": 0.0024, "step": 50480 }, { "epoch": 0.4128879257472298, "grad_norm": 0.10462194681167603, "learning_rate": 9.658811320262576e-06, "loss": 0.0026, "step": 50490 }, { "epoch": 0.412969701925829, "grad_norm": 0.04187440499663353, "learning_rate": 9.658552175248907e-06, "loss": 0.0034, "step": 50500 }, { "epoch": 0.41305147810442816, "grad_norm": 0.16468961536884308, "learning_rate": 9.658292935336744e-06, "loss": 0.0035, "step": 50510 }, { "epoch": 0.41313325428302733, "grad_norm": 0.08864203840494156, "learning_rate": 9.658033600531367e-06, "loss": 0.0023, "step": 50520 }, { "epoch": 0.41321503046162655, "grad_norm": 0.03841666504740715, "learning_rate": 9.657774170838061e-06, "loss": 0.002, "step": 50530 }, { "epoch": 0.4132968066402257, "grad_norm": 0.159953311085701, "learning_rate": 9.657514646262107e-06, "loss": 0.0031, "step": 50540 }, { "epoch": 0.4133785828188249, "grad_norm": 0.12329579144716263, "learning_rate": 9.657255026808794e-06, "loss": 0.0014, "step": 50550 }, { "epoch": 0.41346035899742406, "grad_norm": 0.08308586478233337, "learning_rate": 9.656995312483413e-06, "loss": 0.0019, "step": 50560 }, { "epoch": 0.41354213517602323, "grad_norm": 0.06014726683497429, "learning_rate": 9.65673550329125e-06, "loss": 0.0019, "step": 50570 }, { "epoch": 0.4136239113546224, "grad_norm": 0.0673380121588707, "learning_rate": 9.6564755992376e-06, "loss": 0.0029, "step": 50580 }, { "epoch": 0.41370568753322157, "grad_norm": 0.1398427039384842, "learning_rate": 9.656215600327756e-06, "loss": 0.0034, "step": 50590 }, { "epoch": 0.41378746371182074, "grad_norm": 0.21125191450119019, "learning_rate": 9.655955506567016e-06, "loss": 0.0036, "step": 50600 }, { "epoch": 0.4138692398904199, "grad_norm": 0.06862550228834152, "learning_rate": 9.655695317960679e-06, "loss": 0.0031, "step": 50610 }, { "epoch": 0.4139510160690191, "grad_norm": 0.042238879948854446, "learning_rate": 9.655435034514043e-06, "loss": 0.0014, "step": 50620 }, { "epoch": 0.41403279224761824, "grad_norm": 0.039241597056388855, "learning_rate": 9.65517465623241e-06, "loss": 0.0045, "step": 50630 }, { "epoch": 0.41411456842621747, "grad_norm": 0.05486422777175903, "learning_rate": 9.65491418312109e-06, "loss": 0.0023, "step": 50640 }, { "epoch": 0.41419634460481664, "grad_norm": 0.023542355746030807, "learning_rate": 9.65465361518538e-06, "loss": 0.0043, "step": 50650 }, { "epoch": 0.4142781207834158, "grad_norm": 0.12084642797708511, "learning_rate": 9.654392952430594e-06, "loss": 0.0019, "step": 50660 }, { "epoch": 0.414359896962015, "grad_norm": 0.08572418987751007, "learning_rate": 9.654132194862039e-06, "loss": 0.0031, "step": 50670 }, { "epoch": 0.41444167314061414, "grad_norm": 0.25181692838668823, "learning_rate": 9.65387134248503e-06, "loss": 0.0035, "step": 50680 }, { "epoch": 0.4145234493192133, "grad_norm": 0.05254467576742172, "learning_rate": 9.653610395304878e-06, "loss": 0.0018, "step": 50690 }, { "epoch": 0.4146052254978125, "grad_norm": 0.05579068884253502, "learning_rate": 9.653349353326899e-06, "loss": 0.0027, "step": 50700 }, { "epoch": 0.41468700167641165, "grad_norm": 0.04892479255795479, "learning_rate": 9.653088216556412e-06, "loss": 0.0108, "step": 50710 }, { "epoch": 0.4147687778550108, "grad_norm": 0.14160491526126862, "learning_rate": 9.652826984998735e-06, "loss": 0.0032, "step": 50720 }, { "epoch": 0.41485055403361, "grad_norm": 0.10317496210336685, "learning_rate": 9.65256565865919e-06, "loss": 0.0044, "step": 50730 }, { "epoch": 0.4149323302122092, "grad_norm": 0.08142006397247314, "learning_rate": 9.652304237543102e-06, "loss": 0.0036, "step": 50740 }, { "epoch": 0.4150141063908084, "grad_norm": 0.15136103332042694, "learning_rate": 9.652042721655794e-06, "loss": 0.003, "step": 50750 }, { "epoch": 0.41509588256940755, "grad_norm": 0.02277945540845394, "learning_rate": 9.651781111002596e-06, "loss": 0.0023, "step": 50760 }, { "epoch": 0.4151776587480067, "grad_norm": 0.14524836838245392, "learning_rate": 9.651519405588834e-06, "loss": 0.0035, "step": 50770 }, { "epoch": 0.4152594349266059, "grad_norm": 0.12260537594556808, "learning_rate": 9.651257605419841e-06, "loss": 0.0023, "step": 50780 }, { "epoch": 0.41534121110520505, "grad_norm": 0.004892442375421524, "learning_rate": 9.650995710500951e-06, "loss": 0.0028, "step": 50790 }, { "epoch": 0.4154229872838042, "grad_norm": 0.040695060044527054, "learning_rate": 9.650733720837497e-06, "loss": 0.0031, "step": 50800 }, { "epoch": 0.4155047634624034, "grad_norm": 0.0382089801132679, "learning_rate": 9.650471636434818e-06, "loss": 0.0022, "step": 50810 }, { "epoch": 0.41558653964100256, "grad_norm": 0.08057866990566254, "learning_rate": 9.650209457298249e-06, "loss": 0.0017, "step": 50820 }, { "epoch": 0.41566831581960173, "grad_norm": 0.10290109366178513, "learning_rate": 9.649947183433136e-06, "loss": 0.0034, "step": 50830 }, { "epoch": 0.4157500919982009, "grad_norm": 0.03607238084077835, "learning_rate": 9.649684814844817e-06, "loss": 0.0015, "step": 50840 }, { "epoch": 0.4158318681768001, "grad_norm": 0.10444662719964981, "learning_rate": 9.649422351538642e-06, "loss": 0.0023, "step": 50850 }, { "epoch": 0.4159136443553993, "grad_norm": 0.2434288114309311, "learning_rate": 9.649159793519952e-06, "loss": 0.0028, "step": 50860 }, { "epoch": 0.41599542053399846, "grad_norm": 0.19834502041339874, "learning_rate": 9.648897140794098e-06, "loss": 0.0029, "step": 50870 }, { "epoch": 0.41607719671259763, "grad_norm": 0.04997947812080383, "learning_rate": 9.648634393366432e-06, "loss": 0.0022, "step": 50880 }, { "epoch": 0.4161589728911968, "grad_norm": 0.05738363787531853, "learning_rate": 9.648371551242304e-06, "loss": 0.002, "step": 50890 }, { "epoch": 0.41624074906979597, "grad_norm": 0.1452932059764862, "learning_rate": 9.648108614427068e-06, "loss": 0.002, "step": 50900 }, { "epoch": 0.41632252524839514, "grad_norm": 0.07583979517221451, "learning_rate": 9.647845582926083e-06, "loss": 0.0022, "step": 50910 }, { "epoch": 0.4164043014269943, "grad_norm": 0.10230628401041031, "learning_rate": 9.647582456744703e-06, "loss": 0.0026, "step": 50920 }, { "epoch": 0.4164860776055935, "grad_norm": 0.07257736474275589, "learning_rate": 9.647319235888293e-06, "loss": 0.002, "step": 50930 }, { "epoch": 0.41656785378419264, "grad_norm": 0.06535810977220535, "learning_rate": 9.647055920362212e-06, "loss": 0.0019, "step": 50940 }, { "epoch": 0.4166496299627918, "grad_norm": 0.0992918312549591, "learning_rate": 9.646792510171826e-06, "loss": 0.0027, "step": 50950 }, { "epoch": 0.41673140614139104, "grad_norm": 0.04447116330265999, "learning_rate": 9.646529005322498e-06, "loss": 0.0034, "step": 50960 }, { "epoch": 0.4168131823199902, "grad_norm": 0.10264381021261215, "learning_rate": 9.646265405819598e-06, "loss": 0.0028, "step": 50970 }, { "epoch": 0.4168949584985894, "grad_norm": 0.23418456315994263, "learning_rate": 9.646001711668493e-06, "loss": 0.0074, "step": 50980 }, { "epoch": 0.41697673467718854, "grad_norm": 0.05958355590701103, "learning_rate": 9.645737922874559e-06, "loss": 0.0024, "step": 50990 }, { "epoch": 0.4170585108557877, "grad_norm": 0.056943848729133606, "learning_rate": 9.645474039443165e-06, "loss": 0.0032, "step": 51000 }, { "epoch": 0.4171402870343869, "grad_norm": 0.07608824968338013, "learning_rate": 9.645210061379691e-06, "loss": 0.0024, "step": 51010 }, { "epoch": 0.41722206321298605, "grad_norm": 0.5324780941009521, "learning_rate": 9.644945988689512e-06, "loss": 0.0051, "step": 51020 }, { "epoch": 0.4173038393915852, "grad_norm": 0.17068444192409515, "learning_rate": 9.644681821378006e-06, "loss": 0.0048, "step": 51030 }, { "epoch": 0.4173856155701844, "grad_norm": 0.03269399330019951, "learning_rate": 9.644417559450556e-06, "loss": 0.0023, "step": 51040 }, { "epoch": 0.41746739174878356, "grad_norm": 0.082297183573246, "learning_rate": 9.644153202912548e-06, "loss": 0.0015, "step": 51050 }, { "epoch": 0.4175491679273828, "grad_norm": 0.09621194750070572, "learning_rate": 9.64388875176936e-06, "loss": 0.0032, "step": 51060 }, { "epoch": 0.41763094410598195, "grad_norm": 0.1051316186785698, "learning_rate": 9.643624206026388e-06, "loss": 0.0048, "step": 51070 }, { "epoch": 0.4177127202845811, "grad_norm": 0.04049227014183998, "learning_rate": 9.643359565689012e-06, "loss": 0.0029, "step": 51080 }, { "epoch": 0.4177944964631803, "grad_norm": 0.1306440234184265, "learning_rate": 9.64309483076263e-06, "loss": 0.0041, "step": 51090 }, { "epoch": 0.41787627264177946, "grad_norm": 0.00926513597369194, "learning_rate": 9.642830001252632e-06, "loss": 0.0021, "step": 51100 }, { "epoch": 0.4179580488203786, "grad_norm": 0.10570590198040009, "learning_rate": 9.642565077164413e-06, "loss": 0.0025, "step": 51110 }, { "epoch": 0.4180398249989778, "grad_norm": 0.03599085658788681, "learning_rate": 9.642300058503369e-06, "loss": 0.0029, "step": 51120 }, { "epoch": 0.41812160117757696, "grad_norm": 0.030376799404621124, "learning_rate": 9.6420349452749e-06, "loss": 0.0039, "step": 51130 }, { "epoch": 0.41820337735617613, "grad_norm": 0.19239865243434906, "learning_rate": 9.641769737484406e-06, "loss": 0.0036, "step": 51140 }, { "epoch": 0.4182851535347753, "grad_norm": 0.23111318051815033, "learning_rate": 9.641504435137288e-06, "loss": 0.0039, "step": 51150 }, { "epoch": 0.41836692971337447, "grad_norm": 0.03902965784072876, "learning_rate": 9.641239038238952e-06, "loss": 0.0014, "step": 51160 }, { "epoch": 0.4184487058919737, "grad_norm": 0.033644065260887146, "learning_rate": 9.640973546794805e-06, "loss": 0.0022, "step": 51170 }, { "epoch": 0.41853048207057286, "grad_norm": 0.28095653653144836, "learning_rate": 9.640707960810256e-06, "loss": 0.0022, "step": 51180 }, { "epoch": 0.41861225824917203, "grad_norm": 0.11033552885055542, "learning_rate": 9.640442280290712e-06, "loss": 0.0024, "step": 51190 }, { "epoch": 0.4186940344277712, "grad_norm": 0.1157606914639473, "learning_rate": 9.640176505241586e-06, "loss": 0.0025, "step": 51200 }, { "epoch": 0.41877581060637037, "grad_norm": 0.1047564148902893, "learning_rate": 9.639910635668294e-06, "loss": 0.0034, "step": 51210 }, { "epoch": 0.41885758678496954, "grad_norm": 0.02202117070555687, "learning_rate": 9.639644671576251e-06, "loss": 0.0023, "step": 51220 }, { "epoch": 0.4189393629635687, "grad_norm": 0.11054432392120361, "learning_rate": 9.639378612970874e-06, "loss": 0.0025, "step": 51230 }, { "epoch": 0.4190211391421679, "grad_norm": 0.003410040633752942, "learning_rate": 9.639112459857585e-06, "loss": 0.0036, "step": 51240 }, { "epoch": 0.41910291532076704, "grad_norm": 0.09383589774370193, "learning_rate": 9.638846212241804e-06, "loss": 0.0029, "step": 51250 }, { "epoch": 0.4191846914993662, "grad_norm": 0.14187388122081757, "learning_rate": 9.638579870128953e-06, "loss": 0.0043, "step": 51260 }, { "epoch": 0.4192664676779654, "grad_norm": 0.09417986869812012, "learning_rate": 9.638313433524462e-06, "loss": 0.0044, "step": 51270 }, { "epoch": 0.4193482438565646, "grad_norm": 0.10816923528909683, "learning_rate": 9.638046902433755e-06, "loss": 0.0026, "step": 51280 }, { "epoch": 0.4194300200351638, "grad_norm": 0.2392985075712204, "learning_rate": 9.637780276862263e-06, "loss": 0.0019, "step": 51290 }, { "epoch": 0.41951179621376294, "grad_norm": 0.11497419327497482, "learning_rate": 9.637513556815417e-06, "loss": 0.003, "step": 51300 }, { "epoch": 0.4195935723923621, "grad_norm": 0.029005613178014755, "learning_rate": 9.637246742298649e-06, "loss": 0.0024, "step": 51310 }, { "epoch": 0.4196753485709613, "grad_norm": 0.13398821651935577, "learning_rate": 9.636979833317397e-06, "loss": 0.0036, "step": 51320 }, { "epoch": 0.41975712474956045, "grad_norm": 0.13859879970550537, "learning_rate": 9.636712829877096e-06, "loss": 0.0032, "step": 51330 }, { "epoch": 0.4198389009281596, "grad_norm": 0.05378830432891846, "learning_rate": 9.636445731983186e-06, "loss": 0.0021, "step": 51340 }, { "epoch": 0.4199206771067588, "grad_norm": 0.05258315056562424, "learning_rate": 9.636178539641109e-06, "loss": 0.0021, "step": 51350 }, { "epoch": 0.42000245328535796, "grad_norm": 0.034380920231342316, "learning_rate": 9.635911252856305e-06, "loss": 0.0026, "step": 51360 }, { "epoch": 0.4200842294639571, "grad_norm": 0.014664694666862488, "learning_rate": 9.635643871634219e-06, "loss": 0.0024, "step": 51370 }, { "epoch": 0.42016600564255635, "grad_norm": 0.06919606775045395, "learning_rate": 9.635376395980302e-06, "loss": 0.0026, "step": 51380 }, { "epoch": 0.4202477818211555, "grad_norm": 0.1460338830947876, "learning_rate": 9.6351088259e-06, "loss": 0.0018, "step": 51390 }, { "epoch": 0.4203295579997547, "grad_norm": 0.0855601578950882, "learning_rate": 9.634841161398762e-06, "loss": 0.0021, "step": 51400 }, { "epoch": 0.42041133417835386, "grad_norm": 0.05467887967824936, "learning_rate": 9.634573402482044e-06, "loss": 0.002, "step": 51410 }, { "epoch": 0.420493110356953, "grad_norm": 0.14637809991836548, "learning_rate": 9.634305549155296e-06, "loss": 0.0025, "step": 51420 }, { "epoch": 0.4205748865355522, "grad_norm": 0.04721357300877571, "learning_rate": 9.63403760142398e-06, "loss": 0.0032, "step": 51430 }, { "epoch": 0.42065666271415136, "grad_norm": 0.03716275468468666, "learning_rate": 9.633769559293549e-06, "loss": 0.0057, "step": 51440 }, { "epoch": 0.42073843889275053, "grad_norm": 0.051257144659757614, "learning_rate": 9.633501422769466e-06, "loss": 0.0029, "step": 51450 }, { "epoch": 0.4208202150713497, "grad_norm": 0.057602960616350174, "learning_rate": 9.633233191857194e-06, "loss": 0.0037, "step": 51460 }, { "epoch": 0.42090199124994887, "grad_norm": 0.1820359230041504, "learning_rate": 9.632964866562195e-06, "loss": 0.0039, "step": 51470 }, { "epoch": 0.42098376742854804, "grad_norm": 0.05014469474554062, "learning_rate": 9.632696446889935e-06, "loss": 0.0024, "step": 51480 }, { "epoch": 0.42106554360714726, "grad_norm": 0.05067456141114235, "learning_rate": 9.632427932845884e-06, "loss": 0.0034, "step": 51490 }, { "epoch": 0.42114731978574643, "grad_norm": 0.06515902280807495, "learning_rate": 9.63215932443551e-06, "loss": 0.0038, "step": 51500 }, { "epoch": 0.4212290959643456, "grad_norm": 0.03984116017818451, "learning_rate": 9.631890621664286e-06, "loss": 0.0016, "step": 51510 }, { "epoch": 0.42131087214294477, "grad_norm": 0.05862943455576897, "learning_rate": 9.631621824537685e-06, "loss": 0.0041, "step": 51520 }, { "epoch": 0.42139264832154394, "grad_norm": 0.1503070741891861, "learning_rate": 9.63135293306118e-06, "loss": 0.0023, "step": 51530 }, { "epoch": 0.4214744245001431, "grad_norm": 0.06722074002027512, "learning_rate": 9.631083947240254e-06, "loss": 0.0021, "step": 51540 }, { "epoch": 0.4215562006787423, "grad_norm": 0.03444289416074753, "learning_rate": 9.630814867080383e-06, "loss": 0.0048, "step": 51550 }, { "epoch": 0.42163797685734145, "grad_norm": 0.05397990345954895, "learning_rate": 9.63054569258705e-06, "loss": 0.0018, "step": 51560 }, { "epoch": 0.4217197530359406, "grad_norm": 0.30276742577552795, "learning_rate": 9.630276423765736e-06, "loss": 0.0019, "step": 51570 }, { "epoch": 0.4218015292145398, "grad_norm": 0.10285000503063202, "learning_rate": 9.630007060621928e-06, "loss": 0.0019, "step": 51580 }, { "epoch": 0.42188330539313895, "grad_norm": 0.1277557909488678, "learning_rate": 9.629737603161113e-06, "loss": 0.0022, "step": 51590 }, { "epoch": 0.4219650815717382, "grad_norm": 0.07377853989601135, "learning_rate": 9.62946805138878e-06, "loss": 0.0019, "step": 51600 }, { "epoch": 0.42204685775033735, "grad_norm": 0.1010628417134285, "learning_rate": 9.629198405310417e-06, "loss": 0.0049, "step": 51610 }, { "epoch": 0.4221286339289365, "grad_norm": 0.11900224536657333, "learning_rate": 9.628928664931523e-06, "loss": 0.0029, "step": 51620 }, { "epoch": 0.4222104101075357, "grad_norm": 0.2753504812717438, "learning_rate": 9.628658830257588e-06, "loss": 0.0032, "step": 51630 }, { "epoch": 0.42229218628613485, "grad_norm": 0.0839092880487442, "learning_rate": 9.628388901294113e-06, "loss": 0.0037, "step": 51640 }, { "epoch": 0.422373962464734, "grad_norm": 0.023289429023861885, "learning_rate": 9.62811887804659e-06, "loss": 0.0023, "step": 51650 }, { "epoch": 0.4224557386433332, "grad_norm": 0.02127448096871376, "learning_rate": 9.627848760520527e-06, "loss": 0.0029, "step": 51660 }, { "epoch": 0.42253751482193236, "grad_norm": 0.1282850056886673, "learning_rate": 9.627578548721418e-06, "loss": 0.0019, "step": 51670 }, { "epoch": 0.4226192910005315, "grad_norm": 0.13399004936218262, "learning_rate": 9.627308242654777e-06, "loss": 0.0028, "step": 51680 }, { "epoch": 0.4227010671791307, "grad_norm": 0.03267591819167137, "learning_rate": 9.627037842326104e-06, "loss": 0.0032, "step": 51690 }, { "epoch": 0.4227828433577299, "grad_norm": 0.2524784207344055, "learning_rate": 9.62676734774091e-06, "loss": 0.0032, "step": 51700 }, { "epoch": 0.4228646195363291, "grad_norm": 0.29859524965286255, "learning_rate": 9.626496758904704e-06, "loss": 0.0027, "step": 51710 }, { "epoch": 0.42294639571492826, "grad_norm": 0.11205694079399109, "learning_rate": 9.626226075822998e-06, "loss": 0.0045, "step": 51720 }, { "epoch": 0.4230281718935274, "grad_norm": 0.06902782618999481, "learning_rate": 9.625955298501306e-06, "loss": 0.0013, "step": 51730 }, { "epoch": 0.4231099480721266, "grad_norm": 0.09953899681568146, "learning_rate": 9.625684426945147e-06, "loss": 0.0014, "step": 51740 }, { "epoch": 0.42319172425072576, "grad_norm": 0.19410069286823273, "learning_rate": 9.625413461160034e-06, "loss": 0.0043, "step": 51750 }, { "epoch": 0.42327350042932493, "grad_norm": 0.11005079001188278, "learning_rate": 9.625142401151489e-06, "loss": 0.0029, "step": 51760 }, { "epoch": 0.4233552766079241, "grad_norm": 0.13583749532699585, "learning_rate": 9.624871246925035e-06, "loss": 0.0019, "step": 51770 }, { "epoch": 0.42343705278652327, "grad_norm": 0.018154622986912727, "learning_rate": 9.624599998486194e-06, "loss": 0.002, "step": 51780 }, { "epoch": 0.42351882896512244, "grad_norm": 0.06635221838951111, "learning_rate": 9.624328655840491e-06, "loss": 0.0014, "step": 51790 }, { "epoch": 0.4236006051437216, "grad_norm": 0.2547455132007599, "learning_rate": 9.624057218993456e-06, "loss": 0.0031, "step": 51800 }, { "epoch": 0.42368238132232083, "grad_norm": 0.1922292560338974, "learning_rate": 9.623785687950615e-06, "loss": 0.0041, "step": 51810 }, { "epoch": 0.42376415750092, "grad_norm": 0.1475137323141098, "learning_rate": 9.623514062717503e-06, "loss": 0.0025, "step": 51820 }, { "epoch": 0.42384593367951917, "grad_norm": 0.12390846014022827, "learning_rate": 9.623242343299651e-06, "loss": 0.0027, "step": 51830 }, { "epoch": 0.42392770985811834, "grad_norm": 0.051787182688713074, "learning_rate": 9.622970529702593e-06, "loss": 0.0022, "step": 51840 }, { "epoch": 0.4240094860367175, "grad_norm": 0.12167061865329742, "learning_rate": 9.622698621931868e-06, "loss": 0.0026, "step": 51850 }, { "epoch": 0.4240912622153167, "grad_norm": 0.046116266399621964, "learning_rate": 9.622426619993015e-06, "loss": 0.0026, "step": 51860 }, { "epoch": 0.42417303839391585, "grad_norm": 0.1848669797182083, "learning_rate": 9.622154523891575e-06, "loss": 0.0017, "step": 51870 }, { "epoch": 0.424254814572515, "grad_norm": 0.072219118475914, "learning_rate": 9.62188233363309e-06, "loss": 0.0035, "step": 51880 }, { "epoch": 0.4243365907511142, "grad_norm": 0.15445971488952637, "learning_rate": 9.621610049223104e-06, "loss": 0.003, "step": 51890 }, { "epoch": 0.42441836692971335, "grad_norm": 0.1170969307422638, "learning_rate": 9.621337670667166e-06, "loss": 0.0028, "step": 51900 }, { "epoch": 0.4245001431083125, "grad_norm": 0.05353034287691116, "learning_rate": 9.621065197970822e-06, "loss": 0.0031, "step": 51910 }, { "epoch": 0.42458191928691175, "grad_norm": 0.23615792393684387, "learning_rate": 9.620792631139625e-06, "loss": 0.0034, "step": 51920 }, { "epoch": 0.4246636954655109, "grad_norm": 0.06913977861404419, "learning_rate": 9.620519970179126e-06, "loss": 0.0028, "step": 51930 }, { "epoch": 0.4247454716441101, "grad_norm": 0.07605033367872238, "learning_rate": 9.62024721509488e-06, "loss": 0.002, "step": 51940 }, { "epoch": 0.42482724782270925, "grad_norm": 0.12799669802188873, "learning_rate": 9.619974365892441e-06, "loss": 0.0015, "step": 51950 }, { "epoch": 0.4249090240013084, "grad_norm": 0.16052624583244324, "learning_rate": 9.61970142257737e-06, "loss": 0.0026, "step": 51960 }, { "epoch": 0.4249908001799076, "grad_norm": 0.07204882800579071, "learning_rate": 9.619428385155225e-06, "loss": 0.0028, "step": 51970 }, { "epoch": 0.42507257635850676, "grad_norm": 0.14046181738376617, "learning_rate": 9.61915525363157e-06, "loss": 0.0016, "step": 51980 }, { "epoch": 0.42515435253710593, "grad_norm": 0.09761647880077362, "learning_rate": 9.618882028011967e-06, "loss": 0.003, "step": 51990 }, { "epoch": 0.4252361287157051, "grad_norm": 0.10944435000419617, "learning_rate": 9.618608708301983e-06, "loss": 0.003, "step": 52000 }, { "epoch": 0.42531790489430427, "grad_norm": 0.11816328763961792, "learning_rate": 9.618335294507186e-06, "loss": 0.003, "step": 52010 }, { "epoch": 0.4253996810729035, "grad_norm": 0.13653475046157837, "learning_rate": 9.618061786633144e-06, "loss": 0.0021, "step": 52020 }, { "epoch": 0.42548145725150266, "grad_norm": 0.012605582363903522, "learning_rate": 9.617788184685431e-06, "loss": 0.0015, "step": 52030 }, { "epoch": 0.42556323343010183, "grad_norm": 0.3527822196483612, "learning_rate": 9.617514488669617e-06, "loss": 0.0109, "step": 52040 }, { "epoch": 0.425645009608701, "grad_norm": 0.10098440200090408, "learning_rate": 9.617240698591282e-06, "loss": 0.0021, "step": 52050 }, { "epoch": 0.42572678578730017, "grad_norm": 0.10057394951581955, "learning_rate": 9.616966814455999e-06, "loss": 0.0035, "step": 52060 }, { "epoch": 0.42580856196589933, "grad_norm": 0.06913909316062927, "learning_rate": 9.61669283626935e-06, "loss": 0.0028, "step": 52070 }, { "epoch": 0.4258903381444985, "grad_norm": 0.04333658888936043, "learning_rate": 9.616418764036914e-06, "loss": 0.003, "step": 52080 }, { "epoch": 0.4259721143230977, "grad_norm": 0.010671486146748066, "learning_rate": 9.616144597764278e-06, "loss": 0.0022, "step": 52090 }, { "epoch": 0.42605389050169684, "grad_norm": 0.33922383189201355, "learning_rate": 9.615870337457023e-06, "loss": 0.0045, "step": 52100 }, { "epoch": 0.426135666680296, "grad_norm": 0.013292163610458374, "learning_rate": 9.615595983120736e-06, "loss": 0.0023, "step": 52110 }, { "epoch": 0.4262174428588952, "grad_norm": 0.16826003789901733, "learning_rate": 9.615321534761009e-06, "loss": 0.0041, "step": 52120 }, { "epoch": 0.4262992190374944, "grad_norm": 0.06934871524572372, "learning_rate": 9.61504699238343e-06, "loss": 0.0064, "step": 52130 }, { "epoch": 0.4263809952160936, "grad_norm": 0.06606709212064743, "learning_rate": 9.614772355993592e-06, "loss": 0.0025, "step": 52140 }, { "epoch": 0.42646277139469274, "grad_norm": 0.08962398022413254, "learning_rate": 9.61449762559709e-06, "loss": 0.0032, "step": 52150 }, { "epoch": 0.4265445475732919, "grad_norm": 0.03319687768816948, "learning_rate": 9.614222801199522e-06, "loss": 0.0024, "step": 52160 }, { "epoch": 0.4266263237518911, "grad_norm": 0.08752071112394333, "learning_rate": 9.613947882806484e-06, "loss": 0.0032, "step": 52170 }, { "epoch": 0.42670809993049025, "grad_norm": 0.07931403815746307, "learning_rate": 9.613672870423577e-06, "loss": 0.0024, "step": 52180 }, { "epoch": 0.4267898761090894, "grad_norm": 0.08022644370794296, "learning_rate": 9.613397764056404e-06, "loss": 0.0024, "step": 52190 }, { "epoch": 0.4268716522876886, "grad_norm": 0.08697712421417236, "learning_rate": 9.613122563710567e-06, "loss": 0.0025, "step": 52200 }, { "epoch": 0.42695342846628775, "grad_norm": 0.35527512431144714, "learning_rate": 9.612847269391676e-06, "loss": 0.0032, "step": 52210 }, { "epoch": 0.4270352046448869, "grad_norm": 0.11337387561798096, "learning_rate": 9.612571881105335e-06, "loss": 0.0016, "step": 52220 }, { "epoch": 0.4271169808234861, "grad_norm": 0.022628523409366608, "learning_rate": 9.612296398857156e-06, "loss": 0.0045, "step": 52230 }, { "epoch": 0.4271987570020853, "grad_norm": 0.16449983417987823, "learning_rate": 9.61202082265275e-06, "loss": 0.0034, "step": 52240 }, { "epoch": 0.4272805331806845, "grad_norm": 0.08868449181318283, "learning_rate": 9.611745152497731e-06, "loss": 0.0024, "step": 52250 }, { "epoch": 0.42736230935928365, "grad_norm": 0.265056312084198, "learning_rate": 9.611469388397715e-06, "loss": 0.0023, "step": 52260 }, { "epoch": 0.4274440855378828, "grad_norm": 0.12264197319746017, "learning_rate": 9.611193530358319e-06, "loss": 0.0051, "step": 52270 }, { "epoch": 0.427525861716482, "grad_norm": 0.015316539444029331, "learning_rate": 9.610917578385163e-06, "loss": 0.0024, "step": 52280 }, { "epoch": 0.42760763789508116, "grad_norm": 0.026318348944187164, "learning_rate": 9.610641532483867e-06, "loss": 0.0012, "step": 52290 }, { "epoch": 0.42768941407368033, "grad_norm": 0.07442181557416916, "learning_rate": 9.610365392660057e-06, "loss": 0.0033, "step": 52300 }, { "epoch": 0.4277711902522795, "grad_norm": 0.013479862362146378, "learning_rate": 9.610089158919354e-06, "loss": 0.0014, "step": 52310 }, { "epoch": 0.42785296643087867, "grad_norm": 0.2173176407814026, "learning_rate": 9.60981283126739e-06, "loss": 0.002, "step": 52320 }, { "epoch": 0.42793474260947784, "grad_norm": 0.21361452341079712, "learning_rate": 9.60953640970979e-06, "loss": 0.004, "step": 52330 }, { "epoch": 0.42801651878807706, "grad_norm": 0.02223058231174946, "learning_rate": 9.609259894252187e-06, "loss": 0.0021, "step": 52340 }, { "epoch": 0.42809829496667623, "grad_norm": 0.1078457236289978, "learning_rate": 9.608983284900214e-06, "loss": 0.0043, "step": 52350 }, { "epoch": 0.4281800711452754, "grad_norm": 0.15884146094322205, "learning_rate": 9.608706581659503e-06, "loss": 0.0032, "step": 52360 }, { "epoch": 0.42826184732387457, "grad_norm": 0.022704442963004112, "learning_rate": 9.608429784535696e-06, "loss": 0.0027, "step": 52370 }, { "epoch": 0.42834362350247374, "grad_norm": 0.04113931581377983, "learning_rate": 9.608152893534425e-06, "loss": 0.0016, "step": 52380 }, { "epoch": 0.4284253996810729, "grad_norm": 0.06325416266918182, "learning_rate": 9.607875908661336e-06, "loss": 0.0016, "step": 52390 }, { "epoch": 0.4285071758596721, "grad_norm": 0.1396055370569229, "learning_rate": 9.607598829922069e-06, "loss": 0.0036, "step": 52400 }, { "epoch": 0.42858895203827124, "grad_norm": 0.012205753475427628, "learning_rate": 9.607321657322268e-06, "loss": 0.0037, "step": 52410 }, { "epoch": 0.4286707282168704, "grad_norm": 0.013365445658564568, "learning_rate": 9.607044390867582e-06, "loss": 0.0016, "step": 52420 }, { "epoch": 0.4287525043954696, "grad_norm": 0.06041201949119568, "learning_rate": 9.606767030563655e-06, "loss": 0.0022, "step": 52430 }, { "epoch": 0.42883428057406875, "grad_norm": 0.07377098500728607, "learning_rate": 9.606489576416138e-06, "loss": 0.0034, "step": 52440 }, { "epoch": 0.428916056752668, "grad_norm": 0.06801575422286987, "learning_rate": 9.606212028430685e-06, "loss": 0.0019, "step": 52450 }, { "epoch": 0.42899783293126714, "grad_norm": 0.026289334520697594, "learning_rate": 9.60593438661295e-06, "loss": 0.0033, "step": 52460 }, { "epoch": 0.4290796091098663, "grad_norm": 0.07431306689977646, "learning_rate": 9.605656650968586e-06, "loss": 0.0027, "step": 52470 }, { "epoch": 0.4291613852884655, "grad_norm": 0.1942513883113861, "learning_rate": 9.605378821503253e-06, "loss": 0.0025, "step": 52480 }, { "epoch": 0.42924316146706465, "grad_norm": 0.05712141469120979, "learning_rate": 9.60510089822261e-06, "loss": 0.0026, "step": 52490 }, { "epoch": 0.4293249376456638, "grad_norm": 0.1578381061553955, "learning_rate": 9.604822881132319e-06, "loss": 0.0025, "step": 52500 }, { "epoch": 0.429406713824263, "grad_norm": 0.044690459966659546, "learning_rate": 9.604544770238043e-06, "loss": 0.0017, "step": 52510 }, { "epoch": 0.42948849000286216, "grad_norm": 0.08603311330080032, "learning_rate": 9.604266565545446e-06, "loss": 0.003, "step": 52520 }, { "epoch": 0.4295702661814613, "grad_norm": 0.17853452265262604, "learning_rate": 9.603988267060197e-06, "loss": 0.002, "step": 52530 }, { "epoch": 0.4296520423600605, "grad_norm": 0.12430351227521896, "learning_rate": 9.603709874787966e-06, "loss": 0.0031, "step": 52540 }, { "epoch": 0.42973381853865966, "grad_norm": 0.03151608258485794, "learning_rate": 9.603431388734421e-06, "loss": 0.0032, "step": 52550 }, { "epoch": 0.4298155947172589, "grad_norm": 0.013243505731225014, "learning_rate": 9.603152808905237e-06, "loss": 0.0033, "step": 52560 }, { "epoch": 0.42989737089585806, "grad_norm": 0.13987578451633453, "learning_rate": 9.602874135306088e-06, "loss": 0.0027, "step": 52570 }, { "epoch": 0.4299791470744572, "grad_norm": 0.024984337389469147, "learning_rate": 9.602595367942654e-06, "loss": 0.0029, "step": 52580 }, { "epoch": 0.4300609232530564, "grad_norm": 0.048629939556121826, "learning_rate": 9.602316506820609e-06, "loss": 0.0032, "step": 52590 }, { "epoch": 0.43014269943165556, "grad_norm": 0.05912444368004799, "learning_rate": 9.602037551945637e-06, "loss": 0.0027, "step": 52600 }, { "epoch": 0.43022447561025473, "grad_norm": 0.1285461038351059, "learning_rate": 9.601758503323418e-06, "loss": 0.0031, "step": 52610 }, { "epoch": 0.4303062517888539, "grad_norm": 0.004457474686205387, "learning_rate": 9.601479360959638e-06, "loss": 0.0031, "step": 52620 }, { "epoch": 0.43038802796745307, "grad_norm": 0.07187536358833313, "learning_rate": 9.601200124859985e-06, "loss": 0.002, "step": 52630 }, { "epoch": 0.43046980414605224, "grad_norm": 0.04678862541913986, "learning_rate": 9.600920795030143e-06, "loss": 0.0024, "step": 52640 }, { "epoch": 0.4305515803246514, "grad_norm": 0.06523504108190536, "learning_rate": 9.600641371475807e-06, "loss": 0.0024, "step": 52650 }, { "epoch": 0.43063335650325063, "grad_norm": 0.10904543846845627, "learning_rate": 9.600361854202665e-06, "loss": 0.0034, "step": 52660 }, { "epoch": 0.4307151326818498, "grad_norm": 0.04748097434639931, "learning_rate": 9.600082243216413e-06, "loss": 0.003, "step": 52670 }, { "epoch": 0.43079690886044897, "grad_norm": 0.1421944797039032, "learning_rate": 9.599802538522746e-06, "loss": 0.0028, "step": 52680 }, { "epoch": 0.43087868503904814, "grad_norm": 0.21498918533325195, "learning_rate": 9.599522740127364e-06, "loss": 0.0031, "step": 52690 }, { "epoch": 0.4309604612176473, "grad_norm": 0.09024567157030106, "learning_rate": 9.599242848035964e-06, "loss": 0.0032, "step": 52700 }, { "epoch": 0.4310422373962465, "grad_norm": 0.08886733651161194, "learning_rate": 9.59896286225425e-06, "loss": 0.0018, "step": 52710 }, { "epoch": 0.43112401357484564, "grad_norm": 0.030189795419573784, "learning_rate": 9.598682782787923e-06, "loss": 0.0021, "step": 52720 }, { "epoch": 0.4312057897534448, "grad_norm": 0.15356627106666565, "learning_rate": 9.59840260964269e-06, "loss": 0.0035, "step": 52730 }, { "epoch": 0.431287565932044, "grad_norm": 0.21269264817237854, "learning_rate": 9.598122342824258e-06, "loss": 0.0023, "step": 52740 }, { "epoch": 0.43136934211064315, "grad_norm": 0.11701948195695877, "learning_rate": 9.597841982338336e-06, "loss": 0.0013, "step": 52750 }, { "epoch": 0.4314511182892423, "grad_norm": 0.058789897710084915, "learning_rate": 9.597561528190635e-06, "loss": 0.0022, "step": 52760 }, { "epoch": 0.43153289446784154, "grad_norm": 0.024778129532933235, "learning_rate": 9.59728098038687e-06, "loss": 0.0014, "step": 52770 }, { "epoch": 0.4316146706464407, "grad_norm": 0.11849121749401093, "learning_rate": 9.597000338932754e-06, "loss": 0.0024, "step": 52780 }, { "epoch": 0.4316964468250399, "grad_norm": 0.0890142023563385, "learning_rate": 9.596719603834004e-06, "loss": 0.0051, "step": 52790 }, { "epoch": 0.43177822300363905, "grad_norm": 0.06551936268806458, "learning_rate": 9.596438775096339e-06, "loss": 0.0028, "step": 52800 }, { "epoch": 0.4318599991822382, "grad_norm": 0.3158327043056488, "learning_rate": 9.596157852725482e-06, "loss": 0.0049, "step": 52810 }, { "epoch": 0.4319417753608374, "grad_norm": 0.06427568197250366, "learning_rate": 9.595876836727152e-06, "loss": 0.0025, "step": 52820 }, { "epoch": 0.43202355153943656, "grad_norm": 0.07462857663631439, "learning_rate": 9.595595727107075e-06, "loss": 0.0018, "step": 52830 }, { "epoch": 0.4321053277180357, "grad_norm": 0.10149656981229782, "learning_rate": 9.595314523870978e-06, "loss": 0.0016, "step": 52840 }, { "epoch": 0.4321871038966349, "grad_norm": 0.12651893496513367, "learning_rate": 9.59503322702459e-06, "loss": 0.0025, "step": 52850 }, { "epoch": 0.43226888007523406, "grad_norm": 0.00893044751137495, "learning_rate": 9.594751836573638e-06, "loss": 0.0035, "step": 52860 }, { "epoch": 0.43235065625383323, "grad_norm": 0.02782508358359337, "learning_rate": 9.594470352523856e-06, "loss": 0.0031, "step": 52870 }, { "epoch": 0.43243243243243246, "grad_norm": 0.17687560617923737, "learning_rate": 9.594188774880981e-06, "loss": 0.0037, "step": 52880 }, { "epoch": 0.4325142086110316, "grad_norm": 0.07504795491695404, "learning_rate": 9.593907103650745e-06, "loss": 0.0034, "step": 52890 }, { "epoch": 0.4325959847896308, "grad_norm": 0.0496535487473011, "learning_rate": 9.593625338838887e-06, "loss": 0.0026, "step": 52900 }, { "epoch": 0.43267776096822996, "grad_norm": 0.24559204280376434, "learning_rate": 9.593343480451148e-06, "loss": 0.0028, "step": 52910 }, { "epoch": 0.43275953714682913, "grad_norm": 0.08556503802537918, "learning_rate": 9.593061528493266e-06, "loss": 0.0027, "step": 52920 }, { "epoch": 0.4328413133254283, "grad_norm": 0.06467261910438538, "learning_rate": 9.59277948297099e-06, "loss": 0.0031, "step": 52930 }, { "epoch": 0.43292308950402747, "grad_norm": 0.20683953166007996, "learning_rate": 9.59249734389006e-06, "loss": 0.0029, "step": 52940 }, { "epoch": 0.43300486568262664, "grad_norm": 0.032373473048210144, "learning_rate": 9.59221511125623e-06, "loss": 0.0034, "step": 52950 }, { "epoch": 0.4330866418612258, "grad_norm": 0.0062461132183671, "learning_rate": 9.591932785075241e-06, "loss": 0.0015, "step": 52960 }, { "epoch": 0.433168418039825, "grad_norm": 0.09075973927974701, "learning_rate": 9.591650365352852e-06, "loss": 0.0025, "step": 52970 }, { "epoch": 0.4332501942184242, "grad_norm": 0.1270720660686493, "learning_rate": 9.591367852094812e-06, "loss": 0.0031, "step": 52980 }, { "epoch": 0.43333197039702337, "grad_norm": 0.13644854724407196, "learning_rate": 9.591085245306877e-06, "loss": 0.0022, "step": 52990 }, { "epoch": 0.43341374657562254, "grad_norm": 0.19950859248638153, "learning_rate": 9.590802544994803e-06, "loss": 0.0024, "step": 53000 }, { "epoch": 0.4334955227542217, "grad_norm": 0.08379137516021729, "learning_rate": 9.590519751164348e-06, "loss": 0.0031, "step": 53010 }, { "epoch": 0.4335772989328209, "grad_norm": 0.09867610037326813, "learning_rate": 9.590236863821278e-06, "loss": 0.0026, "step": 53020 }, { "epoch": 0.43365907511142004, "grad_norm": 0.12122591584920883, "learning_rate": 9.58995388297135e-06, "loss": 0.0022, "step": 53030 }, { "epoch": 0.4337408512900192, "grad_norm": 0.11788962781429291, "learning_rate": 9.58967080862033e-06, "loss": 0.0025, "step": 53040 }, { "epoch": 0.4338226274686184, "grad_norm": 0.03607519343495369, "learning_rate": 9.589387640773987e-06, "loss": 0.0029, "step": 53050 }, { "epoch": 0.43390440364721755, "grad_norm": 0.08185256272554398, "learning_rate": 9.589104379438084e-06, "loss": 0.0026, "step": 53060 }, { "epoch": 0.4339861798258167, "grad_norm": 0.08836677670478821, "learning_rate": 9.588821024618397e-06, "loss": 0.0023, "step": 53070 }, { "epoch": 0.4340679560044159, "grad_norm": 0.10559635609388351, "learning_rate": 9.588537576320695e-06, "loss": 0.0017, "step": 53080 }, { "epoch": 0.4341497321830151, "grad_norm": 0.057505588978528976, "learning_rate": 9.588254034550752e-06, "loss": 0.0029, "step": 53090 }, { "epoch": 0.4342315083616143, "grad_norm": 0.07588637620210648, "learning_rate": 9.587970399314345e-06, "loss": 0.0023, "step": 53100 }, { "epoch": 0.43431328454021345, "grad_norm": 0.10530401021242142, "learning_rate": 9.587686670617253e-06, "loss": 0.0025, "step": 53110 }, { "epoch": 0.4343950607188126, "grad_norm": 0.09016682952642441, "learning_rate": 9.587402848465254e-06, "loss": 0.0017, "step": 53120 }, { "epoch": 0.4344768368974118, "grad_norm": 0.029488449916243553, "learning_rate": 9.58711893286413e-06, "loss": 0.002, "step": 53130 }, { "epoch": 0.43455861307601096, "grad_norm": 0.1140042170882225, "learning_rate": 9.586834923819666e-06, "loss": 0.0017, "step": 53140 }, { "epoch": 0.4346403892546101, "grad_norm": 0.11806608736515045, "learning_rate": 9.586550821337644e-06, "loss": 0.0026, "step": 53150 }, { "epoch": 0.4347221654332093, "grad_norm": 0.1965561807155609, "learning_rate": 9.586266625423855e-06, "loss": 0.002, "step": 53160 }, { "epoch": 0.43480394161180846, "grad_norm": 0.10041958838701248, "learning_rate": 9.585982336084086e-06, "loss": 0.0033, "step": 53170 }, { "epoch": 0.43488571779040763, "grad_norm": 0.08159169554710388, "learning_rate": 9.58569795332413e-06, "loss": 0.0025, "step": 53180 }, { "epoch": 0.4349674939690068, "grad_norm": 0.0600995272397995, "learning_rate": 9.585413477149779e-06, "loss": 0.0027, "step": 53190 }, { "epoch": 0.435049270147606, "grad_norm": 0.03445339575409889, "learning_rate": 9.585128907566828e-06, "loss": 0.0019, "step": 53200 }, { "epoch": 0.4351310463262052, "grad_norm": 0.07174636423587799, "learning_rate": 9.584844244581076e-06, "loss": 0.002, "step": 53210 }, { "epoch": 0.43521282250480436, "grad_norm": 0.11857567727565765, "learning_rate": 9.584559488198318e-06, "loss": 0.0051, "step": 53220 }, { "epoch": 0.43529459868340353, "grad_norm": 0.0904809981584549, "learning_rate": 9.584274638424356e-06, "loss": 0.006, "step": 53230 }, { "epoch": 0.4353763748620027, "grad_norm": 0.08589286357164383, "learning_rate": 9.583989695264994e-06, "loss": 0.0024, "step": 53240 }, { "epoch": 0.43545815104060187, "grad_norm": 0.14789095520973206, "learning_rate": 9.583704658726036e-06, "loss": 0.0031, "step": 53250 }, { "epoch": 0.43553992721920104, "grad_norm": 0.07719441503286362, "learning_rate": 9.58341952881329e-06, "loss": 0.0026, "step": 53260 }, { "epoch": 0.4356217033978002, "grad_norm": 0.08672844618558884, "learning_rate": 9.583134305532561e-06, "loss": 0.0021, "step": 53270 }, { "epoch": 0.4357034795763994, "grad_norm": 0.04601481929421425, "learning_rate": 9.582848988889662e-06, "loss": 0.0029, "step": 53280 }, { "epoch": 0.43578525575499855, "grad_norm": 0.11890078336000443, "learning_rate": 9.582563578890401e-06, "loss": 0.0035, "step": 53290 }, { "epoch": 0.43586703193359777, "grad_norm": 0.027203183621168137, "learning_rate": 9.582278075540598e-06, "loss": 0.0022, "step": 53300 }, { "epoch": 0.43594880811219694, "grad_norm": 0.06280023604631424, "learning_rate": 9.581992478846066e-06, "loss": 0.0015, "step": 53310 }, { "epoch": 0.4360305842907961, "grad_norm": 0.038822490721940994, "learning_rate": 9.58170678881262e-06, "loss": 0.0011, "step": 53320 }, { "epoch": 0.4361123604693953, "grad_norm": 0.20002441108226776, "learning_rate": 9.581421005446085e-06, "loss": 0.0041, "step": 53330 }, { "epoch": 0.43619413664799445, "grad_norm": 0.11164430528879166, "learning_rate": 9.581135128752279e-06, "loss": 0.0018, "step": 53340 }, { "epoch": 0.4362759128265936, "grad_norm": 0.2744261920452118, "learning_rate": 9.580849158737026e-06, "loss": 0.0032, "step": 53350 }, { "epoch": 0.4363576890051928, "grad_norm": 0.07573570311069489, "learning_rate": 9.580563095406154e-06, "loss": 0.0052, "step": 53360 }, { "epoch": 0.43643946518379195, "grad_norm": 0.09840934723615646, "learning_rate": 9.580276938765487e-06, "loss": 0.0022, "step": 53370 }, { "epoch": 0.4365212413623911, "grad_norm": 0.04945297911763191, "learning_rate": 9.579990688820858e-06, "loss": 0.0019, "step": 53380 }, { "epoch": 0.4366030175409903, "grad_norm": 0.08675084263086319, "learning_rate": 9.579704345578094e-06, "loss": 0.0028, "step": 53390 }, { "epoch": 0.43668479371958946, "grad_norm": 0.10919281840324402, "learning_rate": 9.57941790904303e-06, "loss": 0.002, "step": 53400 }, { "epoch": 0.4367665698981887, "grad_norm": 0.02776981145143509, "learning_rate": 9.5791313792215e-06, "loss": 0.002, "step": 53410 }, { "epoch": 0.43684834607678785, "grad_norm": 0.08692323416471481, "learning_rate": 9.578844756119343e-06, "loss": 0.0066, "step": 53420 }, { "epoch": 0.436930122255387, "grad_norm": 0.21485140919685364, "learning_rate": 9.578558039742396e-06, "loss": 0.0029, "step": 53430 }, { "epoch": 0.4370118984339862, "grad_norm": 0.1495879590511322, "learning_rate": 9.5782712300965e-06, "loss": 0.0019, "step": 53440 }, { "epoch": 0.43709367461258536, "grad_norm": 0.06472449004650116, "learning_rate": 9.577984327187498e-06, "loss": 0.0027, "step": 53450 }, { "epoch": 0.43717545079118453, "grad_norm": 0.05533613637089729, "learning_rate": 9.577697331021235e-06, "loss": 0.0042, "step": 53460 }, { "epoch": 0.4372572269697837, "grad_norm": 0.07105991244316101, "learning_rate": 9.577410241603556e-06, "loss": 0.0041, "step": 53470 }, { "epoch": 0.43733900314838287, "grad_norm": 0.07769479602575302, "learning_rate": 9.577123058940308e-06, "loss": 0.0036, "step": 53480 }, { "epoch": 0.43742077932698203, "grad_norm": 0.05074622854590416, "learning_rate": 9.576835783037343e-06, "loss": 0.0013, "step": 53490 }, { "epoch": 0.4375025555055812, "grad_norm": 0.0535598024725914, "learning_rate": 9.576548413900514e-06, "loss": 0.0018, "step": 53500 }, { "epoch": 0.43758433168418037, "grad_norm": 0.06848009675741196, "learning_rate": 9.576260951535673e-06, "loss": 0.0054, "step": 53510 }, { "epoch": 0.4376661078627796, "grad_norm": 0.14960017800331116, "learning_rate": 9.575973395948677e-06, "loss": 0.002, "step": 53520 }, { "epoch": 0.43774788404137877, "grad_norm": 0.07976007461547852, "learning_rate": 9.575685747145385e-06, "loss": 0.0021, "step": 53530 }, { "epoch": 0.43782966021997793, "grad_norm": 0.09997786581516266, "learning_rate": 9.575398005131654e-06, "loss": 0.0025, "step": 53540 }, { "epoch": 0.4379114363985771, "grad_norm": 0.048926930874586105, "learning_rate": 9.575110169913344e-06, "loss": 0.0021, "step": 53550 }, { "epoch": 0.43799321257717627, "grad_norm": 0.054424185305833817, "learning_rate": 9.574822241496324e-06, "loss": 0.0016, "step": 53560 }, { "epoch": 0.43807498875577544, "grad_norm": 0.12222295254468918, "learning_rate": 9.574534219886456e-06, "loss": 0.0031, "step": 53570 }, { "epoch": 0.4381567649343746, "grad_norm": 0.0471731536090374, "learning_rate": 9.574246105089606e-06, "loss": 0.003, "step": 53580 }, { "epoch": 0.4382385411129738, "grad_norm": 0.022361472249031067, "learning_rate": 9.573957897111647e-06, "loss": 0.003, "step": 53590 }, { "epoch": 0.43832031729157295, "grad_norm": 0.03998541086912155, "learning_rate": 9.573669595958446e-06, "loss": 0.0018, "step": 53600 }, { "epoch": 0.4384020934701721, "grad_norm": 0.02707846835255623, "learning_rate": 9.573381201635878e-06, "loss": 0.0025, "step": 53610 }, { "epoch": 0.43848386964877134, "grad_norm": 0.21161578595638275, "learning_rate": 9.573092714149817e-06, "loss": 0.0036, "step": 53620 }, { "epoch": 0.4385656458273705, "grad_norm": 0.16012053191661835, "learning_rate": 9.572804133506142e-06, "loss": 0.0022, "step": 53630 }, { "epoch": 0.4386474220059697, "grad_norm": 0.11577188968658447, "learning_rate": 9.572515459710728e-06, "loss": 0.0028, "step": 53640 }, { "epoch": 0.43872919818456885, "grad_norm": 0.07057441771030426, "learning_rate": 9.572226692769458e-06, "loss": 0.0015, "step": 53650 }, { "epoch": 0.438810974363168, "grad_norm": 0.04354610666632652, "learning_rate": 9.571937832688214e-06, "loss": 0.0032, "step": 53660 }, { "epoch": 0.4388927505417672, "grad_norm": 0.11708029359579086, "learning_rate": 9.57164887947288e-06, "loss": 0.0019, "step": 53670 }, { "epoch": 0.43897452672036635, "grad_norm": 0.055426422506570816, "learning_rate": 9.571359833129343e-06, "loss": 0.0018, "step": 53680 }, { "epoch": 0.4390563028989655, "grad_norm": 0.1268301159143448, "learning_rate": 9.571070693663489e-06, "loss": 0.0025, "step": 53690 }, { "epoch": 0.4391380790775647, "grad_norm": 0.01493656262755394, "learning_rate": 9.57078146108121e-06, "loss": 0.0026, "step": 53700 }, { "epoch": 0.43921985525616386, "grad_norm": 0.059253428131341934, "learning_rate": 9.570492135388397e-06, "loss": 0.0016, "step": 53710 }, { "epoch": 0.43930163143476303, "grad_norm": 0.07830413430929184, "learning_rate": 9.570202716590943e-06, "loss": 0.0023, "step": 53720 }, { "epoch": 0.43938340761336225, "grad_norm": 0.12100634723901749, "learning_rate": 9.569913204694748e-06, "loss": 0.0021, "step": 53730 }, { "epoch": 0.4394651837919614, "grad_norm": 0.08469925075769424, "learning_rate": 9.569623599705703e-06, "loss": 0.0029, "step": 53740 }, { "epoch": 0.4395469599705606, "grad_norm": 0.03931872174143791, "learning_rate": 9.569333901629713e-06, "loss": 0.001, "step": 53750 }, { "epoch": 0.43962873614915976, "grad_norm": 0.057528458535671234, "learning_rate": 9.569044110472678e-06, "loss": 0.0053, "step": 53760 }, { "epoch": 0.43971051232775893, "grad_norm": 0.11400420218706131, "learning_rate": 9.5687542262405e-06, "loss": 0.0033, "step": 53770 }, { "epoch": 0.4397922885063581, "grad_norm": 0.07038304954767227, "learning_rate": 9.568464248939085e-06, "loss": 0.0025, "step": 53780 }, { "epoch": 0.43987406468495727, "grad_norm": 0.023871375247836113, "learning_rate": 9.568174178574337e-06, "loss": 0.0026, "step": 53790 }, { "epoch": 0.43995584086355644, "grad_norm": 0.06018810719251633, "learning_rate": 9.56788401515217e-06, "loss": 0.0017, "step": 53800 }, { "epoch": 0.4400376170421556, "grad_norm": 0.04608527943491936, "learning_rate": 9.567593758678492e-06, "loss": 0.0018, "step": 53810 }, { "epoch": 0.4401193932207548, "grad_norm": 0.00816794577986002, "learning_rate": 9.567303409159218e-06, "loss": 0.0023, "step": 53820 }, { "epoch": 0.44020116939935394, "grad_norm": 0.06518129259347916, "learning_rate": 9.56701296660026e-06, "loss": 0.0023, "step": 53830 }, { "epoch": 0.44028294557795317, "grad_norm": 0.10967984795570374, "learning_rate": 9.566722431007536e-06, "loss": 0.0023, "step": 53840 }, { "epoch": 0.44036472175655234, "grad_norm": 0.1236552968621254, "learning_rate": 9.566431802386962e-06, "loss": 0.0075, "step": 53850 }, { "epoch": 0.4404464979351515, "grad_norm": 0.025396566838026047, "learning_rate": 9.566141080744463e-06, "loss": 0.0034, "step": 53860 }, { "epoch": 0.4405282741137507, "grad_norm": 0.04760859161615372, "learning_rate": 9.565850266085957e-06, "loss": 0.0058, "step": 53870 }, { "epoch": 0.44061005029234984, "grad_norm": 0.0544489361345768, "learning_rate": 9.565559358417372e-06, "loss": 0.0028, "step": 53880 }, { "epoch": 0.440691826470949, "grad_norm": 0.06694237887859344, "learning_rate": 9.56526835774463e-06, "loss": 0.0022, "step": 53890 }, { "epoch": 0.4407736026495482, "grad_norm": 0.0917840301990509, "learning_rate": 9.564977264073661e-06, "loss": 0.0017, "step": 53900 }, { "epoch": 0.44085537882814735, "grad_norm": 0.051490966230630875, "learning_rate": 9.564686077410394e-06, "loss": 0.0024, "step": 53910 }, { "epoch": 0.4409371550067465, "grad_norm": 0.019790198653936386, "learning_rate": 9.564394797760761e-06, "loss": 0.0013, "step": 53920 }, { "epoch": 0.4410189311853457, "grad_norm": 0.316768616437912, "learning_rate": 9.564103425130697e-06, "loss": 0.0049, "step": 53930 }, { "epoch": 0.4411007073639449, "grad_norm": 0.05014091357588768, "learning_rate": 9.563811959526134e-06, "loss": 0.0024, "step": 53940 }, { "epoch": 0.4411824835425441, "grad_norm": 0.058820921927690506, "learning_rate": 9.563520400953014e-06, "loss": 0.0027, "step": 53950 }, { "epoch": 0.44126425972114325, "grad_norm": 0.16007426381111145, "learning_rate": 9.563228749417274e-06, "loss": 0.0024, "step": 53960 }, { "epoch": 0.4413460358997424, "grad_norm": 0.10336918383836746, "learning_rate": 9.562937004924855e-06, "loss": 0.0028, "step": 53970 }, { "epoch": 0.4414278120783416, "grad_norm": 0.09246165305376053, "learning_rate": 9.5626451674817e-06, "loss": 0.0024, "step": 53980 }, { "epoch": 0.44150958825694075, "grad_norm": 0.08218418061733246, "learning_rate": 9.562353237093756e-06, "loss": 0.0007, "step": 53990 }, { "epoch": 0.4415913644355399, "grad_norm": 0.13466079533100128, "learning_rate": 9.562061213766965e-06, "loss": 0.0027, "step": 54000 }, { "epoch": 0.4416731406141391, "grad_norm": 0.1570388525724411, "learning_rate": 9.561769097507281e-06, "loss": 0.0026, "step": 54010 }, { "epoch": 0.44175491679273826, "grad_norm": 0.03955541178584099, "learning_rate": 9.561476888320651e-06, "loss": 0.002, "step": 54020 }, { "epoch": 0.44183669297133743, "grad_norm": 0.05887533351778984, "learning_rate": 9.561184586213031e-06, "loss": 0.0031, "step": 54030 }, { "epoch": 0.4419184691499366, "grad_norm": 0.0758318305015564, "learning_rate": 9.560892191190371e-06, "loss": 0.0034, "step": 54040 }, { "epoch": 0.4420002453285358, "grad_norm": 0.08238761872053146, "learning_rate": 9.56059970325863e-06, "loss": 0.0028, "step": 54050 }, { "epoch": 0.442082021507135, "grad_norm": 0.31052711606025696, "learning_rate": 9.560307122423769e-06, "loss": 0.0026, "step": 54060 }, { "epoch": 0.44216379768573416, "grad_norm": 0.05257100611925125, "learning_rate": 9.560014448691743e-06, "loss": 0.002, "step": 54070 }, { "epoch": 0.44224557386433333, "grad_norm": 0.04449637234210968, "learning_rate": 9.559721682068514e-06, "loss": 0.0015, "step": 54080 }, { "epoch": 0.4423273500429325, "grad_norm": 0.20625291764736176, "learning_rate": 9.559428822560051e-06, "loss": 0.0029, "step": 54090 }, { "epoch": 0.44240912622153167, "grad_norm": 0.04318041354417801, "learning_rate": 9.559135870172316e-06, "loss": 0.0021, "step": 54100 }, { "epoch": 0.44249090240013084, "grad_norm": 0.06999511271715164, "learning_rate": 9.558842824911278e-06, "loss": 0.0032, "step": 54110 }, { "epoch": 0.44257267857873, "grad_norm": 0.1030585840344429, "learning_rate": 9.558549686782905e-06, "loss": 0.0048, "step": 54120 }, { "epoch": 0.4426544547573292, "grad_norm": 0.06345333904027939, "learning_rate": 9.558256455793172e-06, "loss": 0.0022, "step": 54130 }, { "epoch": 0.44273623093592834, "grad_norm": 0.06824775040149689, "learning_rate": 9.557963131948048e-06, "loss": 0.0022, "step": 54140 }, { "epoch": 0.4428180071145275, "grad_norm": 0.005186682567000389, "learning_rate": 9.557669715253508e-06, "loss": 0.0017, "step": 54150 }, { "epoch": 0.44289978329312674, "grad_norm": 0.057212650775909424, "learning_rate": 9.557376205715535e-06, "loss": 0.0032, "step": 54160 }, { "epoch": 0.4429815594717259, "grad_norm": 0.03681696206331253, "learning_rate": 9.557082603340102e-06, "loss": 0.0028, "step": 54170 }, { "epoch": 0.4430633356503251, "grad_norm": 0.08450362086296082, "learning_rate": 9.556788908133191e-06, "loss": 0.0017, "step": 54180 }, { "epoch": 0.44314511182892424, "grad_norm": 0.05267033353447914, "learning_rate": 9.556495120100788e-06, "loss": 0.0025, "step": 54190 }, { "epoch": 0.4432268880075234, "grad_norm": 0.17672869563102722, "learning_rate": 9.556201239248877e-06, "loss": 0.0019, "step": 54200 }, { "epoch": 0.4433086641861226, "grad_norm": 0.0630650743842125, "learning_rate": 9.55590726558344e-06, "loss": 0.0015, "step": 54210 }, { "epoch": 0.44339044036472175, "grad_norm": 0.05000428482890129, "learning_rate": 9.555613199110472e-06, "loss": 0.0021, "step": 54220 }, { "epoch": 0.4434722165433209, "grad_norm": 0.053040314465761185, "learning_rate": 9.555319039835958e-06, "loss": 0.0016, "step": 54230 }, { "epoch": 0.4435539927219201, "grad_norm": 0.07280344516038895, "learning_rate": 9.555024787765893e-06, "loss": 0.0022, "step": 54240 }, { "epoch": 0.44363576890051926, "grad_norm": 0.1246943324804306, "learning_rate": 9.55473044290627e-06, "loss": 0.0038, "step": 54250 }, { "epoch": 0.4437175450791185, "grad_norm": 0.04086080938577652, "learning_rate": 9.554436005263086e-06, "loss": 0.0015, "step": 54260 }, { "epoch": 0.44379932125771765, "grad_norm": 0.015957502648234367, "learning_rate": 9.55414147484234e-06, "loss": 0.0018, "step": 54270 }, { "epoch": 0.4438810974363168, "grad_norm": 0.049487870186567307, "learning_rate": 9.553846851650028e-06, "loss": 0.0034, "step": 54280 }, { "epoch": 0.443962873614916, "grad_norm": 0.05261843651533127, "learning_rate": 9.553552135692156e-06, "loss": 0.0013, "step": 54290 }, { "epoch": 0.44404464979351516, "grad_norm": 0.0360620878636837, "learning_rate": 9.553257326974724e-06, "loss": 0.0023, "step": 54300 }, { "epoch": 0.4441264259721143, "grad_norm": 0.10629776120185852, "learning_rate": 9.552962425503741e-06, "loss": 0.0058, "step": 54310 }, { "epoch": 0.4442082021507135, "grad_norm": 0.0574653185904026, "learning_rate": 9.552667431285211e-06, "loss": 0.003, "step": 54320 }, { "epoch": 0.44428997832931266, "grad_norm": 0.028082970529794693, "learning_rate": 9.552372344325144e-06, "loss": 0.0013, "step": 54330 }, { "epoch": 0.44437175450791183, "grad_norm": 0.03954421728849411, "learning_rate": 9.552077164629556e-06, "loss": 0.0032, "step": 54340 }, { "epoch": 0.444453530686511, "grad_norm": 0.19937114417552948, "learning_rate": 9.551781892204452e-06, "loss": 0.0032, "step": 54350 }, { "epoch": 0.44453530686511017, "grad_norm": 0.19359883666038513, "learning_rate": 9.551486527055853e-06, "loss": 0.0021, "step": 54360 }, { "epoch": 0.4446170830437094, "grad_norm": 0.07503872364759445, "learning_rate": 9.551191069189774e-06, "loss": 0.0018, "step": 54370 }, { "epoch": 0.44469885922230856, "grad_norm": 0.09070128202438354, "learning_rate": 9.55089551861223e-06, "loss": 0.0027, "step": 54380 }, { "epoch": 0.44478063540090773, "grad_norm": 0.041592441499233246, "learning_rate": 9.550599875329249e-06, "loss": 0.0018, "step": 54390 }, { "epoch": 0.4448624115795069, "grad_norm": 0.02436288818717003, "learning_rate": 9.550304139346849e-06, "loss": 0.0019, "step": 54400 }, { "epoch": 0.44494418775810607, "grad_norm": 0.20221397280693054, "learning_rate": 9.550008310671055e-06, "loss": 0.0021, "step": 54410 }, { "epoch": 0.44502596393670524, "grad_norm": 0.0346648134291172, "learning_rate": 9.549712389307893e-06, "loss": 0.0022, "step": 54420 }, { "epoch": 0.4451077401153044, "grad_norm": 0.16484858095645905, "learning_rate": 9.549416375263391e-06, "loss": 0.0021, "step": 54430 }, { "epoch": 0.4451895162939036, "grad_norm": 0.058975156396627426, "learning_rate": 9.54912026854358e-06, "loss": 0.0028, "step": 54440 }, { "epoch": 0.44527129247250274, "grad_norm": 0.23578885197639465, "learning_rate": 9.54882406915449e-06, "loss": 0.0047, "step": 54450 }, { "epoch": 0.4453530686511019, "grad_norm": 0.05143086612224579, "learning_rate": 9.548527777102159e-06, "loss": 0.0016, "step": 54460 }, { "epoch": 0.4454348448297011, "grad_norm": 0.07304299622774124, "learning_rate": 9.548231392392617e-06, "loss": 0.0016, "step": 54470 }, { "epoch": 0.4455166210083003, "grad_norm": 0.12953650951385498, "learning_rate": 9.547934915031907e-06, "loss": 0.0022, "step": 54480 }, { "epoch": 0.4455983971868995, "grad_norm": 0.07247129827737808, "learning_rate": 9.547638345026064e-06, "loss": 0.0023, "step": 54490 }, { "epoch": 0.44568017336549864, "grad_norm": 0.0678652822971344, "learning_rate": 9.547341682381133e-06, "loss": 0.0026, "step": 54500 }, { "epoch": 0.4457619495440978, "grad_norm": 0.06373707950115204, "learning_rate": 9.547044927103154e-06, "loss": 0.0029, "step": 54510 }, { "epoch": 0.445843725722697, "grad_norm": 0.12832477688789368, "learning_rate": 9.546748079198176e-06, "loss": 0.0035, "step": 54520 }, { "epoch": 0.44592550190129615, "grad_norm": 0.03150981664657593, "learning_rate": 9.546451138672241e-06, "loss": 0.0019, "step": 54530 }, { "epoch": 0.4460072780798953, "grad_norm": 0.04715494066476822, "learning_rate": 9.546154105531404e-06, "loss": 0.0023, "step": 54540 }, { "epoch": 0.4460890542584945, "grad_norm": 0.06650818884372711, "learning_rate": 9.54585697978171e-06, "loss": 0.0026, "step": 54550 }, { "epoch": 0.44617083043709366, "grad_norm": 0.02780737727880478, "learning_rate": 9.545559761429214e-06, "loss": 0.0019, "step": 54560 }, { "epoch": 0.4462526066156928, "grad_norm": 0.023565614596009254, "learning_rate": 9.545262450479973e-06, "loss": 0.0018, "step": 54570 }, { "epoch": 0.446334382794292, "grad_norm": 0.044630520045757294, "learning_rate": 9.54496504694004e-06, "loss": 0.0016, "step": 54580 }, { "epoch": 0.4464161589728912, "grad_norm": 0.06728685647249222, "learning_rate": 9.544667550815476e-06, "loss": 0.0021, "step": 54590 }, { "epoch": 0.4464979351514904, "grad_norm": 0.049780815839767456, "learning_rate": 9.54436996211234e-06, "loss": 0.0028, "step": 54600 }, { "epoch": 0.44657971133008956, "grad_norm": 0.03329646959900856, "learning_rate": 9.544072280836692e-06, "loss": 0.0028, "step": 54610 }, { "epoch": 0.4466614875086887, "grad_norm": 0.26569920778274536, "learning_rate": 9.543774506994598e-06, "loss": 0.0021, "step": 54620 }, { "epoch": 0.4467432636872879, "grad_norm": 0.1378345936536789, "learning_rate": 9.543476640592124e-06, "loss": 0.0026, "step": 54630 }, { "epoch": 0.44682503986588706, "grad_norm": 0.06786259263753891, "learning_rate": 9.54317868163534e-06, "loss": 0.0021, "step": 54640 }, { "epoch": 0.44690681604448623, "grad_norm": 0.07571159303188324, "learning_rate": 9.542880630130312e-06, "loss": 0.001, "step": 54650 }, { "epoch": 0.4469885922230854, "grad_norm": 0.026970762759447098, "learning_rate": 9.542582486083113e-06, "loss": 0.002, "step": 54660 }, { "epoch": 0.44707036840168457, "grad_norm": 0.0853772982954979, "learning_rate": 9.542284249499816e-06, "loss": 0.0021, "step": 54670 }, { "epoch": 0.44715214458028374, "grad_norm": 0.04976584389805794, "learning_rate": 9.541985920386497e-06, "loss": 0.0022, "step": 54680 }, { "epoch": 0.44723392075888296, "grad_norm": 0.11044225841760635, "learning_rate": 9.541687498749232e-06, "loss": 0.0026, "step": 54690 }, { "epoch": 0.44731569693748213, "grad_norm": 0.07944474369287491, "learning_rate": 9.541388984594103e-06, "loss": 0.0045, "step": 54700 }, { "epoch": 0.4473974731160813, "grad_norm": 0.06598926335573196, "learning_rate": 9.541090377927189e-06, "loss": 0.0024, "step": 54710 }, { "epoch": 0.44747924929468047, "grad_norm": 0.1528780311346054, "learning_rate": 9.54079167875457e-06, "loss": 0.0019, "step": 54720 }, { "epoch": 0.44756102547327964, "grad_norm": 0.20743966102600098, "learning_rate": 9.540492887082335e-06, "loss": 0.0037, "step": 54730 }, { "epoch": 0.4476428016518788, "grad_norm": 0.21788296103477478, "learning_rate": 9.540194002916569e-06, "loss": 0.0043, "step": 54740 }, { "epoch": 0.447724577830478, "grad_norm": 0.042889926582574844, "learning_rate": 9.539895026263362e-06, "loss": 0.0036, "step": 54750 }, { "epoch": 0.44780635400907715, "grad_norm": 0.09340684115886688, "learning_rate": 9.539595957128803e-06, "loss": 0.0036, "step": 54760 }, { "epoch": 0.4478881301876763, "grad_norm": 0.0822371393442154, "learning_rate": 9.539296795518983e-06, "loss": 0.0044, "step": 54770 }, { "epoch": 0.4479699063662755, "grad_norm": 0.09906546771526337, "learning_rate": 9.538997541439999e-06, "loss": 0.0019, "step": 54780 }, { "epoch": 0.44805168254487465, "grad_norm": 0.03229589760303497, "learning_rate": 9.538698194897945e-06, "loss": 0.0025, "step": 54790 }, { "epoch": 0.4481334587234739, "grad_norm": 0.19973643124103546, "learning_rate": 9.538398755898917e-06, "loss": 0.0022, "step": 54800 }, { "epoch": 0.44821523490207305, "grad_norm": 0.30615234375, "learning_rate": 9.53809922444902e-06, "loss": 0.0042, "step": 54810 }, { "epoch": 0.4482970110806722, "grad_norm": 0.1928463876247406, "learning_rate": 9.537799600554353e-06, "loss": 0.0023, "step": 54820 }, { "epoch": 0.4483787872592714, "grad_norm": 0.10988768935203552, "learning_rate": 9.53749988422102e-06, "loss": 0.0038, "step": 54830 }, { "epoch": 0.44846056343787055, "grad_norm": 0.03909802436828613, "learning_rate": 9.537200075455124e-06, "loss": 0.0012, "step": 54840 }, { "epoch": 0.4485423396164697, "grad_norm": 0.0545610636472702, "learning_rate": 9.536900174262777e-06, "loss": 0.0031, "step": 54850 }, { "epoch": 0.4486241157950689, "grad_norm": 0.05156830698251724, "learning_rate": 9.536600180650084e-06, "loss": 0.0025, "step": 54860 }, { "epoch": 0.44870589197366806, "grad_norm": 0.01622265949845314, "learning_rate": 9.536300094623159e-06, "loss": 0.0029, "step": 54870 }, { "epoch": 0.4487876681522672, "grad_norm": 0.12541718780994415, "learning_rate": 9.535999916188112e-06, "loss": 0.0023, "step": 54880 }, { "epoch": 0.4488694443308664, "grad_norm": 0.05587282404303551, "learning_rate": 9.535699645351062e-06, "loss": 0.0026, "step": 54890 }, { "epoch": 0.44895122050946557, "grad_norm": 0.040953345596790314, "learning_rate": 9.53539928211812e-06, "loss": 0.003, "step": 54900 }, { "epoch": 0.4490329966880648, "grad_norm": 0.03544572368264198, "learning_rate": 9.535098826495411e-06, "loss": 0.001, "step": 54910 }, { "epoch": 0.44911477286666396, "grad_norm": 0.06397363543510437, "learning_rate": 9.534798278489053e-06, "loss": 0.0031, "step": 54920 }, { "epoch": 0.4491965490452631, "grad_norm": 0.056483905762434006, "learning_rate": 9.534497638105166e-06, "loss": 0.0027, "step": 54930 }, { "epoch": 0.4492783252238623, "grad_norm": 0.0876932144165039, "learning_rate": 9.534196905349877e-06, "loss": 0.0028, "step": 54940 }, { "epoch": 0.44936010140246146, "grad_norm": 0.0963459387421608, "learning_rate": 9.533896080229312e-06, "loss": 0.0019, "step": 54950 }, { "epoch": 0.44944187758106063, "grad_norm": 0.012932397425174713, "learning_rate": 9.533595162749598e-06, "loss": 0.002, "step": 54960 }, { "epoch": 0.4495236537596598, "grad_norm": 0.06100627779960632, "learning_rate": 9.533294152916865e-06, "loss": 0.002, "step": 54970 }, { "epoch": 0.44960542993825897, "grad_norm": 0.027171708643436432, "learning_rate": 9.532993050737244e-06, "loss": 0.0029, "step": 54980 }, { "epoch": 0.44968720611685814, "grad_norm": 0.04228748381137848, "learning_rate": 9.532691856216873e-06, "loss": 0.0026, "step": 54990 }, { "epoch": 0.4497689822954573, "grad_norm": 0.0475204773247242, "learning_rate": 9.532390569361884e-06, "loss": 0.0025, "step": 55000 }, { "epoch": 0.44985075847405653, "grad_norm": 0.13298837840557098, "learning_rate": 9.532089190178414e-06, "loss": 0.0016, "step": 55010 }, { "epoch": 0.4499325346526557, "grad_norm": 0.04477876052260399, "learning_rate": 9.531787718672602e-06, "loss": 0.0026, "step": 55020 }, { "epoch": 0.45001431083125487, "grad_norm": 0.11390150338411331, "learning_rate": 9.531486154850593e-06, "loss": 0.003, "step": 55030 }, { "epoch": 0.45009608700985404, "grad_norm": 0.05078566074371338, "learning_rate": 9.531184498718526e-06, "loss": 0.002, "step": 55040 }, { "epoch": 0.4501778631884532, "grad_norm": 0.15822958946228027, "learning_rate": 9.530882750282548e-06, "loss": 0.0018, "step": 55050 }, { "epoch": 0.4502596393670524, "grad_norm": 0.10699574649333954, "learning_rate": 9.530580909548807e-06, "loss": 0.0028, "step": 55060 }, { "epoch": 0.45034141554565155, "grad_norm": 0.05963391810655594, "learning_rate": 9.530278976523449e-06, "loss": 0.0023, "step": 55070 }, { "epoch": 0.4504231917242507, "grad_norm": 0.14397309720516205, "learning_rate": 9.529976951212626e-06, "loss": 0.0023, "step": 55080 }, { "epoch": 0.4505049679028499, "grad_norm": 0.052405714988708496, "learning_rate": 9.529674833622491e-06, "loss": 0.0022, "step": 55090 }, { "epoch": 0.45058674408144905, "grad_norm": 0.12851159274578094, "learning_rate": 9.529372623759197e-06, "loss": 0.0024, "step": 55100 }, { "epoch": 0.4506685202600482, "grad_norm": 0.10577679425477982, "learning_rate": 9.5290703216289e-06, "loss": 0.0021, "step": 55110 }, { "epoch": 0.45075029643864745, "grad_norm": 0.17212221026420593, "learning_rate": 9.52876792723776e-06, "loss": 0.003, "step": 55120 }, { "epoch": 0.4508320726172466, "grad_norm": 0.07594547420740128, "learning_rate": 9.528465440591936e-06, "loss": 0.0032, "step": 55130 }, { "epoch": 0.4509138487958458, "grad_norm": 0.11631368845701218, "learning_rate": 9.52816286169759e-06, "loss": 0.0026, "step": 55140 }, { "epoch": 0.45099562497444495, "grad_norm": 0.1249525174498558, "learning_rate": 9.527860190560886e-06, "loss": 0.0028, "step": 55150 }, { "epoch": 0.4510774011530441, "grad_norm": 0.06220652908086777, "learning_rate": 9.52755742718799e-06, "loss": 0.0012, "step": 55160 }, { "epoch": 0.4511591773316433, "grad_norm": 0.09747031331062317, "learning_rate": 9.52725457158507e-06, "loss": 0.0019, "step": 55170 }, { "epoch": 0.45124095351024246, "grad_norm": 0.07992032170295715, "learning_rate": 9.526951623758293e-06, "loss": 0.0044, "step": 55180 }, { "epoch": 0.45132272968884163, "grad_norm": 0.2034233659505844, "learning_rate": 9.52664858371383e-06, "loss": 0.0027, "step": 55190 }, { "epoch": 0.4514045058674408, "grad_norm": 0.029616601765155792, "learning_rate": 9.52634545145786e-06, "loss": 0.0014, "step": 55200 }, { "epoch": 0.45148628204603997, "grad_norm": 0.08727865666151047, "learning_rate": 9.52604222699655e-06, "loss": 0.0029, "step": 55210 }, { "epoch": 0.45156805822463914, "grad_norm": 0.033186692744493484, "learning_rate": 9.525738910336083e-06, "loss": 0.0049, "step": 55220 }, { "epoch": 0.45164983440323836, "grad_norm": 0.05132220685482025, "learning_rate": 9.525435501482635e-06, "loss": 0.0018, "step": 55230 }, { "epoch": 0.45173161058183753, "grad_norm": 0.09363671392202377, "learning_rate": 9.525132000442387e-06, "loss": 0.0045, "step": 55240 }, { "epoch": 0.4518133867604367, "grad_norm": 0.04789271950721741, "learning_rate": 9.524828407221524e-06, "loss": 0.0015, "step": 55250 }, { "epoch": 0.45189516293903587, "grad_norm": 0.049059297889471054, "learning_rate": 9.524524721826227e-06, "loss": 0.0019, "step": 55260 }, { "epoch": 0.45197693911763503, "grad_norm": 0.04384525865316391, "learning_rate": 9.524220944262683e-06, "loss": 0.0026, "step": 55270 }, { "epoch": 0.4520587152962342, "grad_norm": 0.35848841071128845, "learning_rate": 9.523917074537082e-06, "loss": 0.0027, "step": 55280 }, { "epoch": 0.4521404914748334, "grad_norm": 0.09104578197002411, "learning_rate": 9.523613112655613e-06, "loss": 0.0035, "step": 55290 }, { "epoch": 0.45222226765343254, "grad_norm": 0.0995369553565979, "learning_rate": 9.523309058624467e-06, "loss": 0.0062, "step": 55300 }, { "epoch": 0.4523040438320317, "grad_norm": 0.1841810792684555, "learning_rate": 9.52300491244984e-06, "loss": 0.0035, "step": 55310 }, { "epoch": 0.4523858200106309, "grad_norm": 0.11554733663797379, "learning_rate": 9.522700674137925e-06, "loss": 0.0035, "step": 55320 }, { "epoch": 0.4524675961892301, "grad_norm": 0.2721225619316101, "learning_rate": 9.522396343694922e-06, "loss": 0.0073, "step": 55330 }, { "epoch": 0.4525493723678293, "grad_norm": 0.1293119341135025, "learning_rate": 9.522091921127028e-06, "loss": 0.0022, "step": 55340 }, { "epoch": 0.45263114854642844, "grad_norm": 0.033248335123062134, "learning_rate": 9.521787406440447e-06, "loss": 0.0017, "step": 55350 }, { "epoch": 0.4527129247250276, "grad_norm": 0.07685323804616928, "learning_rate": 9.521482799641382e-06, "loss": 0.0017, "step": 55360 }, { "epoch": 0.4527947009036268, "grad_norm": 0.05918122082948685, "learning_rate": 9.521178100736035e-06, "loss": 0.0028, "step": 55370 }, { "epoch": 0.45287647708222595, "grad_norm": 0.12457641214132309, "learning_rate": 9.520873309730618e-06, "loss": 0.002, "step": 55380 }, { "epoch": 0.4529582532608251, "grad_norm": 0.04961340129375458, "learning_rate": 9.520568426631333e-06, "loss": 0.0025, "step": 55390 }, { "epoch": 0.4530400294394243, "grad_norm": 0.09624423086643219, "learning_rate": 9.520263451444398e-06, "loss": 0.0026, "step": 55400 }, { "epoch": 0.45312180561802345, "grad_norm": 0.1621144413948059, "learning_rate": 9.51995838417602e-06, "loss": 0.0021, "step": 55410 }, { "epoch": 0.4532035817966226, "grad_norm": 0.025371748954057693, "learning_rate": 9.519653224832417e-06, "loss": 0.0015, "step": 55420 }, { "epoch": 0.4532853579752218, "grad_norm": 0.4835375249385834, "learning_rate": 9.519347973419805e-06, "loss": 0.0022, "step": 55430 }, { "epoch": 0.453367134153821, "grad_norm": 0.14586570858955383, "learning_rate": 9.5190426299444e-06, "loss": 0.0042, "step": 55440 }, { "epoch": 0.4534489103324202, "grad_norm": 0.05394558608531952, "learning_rate": 9.518737194412424e-06, "loss": 0.0051, "step": 55450 }, { "epoch": 0.45353068651101935, "grad_norm": 0.0908425897359848, "learning_rate": 9.518431666830096e-06, "loss": 0.0038, "step": 55460 }, { "epoch": 0.4536124626896185, "grad_norm": 0.14040550589561462, "learning_rate": 9.518126047203644e-06, "loss": 0.0024, "step": 55470 }, { "epoch": 0.4536942388682177, "grad_norm": 0.05742378160357475, "learning_rate": 9.517820335539294e-06, "loss": 0.0022, "step": 55480 }, { "epoch": 0.45377601504681686, "grad_norm": 0.11159183830022812, "learning_rate": 9.517514531843268e-06, "loss": 0.0022, "step": 55490 }, { "epoch": 0.45385779122541603, "grad_norm": 0.11775700002908707, "learning_rate": 9.5172086361218e-06, "loss": 0.0023, "step": 55500 }, { "epoch": 0.4539395674040152, "grad_norm": 0.11159506440162659, "learning_rate": 9.516902648381121e-06, "loss": 0.0038, "step": 55510 }, { "epoch": 0.45402134358261437, "grad_norm": 0.26052117347717285, "learning_rate": 9.516596568627463e-06, "loss": 0.0022, "step": 55520 }, { "epoch": 0.45410311976121354, "grad_norm": 0.118492990732193, "learning_rate": 9.516290396867062e-06, "loss": 0.0033, "step": 55530 }, { "epoch": 0.4541848959398127, "grad_norm": 0.14662674069404602, "learning_rate": 9.515984133106154e-06, "loss": 0.0027, "step": 55540 }, { "epoch": 0.45426667211841193, "grad_norm": 0.054853178560733795, "learning_rate": 9.515677777350979e-06, "loss": 0.0051, "step": 55550 }, { "epoch": 0.4543484482970111, "grad_norm": 0.10029993206262589, "learning_rate": 9.515371329607777e-06, "loss": 0.0046, "step": 55560 }, { "epoch": 0.45443022447561027, "grad_norm": 0.10876504331827164, "learning_rate": 9.51506478988279e-06, "loss": 0.002, "step": 55570 }, { "epoch": 0.45451200065420944, "grad_norm": 0.05498076602816582, "learning_rate": 9.514758158182265e-06, "loss": 0.0025, "step": 55580 }, { "epoch": 0.4545937768328086, "grad_norm": 0.04471827670931816, "learning_rate": 9.514451434512445e-06, "loss": 0.0021, "step": 55590 }, { "epoch": 0.4546755530114078, "grad_norm": 0.09459366649389267, "learning_rate": 9.51414461887958e-06, "loss": 0.0022, "step": 55600 }, { "epoch": 0.45475732919000694, "grad_norm": 0.04644518345594406, "learning_rate": 9.513837711289919e-06, "loss": 0.0014, "step": 55610 }, { "epoch": 0.4548391053686061, "grad_norm": 0.2067161649465561, "learning_rate": 9.513530711749717e-06, "loss": 0.0043, "step": 55620 }, { "epoch": 0.4549208815472053, "grad_norm": 0.04325210675597191, "learning_rate": 9.513223620265224e-06, "loss": 0.002, "step": 55630 }, { "epoch": 0.45500265772580445, "grad_norm": 0.02310089021921158, "learning_rate": 9.512916436842698e-06, "loss": 0.0028, "step": 55640 }, { "epoch": 0.4550844339044037, "grad_norm": 0.09112659096717834, "learning_rate": 9.512609161488396e-06, "loss": 0.0025, "step": 55650 }, { "epoch": 0.45516621008300284, "grad_norm": 0.04096604883670807, "learning_rate": 9.512301794208576e-06, "loss": 0.0018, "step": 55660 }, { "epoch": 0.455247986261602, "grad_norm": 0.09957714378833771, "learning_rate": 9.511994335009503e-06, "loss": 0.0038, "step": 55670 }, { "epoch": 0.4553297624402012, "grad_norm": 0.07907184958457947, "learning_rate": 9.511686783897436e-06, "loss": 0.0009, "step": 55680 }, { "epoch": 0.45541153861880035, "grad_norm": 0.051763858646154404, "learning_rate": 9.511379140878643e-06, "loss": 0.003, "step": 55690 }, { "epoch": 0.4554933147973995, "grad_norm": 0.03900497406721115, "learning_rate": 9.511071405959388e-06, "loss": 0.0037, "step": 55700 }, { "epoch": 0.4555750909759987, "grad_norm": 0.020509792491793633, "learning_rate": 9.510763579145943e-06, "loss": 0.0017, "step": 55710 }, { "epoch": 0.45565686715459786, "grad_norm": 0.6491503119468689, "learning_rate": 9.510455660444577e-06, "loss": 0.0032, "step": 55720 }, { "epoch": 0.455738643333197, "grad_norm": 0.11352287977933884, "learning_rate": 9.510147649861564e-06, "loss": 0.0025, "step": 55730 }, { "epoch": 0.4558204195117962, "grad_norm": 0.12939368188381195, "learning_rate": 9.509839547403177e-06, "loss": 0.0038, "step": 55740 }, { "epoch": 0.45590219569039536, "grad_norm": 0.028870517387986183, "learning_rate": 9.509531353075692e-06, "loss": 0.0019, "step": 55750 }, { "epoch": 0.4559839718689946, "grad_norm": 0.010172426700592041, "learning_rate": 9.509223066885387e-06, "loss": 0.0025, "step": 55760 }, { "epoch": 0.45606574804759376, "grad_norm": 0.1367829442024231, "learning_rate": 9.508914688838544e-06, "loss": 0.0031, "step": 55770 }, { "epoch": 0.4561475242261929, "grad_norm": 0.10346204042434692, "learning_rate": 9.508606218941446e-06, "loss": 0.0046, "step": 55780 }, { "epoch": 0.4562293004047921, "grad_norm": 0.2122577279806137, "learning_rate": 9.50829765720037e-06, "loss": 0.0035, "step": 55790 }, { "epoch": 0.45631107658339126, "grad_norm": 0.026021018624305725, "learning_rate": 9.507989003621609e-06, "loss": 0.0018, "step": 55800 }, { "epoch": 0.45639285276199043, "grad_norm": 0.07577710598707199, "learning_rate": 9.507680258211447e-06, "loss": 0.0023, "step": 55810 }, { "epoch": 0.4564746289405896, "grad_norm": 0.18364383280277252, "learning_rate": 9.507371420976175e-06, "loss": 0.0034, "step": 55820 }, { "epoch": 0.45655640511918877, "grad_norm": 0.05053159222006798, "learning_rate": 9.50706249192208e-06, "loss": 0.0026, "step": 55830 }, { "epoch": 0.45663818129778794, "grad_norm": 0.026796692982316017, "learning_rate": 9.506753471055463e-06, "loss": 0.0032, "step": 55840 }, { "epoch": 0.4567199574763871, "grad_norm": 0.030508821830153465, "learning_rate": 9.506444358382611e-06, "loss": 0.0027, "step": 55850 }, { "epoch": 0.4568017336549863, "grad_norm": 0.055862922221422195, "learning_rate": 9.506135153909828e-06, "loss": 0.0031, "step": 55860 }, { "epoch": 0.4568835098335855, "grad_norm": 0.12589697539806366, "learning_rate": 9.505825857643407e-06, "loss": 0.0039, "step": 55870 }, { "epoch": 0.45696528601218467, "grad_norm": 0.12273462861776352, "learning_rate": 9.505516469589651e-06, "loss": 0.0029, "step": 55880 }, { "epoch": 0.45704706219078384, "grad_norm": 0.08308066427707672, "learning_rate": 9.505206989754861e-06, "loss": 0.0038, "step": 55890 }, { "epoch": 0.457128838369383, "grad_norm": 0.008925000205636024, "learning_rate": 9.504897418145344e-06, "loss": 0.0029, "step": 55900 }, { "epoch": 0.4572106145479822, "grad_norm": 0.08920904994010925, "learning_rate": 9.504587754767405e-06, "loss": 0.0014, "step": 55910 }, { "epoch": 0.45729239072658134, "grad_norm": 0.07937775552272797, "learning_rate": 9.50427799962735e-06, "loss": 0.0035, "step": 55920 }, { "epoch": 0.4573741669051805, "grad_norm": 0.09036500751972198, "learning_rate": 9.503968152731494e-06, "loss": 0.0018, "step": 55930 }, { "epoch": 0.4574559430837797, "grad_norm": 0.1203262135386467, "learning_rate": 9.503658214086145e-06, "loss": 0.0017, "step": 55940 }, { "epoch": 0.45753771926237885, "grad_norm": 0.10801438242197037, "learning_rate": 9.503348183697616e-06, "loss": 0.0027, "step": 55950 }, { "epoch": 0.457619495440978, "grad_norm": 0.08388058841228485, "learning_rate": 9.503038061572223e-06, "loss": 0.0021, "step": 55960 }, { "epoch": 0.45770127161957724, "grad_norm": 0.00642760843038559, "learning_rate": 9.502727847716286e-06, "loss": 0.0021, "step": 55970 }, { "epoch": 0.4577830477981764, "grad_norm": 0.1741131693124771, "learning_rate": 9.502417542136123e-06, "loss": 0.0042, "step": 55980 }, { "epoch": 0.4578648239767756, "grad_norm": 0.08599032461643219, "learning_rate": 9.502107144838056e-06, "loss": 0.0034, "step": 55990 }, { "epoch": 0.45794660015537475, "grad_norm": 0.09564334899187088, "learning_rate": 9.501796655828405e-06, "loss": 0.0029, "step": 56000 }, { "epoch": 0.4580283763339739, "grad_norm": 0.08331303298473358, "learning_rate": 9.501486075113498e-06, "loss": 0.0021, "step": 56010 }, { "epoch": 0.4581101525125731, "grad_norm": 0.1638924777507782, "learning_rate": 9.50117540269966e-06, "loss": 0.0044, "step": 56020 }, { "epoch": 0.45819192869117226, "grad_norm": 0.05508957430720329, "learning_rate": 9.500864638593218e-06, "loss": 0.0042, "step": 56030 }, { "epoch": 0.4582737048697714, "grad_norm": 0.04847125709056854, "learning_rate": 9.50055378280051e-06, "loss": 0.0026, "step": 56040 }, { "epoch": 0.4583554810483706, "grad_norm": 0.05996705964207649, "learning_rate": 9.500242835327859e-06, "loss": 0.0032, "step": 56050 }, { "epoch": 0.45843725722696976, "grad_norm": 0.09851466864347458, "learning_rate": 9.499931796181606e-06, "loss": 0.003, "step": 56060 }, { "epoch": 0.45851903340556893, "grad_norm": 0.09500668197870255, "learning_rate": 9.499620665368083e-06, "loss": 0.0027, "step": 56070 }, { "epoch": 0.45860080958416816, "grad_norm": 0.14898379147052765, "learning_rate": 9.49930944289363e-06, "loss": 0.0028, "step": 56080 }, { "epoch": 0.4586825857627673, "grad_norm": 0.07563140243291855, "learning_rate": 9.498998128764588e-06, "loss": 0.0028, "step": 56090 }, { "epoch": 0.4587643619413665, "grad_norm": 0.031245466321706772, "learning_rate": 9.498686722987294e-06, "loss": 0.0024, "step": 56100 }, { "epoch": 0.45884613811996566, "grad_norm": 0.1180388331413269, "learning_rate": 9.498375225568098e-06, "loss": 0.0022, "step": 56110 }, { "epoch": 0.45892791429856483, "grad_norm": 0.06902322173118591, "learning_rate": 9.498063636513339e-06, "loss": 0.0022, "step": 56120 }, { "epoch": 0.459009690477164, "grad_norm": 0.13047407567501068, "learning_rate": 9.49775195582937e-06, "loss": 0.0026, "step": 56130 }, { "epoch": 0.45909146665576317, "grad_norm": 0.07828712463378906, "learning_rate": 9.497440183522537e-06, "loss": 0.003, "step": 56140 }, { "epoch": 0.45917324283436234, "grad_norm": 0.06770051270723343, "learning_rate": 9.497128319599191e-06, "loss": 0.0028, "step": 56150 }, { "epoch": 0.4592550190129615, "grad_norm": 0.19968575239181519, "learning_rate": 9.496816364065686e-06, "loss": 0.003, "step": 56160 }, { "epoch": 0.4593367951915607, "grad_norm": 0.07751559466123581, "learning_rate": 9.496504316928377e-06, "loss": 0.0021, "step": 56170 }, { "epoch": 0.45941857137015985, "grad_norm": 0.09546225517988205, "learning_rate": 9.49619217819362e-06, "loss": 0.0033, "step": 56180 }, { "epoch": 0.45950034754875907, "grad_norm": 0.07115013152360916, "learning_rate": 9.495879947867772e-06, "loss": 0.0011, "step": 56190 }, { "epoch": 0.45958212372735824, "grad_norm": 0.08380081504583359, "learning_rate": 9.495567625957198e-06, "loss": 0.0034, "step": 56200 }, { "epoch": 0.4596638999059574, "grad_norm": 0.030975989997386932, "learning_rate": 9.495255212468254e-06, "loss": 0.0052, "step": 56210 }, { "epoch": 0.4597456760845566, "grad_norm": 0.03508919104933739, "learning_rate": 9.49494270740731e-06, "loss": 0.0025, "step": 56220 }, { "epoch": 0.45982745226315574, "grad_norm": 0.04228419065475464, "learning_rate": 9.49463011078073e-06, "loss": 0.0016, "step": 56230 }, { "epoch": 0.4599092284417549, "grad_norm": 0.050709813833236694, "learning_rate": 9.49431742259488e-06, "loss": 0.0031, "step": 56240 }, { "epoch": 0.4599910046203541, "grad_norm": 0.08669928461313248, "learning_rate": 9.49400464285613e-06, "loss": 0.0058, "step": 56250 }, { "epoch": 0.46007278079895325, "grad_norm": 0.11102564632892609, "learning_rate": 9.493691771570852e-06, "loss": 0.002, "step": 56260 }, { "epoch": 0.4601545569775524, "grad_norm": 0.05197376385331154, "learning_rate": 9.493378808745421e-06, "loss": 0.0021, "step": 56270 }, { "epoch": 0.4602363331561516, "grad_norm": 0.05651238188147545, "learning_rate": 9.493065754386213e-06, "loss": 0.0038, "step": 56280 }, { "epoch": 0.4603181093347508, "grad_norm": 0.09002408385276794, "learning_rate": 9.4927526084996e-06, "loss": 0.0025, "step": 56290 }, { "epoch": 0.46039988551335, "grad_norm": 0.1379380077123642, "learning_rate": 9.492439371091967e-06, "loss": 0.0025, "step": 56300 }, { "epoch": 0.46048166169194915, "grad_norm": 0.12917590141296387, "learning_rate": 9.49212604216969e-06, "loss": 0.0026, "step": 56310 }, { "epoch": 0.4605634378705483, "grad_norm": 0.11759080737829208, "learning_rate": 9.491812621739158e-06, "loss": 0.002, "step": 56320 }, { "epoch": 0.4606452140491475, "grad_norm": 0.2903626561164856, "learning_rate": 9.49149910980675e-06, "loss": 0.0034, "step": 56330 }, { "epoch": 0.46072699022774666, "grad_norm": 0.17338190972805023, "learning_rate": 9.491185506378853e-06, "loss": 0.0025, "step": 56340 }, { "epoch": 0.4608087664063458, "grad_norm": 0.15218673646450043, "learning_rate": 9.490871811461858e-06, "loss": 0.0049, "step": 56350 }, { "epoch": 0.460890542584945, "grad_norm": 0.2070273905992508, "learning_rate": 9.490558025062153e-06, "loss": 0.0018, "step": 56360 }, { "epoch": 0.46097231876354416, "grad_norm": 0.1456441432237625, "learning_rate": 9.490244147186132e-06, "loss": 0.004, "step": 56370 }, { "epoch": 0.46105409494214333, "grad_norm": 0.12066281586885452, "learning_rate": 9.489930177840186e-06, "loss": 0.0029, "step": 56380 }, { "epoch": 0.4611358711207425, "grad_norm": 0.17188195884227753, "learning_rate": 9.489616117030714e-06, "loss": 0.0032, "step": 56390 }, { "epoch": 0.4612176472993417, "grad_norm": 0.17355720698833466, "learning_rate": 9.489301964764113e-06, "loss": 0.0017, "step": 56400 }, { "epoch": 0.4612994234779409, "grad_norm": 0.11913366615772247, "learning_rate": 9.48898772104678e-06, "loss": 0.0039, "step": 56410 }, { "epoch": 0.46138119965654006, "grad_norm": 0.2688732147216797, "learning_rate": 9.48867338588512e-06, "loss": 0.002, "step": 56420 }, { "epoch": 0.46146297583513923, "grad_norm": 0.12900389730930328, "learning_rate": 9.488358959285533e-06, "loss": 0.002, "step": 56430 }, { "epoch": 0.4615447520137384, "grad_norm": 0.006490255240350962, "learning_rate": 9.488044441254428e-06, "loss": 0.0022, "step": 56440 }, { "epoch": 0.46162652819233757, "grad_norm": 0.1113785058259964, "learning_rate": 9.487729831798208e-06, "loss": 0.0023, "step": 56450 }, { "epoch": 0.46170830437093674, "grad_norm": 0.2239457666873932, "learning_rate": 9.487415130923283e-06, "loss": 0.002, "step": 56460 }, { "epoch": 0.4617900805495359, "grad_norm": 0.08859540522098541, "learning_rate": 9.487100338636065e-06, "loss": 0.0017, "step": 56470 }, { "epoch": 0.4618718567281351, "grad_norm": 0.047452788800001144, "learning_rate": 9.486785454942966e-06, "loss": 0.0031, "step": 56480 }, { "epoch": 0.46195363290673425, "grad_norm": 0.14540734887123108, "learning_rate": 9.4864704798504e-06, "loss": 0.0016, "step": 56490 }, { "epoch": 0.4620354090853334, "grad_norm": 0.07650738954544067, "learning_rate": 9.486155413364783e-06, "loss": 0.0071, "step": 56500 }, { "epoch": 0.46211718526393264, "grad_norm": 0.06442079693078995, "learning_rate": 9.485840255492534e-06, "loss": 0.0019, "step": 56510 }, { "epoch": 0.4621989614425318, "grad_norm": 0.11250394582748413, "learning_rate": 9.485525006240072e-06, "loss": 0.0021, "step": 56520 }, { "epoch": 0.462280737621131, "grad_norm": 0.10350444912910461, "learning_rate": 9.485209665613821e-06, "loss": 0.0022, "step": 56530 }, { "epoch": 0.46236251379973015, "grad_norm": 0.061246056109666824, "learning_rate": 9.484894233620203e-06, "loss": 0.0023, "step": 56540 }, { "epoch": 0.4624442899783293, "grad_norm": 0.15551884472370148, "learning_rate": 9.484578710265645e-06, "loss": 0.0034, "step": 56550 }, { "epoch": 0.4625260661569285, "grad_norm": 0.1672286093235016, "learning_rate": 9.484263095556572e-06, "loss": 0.0036, "step": 56560 }, { "epoch": 0.46260784233552765, "grad_norm": 0.029698889702558517, "learning_rate": 9.483947389499415e-06, "loss": 0.002, "step": 56570 }, { "epoch": 0.4626896185141268, "grad_norm": 0.1803702414035797, "learning_rate": 9.483631592100604e-06, "loss": 0.0033, "step": 56580 }, { "epoch": 0.462771394692726, "grad_norm": 0.07842140644788742, "learning_rate": 9.483315703366575e-06, "loss": 0.0019, "step": 56590 }, { "epoch": 0.46285317087132516, "grad_norm": 0.14658419787883759, "learning_rate": 9.48299972330376e-06, "loss": 0.0025, "step": 56600 }, { "epoch": 0.4629349470499244, "grad_norm": 0.07714467495679855, "learning_rate": 9.482683651918596e-06, "loss": 0.0013, "step": 56610 }, { "epoch": 0.46301672322852355, "grad_norm": 0.270389199256897, "learning_rate": 9.482367489217523e-06, "loss": 0.0068, "step": 56620 }, { "epoch": 0.4630984994071227, "grad_norm": 0.0675845816731453, "learning_rate": 9.48205123520698e-06, "loss": 0.0012, "step": 56630 }, { "epoch": 0.4631802755857219, "grad_norm": 0.022446749731898308, "learning_rate": 9.481734889893411e-06, "loss": 0.0022, "step": 56640 }, { "epoch": 0.46326205176432106, "grad_norm": 0.12636525928974152, "learning_rate": 9.481418453283259e-06, "loss": 0.0026, "step": 56650 }, { "epoch": 0.46334382794292023, "grad_norm": 0.03513146564364433, "learning_rate": 9.48110192538297e-06, "loss": 0.0033, "step": 56660 }, { "epoch": 0.4634256041215194, "grad_norm": 0.013964131474494934, "learning_rate": 9.480785306198993e-06, "loss": 0.0026, "step": 56670 }, { "epoch": 0.46350738030011857, "grad_norm": 0.06260640919208527, "learning_rate": 9.480468595737777e-06, "loss": 0.0028, "step": 56680 }, { "epoch": 0.46358915647871773, "grad_norm": 0.22656938433647156, "learning_rate": 9.480151794005775e-06, "loss": 0.0036, "step": 56690 }, { "epoch": 0.4636709326573169, "grad_norm": 0.004550219979137182, "learning_rate": 9.479834901009437e-06, "loss": 0.0009, "step": 56700 }, { "epoch": 0.46375270883591607, "grad_norm": 0.2998177409172058, "learning_rate": 9.479517916755224e-06, "loss": 0.0025, "step": 56710 }, { "epoch": 0.4638344850145153, "grad_norm": 0.06083710491657257, "learning_rate": 9.479200841249587e-06, "loss": 0.0017, "step": 56720 }, { "epoch": 0.46391626119311447, "grad_norm": 0.09430095553398132, "learning_rate": 9.478883674498989e-06, "loss": 0.0013, "step": 56730 }, { "epoch": 0.46399803737171363, "grad_norm": 0.028945207595825195, "learning_rate": 9.47856641650989e-06, "loss": 0.0033, "step": 56740 }, { "epoch": 0.4640798135503128, "grad_norm": 0.08957286924123764, "learning_rate": 9.478249067288752e-06, "loss": 0.0022, "step": 56750 }, { "epoch": 0.46416158972891197, "grad_norm": 0.07304486632347107, "learning_rate": 9.47793162684204e-06, "loss": 0.0031, "step": 56760 }, { "epoch": 0.46424336590751114, "grad_norm": 0.038333263248205185, "learning_rate": 9.477614095176222e-06, "loss": 0.0024, "step": 56770 }, { "epoch": 0.4643251420861103, "grad_norm": 0.1535453498363495, "learning_rate": 9.477296472297764e-06, "loss": 0.0019, "step": 56780 }, { "epoch": 0.4644069182647095, "grad_norm": 0.048842307180166245, "learning_rate": 9.47697875821314e-06, "loss": 0.0016, "step": 56790 }, { "epoch": 0.46448869444330865, "grad_norm": 0.09460362046957016, "learning_rate": 9.476660952928817e-06, "loss": 0.0021, "step": 56800 }, { "epoch": 0.4645704706219078, "grad_norm": 0.08040931820869446, "learning_rate": 9.476343056451273e-06, "loss": 0.0025, "step": 56810 }, { "epoch": 0.464652246800507, "grad_norm": 0.14117646217346191, "learning_rate": 9.476025068786982e-06, "loss": 0.0016, "step": 56820 }, { "epoch": 0.4647340229791062, "grad_norm": 0.09233653545379639, "learning_rate": 9.475706989942421e-06, "loss": 0.0018, "step": 56830 }, { "epoch": 0.4648157991577054, "grad_norm": 0.14087700843811035, "learning_rate": 9.475388819924072e-06, "loss": 0.003, "step": 56840 }, { "epoch": 0.46489757533630455, "grad_norm": 0.031718287616968155, "learning_rate": 9.475070558738416e-06, "loss": 0.0027, "step": 56850 }, { "epoch": 0.4649793515149037, "grad_norm": 0.017519574612379074, "learning_rate": 9.474752206391932e-06, "loss": 0.0016, "step": 56860 }, { "epoch": 0.4650611276935029, "grad_norm": 0.1069834902882576, "learning_rate": 9.47443376289111e-06, "loss": 0.0018, "step": 56870 }, { "epoch": 0.46514290387210205, "grad_norm": 0.14143501222133636, "learning_rate": 9.474115228242437e-06, "loss": 0.0022, "step": 56880 }, { "epoch": 0.4652246800507012, "grad_norm": 0.01070267241448164, "learning_rate": 9.473796602452399e-06, "loss": 0.0028, "step": 56890 }, { "epoch": 0.4653064562293004, "grad_norm": 0.0910831019282341, "learning_rate": 9.473477885527487e-06, "loss": 0.0051, "step": 56900 }, { "epoch": 0.46538823240789956, "grad_norm": 0.08821092545986176, "learning_rate": 9.473159077474194e-06, "loss": 0.0018, "step": 56910 }, { "epoch": 0.46547000858649873, "grad_norm": 0.11554417759180069, "learning_rate": 9.472840178299014e-06, "loss": 0.0025, "step": 56920 }, { "epoch": 0.46555178476509795, "grad_norm": 0.03220741078257561, "learning_rate": 9.472521188008446e-06, "loss": 0.0029, "step": 56930 }, { "epoch": 0.4656335609436971, "grad_norm": 0.04290972277522087, "learning_rate": 9.472202106608986e-06, "loss": 0.002, "step": 56940 }, { "epoch": 0.4657153371222963, "grad_norm": 0.2076737880706787, "learning_rate": 9.471882934107132e-06, "loss": 0.004, "step": 56950 }, { "epoch": 0.46579711330089546, "grad_norm": 0.04060642048716545, "learning_rate": 9.471563670509387e-06, "loss": 0.0014, "step": 56960 }, { "epoch": 0.46587888947949463, "grad_norm": 0.09365243464708328, "learning_rate": 9.471244315822257e-06, "loss": 0.0026, "step": 56970 }, { "epoch": 0.4659606656580938, "grad_norm": 0.07908559590578079, "learning_rate": 9.470924870052247e-06, "loss": 0.0025, "step": 56980 }, { "epoch": 0.46604244183669297, "grad_norm": 0.05593472719192505, "learning_rate": 9.470605333205862e-06, "loss": 0.0021, "step": 56990 }, { "epoch": 0.46612421801529214, "grad_norm": 0.1503702700138092, "learning_rate": 9.470285705289612e-06, "loss": 0.0048, "step": 57000 }, { "epoch": 0.4662059941938913, "grad_norm": 0.0830271914601326, "learning_rate": 9.469965986310009e-06, "loss": 0.0018, "step": 57010 }, { "epoch": 0.4662877703724905, "grad_norm": 0.07475399971008301, "learning_rate": 9.469646176273566e-06, "loss": 0.0023, "step": 57020 }, { "epoch": 0.46636954655108964, "grad_norm": 0.034486789256334305, "learning_rate": 9.469326275186797e-06, "loss": 0.0022, "step": 57030 }, { "epoch": 0.46645132272968887, "grad_norm": 0.03949034586548805, "learning_rate": 9.469006283056219e-06, "loss": 0.0019, "step": 57040 }, { "epoch": 0.46653309890828804, "grad_norm": 0.14381521940231323, "learning_rate": 9.468686199888349e-06, "loss": 0.0029, "step": 57050 }, { "epoch": 0.4666148750868872, "grad_norm": 0.03815285488963127, "learning_rate": 9.46836602568971e-06, "loss": 0.0018, "step": 57060 }, { "epoch": 0.4666966512654864, "grad_norm": 0.06867609918117523, "learning_rate": 9.468045760466822e-06, "loss": 0.0019, "step": 57070 }, { "epoch": 0.46677842744408554, "grad_norm": 0.07590770721435547, "learning_rate": 9.46772540422621e-06, "loss": 0.003, "step": 57080 }, { "epoch": 0.4668602036226847, "grad_norm": 0.04521320387721062, "learning_rate": 9.467404956974401e-06, "loss": 0.0022, "step": 57090 }, { "epoch": 0.4669419798012839, "grad_norm": 0.09355226159095764, "learning_rate": 9.467084418717923e-06, "loss": 0.0027, "step": 57100 }, { "epoch": 0.46702375597988305, "grad_norm": 0.07518158853054047, "learning_rate": 9.466763789463302e-06, "loss": 0.0023, "step": 57110 }, { "epoch": 0.4671055321584822, "grad_norm": 0.07237491011619568, "learning_rate": 9.466443069217073e-06, "loss": 0.0031, "step": 57120 }, { "epoch": 0.4671873083370814, "grad_norm": 0.04831636697053909, "learning_rate": 9.466122257985769e-06, "loss": 0.004, "step": 57130 }, { "epoch": 0.46726908451568056, "grad_norm": 0.09046666324138641, "learning_rate": 9.465801355775923e-06, "loss": 0.003, "step": 57140 }, { "epoch": 0.4673508606942798, "grad_norm": 0.11542239785194397, "learning_rate": 9.465480362594074e-06, "loss": 0.0025, "step": 57150 }, { "epoch": 0.46743263687287895, "grad_norm": 0.008224847726523876, "learning_rate": 9.46515927844676e-06, "loss": 0.0017, "step": 57160 }, { "epoch": 0.4675144130514781, "grad_norm": 0.035959094762802124, "learning_rate": 9.464838103340522e-06, "loss": 0.0026, "step": 57170 }, { "epoch": 0.4675961892300773, "grad_norm": 0.03767406567931175, "learning_rate": 9.464516837281903e-06, "loss": 0.0023, "step": 57180 }, { "epoch": 0.46767796540867645, "grad_norm": 0.04483455792069435, "learning_rate": 9.464195480277445e-06, "loss": 0.0018, "step": 57190 }, { "epoch": 0.4677597415872756, "grad_norm": 0.015491831116378307, "learning_rate": 9.4638740323337e-06, "loss": 0.003, "step": 57200 }, { "epoch": 0.4678415177658748, "grad_norm": 0.10449835658073425, "learning_rate": 9.463552493457211e-06, "loss": 0.0027, "step": 57210 }, { "epoch": 0.46792329394447396, "grad_norm": 0.18699117004871368, "learning_rate": 9.463230863654529e-06, "loss": 0.0021, "step": 57220 }, { "epoch": 0.46800507012307313, "grad_norm": 0.006518847309052944, "learning_rate": 9.462909142932205e-06, "loss": 0.0023, "step": 57230 }, { "epoch": 0.4680868463016723, "grad_norm": 0.1387380212545395, "learning_rate": 9.462587331296798e-06, "loss": 0.0027, "step": 57240 }, { "epoch": 0.4681686224802715, "grad_norm": 0.03847476467490196, "learning_rate": 9.462265428754856e-06, "loss": 0.0014, "step": 57250 }, { "epoch": 0.4682503986588707, "grad_norm": 0.15123054385185242, "learning_rate": 9.461943435312942e-06, "loss": 0.0026, "step": 57260 }, { "epoch": 0.46833217483746986, "grad_norm": 0.22152386605739594, "learning_rate": 9.461621350977612e-06, "loss": 0.0035, "step": 57270 }, { "epoch": 0.46841395101606903, "grad_norm": 0.08514288812875748, "learning_rate": 9.46129917575543e-06, "loss": 0.0024, "step": 57280 }, { "epoch": 0.4684957271946682, "grad_norm": 0.055654335767030716, "learning_rate": 9.460976909652957e-06, "loss": 0.0022, "step": 57290 }, { "epoch": 0.46857750337326737, "grad_norm": 0.12399227917194366, "learning_rate": 9.460654552676758e-06, "loss": 0.0027, "step": 57300 }, { "epoch": 0.46865927955186654, "grad_norm": 0.050608377903699875, "learning_rate": 9.460332104833401e-06, "loss": 0.0023, "step": 57310 }, { "epoch": 0.4687410557304657, "grad_norm": 0.1556631326675415, "learning_rate": 9.460009566129451e-06, "loss": 0.002, "step": 57320 }, { "epoch": 0.4688228319090649, "grad_norm": 0.0600995309650898, "learning_rate": 9.459686936571484e-06, "loss": 0.0035, "step": 57330 }, { "epoch": 0.46890460808766404, "grad_norm": 0.2373771071434021, "learning_rate": 9.459364216166067e-06, "loss": 0.0035, "step": 57340 }, { "epoch": 0.4689863842662632, "grad_norm": 0.04036537930369377, "learning_rate": 9.459041404919776e-06, "loss": 0.0016, "step": 57350 }, { "epoch": 0.46906816044486244, "grad_norm": 0.05539639666676521, "learning_rate": 9.458718502839189e-06, "loss": 0.0021, "step": 57360 }, { "epoch": 0.4691499366234616, "grad_norm": 0.0759129673242569, "learning_rate": 9.45839550993088e-06, "loss": 0.0025, "step": 57370 }, { "epoch": 0.4692317128020608, "grad_norm": 0.04386233538389206, "learning_rate": 9.458072426201433e-06, "loss": 0.0031, "step": 57380 }, { "epoch": 0.46931348898065994, "grad_norm": 0.05332547426223755, "learning_rate": 9.457749251657424e-06, "loss": 0.0033, "step": 57390 }, { "epoch": 0.4693952651592591, "grad_norm": 0.029974132776260376, "learning_rate": 9.45742598630544e-06, "loss": 0.0022, "step": 57400 }, { "epoch": 0.4694770413378583, "grad_norm": 0.10525736212730408, "learning_rate": 9.457102630152067e-06, "loss": 0.0022, "step": 57410 }, { "epoch": 0.46955881751645745, "grad_norm": 0.04862292483448982, "learning_rate": 9.456779183203889e-06, "loss": 0.0033, "step": 57420 }, { "epoch": 0.4696405936950566, "grad_norm": 0.09633568674325943, "learning_rate": 9.456455645467497e-06, "loss": 0.0036, "step": 57430 }, { "epoch": 0.4697223698736558, "grad_norm": 0.10234668105840683, "learning_rate": 9.45613201694948e-06, "loss": 0.002, "step": 57440 }, { "epoch": 0.46980414605225496, "grad_norm": 0.011987977661192417, "learning_rate": 9.455808297656433e-06, "loss": 0.0045, "step": 57450 }, { "epoch": 0.4698859222308541, "grad_norm": 0.055699530988931656, "learning_rate": 9.455484487594949e-06, "loss": 0.0024, "step": 57460 }, { "epoch": 0.46996769840945335, "grad_norm": 0.0755845308303833, "learning_rate": 9.455160586771623e-06, "loss": 0.0016, "step": 57470 }, { "epoch": 0.4700494745880525, "grad_norm": 0.14181432127952576, "learning_rate": 9.454836595193057e-06, "loss": 0.0022, "step": 57480 }, { "epoch": 0.4701312507666517, "grad_norm": 0.07039903849363327, "learning_rate": 9.454512512865846e-06, "loss": 0.0033, "step": 57490 }, { "epoch": 0.47021302694525086, "grad_norm": 0.048384372144937515, "learning_rate": 9.454188339796597e-06, "loss": 0.0034, "step": 57500 }, { "epoch": 0.47029480312385, "grad_norm": 0.05532531812787056, "learning_rate": 9.453864075991909e-06, "loss": 0.0026, "step": 57510 }, { "epoch": 0.4703765793024492, "grad_norm": 0.09848172217607498, "learning_rate": 9.453539721458389e-06, "loss": 0.0031, "step": 57520 }, { "epoch": 0.47045835548104836, "grad_norm": 0.07899555563926697, "learning_rate": 9.453215276202645e-06, "loss": 0.0031, "step": 57530 }, { "epoch": 0.47054013165964753, "grad_norm": 0.021910401061177254, "learning_rate": 9.452890740231288e-06, "loss": 0.0032, "step": 57540 }, { "epoch": 0.4706219078382467, "grad_norm": 0.12697580456733704, "learning_rate": 9.452566113550925e-06, "loss": 0.003, "step": 57550 }, { "epoch": 0.47070368401684587, "grad_norm": 0.07663995772600174, "learning_rate": 9.452241396168173e-06, "loss": 0.0027, "step": 57560 }, { "epoch": 0.4707854601954451, "grad_norm": 0.13156269490718842, "learning_rate": 9.451916588089643e-06, "loss": 0.0026, "step": 57570 }, { "epoch": 0.47086723637404426, "grad_norm": 0.10015285015106201, "learning_rate": 9.451591689321955e-06, "loss": 0.0041, "step": 57580 }, { "epoch": 0.47094901255264343, "grad_norm": 0.11168712377548218, "learning_rate": 9.451266699871724e-06, "loss": 0.0019, "step": 57590 }, { "epoch": 0.4710307887312426, "grad_norm": 0.02505389042198658, "learning_rate": 9.450941619745574e-06, "loss": 0.0023, "step": 57600 }, { "epoch": 0.47111256490984177, "grad_norm": 0.044175952672958374, "learning_rate": 9.450616448950125e-06, "loss": 0.0028, "step": 57610 }, { "epoch": 0.47119434108844094, "grad_norm": 0.2409268319606781, "learning_rate": 9.450291187492e-06, "loss": 0.0028, "step": 57620 }, { "epoch": 0.4712761172670401, "grad_norm": 0.009281880222260952, "learning_rate": 9.449965835377828e-06, "loss": 0.002, "step": 57630 }, { "epoch": 0.4713578934456393, "grad_norm": 0.06430760025978088, "learning_rate": 9.449640392614233e-06, "loss": 0.0016, "step": 57640 }, { "epoch": 0.47143966962423844, "grad_norm": 0.1379036009311676, "learning_rate": 9.449314859207848e-06, "loss": 0.0019, "step": 57650 }, { "epoch": 0.4715214458028376, "grad_norm": 0.1268576979637146, "learning_rate": 9.4489892351653e-06, "loss": 0.0022, "step": 57660 }, { "epoch": 0.4716032219814368, "grad_norm": 0.14855490624904633, "learning_rate": 9.448663520493227e-06, "loss": 0.0032, "step": 57670 }, { "epoch": 0.471684998160036, "grad_norm": 0.03002959117293358, "learning_rate": 9.448337715198263e-06, "loss": 0.0018, "step": 57680 }, { "epoch": 0.4717667743386352, "grad_norm": 0.05882679671049118, "learning_rate": 9.448011819287041e-06, "loss": 0.0028, "step": 57690 }, { "epoch": 0.47184855051723434, "grad_norm": 0.1040283590555191, "learning_rate": 9.447685832766205e-06, "loss": 0.0023, "step": 57700 }, { "epoch": 0.4719303266958335, "grad_norm": 0.08494030684232712, "learning_rate": 9.447359755642393e-06, "loss": 0.0019, "step": 57710 }, { "epoch": 0.4720121028744327, "grad_norm": 0.059327416121959686, "learning_rate": 9.447033587922248e-06, "loss": 0.0016, "step": 57720 }, { "epoch": 0.47209387905303185, "grad_norm": 0.22047622501850128, "learning_rate": 9.446707329612414e-06, "loss": 0.002, "step": 57730 }, { "epoch": 0.472175655231631, "grad_norm": 0.040628768503665924, "learning_rate": 9.446380980719536e-06, "loss": 0.0034, "step": 57740 }, { "epoch": 0.4722574314102302, "grad_norm": 0.022339550778269768, "learning_rate": 9.446054541250265e-06, "loss": 0.0014, "step": 57750 }, { "epoch": 0.47233920758882936, "grad_norm": 0.15008795261383057, "learning_rate": 9.445728011211248e-06, "loss": 0.0037, "step": 57760 }, { "epoch": 0.4724209837674285, "grad_norm": 0.07720671594142914, "learning_rate": 9.445401390609137e-06, "loss": 0.0031, "step": 57770 }, { "epoch": 0.4725027599460277, "grad_norm": 0.2823440730571747, "learning_rate": 9.445074679450588e-06, "loss": 0.0018, "step": 57780 }, { "epoch": 0.4725845361246269, "grad_norm": 0.05025578290224075, "learning_rate": 9.444747877742254e-06, "loss": 0.0029, "step": 57790 }, { "epoch": 0.4726663123032261, "grad_norm": 0.05713862180709839, "learning_rate": 9.444420985490791e-06, "loss": 0.0015, "step": 57800 }, { "epoch": 0.47274808848182526, "grad_norm": 0.05914926901459694, "learning_rate": 9.444094002702862e-06, "loss": 0.0021, "step": 57810 }, { "epoch": 0.4728298646604244, "grad_norm": 0.09796014428138733, "learning_rate": 9.443766929385127e-06, "loss": 0.0023, "step": 57820 }, { "epoch": 0.4729116408390236, "grad_norm": 0.06171972304582596, "learning_rate": 9.443439765544246e-06, "loss": 0.0016, "step": 57830 }, { "epoch": 0.47299341701762276, "grad_norm": 0.28347310423851013, "learning_rate": 9.443112511186885e-06, "loss": 0.0038, "step": 57840 }, { "epoch": 0.47307519319622193, "grad_norm": 0.09354253858327866, "learning_rate": 9.44278516631971e-06, "loss": 0.0024, "step": 57850 }, { "epoch": 0.4731569693748211, "grad_norm": 0.12210322171449661, "learning_rate": 9.44245773094939e-06, "loss": 0.0034, "step": 57860 }, { "epoch": 0.47323874555342027, "grad_norm": 0.08957008272409439, "learning_rate": 9.442130205082595e-06, "loss": 0.0018, "step": 57870 }, { "epoch": 0.47332052173201944, "grad_norm": 0.1149682030081749, "learning_rate": 9.441802588725998e-06, "loss": 0.0019, "step": 57880 }, { "epoch": 0.47340229791061866, "grad_norm": 0.059454236179590225, "learning_rate": 9.441474881886273e-06, "loss": 0.0023, "step": 57890 }, { "epoch": 0.47348407408921783, "grad_norm": 0.1481386125087738, "learning_rate": 9.441147084570092e-06, "loss": 0.0022, "step": 57900 }, { "epoch": 0.473565850267817, "grad_norm": 0.03598017245531082, "learning_rate": 9.440819196784137e-06, "loss": 0.0024, "step": 57910 }, { "epoch": 0.47364762644641617, "grad_norm": 0.04687781259417534, "learning_rate": 9.440491218535084e-06, "loss": 0.0013, "step": 57920 }, { "epoch": 0.47372940262501534, "grad_norm": 0.12040120363235474, "learning_rate": 9.440163149829615e-06, "loss": 0.0017, "step": 57930 }, { "epoch": 0.4738111788036145, "grad_norm": 0.15660062432289124, "learning_rate": 9.439834990674414e-06, "loss": 0.0021, "step": 57940 }, { "epoch": 0.4738929549822137, "grad_norm": 0.019149716943502426, "learning_rate": 9.439506741076166e-06, "loss": 0.0026, "step": 57950 }, { "epoch": 0.47397473116081285, "grad_norm": 0.03880469128489494, "learning_rate": 9.439178401041555e-06, "loss": 0.0023, "step": 57960 }, { "epoch": 0.474056507339412, "grad_norm": 0.11536472290754318, "learning_rate": 9.438849970577272e-06, "loss": 0.0037, "step": 57970 }, { "epoch": 0.4741382835180112, "grad_norm": 0.11423853039741516, "learning_rate": 9.43852144969001e-06, "loss": 0.0048, "step": 57980 }, { "epoch": 0.47422005969661035, "grad_norm": 0.06653346866369247, "learning_rate": 9.438192838386456e-06, "loss": 0.002, "step": 57990 }, { "epoch": 0.4743018358752096, "grad_norm": 0.15753212571144104, "learning_rate": 9.437864136673308e-06, "loss": 0.0029, "step": 58000 }, { "epoch": 0.47438361205380875, "grad_norm": 0.07332460582256317, "learning_rate": 9.437535344557258e-06, "loss": 0.0032, "step": 58010 }, { "epoch": 0.4744653882324079, "grad_norm": 0.016050834208726883, "learning_rate": 9.437206462045007e-06, "loss": 0.0027, "step": 58020 }, { "epoch": 0.4745471644110071, "grad_norm": 0.015469333156943321, "learning_rate": 9.436877489143253e-06, "loss": 0.0059, "step": 58030 }, { "epoch": 0.47462894058960625, "grad_norm": 0.034806523472070694, "learning_rate": 9.436548425858699e-06, "loss": 0.0025, "step": 58040 }, { "epoch": 0.4747107167682054, "grad_norm": 0.09800037741661072, "learning_rate": 9.436219272198047e-06, "loss": 0.0016, "step": 58050 }, { "epoch": 0.4747924929468046, "grad_norm": 0.15481267869472504, "learning_rate": 9.435890028168003e-06, "loss": 0.0015, "step": 58060 }, { "epoch": 0.47487426912540376, "grad_norm": 0.02800498716533184, "learning_rate": 9.435560693775272e-06, "loss": 0.0024, "step": 58070 }, { "epoch": 0.4749560453040029, "grad_norm": 0.15681307017803192, "learning_rate": 9.435231269026565e-06, "loss": 0.0034, "step": 58080 }, { "epoch": 0.4750378214826021, "grad_norm": 0.03366045281291008, "learning_rate": 9.434901753928593e-06, "loss": 0.0041, "step": 58090 }, { "epoch": 0.47511959766120127, "grad_norm": 0.11974582076072693, "learning_rate": 9.434572148488065e-06, "loss": 0.003, "step": 58100 }, { "epoch": 0.4752013738398005, "grad_norm": 0.13202737271785736, "learning_rate": 9.434242452711702e-06, "loss": 0.0036, "step": 58110 }, { "epoch": 0.47528315001839966, "grad_norm": 0.04899997636675835, "learning_rate": 9.433912666606213e-06, "loss": 0.0028, "step": 58120 }, { "epoch": 0.4753649261969988, "grad_norm": 0.10558205097913742, "learning_rate": 9.433582790178318e-06, "loss": 0.0028, "step": 58130 }, { "epoch": 0.475446702375598, "grad_norm": 0.02736429125070572, "learning_rate": 9.433252823434738e-06, "loss": 0.0022, "step": 58140 }, { "epoch": 0.47552847855419716, "grad_norm": 0.0946354940533638, "learning_rate": 9.432922766382196e-06, "loss": 0.0024, "step": 58150 }, { "epoch": 0.47561025473279633, "grad_norm": 0.1421622782945633, "learning_rate": 9.432592619027414e-06, "loss": 0.0013, "step": 58160 }, { "epoch": 0.4756920309113955, "grad_norm": 0.11287149041891098, "learning_rate": 9.432262381377117e-06, "loss": 0.0022, "step": 58170 }, { "epoch": 0.47577380708999467, "grad_norm": 0.03478940948843956, "learning_rate": 9.43193205343803e-06, "loss": 0.0018, "step": 58180 }, { "epoch": 0.47585558326859384, "grad_norm": 0.025289949029684067, "learning_rate": 9.431601635216888e-06, "loss": 0.004, "step": 58190 }, { "epoch": 0.475937359447193, "grad_norm": 0.21414794027805328, "learning_rate": 9.431271126720418e-06, "loss": 0.0035, "step": 58200 }, { "epoch": 0.47601913562579223, "grad_norm": 0.084896519780159, "learning_rate": 9.430940527955352e-06, "loss": 0.003, "step": 58210 }, { "epoch": 0.4761009118043914, "grad_norm": 0.027180859819054604, "learning_rate": 9.430609838928426e-06, "loss": 0.0033, "step": 58220 }, { "epoch": 0.47618268798299057, "grad_norm": 0.1403244584798813, "learning_rate": 9.430279059646377e-06, "loss": 0.0017, "step": 58230 }, { "epoch": 0.47626446416158974, "grad_norm": 0.029455358162522316, "learning_rate": 9.429948190115943e-06, "loss": 0.0012, "step": 58240 }, { "epoch": 0.4763462403401889, "grad_norm": 0.15578211843967438, "learning_rate": 9.429617230343861e-06, "loss": 0.0061, "step": 58250 }, { "epoch": 0.4764280165187881, "grad_norm": 0.07051493227481842, "learning_rate": 9.429286180336878e-06, "loss": 0.0041, "step": 58260 }, { "epoch": 0.47650979269738725, "grad_norm": 0.08272914588451385, "learning_rate": 9.428955040101734e-06, "loss": 0.0017, "step": 58270 }, { "epoch": 0.4765915688759864, "grad_norm": 0.0543193444609642, "learning_rate": 9.428623809645177e-06, "loss": 0.0023, "step": 58280 }, { "epoch": 0.4766733450545856, "grad_norm": 0.04743937775492668, "learning_rate": 9.42829248897395e-06, "loss": 0.0026, "step": 58290 }, { "epoch": 0.47675512123318475, "grad_norm": 0.05131608620285988, "learning_rate": 9.42796107809481e-06, "loss": 0.0026, "step": 58300 }, { "epoch": 0.4768368974117839, "grad_norm": 0.09261079132556915, "learning_rate": 9.4276295770145e-06, "loss": 0.0015, "step": 58310 }, { "epoch": 0.47691867359038315, "grad_norm": 0.043079786002635956, "learning_rate": 9.427297985739778e-06, "loss": 0.0025, "step": 58320 }, { "epoch": 0.4770004497689823, "grad_norm": 0.0648718923330307, "learning_rate": 9.426966304277398e-06, "loss": 0.0032, "step": 58330 }, { "epoch": 0.4770822259475815, "grad_norm": 0.06544636189937592, "learning_rate": 9.426634532634114e-06, "loss": 0.0035, "step": 58340 }, { "epoch": 0.47716400212618065, "grad_norm": 0.06180962920188904, "learning_rate": 9.426302670816688e-06, "loss": 0.0031, "step": 58350 }, { "epoch": 0.4772457783047798, "grad_norm": 0.08363823592662811, "learning_rate": 9.425970718831879e-06, "loss": 0.0016, "step": 58360 }, { "epoch": 0.477327554483379, "grad_norm": 0.007865252904593945, "learning_rate": 9.425638676686448e-06, "loss": 0.0022, "step": 58370 }, { "epoch": 0.47740933066197816, "grad_norm": 0.04831537976861, "learning_rate": 9.42530654438716e-06, "loss": 0.0021, "step": 58380 }, { "epoch": 0.47749110684057733, "grad_norm": 0.022356420755386353, "learning_rate": 9.424974321940782e-06, "loss": 0.0034, "step": 58390 }, { "epoch": 0.4775728830191765, "grad_norm": 0.015277620404958725, "learning_rate": 9.424642009354077e-06, "loss": 0.0013, "step": 58400 }, { "epoch": 0.47765465919777567, "grad_norm": 0.1061374619603157, "learning_rate": 9.42430960663382e-06, "loss": 0.0024, "step": 58410 }, { "epoch": 0.47773643537637484, "grad_norm": 0.05349360778927803, "learning_rate": 9.42397711378678e-06, "loss": 0.003, "step": 58420 }, { "epoch": 0.47781821155497406, "grad_norm": 0.03910503908991814, "learning_rate": 9.423644530819728e-06, "loss": 0.0026, "step": 58430 }, { "epoch": 0.47789998773357323, "grad_norm": 0.10422585904598236, "learning_rate": 9.423311857739444e-06, "loss": 0.0036, "step": 58440 }, { "epoch": 0.4779817639121724, "grad_norm": 0.0928611159324646, "learning_rate": 9.422979094552703e-06, "loss": 0.0019, "step": 58450 }, { "epoch": 0.47806354009077157, "grad_norm": 0.06827688217163086, "learning_rate": 9.422646241266279e-06, "loss": 0.0033, "step": 58460 }, { "epoch": 0.47814531626937073, "grad_norm": 0.05031880363821983, "learning_rate": 9.422313297886958e-06, "loss": 0.0016, "step": 58470 }, { "epoch": 0.4782270924479699, "grad_norm": 0.04172680526971817, "learning_rate": 9.42198026442152e-06, "loss": 0.0034, "step": 58480 }, { "epoch": 0.4783088686265691, "grad_norm": 0.12294602394104004, "learning_rate": 9.421647140876751e-06, "loss": 0.0034, "step": 58490 }, { "epoch": 0.47839064480516824, "grad_norm": 0.17108634114265442, "learning_rate": 9.421313927259435e-06, "loss": 0.0029, "step": 58500 }, { "epoch": 0.4784724209837674, "grad_norm": 0.06788614392280579, "learning_rate": 9.420980623576359e-06, "loss": 0.0025, "step": 58510 }, { "epoch": 0.4785541971623666, "grad_norm": 0.3085208833217621, "learning_rate": 9.420647229834316e-06, "loss": 0.0063, "step": 58520 }, { "epoch": 0.4786359733409658, "grad_norm": 0.08141771703958511, "learning_rate": 9.420313746040095e-06, "loss": 0.0036, "step": 58530 }, { "epoch": 0.478717749519565, "grad_norm": 0.0880434587597847, "learning_rate": 9.419980172200489e-06, "loss": 0.0024, "step": 58540 }, { "epoch": 0.47879952569816414, "grad_norm": 0.07029209285974503, "learning_rate": 9.419646508322296e-06, "loss": 0.0032, "step": 58550 }, { "epoch": 0.4788813018767633, "grad_norm": 0.03484824672341347, "learning_rate": 9.41931275441231e-06, "loss": 0.0029, "step": 58560 }, { "epoch": 0.4789630780553625, "grad_norm": 0.04342436045408249, "learning_rate": 9.418978910477332e-06, "loss": 0.0034, "step": 58570 }, { "epoch": 0.47904485423396165, "grad_norm": 0.04999012127518654, "learning_rate": 9.41864497652416e-06, "loss": 0.0023, "step": 58580 }, { "epoch": 0.4791266304125608, "grad_norm": 0.02672421745955944, "learning_rate": 9.418310952559598e-06, "loss": 0.0018, "step": 58590 }, { "epoch": 0.47920840659116, "grad_norm": 0.11024850606918335, "learning_rate": 9.417976838590453e-06, "loss": 0.0019, "step": 58600 }, { "epoch": 0.47929018276975915, "grad_norm": 0.08119737356901169, "learning_rate": 9.417642634623528e-06, "loss": 0.0025, "step": 58610 }, { "epoch": 0.4793719589483583, "grad_norm": 0.02800360694527626, "learning_rate": 9.417308340665629e-06, "loss": 0.0026, "step": 58620 }, { "epoch": 0.4794537351269575, "grad_norm": 0.0473286397755146, "learning_rate": 9.416973956723571e-06, "loss": 0.0026, "step": 58630 }, { "epoch": 0.4795355113055567, "grad_norm": 0.15336760878562927, "learning_rate": 9.416639482804163e-06, "loss": 0.0042, "step": 58640 }, { "epoch": 0.4796172874841559, "grad_norm": 0.13172489404678345, "learning_rate": 9.416304918914219e-06, "loss": 0.0051, "step": 58650 }, { "epoch": 0.47969906366275505, "grad_norm": 0.03833252936601639, "learning_rate": 9.415970265060552e-06, "loss": 0.0021, "step": 58660 }, { "epoch": 0.4797808398413542, "grad_norm": 0.20842495560646057, "learning_rate": 9.415635521249982e-06, "loss": 0.0043, "step": 58670 }, { "epoch": 0.4798626160199534, "grad_norm": 0.09024810791015625, "learning_rate": 9.415300687489327e-06, "loss": 0.0023, "step": 58680 }, { "epoch": 0.47994439219855256, "grad_norm": 0.07736390084028244, "learning_rate": 9.41496576378541e-06, "loss": 0.0034, "step": 58690 }, { "epoch": 0.48002616837715173, "grad_norm": 0.09414848685264587, "learning_rate": 9.414630750145051e-06, "loss": 0.0052, "step": 58700 }, { "epoch": 0.4801079445557509, "grad_norm": 0.2739401161670685, "learning_rate": 9.414295646575076e-06, "loss": 0.0039, "step": 58710 }, { "epoch": 0.48018972073435007, "grad_norm": 0.07042448967695236, "learning_rate": 9.41396045308231e-06, "loss": 0.0035, "step": 58720 }, { "epoch": 0.48027149691294924, "grad_norm": 0.15496726334095, "learning_rate": 9.413625169673578e-06, "loss": 0.0021, "step": 58730 }, { "epoch": 0.4803532730915484, "grad_norm": 0.19675931334495544, "learning_rate": 9.413289796355718e-06, "loss": 0.0031, "step": 58740 }, { "epoch": 0.48043504927014763, "grad_norm": 0.12628063559532166, "learning_rate": 9.412954333135558e-06, "loss": 0.0023, "step": 58750 }, { "epoch": 0.4805168254487468, "grad_norm": 0.09738168120384216, "learning_rate": 9.412618780019928e-06, "loss": 0.0021, "step": 58760 }, { "epoch": 0.48059860162734597, "grad_norm": 0.08927211910486221, "learning_rate": 9.41228313701567e-06, "loss": 0.0029, "step": 58770 }, { "epoch": 0.48068037780594514, "grad_norm": 0.08714508265256882, "learning_rate": 9.411947404129615e-06, "loss": 0.0024, "step": 58780 }, { "epoch": 0.4807621539845443, "grad_norm": 0.04372129216790199, "learning_rate": 9.411611581368607e-06, "loss": 0.0022, "step": 58790 }, { "epoch": 0.4808439301631435, "grad_norm": 0.13916660845279694, "learning_rate": 9.411275668739485e-06, "loss": 0.0017, "step": 58800 }, { "epoch": 0.48092570634174264, "grad_norm": 0.07589033246040344, "learning_rate": 9.410939666249091e-06, "loss": 0.002, "step": 58810 }, { "epoch": 0.4810074825203418, "grad_norm": 0.04504668340086937, "learning_rate": 9.41060357390427e-06, "loss": 0.0042, "step": 58820 }, { "epoch": 0.481089258698941, "grad_norm": 0.18724210560321808, "learning_rate": 9.410267391711872e-06, "loss": 0.0024, "step": 58830 }, { "epoch": 0.48117103487754015, "grad_norm": 0.24542509019374847, "learning_rate": 9.409931119678741e-06, "loss": 0.0039, "step": 58840 }, { "epoch": 0.4812528110561394, "grad_norm": 0.0476987361907959, "learning_rate": 9.409594757811727e-06, "loss": 0.0042, "step": 58850 }, { "epoch": 0.48133458723473854, "grad_norm": 0.06196524202823639, "learning_rate": 9.409258306117685e-06, "loss": 0.003, "step": 58860 }, { "epoch": 0.4814163634133377, "grad_norm": 0.027752110734581947, "learning_rate": 9.408921764603467e-06, "loss": 0.0016, "step": 58870 }, { "epoch": 0.4814981395919369, "grad_norm": 0.062129613012075424, "learning_rate": 9.408585133275929e-06, "loss": 0.0027, "step": 58880 }, { "epoch": 0.48157991577053605, "grad_norm": 0.03863707557320595, "learning_rate": 9.408248412141928e-06, "loss": 0.0029, "step": 58890 }, { "epoch": 0.4816616919491352, "grad_norm": 0.06943682581186295, "learning_rate": 9.407911601208325e-06, "loss": 0.0013, "step": 58900 }, { "epoch": 0.4817434681277344, "grad_norm": 0.06367553025484085, "learning_rate": 9.407574700481978e-06, "loss": 0.0019, "step": 58910 }, { "epoch": 0.48182524430633356, "grad_norm": 0.18839424848556519, "learning_rate": 9.407237709969752e-06, "loss": 0.0055, "step": 58920 }, { "epoch": 0.4819070204849327, "grad_norm": 0.14619547128677368, "learning_rate": 9.406900629678513e-06, "loss": 0.0031, "step": 58930 }, { "epoch": 0.4819887966635319, "grad_norm": 0.06322646141052246, "learning_rate": 9.406563459615123e-06, "loss": 0.0012, "step": 58940 }, { "epoch": 0.48207057284213106, "grad_norm": 0.08340559154748917, "learning_rate": 9.406226199786457e-06, "loss": 0.0017, "step": 58950 }, { "epoch": 0.4821523490207303, "grad_norm": 0.32609823346138, "learning_rate": 9.40588885019938e-06, "loss": 0.0044, "step": 58960 }, { "epoch": 0.48223412519932946, "grad_norm": 0.07392769306898117, "learning_rate": 9.405551410860764e-06, "loss": 0.0016, "step": 58970 }, { "epoch": 0.4823159013779286, "grad_norm": 0.04321371763944626, "learning_rate": 9.405213881777488e-06, "loss": 0.0014, "step": 58980 }, { "epoch": 0.4823976775565278, "grad_norm": 0.1694372594356537, "learning_rate": 9.404876262956423e-06, "loss": 0.002, "step": 58990 }, { "epoch": 0.48247945373512696, "grad_norm": 0.2014578878879547, "learning_rate": 9.404538554404448e-06, "loss": 0.0035, "step": 59000 }, { "epoch": 0.48256122991372613, "grad_norm": 0.08926431089639664, "learning_rate": 9.404200756128444e-06, "loss": 0.0027, "step": 59010 }, { "epoch": 0.4826430060923253, "grad_norm": 0.1701796054840088, "learning_rate": 9.403862868135288e-06, "loss": 0.0024, "step": 59020 }, { "epoch": 0.48272478227092447, "grad_norm": 0.033693827688694, "learning_rate": 9.403524890431868e-06, "loss": 0.0014, "step": 59030 }, { "epoch": 0.48280655844952364, "grad_norm": 0.04721451178193092, "learning_rate": 9.403186823025066e-06, "loss": 0.0017, "step": 59040 }, { "epoch": 0.4828883346281228, "grad_norm": 0.14446327090263367, "learning_rate": 9.402848665921769e-06, "loss": 0.0014, "step": 59050 }, { "epoch": 0.482970110806722, "grad_norm": 0.08336202800273895, "learning_rate": 9.402510419128866e-06, "loss": 0.0044, "step": 59060 }, { "epoch": 0.4830518869853212, "grad_norm": 0.02990983985364437, "learning_rate": 9.402172082653246e-06, "loss": 0.0024, "step": 59070 }, { "epoch": 0.48313366316392037, "grad_norm": 0.06036437302827835, "learning_rate": 9.401833656501804e-06, "loss": 0.0022, "step": 59080 }, { "epoch": 0.48321543934251954, "grad_norm": 0.052690692245960236, "learning_rate": 9.401495140681431e-06, "loss": 0.0052, "step": 59090 }, { "epoch": 0.4832972155211187, "grad_norm": 0.028986899182200432, "learning_rate": 9.401156535199025e-06, "loss": 0.0038, "step": 59100 }, { "epoch": 0.4833789916997179, "grad_norm": 0.07407808303833008, "learning_rate": 9.400817840061483e-06, "loss": 0.0048, "step": 59110 }, { "epoch": 0.48346076787831704, "grad_norm": 0.0323932021856308, "learning_rate": 9.400479055275707e-06, "loss": 0.0034, "step": 59120 }, { "epoch": 0.4835425440569162, "grad_norm": 0.13354523479938507, "learning_rate": 9.400140180848591e-06, "loss": 0.0018, "step": 59130 }, { "epoch": 0.4836243202355154, "grad_norm": 0.04602542892098427, "learning_rate": 9.399801216787046e-06, "loss": 0.0015, "step": 59140 }, { "epoch": 0.48370609641411455, "grad_norm": 0.09984852373600006, "learning_rate": 9.399462163097973e-06, "loss": 0.0022, "step": 59150 }, { "epoch": 0.4837878725927137, "grad_norm": 0.08978478610515594, "learning_rate": 9.39912301978828e-06, "loss": 0.0035, "step": 59160 }, { "epoch": 0.48386964877131294, "grad_norm": 0.11988920718431473, "learning_rate": 9.398783786864876e-06, "loss": 0.0016, "step": 59170 }, { "epoch": 0.4839514249499121, "grad_norm": 0.11636830866336823, "learning_rate": 9.398444464334668e-06, "loss": 0.0032, "step": 59180 }, { "epoch": 0.4840332011285113, "grad_norm": 0.06787017732858658, "learning_rate": 9.398105052204573e-06, "loss": 0.0022, "step": 59190 }, { "epoch": 0.48411497730711045, "grad_norm": 0.04738561436533928, "learning_rate": 9.397765550481504e-06, "loss": 0.0041, "step": 59200 }, { "epoch": 0.4841967534857096, "grad_norm": 0.11518597602844238, "learning_rate": 9.397425959172375e-06, "loss": 0.0028, "step": 59210 }, { "epoch": 0.4842785296643088, "grad_norm": 0.03920254856348038, "learning_rate": 9.397086278284105e-06, "loss": 0.0013, "step": 59220 }, { "epoch": 0.48436030584290796, "grad_norm": 0.020051131024956703, "learning_rate": 9.396746507823613e-06, "loss": 0.0018, "step": 59230 }, { "epoch": 0.4844420820215071, "grad_norm": 0.052447184920310974, "learning_rate": 9.396406647797822e-06, "loss": 0.0022, "step": 59240 }, { "epoch": 0.4845238582001063, "grad_norm": 0.15782439708709717, "learning_rate": 9.396066698213653e-06, "loss": 0.0037, "step": 59250 }, { "epoch": 0.48460563437870546, "grad_norm": 0.06148945167660713, "learning_rate": 9.39572665907803e-06, "loss": 0.003, "step": 59260 }, { "epoch": 0.48468741055730463, "grad_norm": 0.04355819523334503, "learning_rate": 9.395386530397886e-06, "loss": 0.0051, "step": 59270 }, { "epoch": 0.48476918673590386, "grad_norm": 0.10269343107938766, "learning_rate": 9.395046312180142e-06, "loss": 0.0026, "step": 59280 }, { "epoch": 0.484850962914503, "grad_norm": 0.12534387409687042, "learning_rate": 9.394706004431735e-06, "loss": 0.0037, "step": 59290 }, { "epoch": 0.4849327390931022, "grad_norm": 0.05481807515025139, "learning_rate": 9.394365607159592e-06, "loss": 0.0025, "step": 59300 }, { "epoch": 0.48501451527170136, "grad_norm": 0.18388083577156067, "learning_rate": 9.394025120370651e-06, "loss": 0.0035, "step": 59310 }, { "epoch": 0.48509629145030053, "grad_norm": 0.05608098953962326, "learning_rate": 9.393684544071846e-06, "loss": 0.0026, "step": 59320 }, { "epoch": 0.4851780676288997, "grad_norm": 0.04098426550626755, "learning_rate": 9.393343878270118e-06, "loss": 0.0025, "step": 59330 }, { "epoch": 0.48525984380749887, "grad_norm": 0.025029528886079788, "learning_rate": 9.393003122972402e-06, "loss": 0.0052, "step": 59340 }, { "epoch": 0.48534161998609804, "grad_norm": 0.06530512869358063, "learning_rate": 9.392662278185642e-06, "loss": 0.0023, "step": 59350 }, { "epoch": 0.4854233961646972, "grad_norm": 0.03711284324526787, "learning_rate": 9.39232134391678e-06, "loss": 0.0025, "step": 59360 }, { "epoch": 0.4855051723432964, "grad_norm": 0.03897876292467117, "learning_rate": 9.391980320172761e-06, "loss": 0.0017, "step": 59370 }, { "epoch": 0.48558694852189555, "grad_norm": 0.04996999353170395, "learning_rate": 9.391639206960535e-06, "loss": 0.0023, "step": 59380 }, { "epoch": 0.48566872470049477, "grad_norm": 0.07084043323993683, "learning_rate": 9.391298004287048e-06, "loss": 0.0022, "step": 59390 }, { "epoch": 0.48575050087909394, "grad_norm": 0.0413469597697258, "learning_rate": 9.39095671215925e-06, "loss": 0.0043, "step": 59400 }, { "epoch": 0.4858322770576931, "grad_norm": 0.10748358070850372, "learning_rate": 9.390615330584099e-06, "loss": 0.0022, "step": 59410 }, { "epoch": 0.4859140532362923, "grad_norm": 0.11717411875724792, "learning_rate": 9.390273859568542e-06, "loss": 0.0044, "step": 59420 }, { "epoch": 0.48599582941489144, "grad_norm": 0.5221945643424988, "learning_rate": 9.389932299119537e-06, "loss": 0.0027, "step": 59430 }, { "epoch": 0.4860776055934906, "grad_norm": 0.06606949865818024, "learning_rate": 9.389590649244042e-06, "loss": 0.0036, "step": 59440 }, { "epoch": 0.4861593817720898, "grad_norm": 0.07643967866897583, "learning_rate": 9.389248909949021e-06, "loss": 0.0026, "step": 59450 }, { "epoch": 0.48624115795068895, "grad_norm": 0.2289966195821762, "learning_rate": 9.388907081241429e-06, "loss": 0.0035, "step": 59460 }, { "epoch": 0.4863229341292881, "grad_norm": 0.04098083823919296, "learning_rate": 9.388565163128233e-06, "loss": 0.0032, "step": 59470 }, { "epoch": 0.4864047103078873, "grad_norm": 0.136375293135643, "learning_rate": 9.388223155616397e-06, "loss": 0.0031, "step": 59480 }, { "epoch": 0.4864864864864865, "grad_norm": 0.1429247111082077, "learning_rate": 9.387881058712888e-06, "loss": 0.0037, "step": 59490 }, { "epoch": 0.4865682626650857, "grad_norm": 0.07926197350025177, "learning_rate": 9.387538872424678e-06, "loss": 0.0018, "step": 59500 }, { "epoch": 0.48665003884368485, "grad_norm": 0.09242890030145645, "learning_rate": 9.387196596758733e-06, "loss": 0.0025, "step": 59510 }, { "epoch": 0.486731815022284, "grad_norm": 0.16437332332134247, "learning_rate": 9.386854231722027e-06, "loss": 0.002, "step": 59520 }, { "epoch": 0.4868135912008832, "grad_norm": 0.008344443514943123, "learning_rate": 9.386511777321533e-06, "loss": 0.0039, "step": 59530 }, { "epoch": 0.48689536737948236, "grad_norm": 0.055327750742435455, "learning_rate": 9.386169233564231e-06, "loss": 0.0019, "step": 59540 }, { "epoch": 0.4869771435580815, "grad_norm": 0.06570419669151306, "learning_rate": 9.385826600457094e-06, "loss": 0.0017, "step": 59550 }, { "epoch": 0.4870589197366807, "grad_norm": 0.06625238060951233, "learning_rate": 9.385483878007106e-06, "loss": 0.003, "step": 59560 }, { "epoch": 0.48714069591527986, "grad_norm": 0.006767235696315765, "learning_rate": 9.385141066221249e-06, "loss": 0.0016, "step": 59570 }, { "epoch": 0.48722247209387903, "grad_norm": 0.025128643959760666, "learning_rate": 9.3847981651065e-06, "loss": 0.005, "step": 59580 }, { "epoch": 0.4873042482724782, "grad_norm": 0.06304896622896194, "learning_rate": 9.38445517466985e-06, "loss": 0.0029, "step": 59590 }, { "epoch": 0.4873860244510774, "grad_norm": 0.12773801386356354, "learning_rate": 9.384112094918283e-06, "loss": 0.0016, "step": 59600 }, { "epoch": 0.4874678006296766, "grad_norm": 0.13732869923114777, "learning_rate": 9.383768925858791e-06, "loss": 0.0015, "step": 59610 }, { "epoch": 0.48754957680827576, "grad_norm": 0.29208120703697205, "learning_rate": 9.383425667498362e-06, "loss": 0.0024, "step": 59620 }, { "epoch": 0.48763135298687493, "grad_norm": 0.07790271937847137, "learning_rate": 9.383082319843989e-06, "loss": 0.0024, "step": 59630 }, { "epoch": 0.4877131291654741, "grad_norm": 0.08241838961839676, "learning_rate": 9.382738882902666e-06, "loss": 0.0024, "step": 59640 }, { "epoch": 0.48779490534407327, "grad_norm": 0.06454329937696457, "learning_rate": 9.38239535668139e-06, "loss": 0.0027, "step": 59650 }, { "epoch": 0.48787668152267244, "grad_norm": 0.06303489208221436, "learning_rate": 9.382051741187157e-06, "loss": 0.0033, "step": 59660 }, { "epoch": 0.4879584577012716, "grad_norm": 0.11999116837978363, "learning_rate": 9.38170803642697e-06, "loss": 0.0023, "step": 59670 }, { "epoch": 0.4880402338798708, "grad_norm": 0.12117689847946167, "learning_rate": 9.381364242407827e-06, "loss": 0.0028, "step": 59680 }, { "epoch": 0.48812201005846995, "grad_norm": 0.11918973177671432, "learning_rate": 9.381020359136735e-06, "loss": 0.0032, "step": 59690 }, { "epoch": 0.4882037862370691, "grad_norm": 0.10829992592334747, "learning_rate": 9.380676386620695e-06, "loss": 0.0022, "step": 59700 }, { "epoch": 0.48828556241566834, "grad_norm": 0.18866142630577087, "learning_rate": 9.380332324866719e-06, "loss": 0.0013, "step": 59710 }, { "epoch": 0.4883673385942675, "grad_norm": 0.10856427252292633, "learning_rate": 9.37998817388181e-06, "loss": 0.002, "step": 59720 }, { "epoch": 0.4884491147728667, "grad_norm": 0.049368612468242645, "learning_rate": 9.379643933672985e-06, "loss": 0.0016, "step": 59730 }, { "epoch": 0.48853089095146585, "grad_norm": 0.029880426824092865, "learning_rate": 9.379299604247249e-06, "loss": 0.001, "step": 59740 }, { "epoch": 0.488612667130065, "grad_norm": 0.10141783207654953, "learning_rate": 9.378955185611624e-06, "loss": 0.0027, "step": 59750 }, { "epoch": 0.4886944433086642, "grad_norm": 0.043784163892269135, "learning_rate": 9.37861067777312e-06, "loss": 0.0018, "step": 59760 }, { "epoch": 0.48877621948726335, "grad_norm": 0.03734033927321434, "learning_rate": 9.378266080738759e-06, "loss": 0.0025, "step": 59770 }, { "epoch": 0.4888579956658625, "grad_norm": 0.05777325481176376, "learning_rate": 9.37792139451556e-06, "loss": 0.0019, "step": 59780 }, { "epoch": 0.4889397718444617, "grad_norm": 0.01996167190372944, "learning_rate": 9.37757661911054e-06, "loss": 0.0042, "step": 59790 }, { "epoch": 0.48902154802306086, "grad_norm": 0.07620869576931, "learning_rate": 9.37723175453073e-06, "loss": 0.0019, "step": 59800 }, { "epoch": 0.4891033242016601, "grad_norm": 0.03683283552527428, "learning_rate": 9.376886800783148e-06, "loss": 0.0026, "step": 59810 }, { "epoch": 0.48918510038025925, "grad_norm": 0.016863446682691574, "learning_rate": 9.376541757874825e-06, "loss": 0.002, "step": 59820 }, { "epoch": 0.4892668765588584, "grad_norm": 0.3469472825527191, "learning_rate": 9.37619662581279e-06, "loss": 0.0048, "step": 59830 }, { "epoch": 0.4893486527374576, "grad_norm": 0.058540090918540955, "learning_rate": 9.375851404604071e-06, "loss": 0.0022, "step": 59840 }, { "epoch": 0.48943042891605676, "grad_norm": 0.04053940623998642, "learning_rate": 9.375506094255704e-06, "loss": 0.0028, "step": 59850 }, { "epoch": 0.48951220509465593, "grad_norm": 0.09495624899864197, "learning_rate": 9.37516069477472e-06, "loss": 0.002, "step": 59860 }, { "epoch": 0.4895939812732551, "grad_norm": 0.049106862396001816, "learning_rate": 9.374815206168156e-06, "loss": 0.0017, "step": 59870 }, { "epoch": 0.48967575745185427, "grad_norm": 0.11639729887247086, "learning_rate": 9.374469628443051e-06, "loss": 0.0024, "step": 59880 }, { "epoch": 0.48975753363045343, "grad_norm": 0.387176513671875, "learning_rate": 9.374123961606444e-06, "loss": 0.0021, "step": 59890 }, { "epoch": 0.4898393098090526, "grad_norm": 0.02150678262114525, "learning_rate": 9.373778205665376e-06, "loss": 0.0028, "step": 59900 }, { "epoch": 0.48992108598765177, "grad_norm": 0.03412863239645958, "learning_rate": 9.373432360626891e-06, "loss": 0.0019, "step": 59910 }, { "epoch": 0.490002862166251, "grad_norm": 0.09135202318429947, "learning_rate": 9.373086426498035e-06, "loss": 0.002, "step": 59920 }, { "epoch": 0.49008463834485017, "grad_norm": 0.4184563159942627, "learning_rate": 9.372740403285851e-06, "loss": 0.0054, "step": 59930 }, { "epoch": 0.49016641452344933, "grad_norm": 0.12235680967569351, "learning_rate": 9.372394290997394e-06, "loss": 0.003, "step": 59940 }, { "epoch": 0.4902481907020485, "grad_norm": 0.08605843037366867, "learning_rate": 9.372048089639709e-06, "loss": 0.0021, "step": 59950 }, { "epoch": 0.49032996688064767, "grad_norm": 0.04970422759652138, "learning_rate": 9.371701799219851e-06, "loss": 0.0019, "step": 59960 }, { "epoch": 0.49041174305924684, "grad_norm": 0.13398684561252594, "learning_rate": 9.371355419744875e-06, "loss": 0.0018, "step": 59970 }, { "epoch": 0.490493519237846, "grad_norm": 0.1475665271282196, "learning_rate": 9.371008951221834e-06, "loss": 0.0018, "step": 59980 }, { "epoch": 0.4905752954164452, "grad_norm": 0.15475943684577942, "learning_rate": 9.370662393657789e-06, "loss": 0.0017, "step": 59990 }, { "epoch": 0.49065707159504435, "grad_norm": 0.22280634939670563, "learning_rate": 9.370315747059797e-06, "loss": 0.003, "step": 60000 }, { "epoch": 0.4907388477736435, "grad_norm": 0.09851637482643127, "learning_rate": 9.369969011434923e-06, "loss": 0.0043, "step": 60010 }, { "epoch": 0.4908206239522427, "grad_norm": 0.07715827971696854, "learning_rate": 9.369622186790227e-06, "loss": 0.0018, "step": 60020 }, { "epoch": 0.4909024001308419, "grad_norm": 0.018567387014627457, "learning_rate": 9.369275273132776e-06, "loss": 0.0016, "step": 60030 }, { "epoch": 0.4909841763094411, "grad_norm": 0.11289888620376587, "learning_rate": 9.368928270469635e-06, "loss": 0.002, "step": 60040 }, { "epoch": 0.49106595248804025, "grad_norm": 0.0708550214767456, "learning_rate": 9.368581178807874e-06, "loss": 0.002, "step": 60050 }, { "epoch": 0.4911477286666394, "grad_norm": 0.0365125946700573, "learning_rate": 9.368233998154563e-06, "loss": 0.0026, "step": 60060 }, { "epoch": 0.4912295048452386, "grad_norm": 0.21157845854759216, "learning_rate": 9.367886728516775e-06, "loss": 0.0054, "step": 60070 }, { "epoch": 0.49131128102383775, "grad_norm": 0.07859403640031815, "learning_rate": 9.367539369901587e-06, "loss": 0.0027, "step": 60080 }, { "epoch": 0.4913930572024369, "grad_norm": 0.040879026055336, "learning_rate": 9.367191922316068e-06, "loss": 0.0034, "step": 60090 }, { "epoch": 0.4914748333810361, "grad_norm": 0.011194438673555851, "learning_rate": 9.366844385767302e-06, "loss": 0.0016, "step": 60100 }, { "epoch": 0.49155660955963526, "grad_norm": 0.08352098613977432, "learning_rate": 9.366496760262365e-06, "loss": 0.003, "step": 60110 }, { "epoch": 0.49163838573823443, "grad_norm": 0.09487888216972351, "learning_rate": 9.36614904580834e-06, "loss": 0.0019, "step": 60120 }, { "epoch": 0.49172016191683365, "grad_norm": 0.05343140661716461, "learning_rate": 9.36580124241231e-06, "loss": 0.003, "step": 60130 }, { "epoch": 0.4918019380954328, "grad_norm": 0.03604673594236374, "learning_rate": 9.365453350081362e-06, "loss": 0.0026, "step": 60140 }, { "epoch": 0.491883714274032, "grad_norm": 0.11389950662851334, "learning_rate": 9.365105368822579e-06, "loss": 0.0019, "step": 60150 }, { "epoch": 0.49196549045263116, "grad_norm": 0.025969089940190315, "learning_rate": 9.364757298643051e-06, "loss": 0.0027, "step": 60160 }, { "epoch": 0.49204726663123033, "grad_norm": 0.005674608517438173, "learning_rate": 9.364409139549868e-06, "loss": 0.0022, "step": 60170 }, { "epoch": 0.4921290428098295, "grad_norm": 0.15320713818073273, "learning_rate": 9.364060891550125e-06, "loss": 0.0035, "step": 60180 }, { "epoch": 0.49221081898842867, "grad_norm": 0.1560312956571579, "learning_rate": 9.363712554650914e-06, "loss": 0.0027, "step": 60190 }, { "epoch": 0.49229259516702784, "grad_norm": 0.06415663659572601, "learning_rate": 9.363364128859331e-06, "loss": 0.0024, "step": 60200 }, { "epoch": 0.492374371345627, "grad_norm": 0.07316119968891144, "learning_rate": 9.363015614182476e-06, "loss": 0.0024, "step": 60210 }, { "epoch": 0.4924561475242262, "grad_norm": 0.20456728339195251, "learning_rate": 9.362667010627444e-06, "loss": 0.0032, "step": 60220 }, { "epoch": 0.49253792370282534, "grad_norm": 0.1052178218960762, "learning_rate": 9.36231831820134e-06, "loss": 0.0042, "step": 60230 }, { "epoch": 0.49261969988142457, "grad_norm": 0.17608392238616943, "learning_rate": 9.361969536911266e-06, "loss": 0.0018, "step": 60240 }, { "epoch": 0.49270147606002374, "grad_norm": 0.04582073166966438, "learning_rate": 9.361620666764326e-06, "loss": 0.0024, "step": 60250 }, { "epoch": 0.4927832522386229, "grad_norm": 0.034453459084033966, "learning_rate": 9.361271707767627e-06, "loss": 0.001, "step": 60260 }, { "epoch": 0.4928650284172221, "grad_norm": 0.09309881180524826, "learning_rate": 9.36092265992828e-06, "loss": 0.0021, "step": 60270 }, { "epoch": 0.49294680459582124, "grad_norm": 0.0657045915722847, "learning_rate": 9.360573523253393e-06, "loss": 0.0028, "step": 60280 }, { "epoch": 0.4930285807744204, "grad_norm": 0.06344819813966751, "learning_rate": 9.36022429775008e-06, "loss": 0.0038, "step": 60290 }, { "epoch": 0.4931103569530196, "grad_norm": 0.06152265518903732, "learning_rate": 9.359874983425454e-06, "loss": 0.002, "step": 60300 }, { "epoch": 0.49319213313161875, "grad_norm": 0.08097929507493973, "learning_rate": 9.359525580286628e-06, "loss": 0.0038, "step": 60310 }, { "epoch": 0.4932739093102179, "grad_norm": 0.03203972429037094, "learning_rate": 9.359176088340725e-06, "loss": 0.0024, "step": 60320 }, { "epoch": 0.4933556854888171, "grad_norm": 0.13092203438282013, "learning_rate": 9.35882650759486e-06, "loss": 0.0034, "step": 60330 }, { "epoch": 0.49343746166741626, "grad_norm": 0.13278083503246307, "learning_rate": 9.358476838056158e-06, "loss": 0.0034, "step": 60340 }, { "epoch": 0.4935192378460155, "grad_norm": 0.07775045186281204, "learning_rate": 9.358127079731738e-06, "loss": 0.0017, "step": 60350 }, { "epoch": 0.49360101402461465, "grad_norm": 0.10415290296077728, "learning_rate": 9.357777232628728e-06, "loss": 0.0021, "step": 60360 }, { "epoch": 0.4936827902032138, "grad_norm": 0.11958199739456177, "learning_rate": 9.357427296754253e-06, "loss": 0.0022, "step": 60370 }, { "epoch": 0.493764566381813, "grad_norm": 0.053685545921325684, "learning_rate": 9.357077272115442e-06, "loss": 0.0016, "step": 60380 }, { "epoch": 0.49384634256041215, "grad_norm": 0.08944912254810333, "learning_rate": 9.356727158719428e-06, "loss": 0.0017, "step": 60390 }, { "epoch": 0.4939281187390113, "grad_norm": 0.24344946444034576, "learning_rate": 9.356376956573337e-06, "loss": 0.0019, "step": 60400 }, { "epoch": 0.4940098949176105, "grad_norm": 0.1315242499113083, "learning_rate": 9.356026665684307e-06, "loss": 0.002, "step": 60410 }, { "epoch": 0.49409167109620966, "grad_norm": 0.17947956919670105, "learning_rate": 9.355676286059474e-06, "loss": 0.0024, "step": 60420 }, { "epoch": 0.49417344727480883, "grad_norm": 0.08066508173942566, "learning_rate": 9.355325817705975e-06, "loss": 0.0025, "step": 60430 }, { "epoch": 0.494255223453408, "grad_norm": 0.1368550956249237, "learning_rate": 9.354975260630948e-06, "loss": 0.0023, "step": 60440 }, { "epoch": 0.4943369996320072, "grad_norm": 0.04827388375997543, "learning_rate": 9.354624614841536e-06, "loss": 0.0029, "step": 60450 }, { "epoch": 0.4944187758106064, "grad_norm": 0.030345043167471886, "learning_rate": 9.35427388034488e-06, "loss": 0.0028, "step": 60460 }, { "epoch": 0.49450055198920556, "grad_norm": 0.04590597748756409, "learning_rate": 9.353923057148125e-06, "loss": 0.0021, "step": 60470 }, { "epoch": 0.49458232816780473, "grad_norm": 0.060003962367773056, "learning_rate": 9.35357214525842e-06, "loss": 0.0019, "step": 60480 }, { "epoch": 0.4946641043464039, "grad_norm": 0.05681748688220978, "learning_rate": 9.35322114468291e-06, "loss": 0.0023, "step": 60490 }, { "epoch": 0.49474588052500307, "grad_norm": 0.1410062611103058, "learning_rate": 9.35287005542875e-06, "loss": 0.0017, "step": 60500 }, { "epoch": 0.49482765670360224, "grad_norm": 0.060960106551647186, "learning_rate": 9.352518877503085e-06, "loss": 0.0027, "step": 60510 }, { "epoch": 0.4949094328822014, "grad_norm": 0.041331157088279724, "learning_rate": 9.352167610913074e-06, "loss": 0.0024, "step": 60520 }, { "epoch": 0.4949912090608006, "grad_norm": 0.07080575823783875, "learning_rate": 9.351816255665871e-06, "loss": 0.0021, "step": 60530 }, { "epoch": 0.49507298523939974, "grad_norm": 0.149513378739357, "learning_rate": 9.351464811768633e-06, "loss": 0.0039, "step": 60540 }, { "epoch": 0.4951547614179989, "grad_norm": 0.10515744984149933, "learning_rate": 9.35111327922852e-06, "loss": 0.0021, "step": 60550 }, { "epoch": 0.49523653759659814, "grad_norm": 0.0670396089553833, "learning_rate": 9.350761658052694e-06, "loss": 0.0026, "step": 60560 }, { "epoch": 0.4953183137751973, "grad_norm": 0.09587227553129196, "learning_rate": 9.350409948248317e-06, "loss": 0.0022, "step": 60570 }, { "epoch": 0.4954000899537965, "grad_norm": 0.11509011685848236, "learning_rate": 9.350058149822551e-06, "loss": 0.0015, "step": 60580 }, { "epoch": 0.49548186613239564, "grad_norm": 0.0831063836812973, "learning_rate": 9.349706262782566e-06, "loss": 0.0021, "step": 60590 }, { "epoch": 0.4955636423109948, "grad_norm": 0.16045233607292175, "learning_rate": 9.349354287135527e-06, "loss": 0.0018, "step": 60600 }, { "epoch": 0.495645418489594, "grad_norm": 0.19549477100372314, "learning_rate": 9.349002222888607e-06, "loss": 0.0028, "step": 60610 }, { "epoch": 0.49572719466819315, "grad_norm": 0.0380113422870636, "learning_rate": 9.348650070048977e-06, "loss": 0.0038, "step": 60620 }, { "epoch": 0.4958089708467923, "grad_norm": 0.05244797095656395, "learning_rate": 9.34829782862381e-06, "loss": 0.0023, "step": 60630 }, { "epoch": 0.4958907470253915, "grad_norm": 0.025089548900723457, "learning_rate": 9.347945498620282e-06, "loss": 0.0024, "step": 60640 }, { "epoch": 0.49597252320399066, "grad_norm": 0.012987211346626282, "learning_rate": 9.34759308004557e-06, "loss": 0.0034, "step": 60650 }, { "epoch": 0.4960542993825898, "grad_norm": 0.04081115871667862, "learning_rate": 9.347240572906853e-06, "loss": 0.0013, "step": 60660 }, { "epoch": 0.49613607556118905, "grad_norm": 0.07248694449663162, "learning_rate": 9.346887977211311e-06, "loss": 0.0024, "step": 60670 }, { "epoch": 0.4962178517397882, "grad_norm": 0.020014775916934013, "learning_rate": 9.346535292966128e-06, "loss": 0.0016, "step": 60680 }, { "epoch": 0.4962996279183874, "grad_norm": 0.22455380856990814, "learning_rate": 9.346182520178489e-06, "loss": 0.0019, "step": 60690 }, { "epoch": 0.49638140409698656, "grad_norm": 0.08019594848155975, "learning_rate": 9.345829658855578e-06, "loss": 0.0052, "step": 60700 }, { "epoch": 0.4964631802755857, "grad_norm": 0.23305493593215942, "learning_rate": 9.345476709004587e-06, "loss": 0.0021, "step": 60710 }, { "epoch": 0.4965449564541849, "grad_norm": 0.045212700963020325, "learning_rate": 9.3451236706327e-06, "loss": 0.0013, "step": 60720 }, { "epoch": 0.49662673263278406, "grad_norm": 0.05925562232732773, "learning_rate": 9.344770543747114e-06, "loss": 0.0023, "step": 60730 }, { "epoch": 0.49670850881138323, "grad_norm": 0.12091103941202164, "learning_rate": 9.34441732835502e-06, "loss": 0.0016, "step": 60740 }, { "epoch": 0.4967902849899824, "grad_norm": 0.05490148067474365, "learning_rate": 9.344064024463613e-06, "loss": 0.0011, "step": 60750 }, { "epoch": 0.49687206116858157, "grad_norm": 0.12632302939891815, "learning_rate": 9.34371063208009e-06, "loss": 0.0028, "step": 60760 }, { "epoch": 0.4969538373471808, "grad_norm": 0.04279378429055214, "learning_rate": 9.343357151211653e-06, "loss": 0.0026, "step": 60770 }, { "epoch": 0.49703561352577996, "grad_norm": 0.07218111306428909, "learning_rate": 9.3430035818655e-06, "loss": 0.0015, "step": 60780 }, { "epoch": 0.49711738970437913, "grad_norm": 0.04242740198969841, "learning_rate": 9.342649924048832e-06, "loss": 0.0017, "step": 60790 }, { "epoch": 0.4971991658829783, "grad_norm": 0.02525600604712963, "learning_rate": 9.342296177768857e-06, "loss": 0.0019, "step": 60800 }, { "epoch": 0.49728094206157747, "grad_norm": 0.08103889971971512, "learning_rate": 9.341942343032778e-06, "loss": 0.0022, "step": 60810 }, { "epoch": 0.49736271824017664, "grad_norm": 0.07869108766317368, "learning_rate": 9.341588419847804e-06, "loss": 0.0017, "step": 60820 }, { "epoch": 0.4974444944187758, "grad_norm": 0.1585518717765808, "learning_rate": 9.341234408221147e-06, "loss": 0.0023, "step": 60830 }, { "epoch": 0.497526270597375, "grad_norm": 0.17935454845428467, "learning_rate": 9.340880308160015e-06, "loss": 0.0027, "step": 60840 }, { "epoch": 0.49760804677597414, "grad_norm": 0.04569842293858528, "learning_rate": 9.340526119671623e-06, "loss": 0.0017, "step": 60850 }, { "epoch": 0.4976898229545733, "grad_norm": 0.222245454788208, "learning_rate": 9.340171842763184e-06, "loss": 0.0036, "step": 60860 }, { "epoch": 0.4977715991331725, "grad_norm": 0.053579237312078476, "learning_rate": 9.339817477441918e-06, "loss": 0.0026, "step": 60870 }, { "epoch": 0.4978533753117717, "grad_norm": 0.07031276077032089, "learning_rate": 9.339463023715044e-06, "loss": 0.0014, "step": 60880 }, { "epoch": 0.4979351514903709, "grad_norm": 0.06479164958000183, "learning_rate": 9.339108481589778e-06, "loss": 0.0046, "step": 60890 }, { "epoch": 0.49801692766897004, "grad_norm": 0.03853866085410118, "learning_rate": 9.338753851073346e-06, "loss": 0.0025, "step": 60900 }, { "epoch": 0.4980987038475692, "grad_norm": 0.2409675419330597, "learning_rate": 9.338399132172974e-06, "loss": 0.0036, "step": 60910 }, { "epoch": 0.4981804800261684, "grad_norm": 0.22654880583286285, "learning_rate": 9.338044324895884e-06, "loss": 0.002, "step": 60920 }, { "epoch": 0.49826225620476755, "grad_norm": 0.09952007234096527, "learning_rate": 9.337689429249303e-06, "loss": 0.0036, "step": 60930 }, { "epoch": 0.4983440323833667, "grad_norm": 0.0694006085395813, "learning_rate": 9.337334445240462e-06, "loss": 0.0026, "step": 60940 }, { "epoch": 0.4984258085619659, "grad_norm": 0.19635622203350067, "learning_rate": 9.336979372876595e-06, "loss": 0.0026, "step": 60950 }, { "epoch": 0.49850758474056506, "grad_norm": 0.014818305149674416, "learning_rate": 9.336624212164933e-06, "loss": 0.0012, "step": 60960 }, { "epoch": 0.4985893609191642, "grad_norm": 0.06060894578695297, "learning_rate": 9.336268963112709e-06, "loss": 0.0012, "step": 60970 }, { "epoch": 0.4986711370977634, "grad_norm": 0.04676918312907219, "learning_rate": 9.335913625727164e-06, "loss": 0.0015, "step": 60980 }, { "epoch": 0.4987529132763626, "grad_norm": 0.12333877384662628, "learning_rate": 9.335558200015532e-06, "loss": 0.0035, "step": 60990 }, { "epoch": 0.4988346894549618, "grad_norm": 0.06603579223155975, "learning_rate": 9.335202685985056e-06, "loss": 0.0016, "step": 61000 }, { "epoch": 0.4988346894549618, "eval_loss": 0.0022704475559294224, "eval_runtime": 5.3641, "eval_samples_per_second": 37.285, "eval_steps_per_second": 9.321, "step": 61000 }, { "epoch": 0.49891646563356096, "grad_norm": 0.04703107848763466, "learning_rate": 9.334847083642977e-06, "loss": 0.0027, "step": 61010 }, { "epoch": 0.4989982418121601, "grad_norm": 0.20823051035404205, "learning_rate": 9.33449139299654e-06, "loss": 0.0021, "step": 61020 }, { "epoch": 0.4990800179907593, "grad_norm": 0.19887855648994446, "learning_rate": 9.334135614052991e-06, "loss": 0.004, "step": 61030 }, { "epoch": 0.49916179416935846, "grad_norm": 0.10340926796197891, "learning_rate": 9.333779746819576e-06, "loss": 0.0023, "step": 61040 }, { "epoch": 0.49924357034795763, "grad_norm": 0.10438122600317001, "learning_rate": 9.333423791303543e-06, "loss": 0.0022, "step": 61050 }, { "epoch": 0.4993253465265568, "grad_norm": 0.06288120895624161, "learning_rate": 9.333067747512148e-06, "loss": 0.0028, "step": 61060 }, { "epoch": 0.49940712270515597, "grad_norm": 0.1389717310667038, "learning_rate": 9.332711615452638e-06, "loss": 0.003, "step": 61070 }, { "epoch": 0.49948889888375514, "grad_norm": 0.05118778347969055, "learning_rate": 9.332355395132272e-06, "loss": 0.0037, "step": 61080 }, { "epoch": 0.49957067506235436, "grad_norm": 0.04076768085360527, "learning_rate": 9.331999086558306e-06, "loss": 0.0031, "step": 61090 }, { "epoch": 0.49965245124095353, "grad_norm": 0.03454342111945152, "learning_rate": 9.331642689737996e-06, "loss": 0.0016, "step": 61100 }, { "epoch": 0.4997342274195527, "grad_norm": 0.030599188059568405, "learning_rate": 9.331286204678603e-06, "loss": 0.003, "step": 61110 }, { "epoch": 0.49981600359815187, "grad_norm": 0.11130572110414505, "learning_rate": 9.330929631387392e-06, "loss": 0.0027, "step": 61120 }, { "epoch": 0.49989777977675104, "grad_norm": 0.06431176513433456, "learning_rate": 9.33057296987162e-06, "loss": 0.004, "step": 61130 }, { "epoch": 0.4999795559553502, "grad_norm": 0.016633454710245132, "learning_rate": 9.330216220138559e-06, "loss": 0.0025, "step": 61140 }, { "epoch": 0.5000613321339494, "grad_norm": 0.053509198129177094, "learning_rate": 9.329859382195474e-06, "loss": 0.0038, "step": 61150 }, { "epoch": 0.5001431083125486, "grad_norm": 0.004371039569377899, "learning_rate": 9.329502456049633e-06, "loss": 0.0021, "step": 61160 }, { "epoch": 0.5002248844911478, "grad_norm": 0.0911414846777916, "learning_rate": 9.329145441708309e-06, "loss": 0.0029, "step": 61170 }, { "epoch": 0.5003066606697469, "grad_norm": 0.034620486199855804, "learning_rate": 9.328788339178772e-06, "loss": 0.0022, "step": 61180 }, { "epoch": 0.5003884368483461, "grad_norm": 0.19112837314605713, "learning_rate": 9.3284311484683e-06, "loss": 0.0026, "step": 61190 }, { "epoch": 0.5004702130269453, "grad_norm": 0.03258804231882095, "learning_rate": 9.328073869584164e-06, "loss": 0.0021, "step": 61200 }, { "epoch": 0.5005519892055444, "grad_norm": 0.10543493181467056, "learning_rate": 9.327716502533647e-06, "loss": 0.0031, "step": 61210 }, { "epoch": 0.5006337653841436, "grad_norm": 0.07660693675279617, "learning_rate": 9.327359047324028e-06, "loss": 0.0016, "step": 61220 }, { "epoch": 0.5007155415627428, "grad_norm": 0.18934328854084015, "learning_rate": 9.327001503962588e-06, "loss": 0.007, "step": 61230 }, { "epoch": 0.500797317741342, "grad_norm": 0.07138071209192276, "learning_rate": 9.326643872456608e-06, "loss": 0.0018, "step": 61240 }, { "epoch": 0.5008790939199411, "grad_norm": 0.37821006774902344, "learning_rate": 9.326286152813377e-06, "loss": 0.0034, "step": 61250 }, { "epoch": 0.5009608700985403, "grad_norm": 0.053057897835969925, "learning_rate": 9.32592834504018e-06, "loss": 0.0018, "step": 61260 }, { "epoch": 0.5010426462771395, "grad_norm": 0.02020830288529396, "learning_rate": 9.325570449144305e-06, "loss": 0.0025, "step": 61270 }, { "epoch": 0.5011244224557386, "grad_norm": 0.024750856682658195, "learning_rate": 9.325212465133047e-06, "loss": 0.0015, "step": 61280 }, { "epoch": 0.5012061986343378, "grad_norm": 0.2541402578353882, "learning_rate": 9.324854393013694e-06, "loss": 0.0028, "step": 61290 }, { "epoch": 0.501287974812937, "grad_norm": 0.04376402124762535, "learning_rate": 9.32449623279354e-06, "loss": 0.003, "step": 61300 }, { "epoch": 0.5013697509915361, "grad_norm": 0.19424398243427277, "learning_rate": 9.324137984479885e-06, "loss": 0.0041, "step": 61310 }, { "epoch": 0.5014515271701353, "grad_norm": 0.08007463067770004, "learning_rate": 9.323779648080022e-06, "loss": 0.0018, "step": 61320 }, { "epoch": 0.5015333033487345, "grad_norm": 0.005173862911760807, "learning_rate": 9.323421223601255e-06, "loss": 0.0031, "step": 61330 }, { "epoch": 0.5016150795273336, "grad_norm": 0.24209646880626678, "learning_rate": 9.323062711050883e-06, "loss": 0.0023, "step": 61340 }, { "epoch": 0.5016968557059328, "grad_norm": 0.057029690593481064, "learning_rate": 9.322704110436209e-06, "loss": 0.0016, "step": 61350 }, { "epoch": 0.5017786318845321, "grad_norm": 0.02428368292748928, "learning_rate": 9.322345421764539e-06, "loss": 0.002, "step": 61360 }, { "epoch": 0.5018604080631313, "grad_norm": 0.046728942543268204, "learning_rate": 9.321986645043179e-06, "loss": 0.0011, "step": 61370 }, { "epoch": 0.5019421842417304, "grad_norm": 0.09047725796699524, "learning_rate": 9.321627780279439e-06, "loss": 0.0036, "step": 61380 }, { "epoch": 0.5020239604203296, "grad_norm": 0.07804640382528305, "learning_rate": 9.321268827480627e-06, "loss": 0.0027, "step": 61390 }, { "epoch": 0.5021057365989288, "grad_norm": 0.04125697910785675, "learning_rate": 9.320909786654057e-06, "loss": 0.0021, "step": 61400 }, { "epoch": 0.5021875127775279, "grad_norm": 0.031061476096510887, "learning_rate": 9.320550657807042e-06, "loss": 0.0032, "step": 61410 }, { "epoch": 0.5022692889561271, "grad_norm": 0.08653751015663147, "learning_rate": 9.3201914409469e-06, "loss": 0.003, "step": 61420 }, { "epoch": 0.5023510651347263, "grad_norm": 0.04498846456408501, "learning_rate": 9.319832136080945e-06, "loss": 0.0018, "step": 61430 }, { "epoch": 0.5024328413133254, "grad_norm": 0.13818494975566864, "learning_rate": 9.319472743216498e-06, "loss": 0.0023, "step": 61440 }, { "epoch": 0.5025146174919246, "grad_norm": 0.035331640392541885, "learning_rate": 9.319113262360882e-06, "loss": 0.0031, "step": 61450 }, { "epoch": 0.5025963936705238, "grad_norm": 0.07260430604219437, "learning_rate": 9.318753693521417e-06, "loss": 0.0015, "step": 61460 }, { "epoch": 0.502678169849123, "grad_norm": 0.04570554569363594, "learning_rate": 9.318394036705426e-06, "loss": 0.0024, "step": 61470 }, { "epoch": 0.5027599460277221, "grad_norm": 0.18610858917236328, "learning_rate": 9.318034291920242e-06, "loss": 0.0029, "step": 61480 }, { "epoch": 0.5028417222063213, "grad_norm": 0.03689602017402649, "learning_rate": 9.317674459173189e-06, "loss": 0.0031, "step": 61490 }, { "epoch": 0.5029234983849205, "grad_norm": 0.18582840263843536, "learning_rate": 9.317314538471595e-06, "loss": 0.0018, "step": 61500 }, { "epoch": 0.5030052745635196, "grad_norm": 0.020924577489495277, "learning_rate": 9.316954529822798e-06, "loss": 0.0014, "step": 61510 }, { "epoch": 0.5030870507421188, "grad_norm": 0.03602088987827301, "learning_rate": 9.316594433234127e-06, "loss": 0.002, "step": 61520 }, { "epoch": 0.503168826920718, "grad_norm": 0.20321518182754517, "learning_rate": 9.31623424871292e-06, "loss": 0.0029, "step": 61530 }, { "epoch": 0.5032506030993171, "grad_norm": 0.24819424748420715, "learning_rate": 9.31587397626651e-06, "loss": 0.0036, "step": 61540 }, { "epoch": 0.5033323792779163, "grad_norm": 0.16031314432621002, "learning_rate": 9.31551361590224e-06, "loss": 0.0027, "step": 61550 }, { "epoch": 0.5034141554565155, "grad_norm": 0.11349499970674515, "learning_rate": 9.315153167627451e-06, "loss": 0.0031, "step": 61560 }, { "epoch": 0.5034959316351147, "grad_norm": 0.029245054349303246, "learning_rate": 9.314792631449483e-06, "loss": 0.0025, "step": 61570 }, { "epoch": 0.5035777078137139, "grad_norm": 0.029471011832356453, "learning_rate": 9.314432007375684e-06, "loss": 0.0019, "step": 61580 }, { "epoch": 0.5036594839923131, "grad_norm": 0.05699244886636734, "learning_rate": 9.314071295413396e-06, "loss": 0.0039, "step": 61590 }, { "epoch": 0.5037412601709123, "grad_norm": 0.029976654797792435, "learning_rate": 9.31371049556997e-06, "loss": 0.0019, "step": 61600 }, { "epoch": 0.5038230363495114, "grad_norm": 0.07200939208269119, "learning_rate": 9.313349607852756e-06, "loss": 0.004, "step": 61610 }, { "epoch": 0.5039048125281106, "grad_norm": 0.16930228471755981, "learning_rate": 9.312988632269101e-06, "loss": 0.0032, "step": 61620 }, { "epoch": 0.5039865887067098, "grad_norm": 0.049855105578899384, "learning_rate": 9.312627568826365e-06, "loss": 0.0029, "step": 61630 }, { "epoch": 0.5040683648853089, "grad_norm": 0.1648932695388794, "learning_rate": 9.312266417531899e-06, "loss": 0.0016, "step": 61640 }, { "epoch": 0.5041501410639081, "grad_norm": 0.05884020775556564, "learning_rate": 9.311905178393059e-06, "loss": 0.0023, "step": 61650 }, { "epoch": 0.5042319172425073, "grad_norm": 0.1611117273569107, "learning_rate": 9.311543851417208e-06, "loss": 0.002, "step": 61660 }, { "epoch": 0.5043136934211064, "grad_norm": 0.019473062828183174, "learning_rate": 9.311182436611704e-06, "loss": 0.003, "step": 61670 }, { "epoch": 0.5043954695997056, "grad_norm": 0.2399250566959381, "learning_rate": 9.310820933983909e-06, "loss": 0.0041, "step": 61680 }, { "epoch": 0.5044772457783048, "grad_norm": 0.06376124173402786, "learning_rate": 9.310459343541188e-06, "loss": 0.0042, "step": 61690 }, { "epoch": 0.5045590219569039, "grad_norm": 0.0628475770354271, "learning_rate": 9.310097665290904e-06, "loss": 0.0017, "step": 61700 }, { "epoch": 0.5046407981355031, "grad_norm": 0.048955287784338, "learning_rate": 9.30973589924043e-06, "loss": 0.0008, "step": 61710 }, { "epoch": 0.5047225743141023, "grad_norm": 0.08938673883676529, "learning_rate": 9.309374045397132e-06, "loss": 0.0025, "step": 61720 }, { "epoch": 0.5048043504927014, "grad_norm": 0.16989143192768097, "learning_rate": 9.309012103768381e-06, "loss": 0.0027, "step": 61730 }, { "epoch": 0.5048861266713006, "grad_norm": 0.060782406479120255, "learning_rate": 9.308650074361552e-06, "loss": 0.0025, "step": 61740 }, { "epoch": 0.5049679028498998, "grad_norm": 0.03319429233670235, "learning_rate": 9.308287957184017e-06, "loss": 0.0023, "step": 61750 }, { "epoch": 0.505049679028499, "grad_norm": 0.12099123746156693, "learning_rate": 9.307925752243157e-06, "loss": 0.0024, "step": 61760 }, { "epoch": 0.5051314552070981, "grad_norm": 0.04815280809998512, "learning_rate": 9.307563459546346e-06, "loss": 0.0018, "step": 61770 }, { "epoch": 0.5052132313856973, "grad_norm": 0.07332134246826172, "learning_rate": 9.307201079100965e-06, "loss": 0.0016, "step": 61780 }, { "epoch": 0.5052950075642966, "grad_norm": 0.1290506273508072, "learning_rate": 9.3068386109144e-06, "loss": 0.0017, "step": 61790 }, { "epoch": 0.5053767837428957, "grad_norm": 0.043423354625701904, "learning_rate": 9.30647605499403e-06, "loss": 0.0026, "step": 61800 }, { "epoch": 0.5054585599214949, "grad_norm": 0.0031994490418583155, "learning_rate": 9.30611341134724e-06, "loss": 0.0035, "step": 61810 }, { "epoch": 0.5055403361000941, "grad_norm": 0.0073824692517519, "learning_rate": 9.305750679981423e-06, "loss": 0.0031, "step": 61820 }, { "epoch": 0.5056221122786932, "grad_norm": 0.025498829782009125, "learning_rate": 9.305387860903962e-06, "loss": 0.0015, "step": 61830 }, { "epoch": 0.5057038884572924, "grad_norm": 0.06967882066965103, "learning_rate": 9.305024954122253e-06, "loss": 0.0024, "step": 61840 }, { "epoch": 0.5057856646358916, "grad_norm": 0.18681085109710693, "learning_rate": 9.304661959643686e-06, "loss": 0.0029, "step": 61850 }, { "epoch": 0.5058674408144908, "grad_norm": 0.09231079369783401, "learning_rate": 9.304298877475655e-06, "loss": 0.0034, "step": 61860 }, { "epoch": 0.5059492169930899, "grad_norm": 0.06506211310625076, "learning_rate": 9.303935707625558e-06, "loss": 0.0018, "step": 61870 }, { "epoch": 0.5060309931716891, "grad_norm": 0.09582589566707611, "learning_rate": 9.303572450100791e-06, "loss": 0.0015, "step": 61880 }, { "epoch": 0.5061127693502883, "grad_norm": 0.050016630440950394, "learning_rate": 9.303209104908756e-06, "loss": 0.0016, "step": 61890 }, { "epoch": 0.5061945455288874, "grad_norm": 0.07691926509141922, "learning_rate": 9.302845672056855e-06, "loss": 0.0024, "step": 61900 }, { "epoch": 0.5062763217074866, "grad_norm": 0.2478989213705063, "learning_rate": 9.302482151552487e-06, "loss": 0.0028, "step": 61910 }, { "epoch": 0.5063580978860858, "grad_norm": 0.18228234350681305, "learning_rate": 9.302118543403062e-06, "loss": 0.0022, "step": 61920 }, { "epoch": 0.5064398740646849, "grad_norm": 0.078194759786129, "learning_rate": 9.301754847615988e-06, "loss": 0.0024, "step": 61930 }, { "epoch": 0.5065216502432841, "grad_norm": 0.09174741059541702, "learning_rate": 9.301391064198668e-06, "loss": 0.0031, "step": 61940 }, { "epoch": 0.5066034264218833, "grad_norm": 0.013078360818326473, "learning_rate": 9.301027193158517e-06, "loss": 0.0008, "step": 61950 }, { "epoch": 0.5066852026004824, "grad_norm": 0.09129685908555984, "learning_rate": 9.300663234502943e-06, "loss": 0.0024, "step": 61960 }, { "epoch": 0.5067669787790816, "grad_norm": 0.0916978120803833, "learning_rate": 9.300299188239366e-06, "loss": 0.0025, "step": 61970 }, { "epoch": 0.5068487549576808, "grad_norm": 0.047959525138139725, "learning_rate": 9.2999350543752e-06, "loss": 0.0022, "step": 61980 }, { "epoch": 0.50693053113628, "grad_norm": 0.023433709517121315, "learning_rate": 9.29957083291786e-06, "loss": 0.003, "step": 61990 }, { "epoch": 0.5070123073148792, "grad_norm": 0.040286604315042496, "learning_rate": 9.299206523874767e-06, "loss": 0.0019, "step": 62000 }, { "epoch": 0.5070940834934784, "grad_norm": 0.024323660880327225, "learning_rate": 9.298842127253342e-06, "loss": 0.0034, "step": 62010 }, { "epoch": 0.5071758596720776, "grad_norm": 0.03066425956785679, "learning_rate": 9.29847764306101e-06, "loss": 0.0016, "step": 62020 }, { "epoch": 0.5072576358506767, "grad_norm": 0.039999283850193024, "learning_rate": 9.298113071305194e-06, "loss": 0.0017, "step": 62030 }, { "epoch": 0.5073394120292759, "grad_norm": 0.0312836728990078, "learning_rate": 9.297748411993321e-06, "loss": 0.002, "step": 62040 }, { "epoch": 0.5074211882078751, "grad_norm": 0.08986599743366241, "learning_rate": 9.29738366513282e-06, "loss": 0.0018, "step": 62050 }, { "epoch": 0.5075029643864742, "grad_norm": 0.031776342540979385, "learning_rate": 9.29701883073112e-06, "loss": 0.0036, "step": 62060 }, { "epoch": 0.5075847405650734, "grad_norm": 0.07160695642232895, "learning_rate": 9.296653908795654e-06, "loss": 0.0016, "step": 62070 }, { "epoch": 0.5076665167436726, "grad_norm": 0.10162688791751862, "learning_rate": 9.296288899333857e-06, "loss": 0.0022, "step": 62080 }, { "epoch": 0.5077482929222717, "grad_norm": 0.09613917022943497, "learning_rate": 9.29592380235316e-06, "loss": 0.0019, "step": 62090 }, { "epoch": 0.5078300691008709, "grad_norm": 0.08892033249139786, "learning_rate": 9.295558617861005e-06, "loss": 0.0011, "step": 62100 }, { "epoch": 0.5079118452794701, "grad_norm": 0.06997007876634598, "learning_rate": 9.295193345864828e-06, "loss": 0.0036, "step": 62110 }, { "epoch": 0.5079936214580693, "grad_norm": 0.050338853150606155, "learning_rate": 9.294827986372073e-06, "loss": 0.0029, "step": 62120 }, { "epoch": 0.5080753976366684, "grad_norm": 0.04377369210124016, "learning_rate": 9.29446253939018e-06, "loss": 0.0027, "step": 62130 }, { "epoch": 0.5081571738152676, "grad_norm": 0.029201284050941467, "learning_rate": 9.294097004926594e-06, "loss": 0.003, "step": 62140 }, { "epoch": 0.5082389499938668, "grad_norm": 0.02408020943403244, "learning_rate": 9.293731382988763e-06, "loss": 0.0019, "step": 62150 }, { "epoch": 0.5083207261724659, "grad_norm": 0.11199527233839035, "learning_rate": 9.293365673584131e-06, "loss": 0.0034, "step": 62160 }, { "epoch": 0.5084025023510651, "grad_norm": 0.08371167629957199, "learning_rate": 9.292999876720152e-06, "loss": 0.0013, "step": 62170 }, { "epoch": 0.5084842785296643, "grad_norm": 0.12065865844488144, "learning_rate": 9.292633992404276e-06, "loss": 0.003, "step": 62180 }, { "epoch": 0.5085660547082634, "grad_norm": 0.07544991374015808, "learning_rate": 9.292268020643955e-06, "loss": 0.0017, "step": 62190 }, { "epoch": 0.5086478308868626, "grad_norm": 0.04696563631296158, "learning_rate": 9.291901961446647e-06, "loss": 0.0034, "step": 62200 }, { "epoch": 0.5087296070654619, "grad_norm": 0.35632458329200745, "learning_rate": 9.291535814819808e-06, "loss": 0.0033, "step": 62210 }, { "epoch": 0.508811383244061, "grad_norm": 0.08028219640254974, "learning_rate": 9.291169580770895e-06, "loss": 0.0025, "step": 62220 }, { "epoch": 0.5088931594226602, "grad_norm": 0.04570874944329262, "learning_rate": 9.29080325930737e-06, "loss": 0.0012, "step": 62230 }, { "epoch": 0.5089749356012594, "grad_norm": 0.07991156727075577, "learning_rate": 9.290436850436695e-06, "loss": 0.0027, "step": 62240 }, { "epoch": 0.5090567117798586, "grad_norm": 0.040592364966869354, "learning_rate": 9.290070354166334e-06, "loss": 0.0023, "step": 62250 }, { "epoch": 0.5091384879584577, "grad_norm": 0.029013244435191154, "learning_rate": 9.289703770503751e-06, "loss": 0.0017, "step": 62260 }, { "epoch": 0.5092202641370569, "grad_norm": 0.15016482770442963, "learning_rate": 9.289337099456417e-06, "loss": 0.002, "step": 62270 }, { "epoch": 0.5093020403156561, "grad_norm": 0.07596410065889359, "learning_rate": 9.2889703410318e-06, "loss": 0.0019, "step": 62280 }, { "epoch": 0.5093838164942552, "grad_norm": 0.06094176322221756, "learning_rate": 9.28860349523737e-06, "loss": 0.002, "step": 62290 }, { "epoch": 0.5094655926728544, "grad_norm": 0.009710441343486309, "learning_rate": 9.288236562080602e-06, "loss": 0.0018, "step": 62300 }, { "epoch": 0.5095473688514536, "grad_norm": 0.11024066060781479, "learning_rate": 9.287869541568968e-06, "loss": 0.0042, "step": 62310 }, { "epoch": 0.5096291450300527, "grad_norm": 0.052830860018730164, "learning_rate": 9.287502433709947e-06, "loss": 0.0017, "step": 62320 }, { "epoch": 0.5097109212086519, "grad_norm": 0.05244878679513931, "learning_rate": 9.287135238511016e-06, "loss": 0.0018, "step": 62330 }, { "epoch": 0.5097926973872511, "grad_norm": 0.0420977920293808, "learning_rate": 9.286767955979657e-06, "loss": 0.0021, "step": 62340 }, { "epoch": 0.5098744735658503, "grad_norm": 0.1741432398557663, "learning_rate": 9.286400586123348e-06, "loss": 0.0019, "step": 62350 }, { "epoch": 0.5099562497444494, "grad_norm": 0.032660868018865585, "learning_rate": 9.286033128949576e-06, "loss": 0.004, "step": 62360 }, { "epoch": 0.5100380259230486, "grad_norm": 0.13218972086906433, "learning_rate": 9.285665584465824e-06, "loss": 0.0017, "step": 62370 }, { "epoch": 0.5101198021016478, "grad_norm": 0.038115717470645905, "learning_rate": 9.285297952679581e-06, "loss": 0.0016, "step": 62380 }, { "epoch": 0.5102015782802469, "grad_norm": 0.05222046375274658, "learning_rate": 9.284930233598338e-06, "loss": 0.004, "step": 62390 }, { "epoch": 0.5102833544588461, "grad_norm": 0.08615869283676147, "learning_rate": 9.28456242722958e-06, "loss": 0.0032, "step": 62400 }, { "epoch": 0.5103651306374453, "grad_norm": 0.0806642398238182, "learning_rate": 9.284194533580804e-06, "loss": 0.0028, "step": 62410 }, { "epoch": 0.5104469068160444, "grad_norm": 0.12095718830823898, "learning_rate": 9.283826552659503e-06, "loss": 0.0023, "step": 62420 }, { "epoch": 0.5105286829946437, "grad_norm": 0.18670956790447235, "learning_rate": 9.283458484473173e-06, "loss": 0.0025, "step": 62430 }, { "epoch": 0.5106104591732429, "grad_norm": 0.10657890141010284, "learning_rate": 9.283090329029312e-06, "loss": 0.0023, "step": 62440 }, { "epoch": 0.510692235351842, "grad_norm": 0.15143360197544098, "learning_rate": 9.282722086335419e-06, "loss": 0.0014, "step": 62450 }, { "epoch": 0.5107740115304412, "grad_norm": 0.11057086288928986, "learning_rate": 9.282353756398996e-06, "loss": 0.0018, "step": 62460 }, { "epoch": 0.5108557877090404, "grad_norm": 0.12180095911026001, "learning_rate": 9.281985339227547e-06, "loss": 0.0019, "step": 62470 }, { "epoch": 0.5109375638876396, "grad_norm": 0.4717957675457001, "learning_rate": 9.281616834828574e-06, "loss": 0.0023, "step": 62480 }, { "epoch": 0.5110193400662387, "grad_norm": 0.046247150748968124, "learning_rate": 9.281248243209588e-06, "loss": 0.0021, "step": 62490 }, { "epoch": 0.5111011162448379, "grad_norm": 0.014191427268087864, "learning_rate": 9.280879564378096e-06, "loss": 0.0033, "step": 62500 }, { "epoch": 0.5111828924234371, "grad_norm": 0.09344442933797836, "learning_rate": 9.280510798341605e-06, "loss": 0.0029, "step": 62510 }, { "epoch": 0.5112646686020362, "grad_norm": 0.09557056427001953, "learning_rate": 9.280141945107633e-06, "loss": 0.0028, "step": 62520 }, { "epoch": 0.5113464447806354, "grad_norm": 0.12308879941701889, "learning_rate": 9.279773004683687e-06, "loss": 0.0026, "step": 62530 }, { "epoch": 0.5114282209592346, "grad_norm": 0.0780276209115982, "learning_rate": 9.279403977077288e-06, "loss": 0.0025, "step": 62540 }, { "epoch": 0.5115099971378337, "grad_norm": 0.33072471618652344, "learning_rate": 9.27903486229595e-06, "loss": 0.0036, "step": 62550 }, { "epoch": 0.5115917733164329, "grad_norm": 0.136443093419075, "learning_rate": 9.278665660347195e-06, "loss": 0.0031, "step": 62560 }, { "epoch": 0.5116735494950321, "grad_norm": 0.04000743106007576, "learning_rate": 9.278296371238542e-06, "loss": 0.002, "step": 62570 }, { "epoch": 0.5117553256736312, "grad_norm": 0.062258780002593994, "learning_rate": 9.277926994977515e-06, "loss": 0.0027, "step": 62580 }, { "epoch": 0.5118371018522304, "grad_norm": 0.13359735906124115, "learning_rate": 9.277557531571636e-06, "loss": 0.0026, "step": 62590 }, { "epoch": 0.5119188780308296, "grad_norm": 0.09374859184026718, "learning_rate": 9.277187981028436e-06, "loss": 0.0013, "step": 62600 }, { "epoch": 0.5120006542094288, "grad_norm": 0.023215554654598236, "learning_rate": 9.276818343355439e-06, "loss": 0.0036, "step": 62610 }, { "epoch": 0.5120824303880279, "grad_norm": 0.04306026175618172, "learning_rate": 9.276448618560175e-06, "loss": 0.0016, "step": 62620 }, { "epoch": 0.5121642065666271, "grad_norm": 0.11573146283626556, "learning_rate": 9.276078806650178e-06, "loss": 0.0026, "step": 62630 }, { "epoch": 0.5122459827452264, "grad_norm": 0.15795224905014038, "learning_rate": 9.27570890763298e-06, "loss": 0.0023, "step": 62640 }, { "epoch": 0.5123277589238255, "grad_norm": 0.0869690477848053, "learning_rate": 9.275338921516115e-06, "loss": 0.0023, "step": 62650 }, { "epoch": 0.5124095351024247, "grad_norm": 0.07812368124723434, "learning_rate": 9.27496884830712e-06, "loss": 0.0026, "step": 62660 }, { "epoch": 0.5124913112810239, "grad_norm": 0.1044335663318634, "learning_rate": 9.274598688013537e-06, "loss": 0.0044, "step": 62670 }, { "epoch": 0.512573087459623, "grad_norm": 0.13049013912677765, "learning_rate": 9.274228440642902e-06, "loss": 0.0029, "step": 62680 }, { "epoch": 0.5126548636382222, "grad_norm": 0.11863166093826294, "learning_rate": 9.27385810620276e-06, "loss": 0.0033, "step": 62690 }, { "epoch": 0.5127366398168214, "grad_norm": 0.04230772703886032, "learning_rate": 9.273487684700656e-06, "loss": 0.0027, "step": 62700 }, { "epoch": 0.5128184159954206, "grad_norm": 0.08837166428565979, "learning_rate": 9.273117176144133e-06, "loss": 0.0025, "step": 62710 }, { "epoch": 0.5129001921740197, "grad_norm": 0.07236456125974655, "learning_rate": 9.27274658054074e-06, "loss": 0.0015, "step": 62720 }, { "epoch": 0.5129819683526189, "grad_norm": 0.03916643559932709, "learning_rate": 9.272375897898026e-06, "loss": 0.0015, "step": 62730 }, { "epoch": 0.5130637445312181, "grad_norm": 0.07550806552171707, "learning_rate": 9.272005128223542e-06, "loss": 0.0022, "step": 62740 }, { "epoch": 0.5131455207098172, "grad_norm": 0.007432656362652779, "learning_rate": 9.27163427152484e-06, "loss": 0.0017, "step": 62750 }, { "epoch": 0.5132272968884164, "grad_norm": 0.016989631578326225, "learning_rate": 9.271263327809478e-06, "loss": 0.0026, "step": 62760 }, { "epoch": 0.5133090730670156, "grad_norm": 0.40874359011650085, "learning_rate": 9.27089229708501e-06, "loss": 0.0023, "step": 62770 }, { "epoch": 0.5133908492456147, "grad_norm": 0.03549632057547569, "learning_rate": 9.270521179358994e-06, "loss": 0.0022, "step": 62780 }, { "epoch": 0.5134726254242139, "grad_norm": 0.09335017204284668, "learning_rate": 9.27014997463899e-06, "loss": 0.0048, "step": 62790 }, { "epoch": 0.5135544016028131, "grad_norm": 0.07814132422208786, "learning_rate": 9.269778682932559e-06, "loss": 0.0024, "step": 62800 }, { "epoch": 0.5136361777814122, "grad_norm": 0.03756922483444214, "learning_rate": 9.269407304247266e-06, "loss": 0.0012, "step": 62810 }, { "epoch": 0.5137179539600114, "grad_norm": 0.08115248382091522, "learning_rate": 9.269035838590676e-06, "loss": 0.0028, "step": 62820 }, { "epoch": 0.5137997301386106, "grad_norm": 0.01312555093318224, "learning_rate": 9.268664285970357e-06, "loss": 0.0019, "step": 62830 }, { "epoch": 0.5138815063172097, "grad_norm": 0.04918224364519119, "learning_rate": 9.268292646393877e-06, "loss": 0.0043, "step": 62840 }, { "epoch": 0.513963282495809, "grad_norm": 0.0072381142526865005, "learning_rate": 9.267920919868805e-06, "loss": 0.0018, "step": 62850 }, { "epoch": 0.5140450586744082, "grad_norm": 0.014616785570979118, "learning_rate": 9.267549106402715e-06, "loss": 0.0021, "step": 62860 }, { "epoch": 0.5141268348530074, "grad_norm": 0.048046935349702835, "learning_rate": 9.26717720600318e-06, "loss": 0.0021, "step": 62870 }, { "epoch": 0.5142086110316065, "grad_norm": 0.033717960119247437, "learning_rate": 9.266805218677777e-06, "loss": 0.0014, "step": 62880 }, { "epoch": 0.5142903872102057, "grad_norm": 0.04816718026995659, "learning_rate": 9.266433144434085e-06, "loss": 0.0014, "step": 62890 }, { "epoch": 0.5143721633888049, "grad_norm": 0.06388065218925476, "learning_rate": 9.26606098327968e-06, "loss": 0.0032, "step": 62900 }, { "epoch": 0.514453939567404, "grad_norm": 0.014248436316847801, "learning_rate": 9.265688735222147e-06, "loss": 0.002, "step": 62910 }, { "epoch": 0.5145357157460032, "grad_norm": 0.27396705746650696, "learning_rate": 9.265316400269066e-06, "loss": 0.0023, "step": 62920 }, { "epoch": 0.5146174919246024, "grad_norm": 0.030717026442289352, "learning_rate": 9.264943978428024e-06, "loss": 0.0022, "step": 62930 }, { "epoch": 0.5146992681032015, "grad_norm": 0.311739981174469, "learning_rate": 9.264571469706605e-06, "loss": 0.0024, "step": 62940 }, { "epoch": 0.5147810442818007, "grad_norm": 0.04406246542930603, "learning_rate": 9.2641988741124e-06, "loss": 0.0026, "step": 62950 }, { "epoch": 0.5148628204603999, "grad_norm": 0.26276373863220215, "learning_rate": 9.263826191652999e-06, "loss": 0.0029, "step": 62960 }, { "epoch": 0.514944596638999, "grad_norm": 0.23491668701171875, "learning_rate": 9.26345342233599e-06, "loss": 0.0028, "step": 62970 }, { "epoch": 0.5150263728175982, "grad_norm": 0.06149420887231827, "learning_rate": 9.26308056616897e-06, "loss": 0.0029, "step": 62980 }, { "epoch": 0.5151081489961974, "grad_norm": 0.10908447206020355, "learning_rate": 9.262707623159533e-06, "loss": 0.0022, "step": 62990 }, { "epoch": 0.5151899251747966, "grad_norm": 0.08607693761587143, "learning_rate": 9.262334593315279e-06, "loss": 0.0015, "step": 63000 }, { "epoch": 0.5152717013533957, "grad_norm": 0.17886988818645477, "learning_rate": 9.261961476643802e-06, "loss": 0.0031, "step": 63010 }, { "epoch": 0.5153534775319949, "grad_norm": 0.0751158595085144, "learning_rate": 9.261588273152708e-06, "loss": 0.0024, "step": 63020 }, { "epoch": 0.5154352537105941, "grad_norm": 0.02606881968677044, "learning_rate": 9.261214982849595e-06, "loss": 0.002, "step": 63030 }, { "epoch": 0.5155170298891932, "grad_norm": 0.46233850717544556, "learning_rate": 9.26084160574207e-06, "loss": 0.0023, "step": 63040 }, { "epoch": 0.5155988060677924, "grad_norm": 0.07115592062473297, "learning_rate": 9.260468141837737e-06, "loss": 0.0027, "step": 63050 }, { "epoch": 0.5156805822463916, "grad_norm": 0.13257066905498505, "learning_rate": 9.260094591144206e-06, "loss": 0.0027, "step": 63060 }, { "epoch": 0.5157623584249909, "grad_norm": 0.06634924560785294, "learning_rate": 9.259720953669086e-06, "loss": 0.0039, "step": 63070 }, { "epoch": 0.51584413460359, "grad_norm": 0.03800394386053085, "learning_rate": 9.259347229419987e-06, "loss": 0.0014, "step": 63080 }, { "epoch": 0.5159259107821892, "grad_norm": 0.040623739361763, "learning_rate": 9.258973418404521e-06, "loss": 0.0016, "step": 63090 }, { "epoch": 0.5160076869607884, "grad_norm": 0.13938142359256744, "learning_rate": 9.258599520630307e-06, "loss": 0.003, "step": 63100 }, { "epoch": 0.5160894631393875, "grad_norm": 0.033443015068769455, "learning_rate": 9.25822553610496e-06, "loss": 0.0013, "step": 63110 }, { "epoch": 0.5161712393179867, "grad_norm": 0.15944841504096985, "learning_rate": 9.257851464836095e-06, "loss": 0.0038, "step": 63120 }, { "epoch": 0.5162530154965859, "grad_norm": 0.05843399092555046, "learning_rate": 9.257477306831335e-06, "loss": 0.0025, "step": 63130 }, { "epoch": 0.516334791675185, "grad_norm": 0.1636328548192978, "learning_rate": 9.257103062098303e-06, "loss": 0.0035, "step": 63140 }, { "epoch": 0.5164165678537842, "grad_norm": 0.05676531419157982, "learning_rate": 9.25672873064462e-06, "loss": 0.0026, "step": 63150 }, { "epoch": 0.5164983440323834, "grad_norm": 0.22594639658927917, "learning_rate": 9.256354312477914e-06, "loss": 0.0027, "step": 63160 }, { "epoch": 0.5165801202109825, "grad_norm": 0.1610058844089508, "learning_rate": 9.25597980760581e-06, "loss": 0.0011, "step": 63170 }, { "epoch": 0.5166618963895817, "grad_norm": 0.15529616177082062, "learning_rate": 9.255605216035938e-06, "loss": 0.0022, "step": 63180 }, { "epoch": 0.5167436725681809, "grad_norm": 0.039664898067712784, "learning_rate": 9.25523053777593e-06, "loss": 0.0023, "step": 63190 }, { "epoch": 0.51682544874678, "grad_norm": 0.036893319338560104, "learning_rate": 9.254855772833415e-06, "loss": 0.0031, "step": 63200 }, { "epoch": 0.5169072249253792, "grad_norm": 0.028065718710422516, "learning_rate": 9.25448092121603e-06, "loss": 0.003, "step": 63210 }, { "epoch": 0.5169890011039784, "grad_norm": 0.023560184985399246, "learning_rate": 9.254105982931412e-06, "loss": 0.0026, "step": 63220 }, { "epoch": 0.5170707772825776, "grad_norm": 0.022030968219041824, "learning_rate": 9.253730957987195e-06, "loss": 0.0048, "step": 63230 }, { "epoch": 0.5171525534611767, "grad_norm": 0.018833167850971222, "learning_rate": 9.25335584639102e-06, "loss": 0.0032, "step": 63240 }, { "epoch": 0.5172343296397759, "grad_norm": 0.038947563618421555, "learning_rate": 9.252980648150532e-06, "loss": 0.0031, "step": 63250 }, { "epoch": 0.5173161058183751, "grad_norm": 0.1325490027666092, "learning_rate": 9.25260536327337e-06, "loss": 0.0021, "step": 63260 }, { "epoch": 0.5173978819969742, "grad_norm": 0.047081395983695984, "learning_rate": 9.252229991767182e-06, "loss": 0.0023, "step": 63270 }, { "epoch": 0.5174796581755735, "grad_norm": 0.02583506889641285, "learning_rate": 9.25185453363961e-06, "loss": 0.0022, "step": 63280 }, { "epoch": 0.5175614343541727, "grad_norm": 0.2851022481918335, "learning_rate": 9.251478988898305e-06, "loss": 0.0029, "step": 63290 }, { "epoch": 0.5176432105327718, "grad_norm": 0.08722355216741562, "learning_rate": 9.251103357550918e-06, "loss": 0.0014, "step": 63300 }, { "epoch": 0.517724986711371, "grad_norm": 0.02338012494146824, "learning_rate": 9.250727639605099e-06, "loss": 0.0015, "step": 63310 }, { "epoch": 0.5178067628899702, "grad_norm": 0.10179328173398972, "learning_rate": 9.250351835068505e-06, "loss": 0.0025, "step": 63320 }, { "epoch": 0.5178885390685694, "grad_norm": 0.04572933167219162, "learning_rate": 9.249975943948788e-06, "loss": 0.0051, "step": 63330 }, { "epoch": 0.5179703152471685, "grad_norm": 0.016991088166832924, "learning_rate": 9.249599966253606e-06, "loss": 0.0023, "step": 63340 }, { "epoch": 0.5180520914257677, "grad_norm": 0.0444812998175621, "learning_rate": 9.24922390199062e-06, "loss": 0.0029, "step": 63350 }, { "epoch": 0.5181338676043669, "grad_norm": 0.09276220947504044, "learning_rate": 9.248847751167488e-06, "loss": 0.0033, "step": 63360 }, { "epoch": 0.518215643782966, "grad_norm": 0.03801963850855827, "learning_rate": 9.248471513791875e-06, "loss": 0.0015, "step": 63370 }, { "epoch": 0.5182974199615652, "grad_norm": 0.025160182267427444, "learning_rate": 9.248095189871443e-06, "loss": 0.0029, "step": 63380 }, { "epoch": 0.5183791961401644, "grad_norm": 0.10692094266414642, "learning_rate": 9.247718779413859e-06, "loss": 0.0012, "step": 63390 }, { "epoch": 0.5184609723187635, "grad_norm": 0.2366531789302826, "learning_rate": 9.24734228242679e-06, "loss": 0.0036, "step": 63400 }, { "epoch": 0.5185427484973627, "grad_norm": 0.09054814279079437, "learning_rate": 9.246965698917907e-06, "loss": 0.0021, "step": 63410 }, { "epoch": 0.5186245246759619, "grad_norm": 0.04033394157886505, "learning_rate": 9.24658902889488e-06, "loss": 0.0019, "step": 63420 }, { "epoch": 0.518706300854561, "grad_norm": 0.07839624583721161, "learning_rate": 9.246212272365383e-06, "loss": 0.003, "step": 63430 }, { "epoch": 0.5187880770331602, "grad_norm": 0.10091988742351532, "learning_rate": 9.24583542933709e-06, "loss": 0.0016, "step": 63440 }, { "epoch": 0.5188698532117594, "grad_norm": 0.03245239704847336, "learning_rate": 9.24545849981768e-06, "loss": 0.0011, "step": 63450 }, { "epoch": 0.5189516293903585, "grad_norm": 0.17032353579998016, "learning_rate": 9.245081483814828e-06, "loss": 0.0031, "step": 63460 }, { "epoch": 0.5190334055689577, "grad_norm": 0.0191323384642601, "learning_rate": 9.244704381336216e-06, "loss": 0.0006, "step": 63470 }, { "epoch": 0.5191151817475569, "grad_norm": 0.07248269766569138, "learning_rate": 9.244327192389527e-06, "loss": 0.0019, "step": 63480 }, { "epoch": 0.519196957926156, "grad_norm": 0.18977099657058716, "learning_rate": 9.24394991698244e-06, "loss": 0.003, "step": 63490 }, { "epoch": 0.5192787341047553, "grad_norm": 0.09341208636760712, "learning_rate": 9.243572555122645e-06, "loss": 0.0018, "step": 63500 }, { "epoch": 0.5193605102833545, "grad_norm": 0.062107134610414505, "learning_rate": 9.243195106817829e-06, "loss": 0.0021, "step": 63510 }, { "epoch": 0.5194422864619537, "grad_norm": 0.06873714178800583, "learning_rate": 9.242817572075677e-06, "loss": 0.0015, "step": 63520 }, { "epoch": 0.5195240626405528, "grad_norm": 0.06460690498352051, "learning_rate": 9.242439950903883e-06, "loss": 0.0014, "step": 63530 }, { "epoch": 0.519605838819152, "grad_norm": 0.03836022689938545, "learning_rate": 9.24206224331014e-06, "loss": 0.0026, "step": 63540 }, { "epoch": 0.5196876149977512, "grad_norm": 0.0672144740819931, "learning_rate": 9.241684449302142e-06, "loss": 0.0039, "step": 63550 }, { "epoch": 0.5197693911763503, "grad_norm": 0.0445878729224205, "learning_rate": 9.24130656888758e-06, "loss": 0.0027, "step": 63560 }, { "epoch": 0.5198511673549495, "grad_norm": 0.0488247387111187, "learning_rate": 9.24092860207416e-06, "loss": 0.0018, "step": 63570 }, { "epoch": 0.5199329435335487, "grad_norm": 0.6600876450538635, "learning_rate": 9.240550548869573e-06, "loss": 0.0014, "step": 63580 }, { "epoch": 0.5200147197121479, "grad_norm": 0.12675003707408905, "learning_rate": 9.240172409281528e-06, "loss": 0.0021, "step": 63590 }, { "epoch": 0.520096495890747, "grad_norm": 0.04517783224582672, "learning_rate": 9.239794183317723e-06, "loss": 0.0043, "step": 63600 }, { "epoch": 0.5201782720693462, "grad_norm": 0.07043185830116272, "learning_rate": 9.239415870985864e-06, "loss": 0.0032, "step": 63610 }, { "epoch": 0.5202600482479454, "grad_norm": 0.10859231650829315, "learning_rate": 9.239037472293657e-06, "loss": 0.0022, "step": 63620 }, { "epoch": 0.5203418244265445, "grad_norm": 0.06657502800226212, "learning_rate": 9.238658987248815e-06, "loss": 0.0019, "step": 63630 }, { "epoch": 0.5204236006051437, "grad_norm": 0.09894605726003647, "learning_rate": 9.23828041585904e-06, "loss": 0.0028, "step": 63640 }, { "epoch": 0.5205053767837429, "grad_norm": 0.16490906476974487, "learning_rate": 9.237901758132048e-06, "loss": 0.0031, "step": 63650 }, { "epoch": 0.520587152962342, "grad_norm": 0.19269847869873047, "learning_rate": 9.237523014075554e-06, "loss": 0.002, "step": 63660 }, { "epoch": 0.5206689291409412, "grad_norm": 0.05067610740661621, "learning_rate": 9.23714418369727e-06, "loss": 0.0027, "step": 63670 }, { "epoch": 0.5207507053195404, "grad_norm": 0.0074619147926568985, "learning_rate": 9.236765267004918e-06, "loss": 0.0028, "step": 63680 }, { "epoch": 0.5208324814981395, "grad_norm": 0.0843532383441925, "learning_rate": 9.236386264006212e-06, "loss": 0.0011, "step": 63690 }, { "epoch": 0.5209142576767387, "grad_norm": 0.011477598920464516, "learning_rate": 9.236007174708874e-06, "loss": 0.0024, "step": 63700 }, { "epoch": 0.520996033855338, "grad_norm": 0.08743118494749069, "learning_rate": 9.235627999120627e-06, "loss": 0.0027, "step": 63710 }, { "epoch": 0.5210778100339372, "grad_norm": 0.06646837294101715, "learning_rate": 9.235248737249194e-06, "loss": 0.0017, "step": 63720 }, { "epoch": 0.5211595862125363, "grad_norm": 0.10452098399400711, "learning_rate": 9.234869389102301e-06, "loss": 0.0024, "step": 63730 }, { "epoch": 0.5212413623911355, "grad_norm": 0.06961427628993988, "learning_rate": 9.234489954687678e-06, "loss": 0.0021, "step": 63740 }, { "epoch": 0.5213231385697347, "grad_norm": 0.11072377860546112, "learning_rate": 9.234110434013053e-06, "loss": 0.0027, "step": 63750 }, { "epoch": 0.5214049147483338, "grad_norm": 0.055788855999708176, "learning_rate": 9.233730827086155e-06, "loss": 0.0017, "step": 63760 }, { "epoch": 0.521486690926933, "grad_norm": 0.2196718007326126, "learning_rate": 9.23335113391472e-06, "loss": 0.002, "step": 63770 }, { "epoch": 0.5215684671055322, "grad_norm": 0.08756954222917557, "learning_rate": 9.232971354506482e-06, "loss": 0.0031, "step": 63780 }, { "epoch": 0.5216502432841313, "grad_norm": 0.22474469244480133, "learning_rate": 9.232591488869174e-06, "loss": 0.0019, "step": 63790 }, { "epoch": 0.5217320194627305, "grad_norm": 0.03638609126210213, "learning_rate": 9.232211537010538e-06, "loss": 0.0013, "step": 63800 }, { "epoch": 0.5218137956413297, "grad_norm": 0.03294985368847847, "learning_rate": 9.231831498938315e-06, "loss": 0.002, "step": 63810 }, { "epoch": 0.5218955718199288, "grad_norm": 0.10244030505418777, "learning_rate": 9.231451374660242e-06, "loss": 0.003, "step": 63820 }, { "epoch": 0.521977347998528, "grad_norm": 0.12830817699432373, "learning_rate": 9.231071164184067e-06, "loss": 0.0019, "step": 63830 }, { "epoch": 0.5220591241771272, "grad_norm": 0.01831524260342121, "learning_rate": 9.230690867517532e-06, "loss": 0.0016, "step": 63840 }, { "epoch": 0.5221409003557264, "grad_norm": 0.06695520132780075, "learning_rate": 9.230310484668386e-06, "loss": 0.0035, "step": 63850 }, { "epoch": 0.5222226765343255, "grad_norm": 0.029489601030945778, "learning_rate": 9.229930015644375e-06, "loss": 0.0029, "step": 63860 }, { "epoch": 0.5223044527129247, "grad_norm": 0.03972157835960388, "learning_rate": 9.229549460453254e-06, "loss": 0.0018, "step": 63870 }, { "epoch": 0.5223862288915239, "grad_norm": 0.01573432981967926, "learning_rate": 9.22916881910277e-06, "loss": 0.0022, "step": 63880 }, { "epoch": 0.522468005070123, "grad_norm": 0.06568566709756851, "learning_rate": 9.228788091600682e-06, "loss": 0.002, "step": 63890 }, { "epoch": 0.5225497812487222, "grad_norm": 0.08525635302066803, "learning_rate": 9.228407277954741e-06, "loss": 0.002, "step": 63900 }, { "epoch": 0.5226315574273214, "grad_norm": 0.22842498123645782, "learning_rate": 9.228026378172708e-06, "loss": 0.0017, "step": 63910 }, { "epoch": 0.5227133336059206, "grad_norm": 0.014469996094703674, "learning_rate": 9.22764539226234e-06, "loss": 0.0018, "step": 63920 }, { "epoch": 0.5227951097845198, "grad_norm": 0.17888161540031433, "learning_rate": 9.227264320231399e-06, "loss": 0.0023, "step": 63930 }, { "epoch": 0.522876885963119, "grad_norm": 0.18303753435611725, "learning_rate": 9.22688316208765e-06, "loss": 0.0027, "step": 63940 }, { "epoch": 0.5229586621417182, "grad_norm": 0.03436168655753136, "learning_rate": 9.226501917838852e-06, "loss": 0.0024, "step": 63950 }, { "epoch": 0.5230404383203173, "grad_norm": 0.2205488085746765, "learning_rate": 9.226120587492777e-06, "loss": 0.0028, "step": 63960 }, { "epoch": 0.5231222144989165, "grad_norm": 0.07018471509218216, "learning_rate": 9.22573917105719e-06, "loss": 0.0023, "step": 63970 }, { "epoch": 0.5232039906775157, "grad_norm": 0.02622230164706707, "learning_rate": 9.22535766853986e-06, "loss": 0.0026, "step": 63980 }, { "epoch": 0.5232857668561148, "grad_norm": 0.0362451933324337, "learning_rate": 9.22497607994856e-06, "loss": 0.0013, "step": 63990 }, { "epoch": 0.523367543034714, "grad_norm": 0.024528248235583305, "learning_rate": 9.224594405291062e-06, "loss": 0.0023, "step": 64000 }, { "epoch": 0.5234493192133132, "grad_norm": 0.10024603456258774, "learning_rate": 9.224212644575144e-06, "loss": 0.0026, "step": 64010 }, { "epoch": 0.5235310953919123, "grad_norm": 0.0689091607928276, "learning_rate": 9.223830797808581e-06, "loss": 0.0022, "step": 64020 }, { "epoch": 0.5236128715705115, "grad_norm": 0.18789024651050568, "learning_rate": 9.22344886499915e-06, "loss": 0.002, "step": 64030 }, { "epoch": 0.5236946477491107, "grad_norm": 0.24698425829410553, "learning_rate": 9.223066846154634e-06, "loss": 0.0029, "step": 64040 }, { "epoch": 0.5237764239277098, "grad_norm": 0.06594619154930115, "learning_rate": 9.222684741282812e-06, "loss": 0.0016, "step": 64050 }, { "epoch": 0.523858200106309, "grad_norm": 0.19528087973594666, "learning_rate": 9.22230255039147e-06, "loss": 0.002, "step": 64060 }, { "epoch": 0.5239399762849082, "grad_norm": 0.021979698911309242, "learning_rate": 9.221920273488394e-06, "loss": 0.0021, "step": 64070 }, { "epoch": 0.5240217524635074, "grad_norm": 0.0026602863799780607, "learning_rate": 9.221537910581371e-06, "loss": 0.0018, "step": 64080 }, { "epoch": 0.5241035286421065, "grad_norm": 0.06738393753767014, "learning_rate": 9.221155461678185e-06, "loss": 0.0059, "step": 64090 }, { "epoch": 0.5241853048207057, "grad_norm": 0.12354229390621185, "learning_rate": 9.220772926786637e-06, "loss": 0.0024, "step": 64100 }, { "epoch": 0.5242670809993049, "grad_norm": 0.0554836131632328, "learning_rate": 9.220390305914509e-06, "loss": 0.002, "step": 64110 }, { "epoch": 0.524348857177904, "grad_norm": 0.05801834911108017, "learning_rate": 9.220007599069602e-06, "loss": 0.0062, "step": 64120 }, { "epoch": 0.5244306333565032, "grad_norm": 0.0752016007900238, "learning_rate": 9.21962480625971e-06, "loss": 0.0012, "step": 64130 }, { "epoch": 0.5245124095351025, "grad_norm": 0.03538427874445915, "learning_rate": 9.219241927492631e-06, "loss": 0.0025, "step": 64140 }, { "epoch": 0.5245941857137016, "grad_norm": 0.026090268045663834, "learning_rate": 9.218858962776163e-06, "loss": 0.0027, "step": 64150 }, { "epoch": 0.5246759618923008, "grad_norm": 0.048607345670461655, "learning_rate": 9.21847591211811e-06, "loss": 0.0028, "step": 64160 }, { "epoch": 0.5247577380709, "grad_norm": 0.2159067690372467, "learning_rate": 9.218092775526272e-06, "loss": 0.0027, "step": 64170 }, { "epoch": 0.5248395142494992, "grad_norm": 0.2414897382259369, "learning_rate": 9.217709553008458e-06, "loss": 0.002, "step": 64180 }, { "epoch": 0.5249212904280983, "grad_norm": 0.13389909267425537, "learning_rate": 9.217326244572471e-06, "loss": 0.0023, "step": 64190 }, { "epoch": 0.5250030666066975, "grad_norm": 0.046816159039735794, "learning_rate": 9.21694285022612e-06, "loss": 0.0021, "step": 64200 }, { "epoch": 0.5250848427852967, "grad_norm": 0.1451222151517868, "learning_rate": 9.216559369977215e-06, "loss": 0.0016, "step": 64210 }, { "epoch": 0.5251666189638958, "grad_norm": 0.018788188695907593, "learning_rate": 9.216175803833569e-06, "loss": 0.0022, "step": 64220 }, { "epoch": 0.525248395142495, "grad_norm": 0.7808581590652466, "learning_rate": 9.215792151802996e-06, "loss": 0.0029, "step": 64230 }, { "epoch": 0.5253301713210942, "grad_norm": 0.0339556559920311, "learning_rate": 9.215408413893307e-06, "loss": 0.007, "step": 64240 }, { "epoch": 0.5254119474996933, "grad_norm": 0.052319906651973724, "learning_rate": 9.215024590112325e-06, "loss": 0.0018, "step": 64250 }, { "epoch": 0.5254937236782925, "grad_norm": 0.14608506858348846, "learning_rate": 9.214640680467865e-06, "loss": 0.0023, "step": 64260 }, { "epoch": 0.5255754998568917, "grad_norm": 0.057491715997457504, "learning_rate": 9.214256684967749e-06, "loss": 0.002, "step": 64270 }, { "epoch": 0.5256572760354908, "grad_norm": 0.03814053162932396, "learning_rate": 9.213872603619798e-06, "loss": 0.0017, "step": 64280 }, { "epoch": 0.52573905221409, "grad_norm": 0.04701860249042511, "learning_rate": 9.213488436431837e-06, "loss": 0.0023, "step": 64290 }, { "epoch": 0.5258208283926892, "grad_norm": 0.03891034796833992, "learning_rate": 9.213104183411692e-06, "loss": 0.0029, "step": 64300 }, { "epoch": 0.5259026045712883, "grad_norm": 0.029772480949759483, "learning_rate": 9.212719844567188e-06, "loss": 0.0028, "step": 64310 }, { "epoch": 0.5259843807498875, "grad_norm": 0.08371812850236893, "learning_rate": 9.21233541990616e-06, "loss": 0.0021, "step": 64320 }, { "epoch": 0.5260661569284867, "grad_norm": 0.11383549124002457, "learning_rate": 9.211950909436433e-06, "loss": 0.0078, "step": 64330 }, { "epoch": 0.5261479331070859, "grad_norm": 0.1664186418056488, "learning_rate": 9.211566313165843e-06, "loss": 0.0067, "step": 64340 }, { "epoch": 0.5262297092856851, "grad_norm": 0.02190183289349079, "learning_rate": 9.211181631102225e-06, "loss": 0.0018, "step": 64350 }, { "epoch": 0.5263114854642843, "grad_norm": 0.04015928506851196, "learning_rate": 9.210796863253412e-06, "loss": 0.0033, "step": 64360 }, { "epoch": 0.5263932616428835, "grad_norm": 0.048469215631484985, "learning_rate": 9.210412009627246e-06, "loss": 0.0021, "step": 64370 }, { "epoch": 0.5264750378214826, "grad_norm": 0.14772842824459076, "learning_rate": 9.210027070231564e-06, "loss": 0.0027, "step": 64380 }, { "epoch": 0.5265568140000818, "grad_norm": 0.025140048936009407, "learning_rate": 9.20964204507421e-06, "loss": 0.0025, "step": 64390 }, { "epoch": 0.526638590178681, "grad_norm": 0.1165585145354271, "learning_rate": 9.209256934163025e-06, "loss": 0.0019, "step": 64400 }, { "epoch": 0.5267203663572801, "grad_norm": 0.310452401638031, "learning_rate": 9.208871737505854e-06, "loss": 0.002, "step": 64410 }, { "epoch": 0.5268021425358793, "grad_norm": 0.12957310676574707, "learning_rate": 9.208486455110545e-06, "loss": 0.0027, "step": 64420 }, { "epoch": 0.5268839187144785, "grad_norm": 0.07585112750530243, "learning_rate": 9.208101086984946e-06, "loss": 0.003, "step": 64430 }, { "epoch": 0.5269656948930777, "grad_norm": 0.008759971708059311, "learning_rate": 9.207715633136909e-06, "loss": 0.0016, "step": 64440 }, { "epoch": 0.5270474710716768, "grad_norm": 0.010907606221735477, "learning_rate": 9.207330093574282e-06, "loss": 0.0035, "step": 64450 }, { "epoch": 0.527129247250276, "grad_norm": 0.017878299579024315, "learning_rate": 9.206944468304922e-06, "loss": 0.001, "step": 64460 }, { "epoch": 0.5272110234288752, "grad_norm": 0.059305232018232346, "learning_rate": 9.206558757336684e-06, "loss": 0.002, "step": 64470 }, { "epoch": 0.5272927996074743, "grad_norm": 0.12228608876466751, "learning_rate": 9.206172960677425e-06, "loss": 0.0012, "step": 64480 }, { "epoch": 0.5273745757860735, "grad_norm": 0.07520660012960434, "learning_rate": 9.205787078335001e-06, "loss": 0.0022, "step": 64490 }, { "epoch": 0.5274563519646727, "grad_norm": 0.1052316278219223, "learning_rate": 9.205401110317279e-06, "loss": 0.0018, "step": 64500 }, { "epoch": 0.5275381281432718, "grad_norm": 0.10964615643024445, "learning_rate": 9.205015056632116e-06, "loss": 0.0038, "step": 64510 }, { "epoch": 0.527619904321871, "grad_norm": 0.06632760167121887, "learning_rate": 9.20462891728738e-06, "loss": 0.0023, "step": 64520 }, { "epoch": 0.5277016805004702, "grad_norm": 0.03873801603913307, "learning_rate": 9.204242692290935e-06, "loss": 0.0015, "step": 64530 }, { "epoch": 0.5277834566790693, "grad_norm": 0.047112785279750824, "learning_rate": 9.203856381650649e-06, "loss": 0.0019, "step": 64540 }, { "epoch": 0.5278652328576685, "grad_norm": 0.10789304226636887, "learning_rate": 9.20346998537439e-06, "loss": 0.0028, "step": 64550 }, { "epoch": 0.5279470090362678, "grad_norm": 0.047441817820072174, "learning_rate": 9.203083503470031e-06, "loss": 0.0021, "step": 64560 }, { "epoch": 0.528028785214867, "grad_norm": 0.2402884066104889, "learning_rate": 9.202696935945445e-06, "loss": 0.0038, "step": 64570 }, { "epoch": 0.5281105613934661, "grad_norm": 0.05088847875595093, "learning_rate": 9.202310282808506e-06, "loss": 0.0017, "step": 64580 }, { "epoch": 0.5281923375720653, "grad_norm": 0.08217155933380127, "learning_rate": 9.201923544067092e-06, "loss": 0.0025, "step": 64590 }, { "epoch": 0.5282741137506645, "grad_norm": 0.10383359342813492, "learning_rate": 9.20153671972908e-06, "loss": 0.0027, "step": 64600 }, { "epoch": 0.5283558899292636, "grad_norm": 0.13357260823249817, "learning_rate": 9.201149809802347e-06, "loss": 0.0032, "step": 64610 }, { "epoch": 0.5284376661078628, "grad_norm": 0.14202719926834106, "learning_rate": 9.200762814294781e-06, "loss": 0.0018, "step": 64620 }, { "epoch": 0.528519442286462, "grad_norm": 0.006103757303208113, "learning_rate": 9.20037573321426e-06, "loss": 0.0016, "step": 64630 }, { "epoch": 0.5286012184650611, "grad_norm": 0.18839319050312042, "learning_rate": 9.19998856656867e-06, "loss": 0.0025, "step": 64640 }, { "epoch": 0.5286829946436603, "grad_norm": 0.1453801691532135, "learning_rate": 9.1996013143659e-06, "loss": 0.0023, "step": 64650 }, { "epoch": 0.5287647708222595, "grad_norm": 0.11194813251495361, "learning_rate": 9.199213976613839e-06, "loss": 0.0025, "step": 64660 }, { "epoch": 0.5288465470008586, "grad_norm": 0.08026955276727676, "learning_rate": 9.198826553320375e-06, "loss": 0.0033, "step": 64670 }, { "epoch": 0.5289283231794578, "grad_norm": 0.08445575088262558, "learning_rate": 9.1984390444934e-06, "loss": 0.004, "step": 64680 }, { "epoch": 0.529010099358057, "grad_norm": 0.042244356125593185, "learning_rate": 9.198051450140812e-06, "loss": 0.0022, "step": 64690 }, { "epoch": 0.5290918755366562, "grad_norm": 0.06280293315649033, "learning_rate": 9.197663770270502e-06, "loss": 0.0027, "step": 64700 }, { "epoch": 0.5291736517152553, "grad_norm": 0.021295085549354553, "learning_rate": 9.197276004890367e-06, "loss": 0.0011, "step": 64710 }, { "epoch": 0.5292554278938545, "grad_norm": 0.05506760999560356, "learning_rate": 9.19688815400831e-06, "loss": 0.0024, "step": 64720 }, { "epoch": 0.5293372040724537, "grad_norm": 0.046544816344976425, "learning_rate": 9.19650021763223e-06, "loss": 0.0031, "step": 64730 }, { "epoch": 0.5294189802510528, "grad_norm": 0.09034819900989532, "learning_rate": 9.19611219577003e-06, "loss": 0.0028, "step": 64740 }, { "epoch": 0.529500756429652, "grad_norm": 0.08796774595975876, "learning_rate": 9.195724088429611e-06, "loss": 0.0033, "step": 64750 }, { "epoch": 0.5295825326082512, "grad_norm": 0.057231005281209946, "learning_rate": 9.195335895618885e-06, "loss": 0.0016, "step": 64760 }, { "epoch": 0.5296643087868503, "grad_norm": 0.04071984440088272, "learning_rate": 9.194947617345755e-06, "loss": 0.0024, "step": 64770 }, { "epoch": 0.5297460849654496, "grad_norm": 0.10103783011436462, "learning_rate": 9.194559253618134e-06, "loss": 0.0019, "step": 64780 }, { "epoch": 0.5298278611440488, "grad_norm": 0.046851325780153275, "learning_rate": 9.19417080444393e-06, "loss": 0.0028, "step": 64790 }, { "epoch": 0.529909637322648, "grad_norm": 0.03378884494304657, "learning_rate": 9.193782269831057e-06, "loss": 0.0021, "step": 64800 }, { "epoch": 0.5299914135012471, "grad_norm": 0.0762103796005249, "learning_rate": 9.19339364978743e-06, "loss": 0.0022, "step": 64810 }, { "epoch": 0.5300731896798463, "grad_norm": 0.1486818790435791, "learning_rate": 9.193004944320967e-06, "loss": 0.0045, "step": 64820 }, { "epoch": 0.5301549658584455, "grad_norm": 0.08817214518785477, "learning_rate": 9.192616153439586e-06, "loss": 0.0042, "step": 64830 }, { "epoch": 0.5302367420370446, "grad_norm": 0.06999730318784714, "learning_rate": 9.192227277151205e-06, "loss": 0.0034, "step": 64840 }, { "epoch": 0.5303185182156438, "grad_norm": 0.035030756145715714, "learning_rate": 9.191838315463746e-06, "loss": 0.0011, "step": 64850 }, { "epoch": 0.530400294394243, "grad_norm": 0.052671920508146286, "learning_rate": 9.191449268385133e-06, "loss": 0.0015, "step": 64860 }, { "epoch": 0.5304820705728421, "grad_norm": 0.13155506551265717, "learning_rate": 9.191060135923294e-06, "loss": 0.003, "step": 64870 }, { "epoch": 0.5305638467514413, "grad_norm": 0.06249304115772247, "learning_rate": 9.190670918086151e-06, "loss": 0.0024, "step": 64880 }, { "epoch": 0.5306456229300405, "grad_norm": 0.8087536692619324, "learning_rate": 9.190281614881635e-06, "loss": 0.0054, "step": 64890 }, { "epoch": 0.5307273991086396, "grad_norm": 0.20245139300823212, "learning_rate": 9.189892226317677e-06, "loss": 0.0033, "step": 64900 }, { "epoch": 0.5308091752872388, "grad_norm": 0.052993293851614, "learning_rate": 9.189502752402209e-06, "loss": 0.0016, "step": 64910 }, { "epoch": 0.530890951465838, "grad_norm": 0.16696588695049286, "learning_rate": 9.189113193143164e-06, "loss": 0.0027, "step": 64920 }, { "epoch": 0.5309727276444371, "grad_norm": 0.050430554896593094, "learning_rate": 9.18872354854848e-06, "loss": 0.0031, "step": 64930 }, { "epoch": 0.5310545038230363, "grad_norm": 0.08263368159532547, "learning_rate": 9.18833381862609e-06, "loss": 0.0031, "step": 64940 }, { "epoch": 0.5311362800016355, "grad_norm": 0.08450644463300705, "learning_rate": 9.187944003383937e-06, "loss": 0.0031, "step": 64950 }, { "epoch": 0.5312180561802347, "grad_norm": 0.18067213892936707, "learning_rate": 9.18755410282996e-06, "loss": 0.0027, "step": 64960 }, { "epoch": 0.5312998323588338, "grad_norm": 0.03376920893788338, "learning_rate": 9.187164116972103e-06, "loss": 0.0013, "step": 64970 }, { "epoch": 0.531381608537433, "grad_norm": 0.062217116355895996, "learning_rate": 9.186774045818308e-06, "loss": 0.0031, "step": 64980 }, { "epoch": 0.5314633847160323, "grad_norm": 0.012749098241329193, "learning_rate": 9.186383889376522e-06, "loss": 0.0014, "step": 64990 }, { "epoch": 0.5315451608946314, "grad_norm": 0.023598788306117058, "learning_rate": 9.185993647654696e-06, "loss": 0.0022, "step": 65000 }, { "epoch": 0.5316269370732306, "grad_norm": 0.03078646771609783, "learning_rate": 9.185603320660774e-06, "loss": 0.0026, "step": 65010 }, { "epoch": 0.5317087132518298, "grad_norm": 0.1331949234008789, "learning_rate": 9.185212908402711e-06, "loss": 0.0021, "step": 65020 }, { "epoch": 0.531790489430429, "grad_norm": 0.13642840087413788, "learning_rate": 9.184822410888462e-06, "loss": 0.0025, "step": 65030 }, { "epoch": 0.5318722656090281, "grad_norm": 0.02368093840777874, "learning_rate": 9.184431828125975e-06, "loss": 0.0021, "step": 65040 }, { "epoch": 0.5319540417876273, "grad_norm": 0.10153457522392273, "learning_rate": 9.184041160123211e-06, "loss": 0.0031, "step": 65050 }, { "epoch": 0.5320358179662265, "grad_norm": 0.009790627285838127, "learning_rate": 9.183650406888129e-06, "loss": 0.0048, "step": 65060 }, { "epoch": 0.5321175941448256, "grad_norm": 0.2300306111574173, "learning_rate": 9.183259568428688e-06, "loss": 0.0022, "step": 65070 }, { "epoch": 0.5321993703234248, "grad_norm": 0.01466164831072092, "learning_rate": 9.182868644752849e-06, "loss": 0.0027, "step": 65080 }, { "epoch": 0.532281146502024, "grad_norm": 0.12826277315616608, "learning_rate": 9.182477635868575e-06, "loss": 0.0019, "step": 65090 }, { "epoch": 0.5323629226806231, "grad_norm": 0.16070914268493652, "learning_rate": 9.182086541783832e-06, "loss": 0.0026, "step": 65100 }, { "epoch": 0.5324446988592223, "grad_norm": 0.1025218740105629, "learning_rate": 9.181695362506586e-06, "loss": 0.0013, "step": 65110 }, { "epoch": 0.5325264750378215, "grad_norm": 0.08626164495944977, "learning_rate": 9.181304098044808e-06, "loss": 0.0025, "step": 65120 }, { "epoch": 0.5326082512164206, "grad_norm": 0.017301514744758606, "learning_rate": 9.180912748406465e-06, "loss": 0.0022, "step": 65130 }, { "epoch": 0.5326900273950198, "grad_norm": 0.04540606215596199, "learning_rate": 9.180521313599533e-06, "loss": 0.0044, "step": 65140 }, { "epoch": 0.532771803573619, "grad_norm": 0.05651158094406128, "learning_rate": 9.180129793631982e-06, "loss": 0.0029, "step": 65150 }, { "epoch": 0.5328535797522181, "grad_norm": 0.050385862588882446, "learning_rate": 9.179738188511789e-06, "loss": 0.0022, "step": 65160 }, { "epoch": 0.5329353559308173, "grad_norm": 0.03165263310074806, "learning_rate": 9.179346498246932e-06, "loss": 0.0021, "step": 65170 }, { "epoch": 0.5330171321094165, "grad_norm": 0.03592158108949661, "learning_rate": 9.17895472284539e-06, "loss": 0.0015, "step": 65180 }, { "epoch": 0.5330989082880156, "grad_norm": 0.06221812963485718, "learning_rate": 9.178562862315144e-06, "loss": 0.0019, "step": 65190 }, { "epoch": 0.5331806844666149, "grad_norm": 0.06715847551822662, "learning_rate": 9.178170916664176e-06, "loss": 0.0013, "step": 65200 }, { "epoch": 0.5332624606452141, "grad_norm": 0.03436613082885742, "learning_rate": 9.17777888590047e-06, "loss": 0.0012, "step": 65210 }, { "epoch": 0.5333442368238133, "grad_norm": 0.08464542776346207, "learning_rate": 9.177386770032012e-06, "loss": 0.0022, "step": 65220 }, { "epoch": 0.5334260130024124, "grad_norm": 0.10487540811300278, "learning_rate": 9.17699456906679e-06, "loss": 0.0023, "step": 65230 }, { "epoch": 0.5335077891810116, "grad_norm": 0.16683630645275116, "learning_rate": 9.176602283012795e-06, "loss": 0.0032, "step": 65240 }, { "epoch": 0.5335895653596108, "grad_norm": 0.05959540978074074, "learning_rate": 9.176209911878014e-06, "loss": 0.0023, "step": 65250 }, { "epoch": 0.5336713415382099, "grad_norm": 0.040328141301870346, "learning_rate": 9.175817455670443e-06, "loss": 0.0018, "step": 65260 }, { "epoch": 0.5337531177168091, "grad_norm": 0.05474057048559189, "learning_rate": 9.175424914398076e-06, "loss": 0.0019, "step": 65270 }, { "epoch": 0.5338348938954083, "grad_norm": 0.05158194154500961, "learning_rate": 9.17503228806891e-06, "loss": 0.0046, "step": 65280 }, { "epoch": 0.5339166700740074, "grad_norm": 0.047352101653814316, "learning_rate": 9.174639576690944e-06, "loss": 0.0019, "step": 65290 }, { "epoch": 0.5339984462526066, "grad_norm": 0.026397820562124252, "learning_rate": 9.174246780272175e-06, "loss": 0.0016, "step": 65300 }, { "epoch": 0.5340802224312058, "grad_norm": 0.03832346573472023, "learning_rate": 9.173853898820608e-06, "loss": 0.0023, "step": 65310 }, { "epoch": 0.534161998609805, "grad_norm": 0.18666738271713257, "learning_rate": 9.173460932344242e-06, "loss": 0.0025, "step": 65320 }, { "epoch": 0.5342437747884041, "grad_norm": 0.06135554984211922, "learning_rate": 9.173067880851085e-06, "loss": 0.003, "step": 65330 }, { "epoch": 0.5343255509670033, "grad_norm": 0.029836170375347137, "learning_rate": 9.172674744349144e-06, "loss": 0.0026, "step": 65340 }, { "epoch": 0.5344073271456025, "grad_norm": 0.04374982789158821, "learning_rate": 9.172281522846426e-06, "loss": 0.0031, "step": 65350 }, { "epoch": 0.5344891033242016, "grad_norm": 0.10315296053886414, "learning_rate": 9.17188821635094e-06, "loss": 0.0023, "step": 65360 }, { "epoch": 0.5345708795028008, "grad_norm": 0.08000815659761429, "learning_rate": 9.171494824870703e-06, "loss": 0.0051, "step": 65370 }, { "epoch": 0.5346526556814, "grad_norm": 0.05874977633357048, "learning_rate": 9.171101348413727e-06, "loss": 0.0013, "step": 65380 }, { "epoch": 0.5347344318599991, "grad_norm": 0.06715237349271774, "learning_rate": 9.170707786988023e-06, "loss": 0.002, "step": 65390 }, { "epoch": 0.5348162080385983, "grad_norm": 0.026654750108718872, "learning_rate": 9.170314140601612e-06, "loss": 0.0023, "step": 65400 }, { "epoch": 0.5348979842171975, "grad_norm": 0.02459220588207245, "learning_rate": 9.169920409262513e-06, "loss": 0.0032, "step": 65410 }, { "epoch": 0.5349797603957968, "grad_norm": 0.059397608041763306, "learning_rate": 9.169526592978746e-06, "loss": 0.0025, "step": 65420 }, { "epoch": 0.5350615365743959, "grad_norm": 0.04848727583885193, "learning_rate": 9.169132691758331e-06, "loss": 0.0014, "step": 65430 }, { "epoch": 0.5351433127529951, "grad_norm": 0.09070881456136703, "learning_rate": 9.168738705609295e-06, "loss": 0.007, "step": 65440 }, { "epoch": 0.5352250889315943, "grad_norm": 0.03213215991854668, "learning_rate": 9.168344634539664e-06, "loss": 0.0018, "step": 65450 }, { "epoch": 0.5353068651101934, "grad_norm": 0.06838270276784897, "learning_rate": 9.167950478557465e-06, "loss": 0.0021, "step": 65460 }, { "epoch": 0.5353886412887926, "grad_norm": 0.032239627093076706, "learning_rate": 9.167556237670724e-06, "loss": 0.0031, "step": 65470 }, { "epoch": 0.5354704174673918, "grad_norm": 0.027235310524702072, "learning_rate": 9.16716191188748e-06, "loss": 0.0013, "step": 65480 }, { "epoch": 0.5355521936459909, "grad_norm": 0.027257315814495087, "learning_rate": 9.166767501215756e-06, "loss": 0.0023, "step": 65490 }, { "epoch": 0.5356339698245901, "grad_norm": 0.08589950948953629, "learning_rate": 9.166373005663593e-06, "loss": 0.0028, "step": 65500 }, { "epoch": 0.5357157460031893, "grad_norm": 0.050079766660928726, "learning_rate": 9.165978425239023e-06, "loss": 0.0011, "step": 65510 }, { "epoch": 0.5357975221817884, "grad_norm": 0.0723898857831955, "learning_rate": 9.165583759950089e-06, "loss": 0.0018, "step": 65520 }, { "epoch": 0.5358792983603876, "grad_norm": 0.010183609090745449, "learning_rate": 9.165189009804826e-06, "loss": 0.0014, "step": 65530 }, { "epoch": 0.5359610745389868, "grad_norm": 0.05009915679693222, "learning_rate": 9.16479417481128e-06, "loss": 0.0022, "step": 65540 }, { "epoch": 0.536042850717586, "grad_norm": 0.09354805201292038, "learning_rate": 9.16439925497749e-06, "loss": 0.0027, "step": 65550 }, { "epoch": 0.5361246268961851, "grad_norm": 0.04503766447305679, "learning_rate": 9.1640042503115e-06, "loss": 0.0038, "step": 65560 }, { "epoch": 0.5362064030747843, "grad_norm": 0.11482297629117966, "learning_rate": 9.163609160821361e-06, "loss": 0.0026, "step": 65570 }, { "epoch": 0.5362881792533835, "grad_norm": 0.05529123917222023, "learning_rate": 9.163213986515117e-06, "loss": 0.0017, "step": 65580 }, { "epoch": 0.5363699554319826, "grad_norm": 0.07665914297103882, "learning_rate": 9.162818727400823e-06, "loss": 0.0019, "step": 65590 }, { "epoch": 0.5364517316105818, "grad_norm": 0.049933310598134995, "learning_rate": 9.162423383486527e-06, "loss": 0.0023, "step": 65600 }, { "epoch": 0.536533507789181, "grad_norm": 0.02193553000688553, "learning_rate": 9.162027954780282e-06, "loss": 0.0018, "step": 65610 }, { "epoch": 0.5366152839677801, "grad_norm": 0.05246903747320175, "learning_rate": 9.161632441290145e-06, "loss": 0.0017, "step": 65620 }, { "epoch": 0.5366970601463794, "grad_norm": 0.04403211176395416, "learning_rate": 9.161236843024172e-06, "loss": 0.0037, "step": 65630 }, { "epoch": 0.5367788363249786, "grad_norm": 0.03432781249284744, "learning_rate": 9.160841159990423e-06, "loss": 0.0017, "step": 65640 }, { "epoch": 0.5368606125035777, "grad_norm": 0.06046394631266594, "learning_rate": 9.160445392196957e-06, "loss": 0.0023, "step": 65650 }, { "epoch": 0.5369423886821769, "grad_norm": 0.10205866396427155, "learning_rate": 9.160049539651837e-06, "loss": 0.0017, "step": 65660 }, { "epoch": 0.5370241648607761, "grad_norm": 0.1807798445224762, "learning_rate": 9.159653602363126e-06, "loss": 0.0023, "step": 65670 }, { "epoch": 0.5371059410393753, "grad_norm": 0.2562114894390106, "learning_rate": 9.15925758033889e-06, "loss": 0.0018, "step": 65680 }, { "epoch": 0.5371877172179744, "grad_norm": 0.07069186121225357, "learning_rate": 9.158861473587198e-06, "loss": 0.0019, "step": 65690 }, { "epoch": 0.5372694933965736, "grad_norm": 0.15013685822486877, "learning_rate": 9.158465282116115e-06, "loss": 0.0019, "step": 65700 }, { "epoch": 0.5373512695751728, "grad_norm": 0.1033952459692955, "learning_rate": 9.158069005933715e-06, "loss": 0.0018, "step": 65710 }, { "epoch": 0.5374330457537719, "grad_norm": 0.08051280677318573, "learning_rate": 9.157672645048068e-06, "loss": 0.0023, "step": 65720 }, { "epoch": 0.5375148219323711, "grad_norm": 0.03345466032624245, "learning_rate": 9.15727619946725e-06, "loss": 0.0028, "step": 65730 }, { "epoch": 0.5375965981109703, "grad_norm": 0.04806065186858177, "learning_rate": 9.156879669199338e-06, "loss": 0.0024, "step": 65740 }, { "epoch": 0.5376783742895694, "grad_norm": 0.0717066302895546, "learning_rate": 9.156483054252409e-06, "loss": 0.003, "step": 65750 }, { "epoch": 0.5377601504681686, "grad_norm": 0.018137959763407707, "learning_rate": 9.15608635463454e-06, "loss": 0.0018, "step": 65760 }, { "epoch": 0.5378419266467678, "grad_norm": 0.034959353506565094, "learning_rate": 9.155689570353814e-06, "loss": 0.0028, "step": 65770 }, { "epoch": 0.537923702825367, "grad_norm": 0.010243995115160942, "learning_rate": 9.155292701418314e-06, "loss": 0.0029, "step": 65780 }, { "epoch": 0.5380054790039661, "grad_norm": 0.01474562007933855, "learning_rate": 9.154895747836123e-06, "loss": 0.0022, "step": 65790 }, { "epoch": 0.5380872551825653, "grad_norm": 0.0382457971572876, "learning_rate": 9.15449870961533e-06, "loss": 0.0013, "step": 65800 }, { "epoch": 0.5381690313611645, "grad_norm": 0.037701722234487534, "learning_rate": 9.15410158676402e-06, "loss": 0.0018, "step": 65810 }, { "epoch": 0.5382508075397636, "grad_norm": 0.043426234275102615, "learning_rate": 9.153704379290284e-06, "loss": 0.0022, "step": 65820 }, { "epoch": 0.5383325837183628, "grad_norm": 0.0610942542552948, "learning_rate": 9.153307087202215e-06, "loss": 0.0028, "step": 65830 }, { "epoch": 0.5384143598969621, "grad_norm": 0.033201396465301514, "learning_rate": 9.152909710507902e-06, "loss": 0.0022, "step": 65840 }, { "epoch": 0.5384961360755612, "grad_norm": 0.08329179883003235, "learning_rate": 9.152512249215445e-06, "loss": 0.0027, "step": 65850 }, { "epoch": 0.5385779122541604, "grad_norm": 0.053096953779459, "learning_rate": 9.152114703332937e-06, "loss": 0.0024, "step": 65860 }, { "epoch": 0.5386596884327596, "grad_norm": 0.12101881951093674, "learning_rate": 9.151717072868478e-06, "loss": 0.0021, "step": 65870 }, { "epoch": 0.5387414646113587, "grad_norm": 0.06449580937623978, "learning_rate": 9.151319357830167e-06, "loss": 0.0014, "step": 65880 }, { "epoch": 0.5388232407899579, "grad_norm": 0.0800570696592331, "learning_rate": 9.150921558226107e-06, "loss": 0.0033, "step": 65890 }, { "epoch": 0.5389050169685571, "grad_norm": 0.04964981973171234, "learning_rate": 9.1505236740644e-06, "loss": 0.003, "step": 65900 }, { "epoch": 0.5389867931471563, "grad_norm": 0.10027100145816803, "learning_rate": 9.150125705353154e-06, "loss": 0.0022, "step": 65910 }, { "epoch": 0.5390685693257554, "grad_norm": 0.11663414537906647, "learning_rate": 9.149727652100471e-06, "loss": 0.0025, "step": 65920 }, { "epoch": 0.5391503455043546, "grad_norm": 0.025113224983215332, "learning_rate": 9.149329514314465e-06, "loss": 0.0034, "step": 65930 }, { "epoch": 0.5392321216829538, "grad_norm": 0.014889777638018131, "learning_rate": 9.148931292003243e-06, "loss": 0.0017, "step": 65940 }, { "epoch": 0.5393138978615529, "grad_norm": 0.07343071699142456, "learning_rate": 9.148532985174919e-06, "loss": 0.0032, "step": 65950 }, { "epoch": 0.5393956740401521, "grad_norm": 0.0668790191411972, "learning_rate": 9.148134593837606e-06, "loss": 0.0021, "step": 65960 }, { "epoch": 0.5394774502187513, "grad_norm": 0.4206247627735138, "learning_rate": 9.147736117999417e-06, "loss": 0.0034, "step": 65970 }, { "epoch": 0.5395592263973504, "grad_norm": 0.06158139929175377, "learning_rate": 9.147337557668474e-06, "loss": 0.0033, "step": 65980 }, { "epoch": 0.5396410025759496, "grad_norm": 0.04682057350873947, "learning_rate": 9.146938912852894e-06, "loss": 0.0027, "step": 65990 }, { "epoch": 0.5397227787545488, "grad_norm": 0.08524491637945175, "learning_rate": 9.146540183560797e-06, "loss": 0.0017, "step": 66000 }, { "epoch": 0.5398045549331479, "grad_norm": 0.10903409868478775, "learning_rate": 9.146141369800306e-06, "loss": 0.0029, "step": 66010 }, { "epoch": 0.5398863311117471, "grad_norm": 0.055986884981393814, "learning_rate": 9.145742471579547e-06, "loss": 0.0026, "step": 66020 }, { "epoch": 0.5399681072903463, "grad_norm": 0.004491949453949928, "learning_rate": 9.145343488906641e-06, "loss": 0.002, "step": 66030 }, { "epoch": 0.5400498834689454, "grad_norm": 0.0739419236779213, "learning_rate": 9.14494442178972e-06, "loss": 0.0018, "step": 66040 }, { "epoch": 0.5401316596475446, "grad_norm": 0.05095498263835907, "learning_rate": 9.14454527023691e-06, "loss": 0.0033, "step": 66050 }, { "epoch": 0.5402134358261439, "grad_norm": 0.044983260333538055, "learning_rate": 9.144146034256347e-06, "loss": 0.0021, "step": 66060 }, { "epoch": 0.5402952120047431, "grad_norm": 0.11703068017959595, "learning_rate": 9.14374671385616e-06, "loss": 0.0015, "step": 66070 }, { "epoch": 0.5403769881833422, "grad_norm": 0.03351101279258728, "learning_rate": 9.143347309044483e-06, "loss": 0.0019, "step": 66080 }, { "epoch": 0.5404587643619414, "grad_norm": 0.030831148847937584, "learning_rate": 9.142947819829455e-06, "loss": 0.0014, "step": 66090 }, { "epoch": 0.5405405405405406, "grad_norm": 0.18746067583560944, "learning_rate": 9.142548246219212e-06, "loss": 0.0013, "step": 66100 }, { "epoch": 0.5406223167191397, "grad_norm": 0.04922495409846306, "learning_rate": 9.142148588221893e-06, "loss": 0.0019, "step": 66110 }, { "epoch": 0.5407040928977389, "grad_norm": 0.1235910952091217, "learning_rate": 9.14174884584564e-06, "loss": 0.0017, "step": 66120 }, { "epoch": 0.5407858690763381, "grad_norm": 0.19007451832294464, "learning_rate": 9.141349019098598e-06, "loss": 0.0028, "step": 66130 }, { "epoch": 0.5408676452549372, "grad_norm": 0.0027012357022613287, "learning_rate": 9.14094910798891e-06, "loss": 0.0013, "step": 66140 }, { "epoch": 0.5409494214335364, "grad_norm": 0.19088050723075867, "learning_rate": 9.140549112524721e-06, "loss": 0.0034, "step": 66150 }, { "epoch": 0.5410311976121356, "grad_norm": 0.07386212050914764, "learning_rate": 9.140149032714183e-06, "loss": 0.0029, "step": 66160 }, { "epoch": 0.5411129737907348, "grad_norm": 0.12394576519727707, "learning_rate": 9.139748868565444e-06, "loss": 0.0023, "step": 66170 }, { "epoch": 0.5411947499693339, "grad_norm": 0.21348336338996887, "learning_rate": 9.139348620086655e-06, "loss": 0.0018, "step": 66180 }, { "epoch": 0.5412765261479331, "grad_norm": 0.06776224821805954, "learning_rate": 9.138948287285969e-06, "loss": 0.0027, "step": 66190 }, { "epoch": 0.5413583023265323, "grad_norm": 0.1522240787744522, "learning_rate": 9.138547870171544e-06, "loss": 0.0023, "step": 66200 }, { "epoch": 0.5414400785051314, "grad_norm": 0.037153325974941254, "learning_rate": 9.138147368751533e-06, "loss": 0.003, "step": 66210 }, { "epoch": 0.5415218546837306, "grad_norm": 0.21865512430667877, "learning_rate": 9.137746783034097e-06, "loss": 0.0029, "step": 66220 }, { "epoch": 0.5416036308623298, "grad_norm": 0.06656045466661453, "learning_rate": 9.137346113027396e-06, "loss": 0.0022, "step": 66230 }, { "epoch": 0.5416854070409289, "grad_norm": 0.10622875392436981, "learning_rate": 9.13694535873959e-06, "loss": 0.0015, "step": 66240 }, { "epoch": 0.5417671832195281, "grad_norm": 0.034031905233860016, "learning_rate": 9.136544520178847e-06, "loss": 0.0024, "step": 66250 }, { "epoch": 0.5418489593981273, "grad_norm": 0.08991501480340958, "learning_rate": 9.136143597353328e-06, "loss": 0.0027, "step": 66260 }, { "epoch": 0.5419307355767266, "grad_norm": 0.04360980540513992, "learning_rate": 9.135742590271204e-06, "loss": 0.0019, "step": 66270 }, { "epoch": 0.5420125117553257, "grad_norm": 0.06434899568557739, "learning_rate": 9.13534149894064e-06, "loss": 0.0018, "step": 66280 }, { "epoch": 0.5420942879339249, "grad_norm": 0.07231833040714264, "learning_rate": 9.134940323369809e-06, "loss": 0.0024, "step": 66290 }, { "epoch": 0.5421760641125241, "grad_norm": 0.18173007667064667, "learning_rate": 9.134539063566882e-06, "loss": 0.0032, "step": 66300 }, { "epoch": 0.5422578402911232, "grad_norm": 0.04795807972550392, "learning_rate": 9.134137719540034e-06, "loss": 0.0036, "step": 66310 }, { "epoch": 0.5423396164697224, "grad_norm": 0.06343245506286621, "learning_rate": 9.13373629129744e-06, "loss": 0.0016, "step": 66320 }, { "epoch": 0.5424213926483216, "grad_norm": 0.028824470937252045, "learning_rate": 9.133334778847277e-06, "loss": 0.0013, "step": 66330 }, { "epoch": 0.5425031688269207, "grad_norm": 0.0889776423573494, "learning_rate": 9.132933182197726e-06, "loss": 0.0017, "step": 66340 }, { "epoch": 0.5425849450055199, "grad_norm": 0.09898730367422104, "learning_rate": 9.132531501356966e-06, "loss": 0.0023, "step": 66350 }, { "epoch": 0.5426667211841191, "grad_norm": 0.04060090705752373, "learning_rate": 9.13212973633318e-06, "loss": 0.0015, "step": 66360 }, { "epoch": 0.5427484973627182, "grad_norm": 0.15478742122650146, "learning_rate": 9.131727887134553e-06, "loss": 0.002, "step": 66370 }, { "epoch": 0.5428302735413174, "grad_norm": 0.11635614186525345, "learning_rate": 9.13132595376927e-06, "loss": 0.002, "step": 66380 }, { "epoch": 0.5429120497199166, "grad_norm": 0.062475431710481644, "learning_rate": 9.130923936245519e-06, "loss": 0.0033, "step": 66390 }, { "epoch": 0.5429938258985157, "grad_norm": 0.06428876519203186, "learning_rate": 9.130521834571491e-06, "loss": 0.0027, "step": 66400 }, { "epoch": 0.5430756020771149, "grad_norm": 0.03516978770494461, "learning_rate": 9.130119648755375e-06, "loss": 0.002, "step": 66410 }, { "epoch": 0.5431573782557141, "grad_norm": 0.17074276506900787, "learning_rate": 9.129717378805365e-06, "loss": 0.0013, "step": 66420 }, { "epoch": 0.5432391544343133, "grad_norm": 0.03561082109808922, "learning_rate": 9.129315024729654e-06, "loss": 0.0016, "step": 66430 }, { "epoch": 0.5433209306129124, "grad_norm": 0.06138809770345688, "learning_rate": 9.128912586536441e-06, "loss": 0.0013, "step": 66440 }, { "epoch": 0.5434027067915116, "grad_norm": 0.10511525720357895, "learning_rate": 9.128510064233922e-06, "loss": 0.0023, "step": 66450 }, { "epoch": 0.5434844829701108, "grad_norm": 0.07336582243442535, "learning_rate": 9.128107457830297e-06, "loss": 0.0032, "step": 66460 }, { "epoch": 0.5435662591487099, "grad_norm": 0.09772311896085739, "learning_rate": 9.127704767333767e-06, "loss": 0.002, "step": 66470 }, { "epoch": 0.5436480353273092, "grad_norm": 0.09000618755817413, "learning_rate": 9.127301992752536e-06, "loss": 0.0024, "step": 66480 }, { "epoch": 0.5437298115059084, "grad_norm": 0.22008883953094482, "learning_rate": 9.126899134094809e-06, "loss": 0.0011, "step": 66490 }, { "epoch": 0.5438115876845075, "grad_norm": 0.027295595034956932, "learning_rate": 9.126496191368792e-06, "loss": 0.0009, "step": 66500 }, { "epoch": 0.5438933638631067, "grad_norm": 0.05664120614528656, "learning_rate": 9.126093164582693e-06, "loss": 0.0013, "step": 66510 }, { "epoch": 0.5439751400417059, "grad_norm": 0.0818963423371315, "learning_rate": 9.125690053744722e-06, "loss": 0.0013, "step": 66520 }, { "epoch": 0.544056916220305, "grad_norm": 0.12938375771045685, "learning_rate": 9.125286858863093e-06, "loss": 0.0025, "step": 66530 }, { "epoch": 0.5441386923989042, "grad_norm": 0.03189542144536972, "learning_rate": 9.124883579946015e-06, "loss": 0.002, "step": 66540 }, { "epoch": 0.5442204685775034, "grad_norm": 0.18389815092086792, "learning_rate": 9.124480217001707e-06, "loss": 0.0017, "step": 66550 }, { "epoch": 0.5443022447561026, "grad_norm": 0.01719074323773384, "learning_rate": 9.124076770038387e-06, "loss": 0.0022, "step": 66560 }, { "epoch": 0.5443840209347017, "grad_norm": 0.04233711212873459, "learning_rate": 9.123673239064267e-06, "loss": 0.0013, "step": 66570 }, { "epoch": 0.5444657971133009, "grad_norm": 0.07950034737586975, "learning_rate": 9.123269624087572e-06, "loss": 0.0021, "step": 66580 }, { "epoch": 0.5445475732919001, "grad_norm": 0.17062677443027496, "learning_rate": 9.122865925116525e-06, "loss": 0.0033, "step": 66590 }, { "epoch": 0.5446293494704992, "grad_norm": 0.023345299065113068, "learning_rate": 9.122462142159346e-06, "loss": 0.0036, "step": 66600 }, { "epoch": 0.5447111256490984, "grad_norm": 0.06310781091451645, "learning_rate": 9.122058275224261e-06, "loss": 0.0028, "step": 66610 }, { "epoch": 0.5447929018276976, "grad_norm": 0.05970331281423569, "learning_rate": 9.121654324319502e-06, "loss": 0.0024, "step": 66620 }, { "epoch": 0.5448746780062967, "grad_norm": 0.12234412133693695, "learning_rate": 9.121250289453291e-06, "loss": 0.0026, "step": 66630 }, { "epoch": 0.5449564541848959, "grad_norm": 0.005987443961203098, "learning_rate": 9.120846170633864e-06, "loss": 0.0018, "step": 66640 }, { "epoch": 0.5450382303634951, "grad_norm": 0.09069192409515381, "learning_rate": 9.120441967869448e-06, "loss": 0.0031, "step": 66650 }, { "epoch": 0.5451200065420942, "grad_norm": 0.17164134979248047, "learning_rate": 9.120037681168284e-06, "loss": 0.0023, "step": 66660 }, { "epoch": 0.5452017827206934, "grad_norm": 0.11111027747392654, "learning_rate": 9.119633310538601e-06, "loss": 0.0022, "step": 66670 }, { "epoch": 0.5452835588992926, "grad_norm": 0.08247888833284378, "learning_rate": 9.119228855988637e-06, "loss": 0.0032, "step": 66680 }, { "epoch": 0.5453653350778918, "grad_norm": 0.13144943118095398, "learning_rate": 9.118824317526637e-06, "loss": 0.004, "step": 66690 }, { "epoch": 0.545447111256491, "grad_norm": 0.007953595370054245, "learning_rate": 9.118419695160837e-06, "loss": 0.0024, "step": 66700 }, { "epoch": 0.5455288874350902, "grad_norm": 0.06275010854005814, "learning_rate": 9.118014988899477e-06, "loss": 0.0026, "step": 66710 }, { "epoch": 0.5456106636136894, "grad_norm": 0.05640598013997078, "learning_rate": 9.117610198750805e-06, "loss": 0.0018, "step": 66720 }, { "epoch": 0.5456924397922885, "grad_norm": 0.08080883324146271, "learning_rate": 9.117205324723069e-06, "loss": 0.0031, "step": 66730 }, { "epoch": 0.5457742159708877, "grad_norm": 0.07619205117225647, "learning_rate": 9.116800366824512e-06, "loss": 0.0022, "step": 66740 }, { "epoch": 0.5458559921494869, "grad_norm": 0.12196645885705948, "learning_rate": 9.116395325063386e-06, "loss": 0.0032, "step": 66750 }, { "epoch": 0.545937768328086, "grad_norm": 0.08015888184309006, "learning_rate": 9.11599019944794e-06, "loss": 0.0027, "step": 66760 }, { "epoch": 0.5460195445066852, "grad_norm": 0.04031319543719292, "learning_rate": 9.115584989986431e-06, "loss": 0.003, "step": 66770 }, { "epoch": 0.5461013206852844, "grad_norm": 0.06499571353197098, "learning_rate": 9.115179696687108e-06, "loss": 0.0017, "step": 66780 }, { "epoch": 0.5461830968638836, "grad_norm": 0.05272664502263069, "learning_rate": 9.114774319558231e-06, "loss": 0.0014, "step": 66790 }, { "epoch": 0.5462648730424827, "grad_norm": 0.056683216243982315, "learning_rate": 9.114368858608053e-06, "loss": 0.0018, "step": 66800 }, { "epoch": 0.5463466492210819, "grad_norm": 0.039216551929712296, "learning_rate": 9.11396331384484e-06, "loss": 0.0017, "step": 66810 }, { "epoch": 0.5464284253996811, "grad_norm": 0.11789465695619583, "learning_rate": 9.11355768527685e-06, "loss": 0.0025, "step": 66820 }, { "epoch": 0.5465102015782802, "grad_norm": 0.08407355099916458, "learning_rate": 9.113151972912347e-06, "loss": 0.0011, "step": 66830 }, { "epoch": 0.5465919777568794, "grad_norm": 0.03779945895075798, "learning_rate": 9.112746176759594e-06, "loss": 0.0019, "step": 66840 }, { "epoch": 0.5466737539354786, "grad_norm": 0.12470383197069168, "learning_rate": 9.112340296826859e-06, "loss": 0.002, "step": 66850 }, { "epoch": 0.5467555301140777, "grad_norm": 0.06103675067424774, "learning_rate": 9.111934333122408e-06, "loss": 0.0019, "step": 66860 }, { "epoch": 0.5468373062926769, "grad_norm": 0.061578359454870224, "learning_rate": 9.111528285654513e-06, "loss": 0.0039, "step": 66870 }, { "epoch": 0.5469190824712761, "grad_norm": 0.15764157474040985, "learning_rate": 9.111122154431445e-06, "loss": 0.0014, "step": 66880 }, { "epoch": 0.5470008586498752, "grad_norm": 0.06651151925325394, "learning_rate": 9.110715939461477e-06, "loss": 0.002, "step": 66890 }, { "epoch": 0.5470826348284744, "grad_norm": 0.05019589513540268, "learning_rate": 9.110309640752884e-06, "loss": 0.0021, "step": 66900 }, { "epoch": 0.5471644110070737, "grad_norm": 0.05225800722837448, "learning_rate": 9.109903258313943e-06, "loss": 0.0015, "step": 66910 }, { "epoch": 0.5472461871856729, "grad_norm": 0.02124633453786373, "learning_rate": 9.109496792152933e-06, "loss": 0.002, "step": 66920 }, { "epoch": 0.547327963364272, "grad_norm": 0.02869619056582451, "learning_rate": 9.10909024227813e-06, "loss": 0.0023, "step": 66930 }, { "epoch": 0.5474097395428712, "grad_norm": 0.11104375869035721, "learning_rate": 9.10868360869782e-06, "loss": 0.0012, "step": 66940 }, { "epoch": 0.5474915157214704, "grad_norm": 0.08928213268518448, "learning_rate": 9.108276891420284e-06, "loss": 0.0021, "step": 66950 }, { "epoch": 0.5475732919000695, "grad_norm": 0.1272350251674652, "learning_rate": 9.10787009045381e-06, "loss": 0.0017, "step": 66960 }, { "epoch": 0.5476550680786687, "grad_norm": 0.019539576023817062, "learning_rate": 9.107463205806684e-06, "loss": 0.0017, "step": 66970 }, { "epoch": 0.5477368442572679, "grad_norm": 0.02322680875658989, "learning_rate": 9.107056237487192e-06, "loss": 0.0012, "step": 66980 }, { "epoch": 0.547818620435867, "grad_norm": 0.06296521425247192, "learning_rate": 9.106649185503627e-06, "loss": 0.0026, "step": 66990 }, { "epoch": 0.5479003966144662, "grad_norm": 0.0343891941010952, "learning_rate": 9.10624204986428e-06, "loss": 0.0018, "step": 67000 }, { "epoch": 0.5479821727930654, "grad_norm": 0.0301381703466177, "learning_rate": 9.105834830577445e-06, "loss": 0.0018, "step": 67010 }, { "epoch": 0.5480639489716645, "grad_norm": 0.0596780963242054, "learning_rate": 9.105427527651415e-06, "loss": 0.0017, "step": 67020 }, { "epoch": 0.5481457251502637, "grad_norm": 0.03136608004570007, "learning_rate": 9.105020141094491e-06, "loss": 0.0017, "step": 67030 }, { "epoch": 0.5482275013288629, "grad_norm": 0.21281331777572632, "learning_rate": 9.104612670914972e-06, "loss": 0.0029, "step": 67040 }, { "epoch": 0.5483092775074621, "grad_norm": 0.0412740595638752, "learning_rate": 9.104205117121153e-06, "loss": 0.001, "step": 67050 }, { "epoch": 0.5483910536860612, "grad_norm": 0.04204731062054634, "learning_rate": 9.10379747972134e-06, "loss": 0.0023, "step": 67060 }, { "epoch": 0.5484728298646604, "grad_norm": 0.030693860724568367, "learning_rate": 9.10338975872384e-06, "loss": 0.0022, "step": 67070 }, { "epoch": 0.5485546060432596, "grad_norm": 0.05156579986214638, "learning_rate": 9.102981954136952e-06, "loss": 0.0018, "step": 67080 }, { "epoch": 0.5486363822218587, "grad_norm": 0.025889676064252853, "learning_rate": 9.102574065968988e-06, "loss": 0.0016, "step": 67090 }, { "epoch": 0.5487181584004579, "grad_norm": 0.02190488763153553, "learning_rate": 9.102166094228254e-06, "loss": 0.0019, "step": 67100 }, { "epoch": 0.5487999345790571, "grad_norm": 0.1442403346300125, "learning_rate": 9.101758038923063e-06, "loss": 0.003, "step": 67110 }, { "epoch": 0.5488817107576563, "grad_norm": 0.09923823922872543, "learning_rate": 9.101349900061727e-06, "loss": 0.0019, "step": 67120 }, { "epoch": 0.5489634869362555, "grad_norm": 0.10044442862272263, "learning_rate": 9.10094167765256e-06, "loss": 0.0017, "step": 67130 }, { "epoch": 0.5490452631148547, "grad_norm": 0.1324429214000702, "learning_rate": 9.100533371703877e-06, "loss": 0.0031, "step": 67140 }, { "epoch": 0.5491270392934539, "grad_norm": 0.0412389375269413, "learning_rate": 9.100124982223997e-06, "loss": 0.0017, "step": 67150 }, { "epoch": 0.549208815472053, "grad_norm": 0.2122369408607483, "learning_rate": 9.099716509221236e-06, "loss": 0.002, "step": 67160 }, { "epoch": 0.5492905916506522, "grad_norm": 0.006785953417420387, "learning_rate": 9.09930795270392e-06, "loss": 0.0035, "step": 67170 }, { "epoch": 0.5493723678292514, "grad_norm": 0.15813234448432922, "learning_rate": 9.098899312680368e-06, "loss": 0.0037, "step": 67180 }, { "epoch": 0.5494541440078505, "grad_norm": 0.013257681392133236, "learning_rate": 9.098490589158907e-06, "loss": 0.0015, "step": 67190 }, { "epoch": 0.5495359201864497, "grad_norm": 0.08480477333068848, "learning_rate": 9.098081782147858e-06, "loss": 0.0029, "step": 67200 }, { "epoch": 0.5496176963650489, "grad_norm": 0.12552575767040253, "learning_rate": 9.097672891655554e-06, "loss": 0.0018, "step": 67210 }, { "epoch": 0.549699472543648, "grad_norm": 0.09660855680704117, "learning_rate": 9.09726391769032e-06, "loss": 0.0017, "step": 67220 }, { "epoch": 0.5497812487222472, "grad_norm": 0.1468333601951599, "learning_rate": 9.096854860260493e-06, "loss": 0.0038, "step": 67230 }, { "epoch": 0.5498630249008464, "grad_norm": 0.054496221244335175, "learning_rate": 9.0964457193744e-06, "loss": 0.0016, "step": 67240 }, { "epoch": 0.5499448010794455, "grad_norm": 0.09125807136297226, "learning_rate": 9.096036495040379e-06, "loss": 0.0015, "step": 67250 }, { "epoch": 0.5500265772580447, "grad_norm": 0.05184420943260193, "learning_rate": 9.095627187266764e-06, "loss": 0.0023, "step": 67260 }, { "epoch": 0.5501083534366439, "grad_norm": 0.12207689881324768, "learning_rate": 9.095217796061896e-06, "loss": 0.0026, "step": 67270 }, { "epoch": 0.550190129615243, "grad_norm": 0.05721722170710564, "learning_rate": 9.094808321434113e-06, "loss": 0.0018, "step": 67280 }, { "epoch": 0.5502719057938422, "grad_norm": 0.1019635871052742, "learning_rate": 9.094398763391754e-06, "loss": 0.0016, "step": 67290 }, { "epoch": 0.5503536819724414, "grad_norm": 0.17940731346607208, "learning_rate": 9.093989121943166e-06, "loss": 0.0029, "step": 67300 }, { "epoch": 0.5504354581510406, "grad_norm": 0.13112366199493408, "learning_rate": 9.09357939709669e-06, "loss": 0.0021, "step": 67310 }, { "epoch": 0.5505172343296397, "grad_norm": 0.05099710077047348, "learning_rate": 9.093169588860676e-06, "loss": 0.0022, "step": 67320 }, { "epoch": 0.5505990105082389, "grad_norm": 0.1432144045829773, "learning_rate": 9.09275969724347e-06, "loss": 0.0025, "step": 67330 }, { "epoch": 0.5506807866868382, "grad_norm": 0.00763074541464448, "learning_rate": 9.09234972225342e-06, "loss": 0.0021, "step": 67340 }, { "epoch": 0.5507625628654373, "grad_norm": 0.00231782253831625, "learning_rate": 9.091939663898881e-06, "loss": 0.0026, "step": 67350 }, { "epoch": 0.5508443390440365, "grad_norm": 0.029535306617617607, "learning_rate": 9.091529522188206e-06, "loss": 0.0018, "step": 67360 }, { "epoch": 0.5509261152226357, "grad_norm": 0.04984845966100693, "learning_rate": 9.091119297129748e-06, "loss": 0.002, "step": 67370 }, { "epoch": 0.5510078914012349, "grad_norm": 0.029210686683654785, "learning_rate": 9.090708988731865e-06, "loss": 0.0072, "step": 67380 }, { "epoch": 0.551089667579834, "grad_norm": 0.043911658227443695, "learning_rate": 9.090298597002914e-06, "loss": 0.0028, "step": 67390 }, { "epoch": 0.5511714437584332, "grad_norm": 0.20260730385780334, "learning_rate": 9.089888121951257e-06, "loss": 0.0028, "step": 67400 }, { "epoch": 0.5512532199370324, "grad_norm": 0.05972107872366905, "learning_rate": 9.089477563585254e-06, "loss": 0.0014, "step": 67410 }, { "epoch": 0.5513349961156315, "grad_norm": 0.034909822046756744, "learning_rate": 9.089066921913268e-06, "loss": 0.0023, "step": 67420 }, { "epoch": 0.5514167722942307, "grad_norm": 0.0564190074801445, "learning_rate": 9.088656196943666e-06, "loss": 0.0019, "step": 67430 }, { "epoch": 0.5514985484728299, "grad_norm": 0.18146930634975433, "learning_rate": 9.088245388684814e-06, "loss": 0.0027, "step": 67440 }, { "epoch": 0.551580324651429, "grad_norm": 0.03574379161000252, "learning_rate": 9.087834497145081e-06, "loss": 0.0023, "step": 67450 }, { "epoch": 0.5516621008300282, "grad_norm": 0.30848920345306396, "learning_rate": 9.087423522332835e-06, "loss": 0.0027, "step": 67460 }, { "epoch": 0.5517438770086274, "grad_norm": 0.17598995566368103, "learning_rate": 9.087012464256448e-06, "loss": 0.003, "step": 67470 }, { "epoch": 0.5518256531872265, "grad_norm": 0.1651044338941574, "learning_rate": 9.086601322924297e-06, "loss": 0.0049, "step": 67480 }, { "epoch": 0.5519074293658257, "grad_norm": 0.06747327744960785, "learning_rate": 9.086190098344756e-06, "loss": 0.0017, "step": 67490 }, { "epoch": 0.5519892055444249, "grad_norm": 0.0625709518790245, "learning_rate": 9.0857787905262e-06, "loss": 0.0017, "step": 67500 }, { "epoch": 0.552070981723024, "grad_norm": 0.1048995777964592, "learning_rate": 9.08536739947701e-06, "loss": 0.0035, "step": 67510 }, { "epoch": 0.5521527579016232, "grad_norm": 0.12843051552772522, "learning_rate": 9.084955925205566e-06, "loss": 0.0024, "step": 67520 }, { "epoch": 0.5522345340802224, "grad_norm": 0.05602239444851875, "learning_rate": 9.084544367720248e-06, "loss": 0.0021, "step": 67530 }, { "epoch": 0.5523163102588216, "grad_norm": 0.09052055329084396, "learning_rate": 9.084132727029442e-06, "loss": 0.0084, "step": 67540 }, { "epoch": 0.5523980864374208, "grad_norm": 0.059452202171087265, "learning_rate": 9.08372100314153e-06, "loss": 0.0033, "step": 67550 }, { "epoch": 0.55247986261602, "grad_norm": 0.04194915294647217, "learning_rate": 9.083309196064905e-06, "loss": 0.0016, "step": 67560 }, { "epoch": 0.5525616387946192, "grad_norm": 0.20089222490787506, "learning_rate": 9.08289730580795e-06, "loss": 0.0039, "step": 67570 }, { "epoch": 0.5526434149732183, "grad_norm": 0.05243202671408653, "learning_rate": 9.082485332379061e-06, "loss": 0.0013, "step": 67580 }, { "epoch": 0.5527251911518175, "grad_norm": 0.04306665062904358, "learning_rate": 9.082073275786625e-06, "loss": 0.0014, "step": 67590 }, { "epoch": 0.5528069673304167, "grad_norm": 0.04065326601266861, "learning_rate": 9.08166113603904e-06, "loss": 0.0023, "step": 67600 }, { "epoch": 0.5528887435090158, "grad_norm": 0.07711208611726761, "learning_rate": 9.081248913144697e-06, "loss": 0.0036, "step": 67610 }, { "epoch": 0.552970519687615, "grad_norm": 0.05760432407259941, "learning_rate": 9.080836607112e-06, "loss": 0.0015, "step": 67620 }, { "epoch": 0.5530522958662142, "grad_norm": 0.11785125732421875, "learning_rate": 9.080424217949342e-06, "loss": 0.004, "step": 67630 }, { "epoch": 0.5531340720448134, "grad_norm": 0.05837870389223099, "learning_rate": 9.080011745665126e-06, "loss": 0.0015, "step": 67640 }, { "epoch": 0.5532158482234125, "grad_norm": 0.08468335121870041, "learning_rate": 9.079599190267753e-06, "loss": 0.0019, "step": 67650 }, { "epoch": 0.5532976244020117, "grad_norm": 0.01714423857629299, "learning_rate": 9.079186551765632e-06, "loss": 0.0012, "step": 67660 }, { "epoch": 0.5533794005806109, "grad_norm": 0.05666837468743324, "learning_rate": 9.078773830167162e-06, "loss": 0.0023, "step": 67670 }, { "epoch": 0.55346117675921, "grad_norm": 0.08731504529714584, "learning_rate": 9.078361025480756e-06, "loss": 0.0021, "step": 67680 }, { "epoch": 0.5535429529378092, "grad_norm": 0.34432077407836914, "learning_rate": 9.07794813771482e-06, "loss": 0.0023, "step": 67690 }, { "epoch": 0.5536247291164084, "grad_norm": 0.06269155442714691, "learning_rate": 9.077535166877766e-06, "loss": 0.002, "step": 67700 }, { "epoch": 0.5537065052950075, "grad_norm": 0.03330637514591217, "learning_rate": 9.077122112978005e-06, "loss": 0.0027, "step": 67710 }, { "epoch": 0.5537882814736067, "grad_norm": 0.031398504972457886, "learning_rate": 9.076708976023953e-06, "loss": 0.0024, "step": 67720 }, { "epoch": 0.5538700576522059, "grad_norm": 0.07592868059873581, "learning_rate": 9.076295756024027e-06, "loss": 0.0018, "step": 67730 }, { "epoch": 0.553951833830805, "grad_norm": 0.047583695501089096, "learning_rate": 9.075882452986642e-06, "loss": 0.0024, "step": 67740 }, { "epoch": 0.5540336100094042, "grad_norm": 0.1039453074336052, "learning_rate": 9.075469066920219e-06, "loss": 0.0057, "step": 67750 }, { "epoch": 0.5541153861880035, "grad_norm": 0.06431211531162262, "learning_rate": 9.075055597833178e-06, "loss": 0.0021, "step": 67760 }, { "epoch": 0.5541971623666027, "grad_norm": 0.07654350996017456, "learning_rate": 9.07464204573394e-06, "loss": 0.002, "step": 67770 }, { "epoch": 0.5542789385452018, "grad_norm": 0.0846766009926796, "learning_rate": 9.074228410630934e-06, "loss": 0.0023, "step": 67780 }, { "epoch": 0.554360714723801, "grad_norm": 0.07830631732940674, "learning_rate": 9.073814692532584e-06, "loss": 0.0015, "step": 67790 }, { "epoch": 0.5544424909024002, "grad_norm": 0.09892970323562622, "learning_rate": 9.073400891447315e-06, "loss": 0.0017, "step": 67800 }, { "epoch": 0.5545242670809993, "grad_norm": 0.34184181690216064, "learning_rate": 9.07298700738356e-06, "loss": 0.0026, "step": 67810 }, { "epoch": 0.5546060432595985, "grad_norm": 0.17051300406455994, "learning_rate": 9.072573040349749e-06, "loss": 0.0023, "step": 67820 }, { "epoch": 0.5546878194381977, "grad_norm": 0.0812617763876915, "learning_rate": 9.072158990354315e-06, "loss": 0.0023, "step": 67830 }, { "epoch": 0.5547695956167968, "grad_norm": 0.06648764759302139, "learning_rate": 9.071744857405693e-06, "loss": 0.0031, "step": 67840 }, { "epoch": 0.554851371795396, "grad_norm": 0.10314814001321793, "learning_rate": 9.071330641512316e-06, "loss": 0.0023, "step": 67850 }, { "epoch": 0.5549331479739952, "grad_norm": 0.07322869449853897, "learning_rate": 9.070916342682627e-06, "loss": 0.0021, "step": 67860 }, { "epoch": 0.5550149241525943, "grad_norm": 0.0823969841003418, "learning_rate": 9.070501960925062e-06, "loss": 0.0026, "step": 67870 }, { "epoch": 0.5550967003311935, "grad_norm": 0.0522763654589653, "learning_rate": 9.070087496248065e-06, "loss": 0.0021, "step": 67880 }, { "epoch": 0.5551784765097927, "grad_norm": 0.054034844040870667, "learning_rate": 9.069672948660076e-06, "loss": 0.0016, "step": 67890 }, { "epoch": 0.5552602526883919, "grad_norm": 0.12253743410110474, "learning_rate": 9.06925831816954e-06, "loss": 0.0042, "step": 67900 }, { "epoch": 0.555342028866991, "grad_norm": 0.03320955112576485, "learning_rate": 9.068843604784905e-06, "loss": 0.0022, "step": 67910 }, { "epoch": 0.5554238050455902, "grad_norm": 0.02443341165781021, "learning_rate": 9.06842880851462e-06, "loss": 0.0036, "step": 67920 }, { "epoch": 0.5555055812241894, "grad_norm": 0.12391622364521027, "learning_rate": 9.068013929367132e-06, "loss": 0.0014, "step": 67930 }, { "epoch": 0.5555873574027885, "grad_norm": 0.05847570300102234, "learning_rate": 9.067598967350895e-06, "loss": 0.0022, "step": 67940 }, { "epoch": 0.5556691335813877, "grad_norm": 0.06651293486356735, "learning_rate": 9.067183922474359e-06, "loss": 0.002, "step": 67950 }, { "epoch": 0.5557509097599869, "grad_norm": 0.173912912607193, "learning_rate": 9.06676879474598e-06, "loss": 0.0019, "step": 67960 }, { "epoch": 0.555832685938586, "grad_norm": 0.05161167308688164, "learning_rate": 9.066353584174217e-06, "loss": 0.0021, "step": 67970 }, { "epoch": 0.5559144621171853, "grad_norm": 0.05640871822834015, "learning_rate": 9.065938290767525e-06, "loss": 0.0021, "step": 67980 }, { "epoch": 0.5559962382957845, "grad_norm": 0.08457593619823456, "learning_rate": 9.065522914534365e-06, "loss": 0.0013, "step": 67990 }, { "epoch": 0.5560780144743837, "grad_norm": 0.07442457973957062, "learning_rate": 9.065107455483199e-06, "loss": 0.0014, "step": 68000 }, { "epoch": 0.5561597906529828, "grad_norm": 0.07223289459943771, "learning_rate": 9.06469191362249e-06, "loss": 0.0041, "step": 68010 }, { "epoch": 0.556241566831582, "grad_norm": 0.07177002727985382, "learning_rate": 9.064276288960702e-06, "loss": 0.0023, "step": 68020 }, { "epoch": 0.5563233430101812, "grad_norm": 0.05087200552225113, "learning_rate": 9.063860581506303e-06, "loss": 0.0027, "step": 68030 }, { "epoch": 0.5564051191887803, "grad_norm": 0.051574911922216415, "learning_rate": 9.06344479126776e-06, "loss": 0.0034, "step": 68040 }, { "epoch": 0.5564868953673795, "grad_norm": 0.05819510668516159, "learning_rate": 9.063028918253545e-06, "loss": 0.002, "step": 68050 }, { "epoch": 0.5565686715459787, "grad_norm": 0.19213511049747467, "learning_rate": 9.062612962472126e-06, "loss": 0.0023, "step": 68060 }, { "epoch": 0.5566504477245778, "grad_norm": 0.032861243933439255, "learning_rate": 9.06219692393198e-06, "loss": 0.0022, "step": 68070 }, { "epoch": 0.556732223903177, "grad_norm": 0.11845710873603821, "learning_rate": 9.061780802641582e-06, "loss": 0.0028, "step": 68080 }, { "epoch": 0.5568140000817762, "grad_norm": 0.06483355909585953, "learning_rate": 9.061364598609406e-06, "loss": 0.0017, "step": 68090 }, { "epoch": 0.5568957762603753, "grad_norm": 0.3899328112602234, "learning_rate": 9.060948311843934e-06, "loss": 0.0039, "step": 68100 }, { "epoch": 0.5569775524389745, "grad_norm": 0.22750462591648102, "learning_rate": 9.060531942353641e-06, "loss": 0.0018, "step": 68110 }, { "epoch": 0.5570593286175737, "grad_norm": 0.03421682491898537, "learning_rate": 9.060115490147014e-06, "loss": 0.0011, "step": 68120 }, { "epoch": 0.5571411047961728, "grad_norm": 0.08042781800031662, "learning_rate": 9.059698955232531e-06, "loss": 0.0019, "step": 68130 }, { "epoch": 0.557222880974772, "grad_norm": 0.0723414346575737, "learning_rate": 9.059282337618685e-06, "loss": 0.0028, "step": 68140 }, { "epoch": 0.5573046571533712, "grad_norm": 0.07248701900243759, "learning_rate": 9.058865637313956e-06, "loss": 0.0019, "step": 68150 }, { "epoch": 0.5573864333319704, "grad_norm": 0.06840439140796661, "learning_rate": 9.058448854326834e-06, "loss": 0.0014, "step": 68160 }, { "epoch": 0.5574682095105695, "grad_norm": 0.08560628443956375, "learning_rate": 9.058031988665811e-06, "loss": 0.0027, "step": 68170 }, { "epoch": 0.5575499856891687, "grad_norm": 0.09709228575229645, "learning_rate": 9.057615040339379e-06, "loss": 0.0016, "step": 68180 }, { "epoch": 0.557631761867768, "grad_norm": 0.037010207772254944, "learning_rate": 9.057198009356028e-06, "loss": 0.0018, "step": 68190 }, { "epoch": 0.5577135380463671, "grad_norm": 0.09914934635162354, "learning_rate": 9.056780895724255e-06, "loss": 0.0034, "step": 68200 }, { "epoch": 0.5577953142249663, "grad_norm": 0.11038591712713242, "learning_rate": 9.056363699452561e-06, "loss": 0.0017, "step": 68210 }, { "epoch": 0.5578770904035655, "grad_norm": 0.12061439454555511, "learning_rate": 9.05594642054944e-06, "loss": 0.0015, "step": 68220 }, { "epoch": 0.5579588665821646, "grad_norm": 0.05460793524980545, "learning_rate": 9.055529059023391e-06, "loss": 0.0024, "step": 68230 }, { "epoch": 0.5580406427607638, "grad_norm": 0.0443454347550869, "learning_rate": 9.055111614882922e-06, "loss": 0.0016, "step": 68240 }, { "epoch": 0.558122418939363, "grad_norm": 0.5523456335067749, "learning_rate": 9.054694088136531e-06, "loss": 0.0047, "step": 68250 }, { "epoch": 0.5582041951179622, "grad_norm": 0.059193264693021774, "learning_rate": 9.054276478792727e-06, "loss": 0.0016, "step": 68260 }, { "epoch": 0.5582859712965613, "grad_norm": 0.13398855924606323, "learning_rate": 9.053858786860015e-06, "loss": 0.0021, "step": 68270 }, { "epoch": 0.5583677474751605, "grad_norm": 0.06672818958759308, "learning_rate": 9.053441012346904e-06, "loss": 0.003, "step": 68280 }, { "epoch": 0.5584495236537597, "grad_norm": 0.1427423059940338, "learning_rate": 9.053023155261906e-06, "loss": 0.003, "step": 68290 }, { "epoch": 0.5585312998323588, "grad_norm": 0.09989653527736664, "learning_rate": 9.052605215613532e-06, "loss": 0.003, "step": 68300 }, { "epoch": 0.558613076010958, "grad_norm": 0.021813765168190002, "learning_rate": 9.052187193410294e-06, "loss": 0.0015, "step": 68310 }, { "epoch": 0.5586948521895572, "grad_norm": 0.007392542436718941, "learning_rate": 9.05176908866071e-06, "loss": 0.0023, "step": 68320 }, { "epoch": 0.5587766283681563, "grad_norm": 0.11059147119522095, "learning_rate": 9.051350901373297e-06, "loss": 0.0012, "step": 68330 }, { "epoch": 0.5588584045467555, "grad_norm": 0.04240476340055466, "learning_rate": 9.050932631556572e-06, "loss": 0.0017, "step": 68340 }, { "epoch": 0.5589401807253547, "grad_norm": 0.09980455040931702, "learning_rate": 9.050514279219055e-06, "loss": 0.002, "step": 68350 }, { "epoch": 0.5590219569039538, "grad_norm": 0.06949413567781448, "learning_rate": 9.050095844369274e-06, "loss": 0.0026, "step": 68360 }, { "epoch": 0.559103733082553, "grad_norm": 0.03934181109070778, "learning_rate": 9.049677327015744e-06, "loss": 0.0037, "step": 68370 }, { "epoch": 0.5591855092611522, "grad_norm": 0.05138183385133743, "learning_rate": 9.049258727166997e-06, "loss": 0.0039, "step": 68380 }, { "epoch": 0.5592672854397513, "grad_norm": 0.05247944965958595, "learning_rate": 9.04884004483156e-06, "loss": 0.0017, "step": 68390 }, { "epoch": 0.5593490616183506, "grad_norm": 0.1396840661764145, "learning_rate": 9.048421280017958e-06, "loss": 0.0038, "step": 68400 }, { "epoch": 0.5594308377969498, "grad_norm": 0.29351067543029785, "learning_rate": 9.048002432734724e-06, "loss": 0.0038, "step": 68410 }, { "epoch": 0.559512613975549, "grad_norm": 0.1419224590063095, "learning_rate": 9.047583502990392e-06, "loss": 0.0018, "step": 68420 }, { "epoch": 0.5595943901541481, "grad_norm": 0.05401778221130371, "learning_rate": 9.047164490793492e-06, "loss": 0.0015, "step": 68430 }, { "epoch": 0.5596761663327473, "grad_norm": 0.0022005236241966486, "learning_rate": 9.046745396152562e-06, "loss": 0.0008, "step": 68440 }, { "epoch": 0.5597579425113465, "grad_norm": 0.10769530385732651, "learning_rate": 9.046326219076139e-06, "loss": 0.0027, "step": 68450 }, { "epoch": 0.5598397186899456, "grad_norm": 0.11067475378513336, "learning_rate": 9.04590695957276e-06, "loss": 0.0019, "step": 68460 }, { "epoch": 0.5599214948685448, "grad_norm": 0.05280432477593422, "learning_rate": 9.04548761765097e-06, "loss": 0.0065, "step": 68470 }, { "epoch": 0.560003271047144, "grad_norm": 0.014535730704665184, "learning_rate": 9.045068193319309e-06, "loss": 0.0025, "step": 68480 }, { "epoch": 0.5600850472257431, "grad_norm": 0.04050540179014206, "learning_rate": 9.04464868658632e-06, "loss": 0.0016, "step": 68490 }, { "epoch": 0.5601668234043423, "grad_norm": 0.05751130357384682, "learning_rate": 9.044229097460551e-06, "loss": 0.0033, "step": 68500 }, { "epoch": 0.5602485995829415, "grad_norm": 0.06724405288696289, "learning_rate": 9.043809425950546e-06, "loss": 0.0033, "step": 68510 }, { "epoch": 0.5603303757615407, "grad_norm": 0.054006949067115784, "learning_rate": 9.043389672064857e-06, "loss": 0.0016, "step": 68520 }, { "epoch": 0.5604121519401398, "grad_norm": 0.0065891314297914505, "learning_rate": 9.042969835812034e-06, "loss": 0.0014, "step": 68530 }, { "epoch": 0.560493928118739, "grad_norm": 0.0416700653731823, "learning_rate": 9.042549917200629e-06, "loss": 0.0015, "step": 68540 }, { "epoch": 0.5605757042973382, "grad_norm": 0.053727537393569946, "learning_rate": 9.042129916239195e-06, "loss": 0.0019, "step": 68550 }, { "epoch": 0.5606574804759373, "grad_norm": 0.24011655151844025, "learning_rate": 9.04170983293629e-06, "loss": 0.0029, "step": 68560 }, { "epoch": 0.5607392566545365, "grad_norm": 0.18659009039402008, "learning_rate": 9.04128966730047e-06, "loss": 0.0024, "step": 68570 }, { "epoch": 0.5608210328331357, "grad_norm": 0.1290050595998764, "learning_rate": 9.040869419340295e-06, "loss": 0.0019, "step": 68580 }, { "epoch": 0.5609028090117348, "grad_norm": 0.0035162889398634434, "learning_rate": 9.040449089064327e-06, "loss": 0.0052, "step": 68590 }, { "epoch": 0.560984585190334, "grad_norm": 0.07051240652799606, "learning_rate": 9.040028676481125e-06, "loss": 0.0021, "step": 68600 }, { "epoch": 0.5610663613689332, "grad_norm": 0.006507484242320061, "learning_rate": 9.039608181599254e-06, "loss": 0.0035, "step": 68610 }, { "epoch": 0.5611481375475325, "grad_norm": 0.03209049627184868, "learning_rate": 9.039187604427283e-06, "loss": 0.0016, "step": 68620 }, { "epoch": 0.5612299137261316, "grad_norm": 0.08760426193475723, "learning_rate": 9.038766944973775e-06, "loss": 0.0021, "step": 68630 }, { "epoch": 0.5613116899047308, "grad_norm": 0.024742143228650093, "learning_rate": 9.038346203247304e-06, "loss": 0.0015, "step": 68640 }, { "epoch": 0.56139346608333, "grad_norm": 0.06607774645090103, "learning_rate": 9.037925379256436e-06, "loss": 0.0026, "step": 68650 }, { "epoch": 0.5614752422619291, "grad_norm": 0.1359013170003891, "learning_rate": 9.037504473009746e-06, "loss": 0.0015, "step": 68660 }, { "epoch": 0.5615570184405283, "grad_norm": 0.00948121678084135, "learning_rate": 9.03708348451581e-06, "loss": 0.0018, "step": 68670 }, { "epoch": 0.5616387946191275, "grad_norm": 0.09339594841003418, "learning_rate": 9.0366624137832e-06, "loss": 0.0029, "step": 68680 }, { "epoch": 0.5617205707977266, "grad_norm": 0.09967242926359177, "learning_rate": 9.036241260820496e-06, "loss": 0.0021, "step": 68690 }, { "epoch": 0.5618023469763258, "grad_norm": 0.027481630444526672, "learning_rate": 9.035820025636278e-06, "loss": 0.0018, "step": 68700 }, { "epoch": 0.561884123154925, "grad_norm": 0.17541882395744324, "learning_rate": 9.035398708239124e-06, "loss": 0.0034, "step": 68710 }, { "epoch": 0.5619658993335241, "grad_norm": 0.19288192689418793, "learning_rate": 9.034977308637618e-06, "loss": 0.0013, "step": 68720 }, { "epoch": 0.5620476755121233, "grad_norm": 0.055129263550043106, "learning_rate": 9.034555826840346e-06, "loss": 0.0039, "step": 68730 }, { "epoch": 0.5621294516907225, "grad_norm": 0.07567870616912842, "learning_rate": 9.03413426285589e-06, "loss": 0.0024, "step": 68740 }, { "epoch": 0.5622112278693217, "grad_norm": 0.1471622884273529, "learning_rate": 9.033712616692843e-06, "loss": 0.0015, "step": 68750 }, { "epoch": 0.5622930040479208, "grad_norm": 0.05469805374741554, "learning_rate": 9.03329088835979e-06, "loss": 0.0032, "step": 68760 }, { "epoch": 0.56237478022652, "grad_norm": 0.00699542136862874, "learning_rate": 9.032869077865323e-06, "loss": 0.0029, "step": 68770 }, { "epoch": 0.5624565564051192, "grad_norm": 0.11228583008050919, "learning_rate": 9.032447185218034e-06, "loss": 0.0026, "step": 68780 }, { "epoch": 0.5625383325837183, "grad_norm": 0.1804811656475067, "learning_rate": 9.032025210426519e-06, "loss": 0.0023, "step": 68790 }, { "epoch": 0.5626201087623175, "grad_norm": 0.1597243845462799, "learning_rate": 9.031603153499372e-06, "loss": 0.001, "step": 68800 }, { "epoch": 0.5627018849409167, "grad_norm": 0.04821109026670456, "learning_rate": 9.031181014445192e-06, "loss": 0.0012, "step": 68810 }, { "epoch": 0.5627836611195158, "grad_norm": 0.07711924612522125, "learning_rate": 9.030758793272578e-06, "loss": 0.0031, "step": 68820 }, { "epoch": 0.5628654372981151, "grad_norm": 0.14808791875839233, "learning_rate": 9.030336489990133e-06, "loss": 0.002, "step": 68830 }, { "epoch": 0.5629472134767143, "grad_norm": 0.060039322823286057, "learning_rate": 9.029914104606456e-06, "loss": 0.0019, "step": 68840 }, { "epoch": 0.5630289896553134, "grad_norm": 0.04461781680583954, "learning_rate": 9.029491637130152e-06, "loss": 0.0026, "step": 68850 }, { "epoch": 0.5631107658339126, "grad_norm": 0.034169282764196396, "learning_rate": 9.02906908756983e-06, "loss": 0.0018, "step": 68860 }, { "epoch": 0.5631925420125118, "grad_norm": 0.04493393748998642, "learning_rate": 9.028646455934096e-06, "loss": 0.0014, "step": 68870 }, { "epoch": 0.563274318191111, "grad_norm": 0.030563337728381157, "learning_rate": 9.028223742231557e-06, "loss": 0.0015, "step": 68880 }, { "epoch": 0.5633560943697101, "grad_norm": 0.04739619791507721, "learning_rate": 9.027800946470828e-06, "loss": 0.0032, "step": 68890 }, { "epoch": 0.5634378705483093, "grad_norm": 0.15572157502174377, "learning_rate": 9.027378068660517e-06, "loss": 0.0024, "step": 68900 }, { "epoch": 0.5635196467269085, "grad_norm": 0.09059670567512512, "learning_rate": 9.026955108809243e-06, "loss": 0.0019, "step": 68910 }, { "epoch": 0.5636014229055076, "grad_norm": 0.07781968265771866, "learning_rate": 9.026532066925621e-06, "loss": 0.0021, "step": 68920 }, { "epoch": 0.5636831990841068, "grad_norm": 0.1578875333070755, "learning_rate": 9.026108943018267e-06, "loss": 0.0035, "step": 68930 }, { "epoch": 0.563764975262706, "grad_norm": 0.0458618700504303, "learning_rate": 9.0256857370958e-06, "loss": 0.0031, "step": 68940 }, { "epoch": 0.5638467514413051, "grad_norm": 0.04152918606996536, "learning_rate": 9.025262449166845e-06, "loss": 0.0018, "step": 68950 }, { "epoch": 0.5639285276199043, "grad_norm": 0.05696104094386101, "learning_rate": 9.024839079240019e-06, "loss": 0.0017, "step": 68960 }, { "epoch": 0.5640103037985035, "grad_norm": 0.014561742544174194, "learning_rate": 9.024415627323952e-06, "loss": 0.0013, "step": 68970 }, { "epoch": 0.5640920799771026, "grad_norm": 0.05766185000538826, "learning_rate": 9.023992093427267e-06, "loss": 0.0022, "step": 68980 }, { "epoch": 0.5641738561557018, "grad_norm": 0.012966993264853954, "learning_rate": 9.02356847755859e-06, "loss": 0.0012, "step": 68990 }, { "epoch": 0.564255632334301, "grad_norm": 0.1008981317281723, "learning_rate": 9.023144779726554e-06, "loss": 0.0024, "step": 69000 }, { "epoch": 0.5643374085129002, "grad_norm": 0.040290988981723785, "learning_rate": 9.02272099993979e-06, "loss": 0.0039, "step": 69010 }, { "epoch": 0.5644191846914993, "grad_norm": 0.03767968341708183, "learning_rate": 9.022297138206927e-06, "loss": 0.0014, "step": 69020 }, { "epoch": 0.5645009608700985, "grad_norm": 0.0934017226099968, "learning_rate": 9.021873194536604e-06, "loss": 0.002, "step": 69030 }, { "epoch": 0.5645827370486978, "grad_norm": 0.05773124098777771, "learning_rate": 9.021449168937454e-06, "loss": 0.0029, "step": 69040 }, { "epoch": 0.5646645132272969, "grad_norm": 0.06949114799499512, "learning_rate": 9.021025061418116e-06, "loss": 0.0032, "step": 69050 }, { "epoch": 0.5647462894058961, "grad_norm": 0.21413742005825043, "learning_rate": 9.020600871987227e-06, "loss": 0.0033, "step": 69060 }, { "epoch": 0.5648280655844953, "grad_norm": 0.019266510382294655, "learning_rate": 9.020176600653431e-06, "loss": 0.002, "step": 69070 }, { "epoch": 0.5649098417630944, "grad_norm": 0.026101090013980865, "learning_rate": 9.019752247425371e-06, "loss": 0.0034, "step": 69080 }, { "epoch": 0.5649916179416936, "grad_norm": 0.15803031623363495, "learning_rate": 9.019327812311692e-06, "loss": 0.0026, "step": 69090 }, { "epoch": 0.5650733941202928, "grad_norm": 0.021837478503584862, "learning_rate": 9.018903295321035e-06, "loss": 0.0015, "step": 69100 }, { "epoch": 0.565155170298892, "grad_norm": 0.1338924765586853, "learning_rate": 9.018478696462051e-06, "loss": 0.002, "step": 69110 }, { "epoch": 0.5652369464774911, "grad_norm": 0.04093529284000397, "learning_rate": 9.018054015743392e-06, "loss": 0.0011, "step": 69120 }, { "epoch": 0.5653187226560903, "grad_norm": 0.030761079862713814, "learning_rate": 9.017629253173707e-06, "loss": 0.0037, "step": 69130 }, { "epoch": 0.5654004988346895, "grad_norm": 0.05733683705329895, "learning_rate": 9.017204408761646e-06, "loss": 0.0019, "step": 69140 }, { "epoch": 0.5654822750132886, "grad_norm": 0.2379976063966751, "learning_rate": 9.016779482515866e-06, "loss": 0.0017, "step": 69150 }, { "epoch": 0.5655640511918878, "grad_norm": 0.1565912365913391, "learning_rate": 9.016354474445026e-06, "loss": 0.0026, "step": 69160 }, { "epoch": 0.565645827370487, "grad_norm": 0.06853556632995605, "learning_rate": 9.015929384557778e-06, "loss": 0.0015, "step": 69170 }, { "epoch": 0.5657276035490861, "grad_norm": 0.020351294428110123, "learning_rate": 9.015504212862784e-06, "loss": 0.002, "step": 69180 }, { "epoch": 0.5658093797276853, "grad_norm": 0.13169242441654205, "learning_rate": 9.015078959368706e-06, "loss": 0.0018, "step": 69190 }, { "epoch": 0.5658911559062845, "grad_norm": 0.28587329387664795, "learning_rate": 9.014653624084206e-06, "loss": 0.0014, "step": 69200 }, { "epoch": 0.5659729320848836, "grad_norm": 0.29539674520492554, "learning_rate": 9.014228207017948e-06, "loss": 0.0012, "step": 69210 }, { "epoch": 0.5660547082634828, "grad_norm": 0.08338034898042679, "learning_rate": 9.013802708178598e-06, "loss": 0.0025, "step": 69220 }, { "epoch": 0.566136484442082, "grad_norm": 0.12204311043024063, "learning_rate": 9.013377127574827e-06, "loss": 0.0021, "step": 69230 }, { "epoch": 0.5662182606206811, "grad_norm": 0.09983447194099426, "learning_rate": 9.012951465215298e-06, "loss": 0.0033, "step": 69240 }, { "epoch": 0.5663000367992803, "grad_norm": 0.08265138417482376, "learning_rate": 9.012525721108688e-06, "loss": 0.0027, "step": 69250 }, { "epoch": 0.5663818129778796, "grad_norm": 0.05272148922085762, "learning_rate": 9.012099895263666e-06, "loss": 0.0028, "step": 69260 }, { "epoch": 0.5664635891564788, "grad_norm": 0.040392350405454636, "learning_rate": 9.011673987688909e-06, "loss": 0.0016, "step": 69270 }, { "epoch": 0.5665453653350779, "grad_norm": 0.08176713436841965, "learning_rate": 9.011247998393093e-06, "loss": 0.002, "step": 69280 }, { "epoch": 0.5666271415136771, "grad_norm": 0.12104939669370651, "learning_rate": 9.010821927384894e-06, "loss": 0.0037, "step": 69290 }, { "epoch": 0.5667089176922763, "grad_norm": 0.09709173440933228, "learning_rate": 9.010395774672992e-06, "loss": 0.0032, "step": 69300 }, { "epoch": 0.5667906938708754, "grad_norm": 0.027333134785294533, "learning_rate": 9.009969540266068e-06, "loss": 0.0015, "step": 69310 }, { "epoch": 0.5668724700494746, "grad_norm": 0.024129807949066162, "learning_rate": 9.009543224172804e-06, "loss": 0.0016, "step": 69320 }, { "epoch": 0.5669542462280738, "grad_norm": 0.0787876546382904, "learning_rate": 9.009116826401886e-06, "loss": 0.0019, "step": 69330 }, { "epoch": 0.567036022406673, "grad_norm": 0.0767413079738617, "learning_rate": 9.008690346962002e-06, "loss": 0.0022, "step": 69340 }, { "epoch": 0.5671177985852721, "grad_norm": 0.10515929013490677, "learning_rate": 9.008263785861836e-06, "loss": 0.0038, "step": 69350 }, { "epoch": 0.5671995747638713, "grad_norm": 0.03770775347948074, "learning_rate": 9.007837143110077e-06, "loss": 0.0013, "step": 69360 }, { "epoch": 0.5672813509424705, "grad_norm": 0.0363188274204731, "learning_rate": 9.007410418715417e-06, "loss": 0.0016, "step": 69370 }, { "epoch": 0.5673631271210696, "grad_norm": 0.03204430267214775, "learning_rate": 9.006983612686551e-06, "loss": 0.0013, "step": 69380 }, { "epoch": 0.5674449032996688, "grad_norm": 0.13135883212089539, "learning_rate": 9.006556725032172e-06, "loss": 0.0018, "step": 69390 }, { "epoch": 0.567526679478268, "grad_norm": 0.12913356721401215, "learning_rate": 9.006129755760976e-06, "loss": 0.0022, "step": 69400 }, { "epoch": 0.5676084556568671, "grad_norm": 0.029015440493822098, "learning_rate": 9.005702704881658e-06, "loss": 0.0031, "step": 69410 }, { "epoch": 0.5676902318354663, "grad_norm": 0.02435477450489998, "learning_rate": 9.005275572402922e-06, "loss": 0.001, "step": 69420 }, { "epoch": 0.5677720080140655, "grad_norm": 0.09741021692752838, "learning_rate": 9.004848358333467e-06, "loss": 0.002, "step": 69430 }, { "epoch": 0.5678537841926646, "grad_norm": 0.10186812281608582, "learning_rate": 9.004421062681995e-06, "loss": 0.0021, "step": 69440 }, { "epoch": 0.5679355603712638, "grad_norm": 0.17190392315387726, "learning_rate": 9.003993685457213e-06, "loss": 0.0023, "step": 69450 }, { "epoch": 0.568017336549863, "grad_norm": 0.0970454216003418, "learning_rate": 9.003566226667823e-06, "loss": 0.002, "step": 69460 }, { "epoch": 0.5680991127284623, "grad_norm": 0.18235459923744202, "learning_rate": 9.003138686322534e-06, "loss": 0.0034, "step": 69470 }, { "epoch": 0.5681808889070614, "grad_norm": 0.05355193465948105, "learning_rate": 9.002711064430059e-06, "loss": 0.0018, "step": 69480 }, { "epoch": 0.5682626650856606, "grad_norm": 0.04083506390452385, "learning_rate": 9.002283360999103e-06, "loss": 0.0017, "step": 69490 }, { "epoch": 0.5683444412642598, "grad_norm": 0.02588433213531971, "learning_rate": 9.001855576038382e-06, "loss": 0.0031, "step": 69500 }, { "epoch": 0.5684262174428589, "grad_norm": 0.09600378572940826, "learning_rate": 9.001427709556612e-06, "loss": 0.0025, "step": 69510 }, { "epoch": 0.5685079936214581, "grad_norm": 0.023562990128993988, "learning_rate": 9.000999761562506e-06, "loss": 0.0017, "step": 69520 }, { "epoch": 0.5685897698000573, "grad_norm": 0.03381561487913132, "learning_rate": 9.000571732064783e-06, "loss": 0.0016, "step": 69530 }, { "epoch": 0.5686715459786564, "grad_norm": 0.07780418545007706, "learning_rate": 9.000143621072161e-06, "loss": 0.0036, "step": 69540 }, { "epoch": 0.5687533221572556, "grad_norm": 0.03972689062356949, "learning_rate": 8.999715428593363e-06, "loss": 0.003, "step": 69550 }, { "epoch": 0.5688350983358548, "grad_norm": 0.09763572365045547, "learning_rate": 8.999287154637111e-06, "loss": 0.0012, "step": 69560 }, { "epoch": 0.5689168745144539, "grad_norm": 0.06477309763431549, "learning_rate": 8.998858799212126e-06, "loss": 0.0018, "step": 69570 }, { "epoch": 0.5689986506930531, "grad_norm": 0.18715457618236542, "learning_rate": 8.99843036232714e-06, "loss": 0.0022, "step": 69580 }, { "epoch": 0.5690804268716523, "grad_norm": 0.10181540995836258, "learning_rate": 8.998001843990876e-06, "loss": 0.0026, "step": 69590 }, { "epoch": 0.5691622030502514, "grad_norm": 0.05679955706000328, "learning_rate": 8.997573244212065e-06, "loss": 0.0039, "step": 69600 }, { "epoch": 0.5692439792288506, "grad_norm": 0.007878221571445465, "learning_rate": 8.997144562999438e-06, "loss": 0.0027, "step": 69610 }, { "epoch": 0.5693257554074498, "grad_norm": 0.10749701410531998, "learning_rate": 8.996715800361729e-06, "loss": 0.0013, "step": 69620 }, { "epoch": 0.569407531586049, "grad_norm": 0.13134555518627167, "learning_rate": 8.996286956307667e-06, "loss": 0.0031, "step": 69630 }, { "epoch": 0.5694893077646481, "grad_norm": 0.027221644297242165, "learning_rate": 8.995858030845992e-06, "loss": 0.002, "step": 69640 }, { "epoch": 0.5695710839432473, "grad_norm": 0.008912765420973301, "learning_rate": 8.995429023985442e-06, "loss": 0.0024, "step": 69650 }, { "epoch": 0.5696528601218465, "grad_norm": 0.05161435902118683, "learning_rate": 8.994999935734754e-06, "loss": 0.0028, "step": 69660 }, { "epoch": 0.5697346363004456, "grad_norm": 0.07680298388004303, "learning_rate": 8.994570766102671e-06, "loss": 0.0025, "step": 69670 }, { "epoch": 0.5698164124790449, "grad_norm": 0.1172260046005249, "learning_rate": 8.994141515097935e-06, "loss": 0.0019, "step": 69680 }, { "epoch": 0.5698981886576441, "grad_norm": 0.0468902625143528, "learning_rate": 8.993712182729289e-06, "loss": 0.0028, "step": 69690 }, { "epoch": 0.5699799648362432, "grad_norm": 0.132623091340065, "learning_rate": 8.99328276900548e-06, "loss": 0.0043, "step": 69700 }, { "epoch": 0.5700617410148424, "grad_norm": 0.060159701853990555, "learning_rate": 8.992853273935254e-06, "loss": 0.0011, "step": 69710 }, { "epoch": 0.5701435171934416, "grad_norm": 0.13373897969722748, "learning_rate": 8.992423697527361e-06, "loss": 0.0022, "step": 69720 }, { "epoch": 0.5702252933720408, "grad_norm": 0.07043401896953583, "learning_rate": 8.991994039790555e-06, "loss": 0.0021, "step": 69730 }, { "epoch": 0.5703070695506399, "grad_norm": 0.1647057682275772, "learning_rate": 8.991564300733583e-06, "loss": 0.0017, "step": 69740 }, { "epoch": 0.5703888457292391, "grad_norm": 0.026211706921458244, "learning_rate": 8.991134480365202e-06, "loss": 0.0036, "step": 69750 }, { "epoch": 0.5704706219078383, "grad_norm": 0.06959687918424606, "learning_rate": 8.990704578694168e-06, "loss": 0.0015, "step": 69760 }, { "epoch": 0.5705523980864374, "grad_norm": 0.17177779972553253, "learning_rate": 8.990274595729237e-06, "loss": 0.0039, "step": 69770 }, { "epoch": 0.5706341742650366, "grad_norm": 0.05744607746601105, "learning_rate": 8.98984453147917e-06, "loss": 0.0022, "step": 69780 }, { "epoch": 0.5707159504436358, "grad_norm": 0.03224882856011391, "learning_rate": 8.989414385952726e-06, "loss": 0.0023, "step": 69790 }, { "epoch": 0.5707977266222349, "grad_norm": 0.13609640300273895, "learning_rate": 8.98898415915867e-06, "loss": 0.0027, "step": 69800 }, { "epoch": 0.5708795028008341, "grad_norm": 0.04589856043457985, "learning_rate": 8.988553851105762e-06, "loss": 0.0025, "step": 69810 }, { "epoch": 0.5709612789794333, "grad_norm": 0.05841698870062828, "learning_rate": 8.988123461802772e-06, "loss": 0.0016, "step": 69820 }, { "epoch": 0.5710430551580324, "grad_norm": 0.04247550666332245, "learning_rate": 8.987692991258464e-06, "loss": 0.0013, "step": 69830 }, { "epoch": 0.5711248313366316, "grad_norm": 0.04289577156305313, "learning_rate": 8.987262439481607e-06, "loss": 0.0014, "step": 69840 }, { "epoch": 0.5712066075152308, "grad_norm": 0.007671307772397995, "learning_rate": 8.986831806480975e-06, "loss": 0.0011, "step": 69850 }, { "epoch": 0.57128838369383, "grad_norm": 0.08579093962907791, "learning_rate": 8.98640109226534e-06, "loss": 0.0042, "step": 69860 }, { "epoch": 0.5713701598724291, "grad_norm": 0.10221902281045914, "learning_rate": 8.985970296843472e-06, "loss": 0.0036, "step": 69870 }, { "epoch": 0.5714519360510283, "grad_norm": 0.02488141879439354, "learning_rate": 8.98553942022415e-06, "loss": 0.0021, "step": 69880 }, { "epoch": 0.5715337122296275, "grad_norm": 0.03768089786171913, "learning_rate": 8.98510846241615e-06, "loss": 0.0013, "step": 69890 }, { "epoch": 0.5716154884082267, "grad_norm": 0.04120191931724548, "learning_rate": 8.984677423428251e-06, "loss": 0.0018, "step": 69900 }, { "epoch": 0.5716972645868259, "grad_norm": 0.146031454205513, "learning_rate": 8.984246303269236e-06, "loss": 0.0022, "step": 69910 }, { "epoch": 0.5717790407654251, "grad_norm": 0.051591698080301285, "learning_rate": 8.983815101947884e-06, "loss": 0.0014, "step": 69920 }, { "epoch": 0.5718608169440242, "grad_norm": 0.058512791991233826, "learning_rate": 8.98338381947298e-06, "loss": 0.0013, "step": 69930 }, { "epoch": 0.5719425931226234, "grad_norm": 0.014062891714274883, "learning_rate": 8.982952455853309e-06, "loss": 0.002, "step": 69940 }, { "epoch": 0.5720243693012226, "grad_norm": 0.030751284211874008, "learning_rate": 8.98252101109766e-06, "loss": 0.0029, "step": 69950 }, { "epoch": 0.5721061454798217, "grad_norm": 0.042793527245521545, "learning_rate": 8.98208948521482e-06, "loss": 0.0021, "step": 69960 }, { "epoch": 0.5721879216584209, "grad_norm": 0.010920954868197441, "learning_rate": 8.981657878213582e-06, "loss": 0.0032, "step": 69970 }, { "epoch": 0.5722696978370201, "grad_norm": 0.11770373582839966, "learning_rate": 8.981226190102734e-06, "loss": 0.0013, "step": 69980 }, { "epoch": 0.5723514740156193, "grad_norm": 0.06923732906579971, "learning_rate": 8.980794420891074e-06, "loss": 0.0033, "step": 69990 }, { "epoch": 0.5724332501942184, "grad_norm": 0.07070089131593704, "learning_rate": 8.980362570587396e-06, "loss": 0.0018, "step": 70000 }, { "epoch": 0.5725150263728176, "grad_norm": 0.02287241630256176, "learning_rate": 8.979930639200495e-06, "loss": 0.0025, "step": 70010 }, { "epoch": 0.5725968025514168, "grad_norm": 0.12344665080308914, "learning_rate": 8.979498626739175e-06, "loss": 0.003, "step": 70020 }, { "epoch": 0.5726785787300159, "grad_norm": 0.13178950548171997, "learning_rate": 8.979066533212231e-06, "loss": 0.0026, "step": 70030 }, { "epoch": 0.5727603549086151, "grad_norm": 0.12275534868240356, "learning_rate": 8.978634358628468e-06, "loss": 0.0021, "step": 70040 }, { "epoch": 0.5728421310872143, "grad_norm": 0.008263965137302876, "learning_rate": 8.978202102996691e-06, "loss": 0.0044, "step": 70050 }, { "epoch": 0.5729239072658134, "grad_norm": 0.05932630971074104, "learning_rate": 8.9777697663257e-06, "loss": 0.001, "step": 70060 }, { "epoch": 0.5730056834444126, "grad_norm": 0.010094553232192993, "learning_rate": 8.977337348624308e-06, "loss": 0.0024, "step": 70070 }, { "epoch": 0.5730874596230118, "grad_norm": 0.13964776694774628, "learning_rate": 8.97690484990132e-06, "loss": 0.0026, "step": 70080 }, { "epoch": 0.5731692358016109, "grad_norm": 0.033113498240709305, "learning_rate": 8.97647227016555e-06, "loss": 0.0043, "step": 70090 }, { "epoch": 0.5732510119802101, "grad_norm": 0.04489568620920181, "learning_rate": 8.976039609425806e-06, "loss": 0.0017, "step": 70100 }, { "epoch": 0.5733327881588094, "grad_norm": 0.016953954473137856, "learning_rate": 8.975606867690902e-06, "loss": 0.0027, "step": 70110 }, { "epoch": 0.5734145643374086, "grad_norm": 0.12045982480049133, "learning_rate": 8.975174044969657e-06, "loss": 0.0021, "step": 70120 }, { "epoch": 0.5734963405160077, "grad_norm": 0.1605413854122162, "learning_rate": 8.974741141270884e-06, "loss": 0.0016, "step": 70130 }, { "epoch": 0.5735781166946069, "grad_norm": 0.136209636926651, "learning_rate": 8.974308156603404e-06, "loss": 0.0025, "step": 70140 }, { "epoch": 0.5736598928732061, "grad_norm": 0.06520625203847885, "learning_rate": 8.973875090976035e-06, "loss": 0.0017, "step": 70150 }, { "epoch": 0.5737416690518052, "grad_norm": 0.11785096675157547, "learning_rate": 8.9734419443976e-06, "loss": 0.0011, "step": 70160 }, { "epoch": 0.5738234452304044, "grad_norm": 0.21482756733894348, "learning_rate": 8.973008716876925e-06, "loss": 0.0012, "step": 70170 }, { "epoch": 0.5739052214090036, "grad_norm": 0.05465692654252052, "learning_rate": 8.972575408422832e-06, "loss": 0.0029, "step": 70180 }, { "epoch": 0.5739869975876027, "grad_norm": 0.05842800810933113, "learning_rate": 8.972142019044148e-06, "loss": 0.0012, "step": 70190 }, { "epoch": 0.5740687737662019, "grad_norm": 0.019669843837618828, "learning_rate": 8.971708548749705e-06, "loss": 0.0027, "step": 70200 }, { "epoch": 0.5741505499448011, "grad_norm": 0.12754125893115997, "learning_rate": 8.971274997548327e-06, "loss": 0.0017, "step": 70210 }, { "epoch": 0.5742323261234002, "grad_norm": 0.07168141007423401, "learning_rate": 8.970841365448851e-06, "loss": 0.0026, "step": 70220 }, { "epoch": 0.5743141023019994, "grad_norm": 0.09286046028137207, "learning_rate": 8.97040765246011e-06, "loss": 0.0017, "step": 70230 }, { "epoch": 0.5743958784805986, "grad_norm": 0.061972726136446, "learning_rate": 8.969973858590935e-06, "loss": 0.0023, "step": 70240 }, { "epoch": 0.5744776546591978, "grad_norm": 0.006387300789356232, "learning_rate": 8.969539983850167e-06, "loss": 0.0012, "step": 70250 }, { "epoch": 0.5745594308377969, "grad_norm": 0.036832671612501144, "learning_rate": 8.969106028246643e-06, "loss": 0.0029, "step": 70260 }, { "epoch": 0.5746412070163961, "grad_norm": 0.07417809218168259, "learning_rate": 8.968671991789203e-06, "loss": 0.0017, "step": 70270 }, { "epoch": 0.5747229831949953, "grad_norm": 0.1937602311372757, "learning_rate": 8.968237874486689e-06, "loss": 0.0035, "step": 70280 }, { "epoch": 0.5748047593735944, "grad_norm": 0.21132542192935944, "learning_rate": 8.967803676347943e-06, "loss": 0.0014, "step": 70290 }, { "epoch": 0.5748865355521936, "grad_norm": 0.10069318115711212, "learning_rate": 8.967369397381811e-06, "loss": 0.0016, "step": 70300 }, { "epoch": 0.5749683117307928, "grad_norm": 0.1171262264251709, "learning_rate": 8.96693503759714e-06, "loss": 0.0036, "step": 70310 }, { "epoch": 0.575050087909392, "grad_norm": 0.0295004490762949, "learning_rate": 8.966500597002778e-06, "loss": 0.0025, "step": 70320 }, { "epoch": 0.5751318640879912, "grad_norm": 0.14730361104011536, "learning_rate": 8.966066075607573e-06, "loss": 0.0015, "step": 70330 }, { "epoch": 0.5752136402665904, "grad_norm": 0.034678906202316284, "learning_rate": 8.965631473420381e-06, "loss": 0.0024, "step": 70340 }, { "epoch": 0.5752954164451896, "grad_norm": 0.1582571268081665, "learning_rate": 8.96519679045005e-06, "loss": 0.0026, "step": 70350 }, { "epoch": 0.5753771926237887, "grad_norm": 0.1410115361213684, "learning_rate": 8.96476202670544e-06, "loss": 0.0019, "step": 70360 }, { "epoch": 0.5754589688023879, "grad_norm": 0.12084396183490753, "learning_rate": 8.964327182195403e-06, "loss": 0.0025, "step": 70370 }, { "epoch": 0.5755407449809871, "grad_norm": 0.05038441717624664, "learning_rate": 8.963892256928798e-06, "loss": 0.0044, "step": 70380 }, { "epoch": 0.5756225211595862, "grad_norm": 0.07893023639917374, "learning_rate": 8.963457250914487e-06, "loss": 0.0038, "step": 70390 }, { "epoch": 0.5757042973381854, "grad_norm": 0.061435598880052567, "learning_rate": 8.96302216416133e-06, "loss": 0.002, "step": 70400 }, { "epoch": 0.5757860735167846, "grad_norm": 0.04759208858013153, "learning_rate": 8.962586996678192e-06, "loss": 0.0038, "step": 70410 }, { "epoch": 0.5758678496953837, "grad_norm": 0.28199848532676697, "learning_rate": 8.962151748473933e-06, "loss": 0.0038, "step": 70420 }, { "epoch": 0.5759496258739829, "grad_norm": 0.062220096588134766, "learning_rate": 8.961716419557423e-06, "loss": 0.0018, "step": 70430 }, { "epoch": 0.5760314020525821, "grad_norm": 0.007958073168992996, "learning_rate": 8.961281009937531e-06, "loss": 0.002, "step": 70440 }, { "epoch": 0.5761131782311812, "grad_norm": 0.0521959513425827, "learning_rate": 8.960845519623123e-06, "loss": 0.0015, "step": 70450 }, { "epoch": 0.5761949544097804, "grad_norm": 0.04620743170380592, "learning_rate": 8.960409948623071e-06, "loss": 0.0019, "step": 70460 }, { "epoch": 0.5762767305883796, "grad_norm": 0.03334720432758331, "learning_rate": 8.959974296946252e-06, "loss": 0.0009, "step": 70470 }, { "epoch": 0.5763585067669788, "grad_norm": 0.003572949441149831, "learning_rate": 8.959538564601536e-06, "loss": 0.0039, "step": 70480 }, { "epoch": 0.5764402829455779, "grad_norm": 0.02562985010445118, "learning_rate": 8.959102751597802e-06, "loss": 0.002, "step": 70490 }, { "epoch": 0.5765220591241771, "grad_norm": 0.08521571010351181, "learning_rate": 8.958666857943927e-06, "loss": 0.0013, "step": 70500 }, { "epoch": 0.5766038353027763, "grad_norm": 0.008251944556832314, "learning_rate": 8.95823088364879e-06, "loss": 0.0037, "step": 70510 }, { "epoch": 0.5766856114813754, "grad_norm": 0.12973402440547943, "learning_rate": 8.957794828721272e-06, "loss": 0.0023, "step": 70520 }, { "epoch": 0.5767673876599746, "grad_norm": 0.03471996262669563, "learning_rate": 8.957358693170258e-06, "loss": 0.0016, "step": 70530 }, { "epoch": 0.5768491638385739, "grad_norm": 0.0392235703766346, "learning_rate": 8.956922477004628e-06, "loss": 0.0019, "step": 70540 }, { "epoch": 0.576930940017173, "grad_norm": 0.10467886924743652, "learning_rate": 8.956486180233273e-06, "loss": 0.0024, "step": 70550 }, { "epoch": 0.5770127161957722, "grad_norm": 0.0938451737165451, "learning_rate": 8.956049802865076e-06, "loss": 0.0018, "step": 70560 }, { "epoch": 0.5770944923743714, "grad_norm": 0.0229343269020319, "learning_rate": 8.955613344908931e-06, "loss": 0.0023, "step": 70570 }, { "epoch": 0.5771762685529706, "grad_norm": 0.07678976655006409, "learning_rate": 8.955176806373727e-06, "loss": 0.0019, "step": 70580 }, { "epoch": 0.5772580447315697, "grad_norm": 0.06359340250492096, "learning_rate": 8.954740187268356e-06, "loss": 0.002, "step": 70590 }, { "epoch": 0.5773398209101689, "grad_norm": 0.13560599088668823, "learning_rate": 8.954303487601712e-06, "loss": 0.0023, "step": 70600 }, { "epoch": 0.5774215970887681, "grad_norm": 0.03163228556513786, "learning_rate": 8.953866707382694e-06, "loss": 0.0022, "step": 70610 }, { "epoch": 0.5775033732673672, "grad_norm": 0.08540502190589905, "learning_rate": 8.953429846620196e-06, "loss": 0.0013, "step": 70620 }, { "epoch": 0.5775851494459664, "grad_norm": 0.15330292284488678, "learning_rate": 8.952992905323117e-06, "loss": 0.0022, "step": 70630 }, { "epoch": 0.5776669256245656, "grad_norm": 0.09825856238603592, "learning_rate": 8.952555883500362e-06, "loss": 0.0021, "step": 70640 }, { "epoch": 0.5777487018031647, "grad_norm": 0.07302305847406387, "learning_rate": 8.95211878116083e-06, "loss": 0.0031, "step": 70650 }, { "epoch": 0.5778304779817639, "grad_norm": 0.06090882048010826, "learning_rate": 8.951681598313424e-06, "loss": 0.0017, "step": 70660 }, { "epoch": 0.5779122541603631, "grad_norm": 0.3931559920310974, "learning_rate": 8.951244334967053e-06, "loss": 0.006, "step": 70670 }, { "epoch": 0.5779940303389622, "grad_norm": 0.08947020769119263, "learning_rate": 8.950806991130625e-06, "loss": 0.0026, "step": 70680 }, { "epoch": 0.5780758065175614, "grad_norm": 0.15918400883674622, "learning_rate": 8.950369566813044e-06, "loss": 0.0018, "step": 70690 }, { "epoch": 0.5781575826961606, "grad_norm": 0.11571196466684341, "learning_rate": 8.949932062023225e-06, "loss": 0.0015, "step": 70700 }, { "epoch": 0.5782393588747597, "grad_norm": 0.09951039403676987, "learning_rate": 8.94949447677008e-06, "loss": 0.001, "step": 70710 }, { "epoch": 0.5783211350533589, "grad_norm": 0.05674179270863533, "learning_rate": 8.949056811062523e-06, "loss": 0.0024, "step": 70720 }, { "epoch": 0.5784029112319581, "grad_norm": 0.1548386961221695, "learning_rate": 8.948619064909468e-06, "loss": 0.0014, "step": 70730 }, { "epoch": 0.5784846874105573, "grad_norm": 0.02284594438970089, "learning_rate": 8.948181238319833e-06, "loss": 0.0013, "step": 70740 }, { "epoch": 0.5785664635891565, "grad_norm": 0.021019713953137398, "learning_rate": 8.947743331302536e-06, "loss": 0.0019, "step": 70750 }, { "epoch": 0.5786482397677557, "grad_norm": 0.07660728693008423, "learning_rate": 8.9473053438665e-06, "loss": 0.0024, "step": 70760 }, { "epoch": 0.5787300159463549, "grad_norm": 0.02293802984058857, "learning_rate": 8.946867276020645e-06, "loss": 0.0036, "step": 70770 }, { "epoch": 0.578811792124954, "grad_norm": 0.1395949274301529, "learning_rate": 8.946429127773894e-06, "loss": 0.0032, "step": 70780 }, { "epoch": 0.5788935683035532, "grad_norm": 0.06607721000909805, "learning_rate": 8.945990899135175e-06, "loss": 0.0014, "step": 70790 }, { "epoch": 0.5789753444821524, "grad_norm": 0.06881459802389145, "learning_rate": 8.945552590113414e-06, "loss": 0.0027, "step": 70800 }, { "epoch": 0.5790571206607515, "grad_norm": 0.04519622400403023, "learning_rate": 8.94511420071754e-06, "loss": 0.0017, "step": 70810 }, { "epoch": 0.5791388968393507, "grad_norm": 0.10582324117422104, "learning_rate": 8.944675730956483e-06, "loss": 0.002, "step": 70820 }, { "epoch": 0.5792206730179499, "grad_norm": 0.06880845129489899, "learning_rate": 8.944237180839175e-06, "loss": 0.0015, "step": 70830 }, { "epoch": 0.579302449196549, "grad_norm": 0.07966773211956024, "learning_rate": 8.943798550374549e-06, "loss": 0.0011, "step": 70840 }, { "epoch": 0.5793842253751482, "grad_norm": 0.15182557702064514, "learning_rate": 8.94335983957154e-06, "loss": 0.0027, "step": 70850 }, { "epoch": 0.5794660015537474, "grad_norm": 0.04700866714119911, "learning_rate": 8.942921048439088e-06, "loss": 0.002, "step": 70860 }, { "epoch": 0.5795477777323466, "grad_norm": 0.06073882058262825, "learning_rate": 8.942482176986127e-06, "loss": 0.0017, "step": 70870 }, { "epoch": 0.5796295539109457, "grad_norm": 0.03450591117143631, "learning_rate": 8.942043225221601e-06, "loss": 0.0016, "step": 70880 }, { "epoch": 0.5797113300895449, "grad_norm": 0.07907191663980484, "learning_rate": 8.94160419315445e-06, "loss": 0.0018, "step": 70890 }, { "epoch": 0.5797931062681441, "grad_norm": 0.0048636929132044315, "learning_rate": 8.941165080793617e-06, "loss": 0.0013, "step": 70900 }, { "epoch": 0.5798748824467432, "grad_norm": 0.12755608558654785, "learning_rate": 8.94072588814805e-06, "loss": 0.0012, "step": 70910 }, { "epoch": 0.5799566586253424, "grad_norm": 0.07317820936441422, "learning_rate": 8.94028661522669e-06, "loss": 0.0013, "step": 70920 }, { "epoch": 0.5800384348039416, "grad_norm": 0.3145087659358978, "learning_rate": 8.939847262038491e-06, "loss": 0.0016, "step": 70930 }, { "epoch": 0.5801202109825407, "grad_norm": 0.030585767701268196, "learning_rate": 8.939407828592398e-06, "loss": 0.0019, "step": 70940 }, { "epoch": 0.5802019871611399, "grad_norm": 0.06902995705604553, "learning_rate": 8.938968314897369e-06, "loss": 0.0019, "step": 70950 }, { "epoch": 0.5802837633397392, "grad_norm": 0.050099633634090424, "learning_rate": 8.938528720962353e-06, "loss": 0.002, "step": 70960 }, { "epoch": 0.5803655395183384, "grad_norm": 0.09259482473134995, "learning_rate": 8.938089046796305e-06, "loss": 0.0033, "step": 70970 }, { "epoch": 0.5804473156969375, "grad_norm": 0.04285290464758873, "learning_rate": 8.937649292408182e-06, "loss": 0.0019, "step": 70980 }, { "epoch": 0.5805290918755367, "grad_norm": 0.026191944256424904, "learning_rate": 8.937209457806942e-06, "loss": 0.0011, "step": 70990 }, { "epoch": 0.5806108680541359, "grad_norm": 0.07699846476316452, "learning_rate": 8.936769543001544e-06, "loss": 0.0024, "step": 71000 }, { "epoch": 0.580692644232735, "grad_norm": 0.03360515460371971, "learning_rate": 8.936329548000952e-06, "loss": 0.0013, "step": 71010 }, { "epoch": 0.5807744204113342, "grad_norm": 0.09943456947803497, "learning_rate": 8.935889472814128e-06, "loss": 0.0023, "step": 71020 }, { "epoch": 0.5808561965899334, "grad_norm": 0.053944181650877, "learning_rate": 8.935449317450035e-06, "loss": 0.0022, "step": 71030 }, { "epoch": 0.5809379727685325, "grad_norm": 0.08789244294166565, "learning_rate": 8.93500908191764e-06, "loss": 0.002, "step": 71040 }, { "epoch": 0.5810197489471317, "grad_norm": 0.01916828565299511, "learning_rate": 8.934568766225913e-06, "loss": 0.0017, "step": 71050 }, { "epoch": 0.5811015251257309, "grad_norm": 0.09742289036512375, "learning_rate": 8.934128370383823e-06, "loss": 0.002, "step": 71060 }, { "epoch": 0.58118330130433, "grad_norm": 0.026409167796373367, "learning_rate": 8.933687894400337e-06, "loss": 0.0037, "step": 71070 }, { "epoch": 0.5812650774829292, "grad_norm": 0.07061980664730072, "learning_rate": 8.933247338284434e-06, "loss": 0.002, "step": 71080 }, { "epoch": 0.5813468536615284, "grad_norm": 0.07930304855108261, "learning_rate": 8.932806702045084e-06, "loss": 0.0032, "step": 71090 }, { "epoch": 0.5814286298401276, "grad_norm": 0.1130337044596672, "learning_rate": 8.932365985691265e-06, "loss": 0.0018, "step": 71100 }, { "epoch": 0.5815104060187267, "grad_norm": 0.027290169149637222, "learning_rate": 8.931925189231955e-06, "loss": 0.0021, "step": 71110 }, { "epoch": 0.5815921821973259, "grad_norm": 0.09444383531808853, "learning_rate": 8.931484312676133e-06, "loss": 0.0019, "step": 71120 }, { "epoch": 0.5816739583759251, "grad_norm": 0.008323113434016705, "learning_rate": 8.931043356032778e-06, "loss": 0.0022, "step": 71130 }, { "epoch": 0.5817557345545242, "grad_norm": 0.033803150057792664, "learning_rate": 8.930602319310876e-06, "loss": 0.0016, "step": 71140 }, { "epoch": 0.5818375107331234, "grad_norm": 0.005161338485777378, "learning_rate": 8.93016120251941e-06, "loss": 0.001, "step": 71150 }, { "epoch": 0.5819192869117226, "grad_norm": 0.0066021461971104145, "learning_rate": 8.929720005667365e-06, "loss": 0.0014, "step": 71160 }, { "epoch": 0.5820010630903217, "grad_norm": 0.04397182539105415, "learning_rate": 8.92927872876373e-06, "loss": 0.0026, "step": 71170 }, { "epoch": 0.582082839268921, "grad_norm": 0.05651935562491417, "learning_rate": 8.928837371817494e-06, "loss": 0.0019, "step": 71180 }, { "epoch": 0.5821646154475202, "grad_norm": 0.029663141816854477, "learning_rate": 8.928395934837645e-06, "loss": 0.0015, "step": 71190 }, { "epoch": 0.5822463916261194, "grad_norm": 0.04645390808582306, "learning_rate": 8.92795441783318e-06, "loss": 0.0014, "step": 71200 }, { "epoch": 0.5823281678047185, "grad_norm": 0.08266573399305344, "learning_rate": 8.92751282081309e-06, "loss": 0.0026, "step": 71210 }, { "epoch": 0.5824099439833177, "grad_norm": 0.03825368359684944, "learning_rate": 8.92707114378637e-06, "loss": 0.0041, "step": 71220 }, { "epoch": 0.5824917201619169, "grad_norm": 0.02939654514193535, "learning_rate": 8.92662938676202e-06, "loss": 0.0013, "step": 71230 }, { "epoch": 0.582573496340516, "grad_norm": 0.039788465946912766, "learning_rate": 8.926187549749037e-06, "loss": 0.0024, "step": 71240 }, { "epoch": 0.5826552725191152, "grad_norm": 0.08534428477287292, "learning_rate": 8.925745632756424e-06, "loss": 0.0025, "step": 71250 }, { "epoch": 0.5827370486977144, "grad_norm": 0.1182628870010376, "learning_rate": 8.925303635793178e-06, "loss": 0.0034, "step": 71260 }, { "epoch": 0.5828188248763135, "grad_norm": 0.16060850024223328, "learning_rate": 8.92486155886831e-06, "loss": 0.0021, "step": 71270 }, { "epoch": 0.5829006010549127, "grad_norm": 0.12875553965568542, "learning_rate": 8.924419401990821e-06, "loss": 0.0034, "step": 71280 }, { "epoch": 0.5829823772335119, "grad_norm": 0.08296719938516617, "learning_rate": 8.923977165169717e-06, "loss": 0.0026, "step": 71290 }, { "epoch": 0.583064153412111, "grad_norm": 0.036449454724788666, "learning_rate": 8.923534848414009e-06, "loss": 0.0024, "step": 71300 }, { "epoch": 0.5831459295907102, "grad_norm": 0.04816701263189316, "learning_rate": 8.923092451732707e-06, "loss": 0.0024, "step": 71310 }, { "epoch": 0.5832277057693094, "grad_norm": 0.26127931475639343, "learning_rate": 8.922649975134824e-06, "loss": 0.0022, "step": 71320 }, { "epoch": 0.5833094819479085, "grad_norm": 0.1618429571390152, "learning_rate": 8.922207418629373e-06, "loss": 0.0027, "step": 71330 }, { "epoch": 0.5833912581265077, "grad_norm": 0.05598291754722595, "learning_rate": 8.921764782225367e-06, "loss": 0.0011, "step": 71340 }, { "epoch": 0.5834730343051069, "grad_norm": 0.09550356864929199, "learning_rate": 8.921322065931826e-06, "loss": 0.0021, "step": 71350 }, { "epoch": 0.583554810483706, "grad_norm": 0.2522112727165222, "learning_rate": 8.920879269757766e-06, "loss": 0.0049, "step": 71360 }, { "epoch": 0.5836365866623052, "grad_norm": 0.0348014160990715, "learning_rate": 8.920436393712209e-06, "loss": 0.0025, "step": 71370 }, { "epoch": 0.5837183628409044, "grad_norm": 0.09151057153940201, "learning_rate": 8.919993437804175e-06, "loss": 0.0015, "step": 71380 }, { "epoch": 0.5838001390195037, "grad_norm": 0.017390593886375427, "learning_rate": 8.91955040204269e-06, "loss": 0.0015, "step": 71390 }, { "epoch": 0.5838819151981028, "grad_norm": 0.04053262248635292, "learning_rate": 8.919107286436776e-06, "loss": 0.002, "step": 71400 }, { "epoch": 0.583963691376702, "grad_norm": 0.061820901930332184, "learning_rate": 8.91866409099546e-06, "loss": 0.0018, "step": 71410 }, { "epoch": 0.5840454675553012, "grad_norm": 0.13887402415275574, "learning_rate": 8.918220815727773e-06, "loss": 0.0015, "step": 71420 }, { "epoch": 0.5841272437339003, "grad_norm": 0.10677115619182587, "learning_rate": 8.917777460642743e-06, "loss": 0.002, "step": 71430 }, { "epoch": 0.5842090199124995, "grad_norm": 0.07657112181186676, "learning_rate": 8.9173340257494e-06, "loss": 0.0018, "step": 71440 }, { "epoch": 0.5842907960910987, "grad_norm": 0.08531742542982101, "learning_rate": 8.916890511056782e-06, "loss": 0.0014, "step": 71450 }, { "epoch": 0.5843725722696979, "grad_norm": 0.04129185155034065, "learning_rate": 8.916446916573918e-06, "loss": 0.0036, "step": 71460 }, { "epoch": 0.584454348448297, "grad_norm": 0.1478956639766693, "learning_rate": 8.916003242309847e-06, "loss": 0.0025, "step": 71470 }, { "epoch": 0.5845361246268962, "grad_norm": 0.062491364777088165, "learning_rate": 8.915559488273607e-06, "loss": 0.0025, "step": 71480 }, { "epoch": 0.5846179008054954, "grad_norm": 0.04645826667547226, "learning_rate": 8.915115654474238e-06, "loss": 0.0022, "step": 71490 }, { "epoch": 0.5846996769840945, "grad_norm": 0.12455902248620987, "learning_rate": 8.91467174092078e-06, "loss": 0.0031, "step": 71500 }, { "epoch": 0.5847814531626937, "grad_norm": 0.037273962050676346, "learning_rate": 8.914227747622278e-06, "loss": 0.0029, "step": 71510 }, { "epoch": 0.5848632293412929, "grad_norm": 0.0028529935516417027, "learning_rate": 8.913783674587772e-06, "loss": 0.0014, "step": 71520 }, { "epoch": 0.584945005519892, "grad_norm": 0.10423009097576141, "learning_rate": 8.913339521826314e-06, "loss": 0.0021, "step": 71530 }, { "epoch": 0.5850267816984912, "grad_norm": 0.10926064103841782, "learning_rate": 8.912895289346948e-06, "loss": 0.0022, "step": 71540 }, { "epoch": 0.5851085578770904, "grad_norm": 0.02642405778169632, "learning_rate": 8.912450977158726e-06, "loss": 0.0016, "step": 71550 }, { "epoch": 0.5851903340556895, "grad_norm": 0.09125598520040512, "learning_rate": 8.912006585270695e-06, "loss": 0.0021, "step": 71560 }, { "epoch": 0.5852721102342887, "grad_norm": 0.08254195749759674, "learning_rate": 8.911562113691912e-06, "loss": 0.0031, "step": 71570 }, { "epoch": 0.5853538864128879, "grad_norm": 0.2831258475780487, "learning_rate": 8.911117562431429e-06, "loss": 0.0028, "step": 71580 }, { "epoch": 0.585435662591487, "grad_norm": 0.04221668466925621, "learning_rate": 8.910672931498301e-06, "loss": 0.0018, "step": 71590 }, { "epoch": 0.5855174387700863, "grad_norm": 0.14639323949813843, "learning_rate": 8.910228220901586e-06, "loss": 0.0028, "step": 71600 }, { "epoch": 0.5855992149486855, "grad_norm": 0.04525918513536453, "learning_rate": 8.909783430650345e-06, "loss": 0.0022, "step": 71610 }, { "epoch": 0.5856809911272847, "grad_norm": 0.12854042649269104, "learning_rate": 8.909338560753638e-06, "loss": 0.0022, "step": 71620 }, { "epoch": 0.5857627673058838, "grad_norm": 0.04422270134091377, "learning_rate": 8.908893611220526e-06, "loss": 0.0018, "step": 71630 }, { "epoch": 0.585844543484483, "grad_norm": 0.16282711923122406, "learning_rate": 8.908448582060074e-06, "loss": 0.0022, "step": 71640 }, { "epoch": 0.5859263196630822, "grad_norm": 0.017114169895648956, "learning_rate": 8.908003473281348e-06, "loss": 0.0014, "step": 71650 }, { "epoch": 0.5860080958416813, "grad_norm": 0.04735824093222618, "learning_rate": 8.907558284893414e-06, "loss": 0.0015, "step": 71660 }, { "epoch": 0.5860898720202805, "grad_norm": 0.04959234222769737, "learning_rate": 8.907113016905341e-06, "loss": 0.0028, "step": 71670 }, { "epoch": 0.5861716481988797, "grad_norm": 0.1608412265777588, "learning_rate": 8.906667669326203e-06, "loss": 0.0013, "step": 71680 }, { "epoch": 0.5862534243774788, "grad_norm": 0.11092221736907959, "learning_rate": 8.906222242165066e-06, "loss": 0.0026, "step": 71690 }, { "epoch": 0.586335200556078, "grad_norm": 0.06475050002336502, "learning_rate": 8.905776735431008e-06, "loss": 0.0018, "step": 71700 }, { "epoch": 0.5864169767346772, "grad_norm": 0.013195834122598171, "learning_rate": 8.905331149133102e-06, "loss": 0.0026, "step": 71710 }, { "epoch": 0.5864987529132764, "grad_norm": 0.04417797178030014, "learning_rate": 8.904885483280427e-06, "loss": 0.0018, "step": 71720 }, { "epoch": 0.5865805290918755, "grad_norm": 0.06738919764757156, "learning_rate": 8.90443973788206e-06, "loss": 0.0024, "step": 71730 }, { "epoch": 0.5866623052704747, "grad_norm": 0.19052141904830933, "learning_rate": 8.903993912947082e-06, "loss": 0.0023, "step": 71740 }, { "epoch": 0.5867440814490739, "grad_norm": 0.02778596803545952, "learning_rate": 8.903548008484577e-06, "loss": 0.0076, "step": 71750 }, { "epoch": 0.586825857627673, "grad_norm": 0.03570648655295372, "learning_rate": 8.903102024503623e-06, "loss": 0.0009, "step": 71760 }, { "epoch": 0.5869076338062722, "grad_norm": 0.13586385548114777, "learning_rate": 8.90265596101331e-06, "loss": 0.0017, "step": 71770 }, { "epoch": 0.5869894099848714, "grad_norm": 0.023716948926448822, "learning_rate": 8.902209818022722e-06, "loss": 0.004, "step": 71780 }, { "epoch": 0.5870711861634705, "grad_norm": 0.08294984698295593, "learning_rate": 8.901763595540948e-06, "loss": 0.0016, "step": 71790 }, { "epoch": 0.5871529623420697, "grad_norm": 0.05596161633729935, "learning_rate": 8.901317293577078e-06, "loss": 0.002, "step": 71800 }, { "epoch": 0.5872347385206689, "grad_norm": 0.0387088917195797, "learning_rate": 8.900870912140204e-06, "loss": 0.0018, "step": 71810 }, { "epoch": 0.5873165146992682, "grad_norm": 0.11006512492895126, "learning_rate": 8.900424451239417e-06, "loss": 0.0009, "step": 71820 }, { "epoch": 0.5873982908778673, "grad_norm": 0.0324486680328846, "learning_rate": 8.899977910883817e-06, "loss": 0.0017, "step": 71830 }, { "epoch": 0.5874800670564665, "grad_norm": 0.08360298722982407, "learning_rate": 8.899531291082494e-06, "loss": 0.0025, "step": 71840 }, { "epoch": 0.5875618432350657, "grad_norm": 0.08664529770612717, "learning_rate": 8.89908459184455e-06, "loss": 0.0027, "step": 71850 }, { "epoch": 0.5876436194136648, "grad_norm": 0.10326976329088211, "learning_rate": 8.898637813179083e-06, "loss": 0.0016, "step": 71860 }, { "epoch": 0.587725395592264, "grad_norm": 0.14073769748210907, "learning_rate": 8.898190955095196e-06, "loss": 0.002, "step": 71870 }, { "epoch": 0.5878071717708632, "grad_norm": 0.23755310475826263, "learning_rate": 8.89774401760199e-06, "loss": 0.0024, "step": 71880 }, { "epoch": 0.5878889479494623, "grad_norm": 0.17394918203353882, "learning_rate": 8.897297000708569e-06, "loss": 0.0027, "step": 71890 }, { "epoch": 0.5879707241280615, "grad_norm": 0.09211543947458267, "learning_rate": 8.89684990442404e-06, "loss": 0.0016, "step": 71900 }, { "epoch": 0.5880525003066607, "grad_norm": 0.08601967245340347, "learning_rate": 8.896402728757514e-06, "loss": 0.0026, "step": 71910 }, { "epoch": 0.5881342764852598, "grad_norm": 0.03137661889195442, "learning_rate": 8.895955473718095e-06, "loss": 0.0017, "step": 71920 }, { "epoch": 0.588216052663859, "grad_norm": 0.057735126465559006, "learning_rate": 8.895508139314897e-06, "loss": 0.0017, "step": 71930 }, { "epoch": 0.5882978288424582, "grad_norm": 0.15248151123523712, "learning_rate": 8.895060725557031e-06, "loss": 0.0021, "step": 71940 }, { "epoch": 0.5883796050210574, "grad_norm": 0.09847601503133774, "learning_rate": 8.894613232453612e-06, "loss": 0.0037, "step": 71950 }, { "epoch": 0.5884613811996565, "grad_norm": 0.07393448799848557, "learning_rate": 8.894165660013756e-06, "loss": 0.002, "step": 71960 }, { "epoch": 0.5885431573782557, "grad_norm": 0.036684516817331314, "learning_rate": 8.89371800824658e-06, "loss": 0.0012, "step": 71970 }, { "epoch": 0.5886249335568549, "grad_norm": 0.11851279437541962, "learning_rate": 8.893270277161203e-06, "loss": 0.0018, "step": 71980 }, { "epoch": 0.588706709735454, "grad_norm": 0.34680962562561035, "learning_rate": 8.892822466766746e-06, "loss": 0.003, "step": 71990 }, { "epoch": 0.5887884859140532, "grad_norm": 0.08279848098754883, "learning_rate": 8.892374577072331e-06, "loss": 0.0017, "step": 72000 }, { "epoch": 0.5888702620926524, "grad_norm": 0.08732764422893524, "learning_rate": 8.891926608087084e-06, "loss": 0.0017, "step": 72010 }, { "epoch": 0.5889520382712515, "grad_norm": 0.004592591896653175, "learning_rate": 8.891478559820125e-06, "loss": 0.0019, "step": 72020 }, { "epoch": 0.5890338144498508, "grad_norm": 0.1548679620027542, "learning_rate": 8.891030432280587e-06, "loss": 0.0022, "step": 72030 }, { "epoch": 0.58911559062845, "grad_norm": 0.05130044370889664, "learning_rate": 8.890582225477597e-06, "loss": 0.0017, "step": 72040 }, { "epoch": 0.5891973668070491, "grad_norm": 0.01984832063317299, "learning_rate": 8.890133939420283e-06, "loss": 0.0014, "step": 72050 }, { "epoch": 0.5892791429856483, "grad_norm": 0.033394020050764084, "learning_rate": 8.88968557411778e-06, "loss": 0.0014, "step": 72060 }, { "epoch": 0.5893609191642475, "grad_norm": 0.039229609072208405, "learning_rate": 8.889237129579218e-06, "loss": 0.0022, "step": 72070 }, { "epoch": 0.5894426953428467, "grad_norm": 0.13735409080982208, "learning_rate": 8.888788605813735e-06, "loss": 0.0022, "step": 72080 }, { "epoch": 0.5895244715214458, "grad_norm": 0.02136373519897461, "learning_rate": 8.888340002830468e-06, "loss": 0.0017, "step": 72090 }, { "epoch": 0.589606247700045, "grad_norm": 0.19128000736236572, "learning_rate": 8.887891320638553e-06, "loss": 0.0023, "step": 72100 }, { "epoch": 0.5896880238786442, "grad_norm": 0.05601808801293373, "learning_rate": 8.887442559247134e-06, "loss": 0.0021, "step": 72110 }, { "epoch": 0.5897698000572433, "grad_norm": 0.07759450376033783, "learning_rate": 8.886993718665349e-06, "loss": 0.0056, "step": 72120 }, { "epoch": 0.5898515762358425, "grad_norm": 0.1102345883846283, "learning_rate": 8.88654479890234e-06, "loss": 0.0027, "step": 72130 }, { "epoch": 0.5899333524144417, "grad_norm": 0.054914440959692, "learning_rate": 8.886095799967257e-06, "loss": 0.0016, "step": 72140 }, { "epoch": 0.5900151285930408, "grad_norm": 0.021011406555771828, "learning_rate": 8.885646721869246e-06, "loss": 0.0218, "step": 72150 }, { "epoch": 0.59009690477164, "grad_norm": 0.0859932228922844, "learning_rate": 8.885197564617449e-06, "loss": 0.0024, "step": 72160 }, { "epoch": 0.5901786809502392, "grad_norm": 0.07851255685091019, "learning_rate": 8.88474832822102e-06, "loss": 0.0016, "step": 72170 }, { "epoch": 0.5902604571288383, "grad_norm": 0.028721999377012253, "learning_rate": 8.884299012689112e-06, "loss": 0.0024, "step": 72180 }, { "epoch": 0.5903422333074375, "grad_norm": 0.06965108215808868, "learning_rate": 8.883849618030873e-06, "loss": 0.0022, "step": 72190 }, { "epoch": 0.5904240094860367, "grad_norm": 0.028424710035324097, "learning_rate": 8.883400144255463e-06, "loss": 0.0027, "step": 72200 }, { "epoch": 0.5905057856646359, "grad_norm": 0.12163758277893066, "learning_rate": 8.882950591372033e-06, "loss": 0.0015, "step": 72210 }, { "epoch": 0.590587561843235, "grad_norm": 0.06364858150482178, "learning_rate": 8.882500959389747e-06, "loss": 0.002, "step": 72220 }, { "epoch": 0.5906693380218342, "grad_norm": 0.15414480865001678, "learning_rate": 8.882051248317756e-06, "loss": 0.002, "step": 72230 }, { "epoch": 0.5907511142004335, "grad_norm": 0.019450880587100983, "learning_rate": 8.881601458165228e-06, "loss": 0.0017, "step": 72240 }, { "epoch": 0.5908328903790326, "grad_norm": 0.05012385919690132, "learning_rate": 8.881151588941324e-06, "loss": 0.0018, "step": 72250 }, { "epoch": 0.5909146665576318, "grad_norm": 0.05139760300517082, "learning_rate": 8.880701640655205e-06, "loss": 0.0015, "step": 72260 }, { "epoch": 0.590996442736231, "grad_norm": 0.011005903594195843, "learning_rate": 8.88025161331604e-06, "loss": 0.001, "step": 72270 }, { "epoch": 0.5910782189148301, "grad_norm": 0.06452035158872604, "learning_rate": 8.879801506932997e-06, "loss": 0.0033, "step": 72280 }, { "epoch": 0.5911599950934293, "grad_norm": 0.12461313605308533, "learning_rate": 8.87935132151524e-06, "loss": 0.0017, "step": 72290 }, { "epoch": 0.5912417712720285, "grad_norm": 0.15342977643013, "learning_rate": 8.878901057071948e-06, "loss": 0.0023, "step": 72300 }, { "epoch": 0.5913235474506277, "grad_norm": 0.03879566118121147, "learning_rate": 8.878450713612286e-06, "loss": 0.0013, "step": 72310 }, { "epoch": 0.5914053236292268, "grad_norm": 0.13492745161056519, "learning_rate": 8.878000291145433e-06, "loss": 0.0022, "step": 72320 }, { "epoch": 0.591487099807826, "grad_norm": 0.10386378318071365, "learning_rate": 8.877549789680558e-06, "loss": 0.0022, "step": 72330 }, { "epoch": 0.5915688759864252, "grad_norm": 0.09063474833965302, "learning_rate": 8.877099209226844e-06, "loss": 0.0015, "step": 72340 }, { "epoch": 0.5916506521650243, "grad_norm": 0.12861758470535278, "learning_rate": 8.876648549793467e-06, "loss": 0.0019, "step": 72350 }, { "epoch": 0.5917324283436235, "grad_norm": 0.06305965036153793, "learning_rate": 8.87619781138961e-06, "loss": 0.0018, "step": 72360 }, { "epoch": 0.5918142045222227, "grad_norm": 0.09536999464035034, "learning_rate": 8.875746994024451e-06, "loss": 0.0014, "step": 72370 }, { "epoch": 0.5918959807008218, "grad_norm": 0.09007132053375244, "learning_rate": 8.875296097707174e-06, "loss": 0.003, "step": 72380 }, { "epoch": 0.591977756879421, "grad_norm": 0.16237996518611908, "learning_rate": 8.874845122446968e-06, "loss": 0.0016, "step": 72390 }, { "epoch": 0.5920595330580202, "grad_norm": 0.09914602339267731, "learning_rate": 8.874394068253017e-06, "loss": 0.0048, "step": 72400 }, { "epoch": 0.5921413092366193, "grad_norm": 0.06345036625862122, "learning_rate": 8.873942935134507e-06, "loss": 0.001, "step": 72410 }, { "epoch": 0.5922230854152185, "grad_norm": 0.17114011943340302, "learning_rate": 8.873491723100632e-06, "loss": 0.003, "step": 72420 }, { "epoch": 0.5923048615938177, "grad_norm": 0.019899245351552963, "learning_rate": 8.873040432160583e-06, "loss": 0.0038, "step": 72430 }, { "epoch": 0.5923866377724168, "grad_norm": 0.013992205262184143, "learning_rate": 8.872589062323551e-06, "loss": 0.0011, "step": 72440 }, { "epoch": 0.592468413951016, "grad_norm": 0.016960879787802696, "learning_rate": 8.872137613598733e-06, "loss": 0.0021, "step": 72450 }, { "epoch": 0.5925501901296153, "grad_norm": 0.13908301293849945, "learning_rate": 8.871686085995323e-06, "loss": 0.0028, "step": 72460 }, { "epoch": 0.5926319663082145, "grad_norm": 0.026197561994194984, "learning_rate": 8.871234479522523e-06, "loss": 0.0036, "step": 72470 }, { "epoch": 0.5927137424868136, "grad_norm": 0.02454158291220665, "learning_rate": 8.870782794189527e-06, "loss": 0.0014, "step": 72480 }, { "epoch": 0.5927955186654128, "grad_norm": 0.0673883929848671, "learning_rate": 8.870331030005541e-06, "loss": 0.0026, "step": 72490 }, { "epoch": 0.592877294844012, "grad_norm": 0.17130403220653534, "learning_rate": 8.869879186979766e-06, "loss": 0.0033, "step": 72500 }, { "epoch": 0.5929590710226111, "grad_norm": 0.009642819873988628, "learning_rate": 8.869427265121405e-06, "loss": 0.0023, "step": 72510 }, { "epoch": 0.5930408472012103, "grad_norm": 0.036553334444761276, "learning_rate": 8.868975264439667e-06, "loss": 0.0016, "step": 72520 }, { "epoch": 0.5931226233798095, "grad_norm": 0.07015488296747208, "learning_rate": 8.868523184943757e-06, "loss": 0.0019, "step": 72530 }, { "epoch": 0.5932043995584086, "grad_norm": 0.03393134847283363, "learning_rate": 8.868071026642886e-06, "loss": 0.0013, "step": 72540 }, { "epoch": 0.5932861757370078, "grad_norm": 0.055273402482271194, "learning_rate": 8.867618789546262e-06, "loss": 0.0021, "step": 72550 }, { "epoch": 0.593367951915607, "grad_norm": 0.227711021900177, "learning_rate": 8.867166473663102e-06, "loss": 0.0039, "step": 72560 }, { "epoch": 0.5934497280942062, "grad_norm": 0.018669573590159416, "learning_rate": 8.866714079002617e-06, "loss": 0.0014, "step": 72570 }, { "epoch": 0.5935315042728053, "grad_norm": 0.01655632071197033, "learning_rate": 8.866261605574022e-06, "loss": 0.0048, "step": 72580 }, { "epoch": 0.5936132804514045, "grad_norm": 0.0733291506767273, "learning_rate": 8.865809053386535e-06, "loss": 0.0024, "step": 72590 }, { "epoch": 0.5936950566300037, "grad_norm": 0.06612221151590347, "learning_rate": 8.865356422449378e-06, "loss": 0.0026, "step": 72600 }, { "epoch": 0.5937768328086028, "grad_norm": 0.06408068537712097, "learning_rate": 8.864903712771766e-06, "loss": 0.0019, "step": 72610 }, { "epoch": 0.593858608987202, "grad_norm": 0.05724561586976051, "learning_rate": 8.864450924362925e-06, "loss": 0.0043, "step": 72620 }, { "epoch": 0.5939403851658012, "grad_norm": 0.05352099984884262, "learning_rate": 8.863998057232078e-06, "loss": 0.0014, "step": 72630 }, { "epoch": 0.5940221613444003, "grad_norm": 0.021658705547451973, "learning_rate": 8.863545111388448e-06, "loss": 0.0016, "step": 72640 }, { "epoch": 0.5941039375229995, "grad_norm": 0.15553870797157288, "learning_rate": 8.863092086841265e-06, "loss": 0.0016, "step": 72650 }, { "epoch": 0.5941857137015987, "grad_norm": 0.07787687331438065, "learning_rate": 8.862638983599755e-06, "loss": 0.0024, "step": 72660 }, { "epoch": 0.594267489880198, "grad_norm": 0.011621863581240177, "learning_rate": 8.86218580167315e-06, "loss": 0.0024, "step": 72670 }, { "epoch": 0.5943492660587971, "grad_norm": 0.08984489738941193, "learning_rate": 8.861732541070682e-06, "loss": 0.0024, "step": 72680 }, { "epoch": 0.5944310422373963, "grad_norm": 0.14513498544692993, "learning_rate": 8.861279201801583e-06, "loss": 0.0029, "step": 72690 }, { "epoch": 0.5945128184159955, "grad_norm": 0.05291527509689331, "learning_rate": 8.860825783875086e-06, "loss": 0.002, "step": 72700 }, { "epoch": 0.5945945945945946, "grad_norm": 0.03630400076508522, "learning_rate": 8.860372287300432e-06, "loss": 0.0027, "step": 72710 }, { "epoch": 0.5946763707731938, "grad_norm": 0.047474898397922516, "learning_rate": 8.859918712086855e-06, "loss": 0.0027, "step": 72720 }, { "epoch": 0.594758146951793, "grad_norm": 0.055001262575387955, "learning_rate": 8.859465058243596e-06, "loss": 0.0021, "step": 72730 }, { "epoch": 0.5948399231303921, "grad_norm": 0.08757667988538742, "learning_rate": 8.859011325779898e-06, "loss": 0.0021, "step": 72740 }, { "epoch": 0.5949216993089913, "grad_norm": 0.10261888802051544, "learning_rate": 8.858557514705002e-06, "loss": 0.0021, "step": 72750 }, { "epoch": 0.5950034754875905, "grad_norm": 0.05556214600801468, "learning_rate": 8.858103625028154e-06, "loss": 0.0025, "step": 72760 }, { "epoch": 0.5950852516661896, "grad_norm": 0.06348184496164322, "learning_rate": 8.857649656758599e-06, "loss": 0.0022, "step": 72770 }, { "epoch": 0.5951670278447888, "grad_norm": 0.06060196831822395, "learning_rate": 8.857195609905583e-06, "loss": 0.0026, "step": 72780 }, { "epoch": 0.595248804023388, "grad_norm": 0.017705274745821953, "learning_rate": 8.856741484478358e-06, "loss": 0.0013, "step": 72790 }, { "epoch": 0.5953305802019871, "grad_norm": 0.08589843660593033, "learning_rate": 8.856287280486173e-06, "loss": 0.0043, "step": 72800 }, { "epoch": 0.5954123563805863, "grad_norm": 0.1224714145064354, "learning_rate": 8.855832997938282e-06, "loss": 0.0025, "step": 72810 }, { "epoch": 0.5954941325591855, "grad_norm": 0.0623680017888546, "learning_rate": 8.85537863684394e-06, "loss": 0.003, "step": 72820 }, { "epoch": 0.5955759087377847, "grad_norm": 0.03920314460992813, "learning_rate": 8.8549241972124e-06, "loss": 0.002, "step": 72830 }, { "epoch": 0.5956576849163838, "grad_norm": 0.0076135871931910515, "learning_rate": 8.854469679052922e-06, "loss": 0.0022, "step": 72840 }, { "epoch": 0.595739461094983, "grad_norm": 0.053918976336717606, "learning_rate": 8.854015082374764e-06, "loss": 0.0017, "step": 72850 }, { "epoch": 0.5958212372735822, "grad_norm": 0.18094876408576965, "learning_rate": 8.853560407187184e-06, "loss": 0.0043, "step": 72860 }, { "epoch": 0.5959030134521813, "grad_norm": 0.03191529959440231, "learning_rate": 8.853105653499448e-06, "loss": 0.0047, "step": 72870 }, { "epoch": 0.5959847896307805, "grad_norm": 0.011698461137712002, "learning_rate": 8.852650821320816e-06, "loss": 0.0018, "step": 72880 }, { "epoch": 0.5960665658093798, "grad_norm": 0.07860764116048813, "learning_rate": 8.852195910660557e-06, "loss": 0.0011, "step": 72890 }, { "epoch": 0.596148341987979, "grad_norm": 0.10350678861141205, "learning_rate": 8.851740921527934e-06, "loss": 0.0028, "step": 72900 }, { "epoch": 0.5962301181665781, "grad_norm": 0.004227575846016407, "learning_rate": 8.851285853932219e-06, "loss": 0.0017, "step": 72910 }, { "epoch": 0.5963118943451773, "grad_norm": 0.09516584873199463, "learning_rate": 8.85083070788268e-06, "loss": 0.0022, "step": 72920 }, { "epoch": 0.5963936705237765, "grad_norm": 0.07620242238044739, "learning_rate": 8.850375483388592e-06, "loss": 0.001, "step": 72930 }, { "epoch": 0.5964754467023756, "grad_norm": 0.0529586561024189, "learning_rate": 8.849920180459222e-06, "loss": 0.0016, "step": 72940 }, { "epoch": 0.5965572228809748, "grad_norm": 0.10349470376968384, "learning_rate": 8.849464799103851e-06, "loss": 0.0031, "step": 72950 }, { "epoch": 0.596638999059574, "grad_norm": 0.052728768438100815, "learning_rate": 8.849009339331752e-06, "loss": 0.0021, "step": 72960 }, { "epoch": 0.5967207752381731, "grad_norm": 0.023212237283587456, "learning_rate": 8.848553801152205e-06, "loss": 0.0011, "step": 72970 }, { "epoch": 0.5968025514167723, "grad_norm": 0.043598610907793045, "learning_rate": 8.848098184574487e-06, "loss": 0.0012, "step": 72980 }, { "epoch": 0.5968843275953715, "grad_norm": 0.04801846295595169, "learning_rate": 8.847642489607881e-06, "loss": 0.0011, "step": 72990 }, { "epoch": 0.5969661037739706, "grad_norm": 0.05412798747420311, "learning_rate": 8.847186716261672e-06, "loss": 0.0019, "step": 73000 }, { "epoch": 0.5970478799525698, "grad_norm": 0.05467844754457474, "learning_rate": 8.846730864545142e-06, "loss": 0.0023, "step": 73010 }, { "epoch": 0.597129656131169, "grad_norm": 0.038473036140203476, "learning_rate": 8.846274934467577e-06, "loss": 0.0021, "step": 73020 }, { "epoch": 0.5972114323097681, "grad_norm": 0.03432523086667061, "learning_rate": 8.845818926038265e-06, "loss": 0.0012, "step": 73030 }, { "epoch": 0.5972932084883673, "grad_norm": 0.03162389248609543, "learning_rate": 8.845362839266497e-06, "loss": 0.0022, "step": 73040 }, { "epoch": 0.5973749846669665, "grad_norm": 0.05577242374420166, "learning_rate": 8.84490667416156e-06, "loss": 0.0018, "step": 73050 }, { "epoch": 0.5974567608455656, "grad_norm": 0.048175886273384094, "learning_rate": 8.84445043073275e-06, "loss": 0.0019, "step": 73060 }, { "epoch": 0.5975385370241648, "grad_norm": 0.07446431368589401, "learning_rate": 8.84399410898936e-06, "loss": 0.0023, "step": 73070 }, { "epoch": 0.597620313202764, "grad_norm": 0.16105049848556519, "learning_rate": 8.843537708940684e-06, "loss": 0.0021, "step": 73080 }, { "epoch": 0.5977020893813632, "grad_norm": 0.0059441858902573586, "learning_rate": 8.843081230596022e-06, "loss": 0.0034, "step": 73090 }, { "epoch": 0.5977838655599624, "grad_norm": 0.03919699043035507, "learning_rate": 8.84262467396467e-06, "loss": 0.0009, "step": 73100 }, { "epoch": 0.5978656417385616, "grad_norm": 0.10478091984987259, "learning_rate": 8.842168039055931e-06, "loss": 0.0019, "step": 73110 }, { "epoch": 0.5979474179171608, "grad_norm": 0.08083273470401764, "learning_rate": 8.841711325879108e-06, "loss": 0.0022, "step": 73120 }, { "epoch": 0.5980291940957599, "grad_norm": 0.047955334186553955, "learning_rate": 8.8412545344435e-06, "loss": 0.0012, "step": 73130 }, { "epoch": 0.5981109702743591, "grad_norm": 0.08128184080123901, "learning_rate": 8.840797664758416e-06, "loss": 0.0034, "step": 73140 }, { "epoch": 0.5981927464529583, "grad_norm": 0.07257790863513947, "learning_rate": 8.84034071683316e-06, "loss": 0.0017, "step": 73150 }, { "epoch": 0.5982745226315574, "grad_norm": 0.08632177114486694, "learning_rate": 8.839883690677043e-06, "loss": 0.0026, "step": 73160 }, { "epoch": 0.5983562988101566, "grad_norm": 0.06722696870565414, "learning_rate": 8.839426586299374e-06, "loss": 0.0017, "step": 73170 }, { "epoch": 0.5984380749887558, "grad_norm": 0.004158978350460529, "learning_rate": 8.838969403709465e-06, "loss": 0.0015, "step": 73180 }, { "epoch": 0.598519851167355, "grad_norm": 0.038996197283267975, "learning_rate": 8.838512142916628e-06, "loss": 0.0012, "step": 73190 }, { "epoch": 0.5986016273459541, "grad_norm": 0.0036007247399538755, "learning_rate": 8.83805480393018e-06, "loss": 0.0012, "step": 73200 }, { "epoch": 0.5986834035245533, "grad_norm": 0.0047093345783650875, "learning_rate": 8.837597386759435e-06, "loss": 0.0025, "step": 73210 }, { "epoch": 0.5987651797031525, "grad_norm": 0.049989003688097, "learning_rate": 8.83713989141371e-06, "loss": 0.0019, "step": 73220 }, { "epoch": 0.5988469558817516, "grad_norm": 0.034447312355041504, "learning_rate": 8.83668231790233e-06, "loss": 0.0014, "step": 73230 }, { "epoch": 0.5989287320603508, "grad_norm": 0.06609445065259933, "learning_rate": 8.836224666234611e-06, "loss": 0.0017, "step": 73240 }, { "epoch": 0.59901050823895, "grad_norm": 0.014981704764068127, "learning_rate": 8.835766936419876e-06, "loss": 0.0019, "step": 73250 }, { "epoch": 0.5990922844175491, "grad_norm": 0.1433173567056656, "learning_rate": 8.835309128467454e-06, "loss": 0.0025, "step": 73260 }, { "epoch": 0.5991740605961483, "grad_norm": 0.1096976175904274, "learning_rate": 8.834851242386664e-06, "loss": 0.0015, "step": 73270 }, { "epoch": 0.5992558367747475, "grad_norm": 0.16328231990337372, "learning_rate": 8.834393278186839e-06, "loss": 0.0014, "step": 73280 }, { "epoch": 0.5993376129533466, "grad_norm": 0.07653024792671204, "learning_rate": 8.833935235877304e-06, "loss": 0.0013, "step": 73290 }, { "epoch": 0.5994193891319458, "grad_norm": 0.13483217358589172, "learning_rate": 8.833477115467396e-06, "loss": 0.0019, "step": 73300 }, { "epoch": 0.5995011653105451, "grad_norm": 0.10274215042591095, "learning_rate": 8.833018916966438e-06, "loss": 0.002, "step": 73310 }, { "epoch": 0.5995829414891443, "grad_norm": 0.19793999195098877, "learning_rate": 8.832560640383772e-06, "loss": 0.0017, "step": 73320 }, { "epoch": 0.5996647176677434, "grad_norm": 0.01798834651708603, "learning_rate": 8.83210228572873e-06, "loss": 0.0024, "step": 73330 }, { "epoch": 0.5997464938463426, "grad_norm": 0.04681325703859329, "learning_rate": 8.83164385301065e-06, "loss": 0.0027, "step": 73340 }, { "epoch": 0.5998282700249418, "grad_norm": 0.1006145179271698, "learning_rate": 8.83118534223887e-06, "loss": 0.002, "step": 73350 }, { "epoch": 0.5999100462035409, "grad_norm": 0.05473542585968971, "learning_rate": 8.83072675342273e-06, "loss": 0.0018, "step": 73360 }, { "epoch": 0.5999918223821401, "grad_norm": 0.012354833073914051, "learning_rate": 8.830268086571573e-06, "loss": 0.0021, "step": 73370 }, { "epoch": 0.6000735985607393, "grad_norm": 0.03788018599152565, "learning_rate": 8.829809341694742e-06, "loss": 0.0032, "step": 73380 }, { "epoch": 0.6001553747393384, "grad_norm": 0.051058199256658554, "learning_rate": 8.82935051880158e-06, "loss": 0.0044, "step": 73390 }, { "epoch": 0.6002371509179376, "grad_norm": 0.028816021978855133, "learning_rate": 8.828891617901436e-06, "loss": 0.0056, "step": 73400 }, { "epoch": 0.6003189270965368, "grad_norm": 0.036244262009859085, "learning_rate": 8.828432639003659e-06, "loss": 0.0016, "step": 73410 }, { "epoch": 0.600400703275136, "grad_norm": 0.20404094457626343, "learning_rate": 8.827973582117595e-06, "loss": 0.0026, "step": 73420 }, { "epoch": 0.6004824794537351, "grad_norm": 0.057381194084882736, "learning_rate": 8.827514447252598e-06, "loss": 0.0031, "step": 73430 }, { "epoch": 0.6005642556323343, "grad_norm": 0.0093376524746418, "learning_rate": 8.827055234418022e-06, "loss": 0.0027, "step": 73440 }, { "epoch": 0.6006460318109335, "grad_norm": 0.038888465613126755, "learning_rate": 8.826595943623219e-06, "loss": 0.0017, "step": 73450 }, { "epoch": 0.6007278079895326, "grad_norm": 0.012226032093167305, "learning_rate": 8.826136574877546e-06, "loss": 0.0013, "step": 73460 }, { "epoch": 0.6008095841681318, "grad_norm": 0.07068027555942535, "learning_rate": 8.825677128190362e-06, "loss": 0.0038, "step": 73470 }, { "epoch": 0.600891360346731, "grad_norm": 0.10652656108140945, "learning_rate": 8.825217603571024e-06, "loss": 0.0021, "step": 73480 }, { "epoch": 0.6009731365253301, "grad_norm": 0.09537801146507263, "learning_rate": 8.824758001028895e-06, "loss": 0.0014, "step": 73490 }, { "epoch": 0.6010549127039293, "grad_norm": 0.4857293963432312, "learning_rate": 8.824298320573336e-06, "loss": 0.0015, "step": 73500 }, { "epoch": 0.6011366888825285, "grad_norm": 0.1647672951221466, "learning_rate": 8.823838562213712e-06, "loss": 0.0035, "step": 73510 }, { "epoch": 0.6012184650611276, "grad_norm": 0.007902686484158039, "learning_rate": 8.823378725959387e-06, "loss": 0.0024, "step": 73520 }, { "epoch": 0.6013002412397269, "grad_norm": 0.15509147942066193, "learning_rate": 8.822918811819732e-06, "loss": 0.0014, "step": 73530 }, { "epoch": 0.6013820174183261, "grad_norm": 0.05218661576509476, "learning_rate": 8.822458819804112e-06, "loss": 0.0028, "step": 73540 }, { "epoch": 0.6014637935969253, "grad_norm": 0.12559522688388824, "learning_rate": 8.821998749921898e-06, "loss": 0.0029, "step": 73550 }, { "epoch": 0.6015455697755244, "grad_norm": 0.08860786259174347, "learning_rate": 8.821538602182464e-06, "loss": 0.0023, "step": 73560 }, { "epoch": 0.6016273459541236, "grad_norm": 0.01728895865380764, "learning_rate": 8.821078376595183e-06, "loss": 0.0016, "step": 73570 }, { "epoch": 0.6017091221327228, "grad_norm": 0.07032535970211029, "learning_rate": 8.820618073169427e-06, "loss": 0.0029, "step": 73580 }, { "epoch": 0.6017908983113219, "grad_norm": 0.0325942300260067, "learning_rate": 8.820157691914577e-06, "loss": 0.0021, "step": 73590 }, { "epoch": 0.6018726744899211, "grad_norm": 0.04585428163409233, "learning_rate": 8.819697232840009e-06, "loss": 0.0013, "step": 73600 }, { "epoch": 0.6019544506685203, "grad_norm": 0.08846163004636765, "learning_rate": 8.819236695955104e-06, "loss": 0.0013, "step": 73610 }, { "epoch": 0.6020362268471194, "grad_norm": 0.008891656994819641, "learning_rate": 8.818776081269244e-06, "loss": 0.0024, "step": 73620 }, { "epoch": 0.6021180030257186, "grad_norm": 0.09417679905891418, "learning_rate": 8.818315388791812e-06, "loss": 0.0021, "step": 73630 }, { "epoch": 0.6021997792043178, "grad_norm": 0.015587669797241688, "learning_rate": 8.81785461853219e-06, "loss": 0.0018, "step": 73640 }, { "epoch": 0.6022815553829169, "grad_norm": 0.00983260478824377, "learning_rate": 8.817393770499765e-06, "loss": 0.0016, "step": 73650 }, { "epoch": 0.6023633315615161, "grad_norm": 0.03616955876350403, "learning_rate": 8.816932844703926e-06, "loss": 0.0016, "step": 73660 }, { "epoch": 0.6024451077401153, "grad_norm": 0.05395341292023659, "learning_rate": 8.816471841154064e-06, "loss": 0.005, "step": 73670 }, { "epoch": 0.6025268839187145, "grad_norm": 0.18943317234516144, "learning_rate": 8.81601075985957e-06, "loss": 0.0033, "step": 73680 }, { "epoch": 0.6026086600973136, "grad_norm": 0.02263188548386097, "learning_rate": 8.815549600829832e-06, "loss": 0.0025, "step": 73690 }, { "epoch": 0.6026904362759128, "grad_norm": 0.08228340744972229, "learning_rate": 8.81508836407425e-06, "loss": 0.0016, "step": 73700 }, { "epoch": 0.602772212454512, "grad_norm": 0.01762707531452179, "learning_rate": 8.814627049602213e-06, "loss": 0.0021, "step": 73710 }, { "epoch": 0.6028539886331111, "grad_norm": 0.03716045245528221, "learning_rate": 8.814165657423126e-06, "loss": 0.002, "step": 73720 }, { "epoch": 0.6029357648117103, "grad_norm": 0.6168786287307739, "learning_rate": 8.813704187546382e-06, "loss": 0.0024, "step": 73730 }, { "epoch": 0.6030175409903096, "grad_norm": 0.07966011017560959, "learning_rate": 8.813242639981385e-06, "loss": 0.004, "step": 73740 }, { "epoch": 0.6030993171689087, "grad_norm": 0.0955360010266304, "learning_rate": 8.812781014737534e-06, "loss": 0.0018, "step": 73750 }, { "epoch": 0.6031810933475079, "grad_norm": 0.08779488503932953, "learning_rate": 8.812319311824236e-06, "loss": 0.002, "step": 73760 }, { "epoch": 0.6032628695261071, "grad_norm": 0.029480615630745888, "learning_rate": 8.811857531250896e-06, "loss": 0.0035, "step": 73770 }, { "epoch": 0.6033446457047063, "grad_norm": 0.0451054722070694, "learning_rate": 8.81139567302692e-06, "loss": 0.0018, "step": 73780 }, { "epoch": 0.6034264218833054, "grad_norm": 0.017433151602745056, "learning_rate": 8.810933737161713e-06, "loss": 0.002, "step": 73790 }, { "epoch": 0.6035081980619046, "grad_norm": 0.10893980413675308, "learning_rate": 8.81047172366469e-06, "loss": 0.0015, "step": 73800 }, { "epoch": 0.6035899742405038, "grad_norm": 0.001773212687112391, "learning_rate": 8.81000963254526e-06, "loss": 0.0013, "step": 73810 }, { "epoch": 0.6036717504191029, "grad_norm": 0.028840413317084312, "learning_rate": 8.809547463812836e-06, "loss": 0.0016, "step": 73820 }, { "epoch": 0.6037535265977021, "grad_norm": 0.06998056918382645, "learning_rate": 8.809085217476836e-06, "loss": 0.003, "step": 73830 }, { "epoch": 0.6038353027763013, "grad_norm": 0.07955551892518997, "learning_rate": 8.808622893546671e-06, "loss": 0.0027, "step": 73840 }, { "epoch": 0.6039170789549004, "grad_norm": 0.08376646786928177, "learning_rate": 8.808160492031765e-06, "loss": 0.0018, "step": 73850 }, { "epoch": 0.6039988551334996, "grad_norm": 0.07501192390918732, "learning_rate": 8.807698012941532e-06, "loss": 0.0022, "step": 73860 }, { "epoch": 0.6040806313120988, "grad_norm": 0.13326244056224823, "learning_rate": 8.807235456285397e-06, "loss": 0.0018, "step": 73870 }, { "epoch": 0.6041624074906979, "grad_norm": 0.030712787061929703, "learning_rate": 8.80677282207278e-06, "loss": 0.0022, "step": 73880 }, { "epoch": 0.6042441836692971, "grad_norm": 0.04523042216897011, "learning_rate": 8.806310110313107e-06, "loss": 0.0015, "step": 73890 }, { "epoch": 0.6043259598478963, "grad_norm": 0.03178895637392998, "learning_rate": 8.805847321015804e-06, "loss": 0.0022, "step": 73900 }, { "epoch": 0.6044077360264954, "grad_norm": 0.07455611228942871, "learning_rate": 8.805384454190296e-06, "loss": 0.002, "step": 73910 }, { "epoch": 0.6044895122050946, "grad_norm": 0.03639276325702667, "learning_rate": 8.804921509846014e-06, "loss": 0.0016, "step": 73920 }, { "epoch": 0.6045712883836938, "grad_norm": 0.03540019318461418, "learning_rate": 8.804458487992386e-06, "loss": 0.0023, "step": 73930 }, { "epoch": 0.604653064562293, "grad_norm": 0.4007820188999176, "learning_rate": 8.803995388638849e-06, "loss": 0.0033, "step": 73940 }, { "epoch": 0.6047348407408922, "grad_norm": 0.03134804591536522, "learning_rate": 8.803532211794833e-06, "loss": 0.003, "step": 73950 }, { "epoch": 0.6048166169194914, "grad_norm": 0.09986664354801178, "learning_rate": 8.803068957469773e-06, "loss": 0.0016, "step": 73960 }, { "epoch": 0.6048983930980906, "grad_norm": 0.04438428208231926, "learning_rate": 8.802605625673109e-06, "loss": 0.003, "step": 73970 }, { "epoch": 0.6049801692766897, "grad_norm": 0.11012955009937286, "learning_rate": 8.802142216414279e-06, "loss": 0.0013, "step": 73980 }, { "epoch": 0.6050619454552889, "grad_norm": 0.051192380487918854, "learning_rate": 8.801678729702716e-06, "loss": 0.0014, "step": 73990 }, { "epoch": 0.6051437216338881, "grad_norm": 0.042749516665935516, "learning_rate": 8.801215165547872e-06, "loss": 0.0019, "step": 74000 }, { "epoch": 0.6052254978124872, "grad_norm": 0.01937190815806389, "learning_rate": 8.800751523959184e-06, "loss": 0.0024, "step": 74010 }, { "epoch": 0.6053072739910864, "grad_norm": 0.036483701318502426, "learning_rate": 8.800287804946096e-06, "loss": 0.0014, "step": 74020 }, { "epoch": 0.6053890501696856, "grad_norm": 0.06503702700138092, "learning_rate": 8.799824008518059e-06, "loss": 0.0013, "step": 74030 }, { "epoch": 0.6054708263482848, "grad_norm": 0.014074428007006645, "learning_rate": 8.799360134684517e-06, "loss": 0.0029, "step": 74040 }, { "epoch": 0.6055526025268839, "grad_norm": 0.03288230299949646, "learning_rate": 8.798896183454921e-06, "loss": 0.0014, "step": 74050 }, { "epoch": 0.6056343787054831, "grad_norm": 0.09220060706138611, "learning_rate": 8.798432154838722e-06, "loss": 0.0027, "step": 74060 }, { "epoch": 0.6057161548840823, "grad_norm": 0.04726899042725563, "learning_rate": 8.797968048845373e-06, "loss": 0.0012, "step": 74070 }, { "epoch": 0.6057979310626814, "grad_norm": 0.01895926706492901, "learning_rate": 8.797503865484327e-06, "loss": 0.0018, "step": 74080 }, { "epoch": 0.6058797072412806, "grad_norm": 0.11327879875898361, "learning_rate": 8.79703960476504e-06, "loss": 0.0026, "step": 74090 }, { "epoch": 0.6059614834198798, "grad_norm": 0.09047142416238785, "learning_rate": 8.796575266696971e-06, "loss": 0.0034, "step": 74100 }, { "epoch": 0.6060432595984789, "grad_norm": 0.22944511473178864, "learning_rate": 8.796110851289577e-06, "loss": 0.0033, "step": 74110 }, { "epoch": 0.6061250357770781, "grad_norm": 0.06064813211560249, "learning_rate": 8.79564635855232e-06, "loss": 0.0029, "step": 74120 }, { "epoch": 0.6062068119556773, "grad_norm": 0.016726914793252945, "learning_rate": 8.795181788494661e-06, "loss": 0.0019, "step": 74130 }, { "epoch": 0.6062885881342764, "grad_norm": 0.6051810383796692, "learning_rate": 8.794717141126064e-06, "loss": 0.0037, "step": 74140 }, { "epoch": 0.6063703643128756, "grad_norm": 0.04798702523112297, "learning_rate": 8.794252416455995e-06, "loss": 0.0021, "step": 74150 }, { "epoch": 0.6064521404914748, "grad_norm": 0.10719854384660721, "learning_rate": 8.793787614493919e-06, "loss": 0.0017, "step": 74160 }, { "epoch": 0.6065339166700741, "grad_norm": 0.059859875589609146, "learning_rate": 8.793322735249306e-06, "loss": 0.0023, "step": 74170 }, { "epoch": 0.6066156928486732, "grad_norm": 0.28690531849861145, "learning_rate": 8.792857778731626e-06, "loss": 0.0014, "step": 74180 }, { "epoch": 0.6066974690272724, "grad_norm": 0.032684940844774246, "learning_rate": 8.79239274495035e-06, "loss": 0.0025, "step": 74190 }, { "epoch": 0.6067792452058716, "grad_norm": 0.08258825540542603, "learning_rate": 8.79192763391495e-06, "loss": 0.002, "step": 74200 }, { "epoch": 0.6068610213844707, "grad_norm": 0.04439646005630493, "learning_rate": 8.791462445634904e-06, "loss": 0.0023, "step": 74210 }, { "epoch": 0.6069427975630699, "grad_norm": 0.13413628935813904, "learning_rate": 8.790997180119685e-06, "loss": 0.0022, "step": 74220 }, { "epoch": 0.6070245737416691, "grad_norm": 0.038567185401916504, "learning_rate": 8.79053183737877e-06, "loss": 0.0024, "step": 74230 }, { "epoch": 0.6071063499202682, "grad_norm": 0.06195969879627228, "learning_rate": 8.790066417421644e-06, "loss": 0.0016, "step": 74240 }, { "epoch": 0.6071881260988674, "grad_norm": 0.015220209024846554, "learning_rate": 8.789600920257781e-06, "loss": 0.0036, "step": 74250 }, { "epoch": 0.6072699022774666, "grad_norm": 0.0753876343369484, "learning_rate": 8.789135345896669e-06, "loss": 0.0018, "step": 74260 }, { "epoch": 0.6073516784560657, "grad_norm": 0.12874050438404083, "learning_rate": 8.78866969434779e-06, "loss": 0.0018, "step": 74270 }, { "epoch": 0.6074334546346649, "grad_norm": 0.0353340208530426, "learning_rate": 8.78820396562063e-06, "loss": 0.0018, "step": 74280 }, { "epoch": 0.6075152308132641, "grad_norm": 0.12684111297130585, "learning_rate": 8.787738159724675e-06, "loss": 0.0022, "step": 74290 }, { "epoch": 0.6075970069918633, "grad_norm": 0.09446834027767181, "learning_rate": 8.787272276669414e-06, "loss": 0.0025, "step": 74300 }, { "epoch": 0.6076787831704624, "grad_norm": 0.060450632125139236, "learning_rate": 8.78680631646434e-06, "loss": 0.002, "step": 74310 }, { "epoch": 0.6077605593490616, "grad_norm": 0.08459126204252243, "learning_rate": 8.786340279118944e-06, "loss": 0.002, "step": 74320 }, { "epoch": 0.6078423355276608, "grad_norm": 0.019559865817427635, "learning_rate": 8.785874164642716e-06, "loss": 0.0008, "step": 74330 }, { "epoch": 0.6079241117062599, "grad_norm": 0.09618533402681351, "learning_rate": 8.785407973045156e-06, "loss": 0.0013, "step": 74340 }, { "epoch": 0.6080058878848591, "grad_norm": 0.07796560972929001, "learning_rate": 8.784941704335758e-06, "loss": 0.0027, "step": 74350 }, { "epoch": 0.6080876640634583, "grad_norm": 0.058355286717414856, "learning_rate": 8.78447535852402e-06, "loss": 0.0025, "step": 74360 }, { "epoch": 0.6081694402420574, "grad_norm": 0.08628678321838379, "learning_rate": 8.784008935619445e-06, "loss": 0.0018, "step": 74370 }, { "epoch": 0.6082512164206567, "grad_norm": 0.14340811967849731, "learning_rate": 8.783542435631531e-06, "loss": 0.0025, "step": 74380 }, { "epoch": 0.6083329925992559, "grad_norm": 0.04515235126018524, "learning_rate": 8.783075858569782e-06, "loss": 0.0019, "step": 74390 }, { "epoch": 0.608414768777855, "grad_norm": 0.14315658807754517, "learning_rate": 8.782609204443703e-06, "loss": 0.0016, "step": 74400 }, { "epoch": 0.6084965449564542, "grad_norm": 0.037698909640312195, "learning_rate": 8.7821424732628e-06, "loss": 0.002, "step": 74410 }, { "epoch": 0.6085783211350534, "grad_norm": 0.17256705462932587, "learning_rate": 8.78167566503658e-06, "loss": 0.0024, "step": 74420 }, { "epoch": 0.6086600973136526, "grad_norm": 0.05333298444747925, "learning_rate": 8.781208779774552e-06, "loss": 0.0017, "step": 74430 }, { "epoch": 0.6087418734922517, "grad_norm": 0.018872790038585663, "learning_rate": 8.780741817486228e-06, "loss": 0.0014, "step": 74440 }, { "epoch": 0.6088236496708509, "grad_norm": 0.045443449169397354, "learning_rate": 8.780274778181122e-06, "loss": 0.0019, "step": 74450 }, { "epoch": 0.6089054258494501, "grad_norm": 0.024885138496756554, "learning_rate": 8.779807661868746e-06, "loss": 0.0024, "step": 74460 }, { "epoch": 0.6089872020280492, "grad_norm": 0.11635679751634598, "learning_rate": 8.779340468558613e-06, "loss": 0.0022, "step": 74470 }, { "epoch": 0.6090689782066484, "grad_norm": 0.01937730424106121, "learning_rate": 8.778873198260244e-06, "loss": 0.0013, "step": 74480 }, { "epoch": 0.6091507543852476, "grad_norm": 0.04670146107673645, "learning_rate": 8.778405850983156e-06, "loss": 0.0028, "step": 74490 }, { "epoch": 0.6092325305638467, "grad_norm": 0.0057545253075659275, "learning_rate": 8.777938426736871e-06, "loss": 0.0021, "step": 74500 }, { "epoch": 0.6093143067424459, "grad_norm": 0.04055486246943474, "learning_rate": 8.777470925530907e-06, "loss": 0.003, "step": 74510 }, { "epoch": 0.6093960829210451, "grad_norm": 0.4160338342189789, "learning_rate": 8.777003347374792e-06, "loss": 0.0028, "step": 74520 }, { "epoch": 0.6094778590996442, "grad_norm": 0.04274529218673706, "learning_rate": 8.776535692278048e-06, "loss": 0.0009, "step": 74530 }, { "epoch": 0.6095596352782434, "grad_norm": 0.09717987477779388, "learning_rate": 8.7760679602502e-06, "loss": 0.0013, "step": 74540 }, { "epoch": 0.6096414114568426, "grad_norm": 0.033527955412864685, "learning_rate": 8.775600151300782e-06, "loss": 0.0021, "step": 74550 }, { "epoch": 0.6097231876354418, "grad_norm": 0.07802926003932953, "learning_rate": 8.775132265439318e-06, "loss": 0.0029, "step": 74560 }, { "epoch": 0.6098049638140409, "grad_norm": 0.051972292363643646, "learning_rate": 8.77466430267534e-06, "loss": 0.001, "step": 74570 }, { "epoch": 0.6098867399926401, "grad_norm": 0.04848092421889305, "learning_rate": 8.774196263018384e-06, "loss": 0.0032, "step": 74580 }, { "epoch": 0.6099685161712394, "grad_norm": 0.011892377398908138, "learning_rate": 8.773728146477982e-06, "loss": 0.0015, "step": 74590 }, { "epoch": 0.6100502923498385, "grad_norm": 0.05487799644470215, "learning_rate": 8.77325995306367e-06, "loss": 0.0016, "step": 74600 }, { "epoch": 0.6101320685284377, "grad_norm": 0.018976610153913498, "learning_rate": 8.772791682784985e-06, "loss": 0.003, "step": 74610 }, { "epoch": 0.6102138447070369, "grad_norm": 0.09616118669509888, "learning_rate": 8.772323335651468e-06, "loss": 0.0029, "step": 74620 }, { "epoch": 0.610295620885636, "grad_norm": 0.2225904017686844, "learning_rate": 8.771854911672656e-06, "loss": 0.0014, "step": 74630 }, { "epoch": 0.6103773970642352, "grad_norm": 0.057562824338674545, "learning_rate": 8.771386410858095e-06, "loss": 0.002, "step": 74640 }, { "epoch": 0.6104591732428344, "grad_norm": 0.08200231194496155, "learning_rate": 8.770917833217329e-06, "loss": 0.0035, "step": 74650 }, { "epoch": 0.6105409494214336, "grad_norm": 0.09853002429008484, "learning_rate": 8.7704491787599e-06, "loss": 0.0032, "step": 74660 }, { "epoch": 0.6106227256000327, "grad_norm": 0.18473616242408752, "learning_rate": 8.769980447495356e-06, "loss": 0.0026, "step": 74670 }, { "epoch": 0.6107045017786319, "grad_norm": 0.1409389227628708, "learning_rate": 8.769511639433247e-06, "loss": 0.0021, "step": 74680 }, { "epoch": 0.6107862779572311, "grad_norm": 0.02108701877295971, "learning_rate": 8.769042754583121e-06, "loss": 0.0019, "step": 74690 }, { "epoch": 0.6108680541358302, "grad_norm": 0.0451146624982357, "learning_rate": 8.76857379295453e-06, "loss": 0.0073, "step": 74700 }, { "epoch": 0.6109498303144294, "grad_norm": 0.15219973027706146, "learning_rate": 8.76810475455703e-06, "loss": 0.0025, "step": 74710 }, { "epoch": 0.6110316064930286, "grad_norm": 0.037775762379169464, "learning_rate": 8.76763563940017e-06, "loss": 0.0025, "step": 74720 }, { "epoch": 0.6111133826716277, "grad_norm": 0.04469795897603035, "learning_rate": 8.767166447493512e-06, "loss": 0.0029, "step": 74730 }, { "epoch": 0.6111951588502269, "grad_norm": 0.09311708807945251, "learning_rate": 8.766697178846611e-06, "loss": 0.0017, "step": 74740 }, { "epoch": 0.6112769350288261, "grad_norm": 0.022519739344716072, "learning_rate": 8.766227833469026e-06, "loss": 0.002, "step": 74750 }, { "epoch": 0.6113587112074252, "grad_norm": 0.21342730522155762, "learning_rate": 8.76575841137032e-06, "loss": 0.0024, "step": 74760 }, { "epoch": 0.6114404873860244, "grad_norm": 0.10700143128633499, "learning_rate": 8.76528891256005e-06, "loss": 0.0017, "step": 74770 }, { "epoch": 0.6115222635646236, "grad_norm": 0.10237443447113037, "learning_rate": 8.76481933704779e-06, "loss": 0.0013, "step": 74780 }, { "epoch": 0.6116040397432227, "grad_norm": 0.1255197674036026, "learning_rate": 8.764349684843096e-06, "loss": 0.0015, "step": 74790 }, { "epoch": 0.6116858159218219, "grad_norm": 0.016605252400040627, "learning_rate": 8.763879955955542e-06, "loss": 0.0024, "step": 74800 }, { "epoch": 0.6117675921004212, "grad_norm": 0.10592179745435715, "learning_rate": 8.763410150394692e-06, "loss": 0.0033, "step": 74810 }, { "epoch": 0.6118493682790204, "grad_norm": 0.11382066458463669, "learning_rate": 8.762940268170118e-06, "loss": 0.0019, "step": 74820 }, { "epoch": 0.6119311444576195, "grad_norm": 0.05041682720184326, "learning_rate": 8.762470309291391e-06, "loss": 0.0018, "step": 74830 }, { "epoch": 0.6120129206362187, "grad_norm": 0.1256207376718521, "learning_rate": 8.762000273768088e-06, "loss": 0.0013, "step": 74840 }, { "epoch": 0.6120946968148179, "grad_norm": 0.05866647884249687, "learning_rate": 8.761530161609781e-06, "loss": 0.0018, "step": 74850 }, { "epoch": 0.612176472993417, "grad_norm": 0.12432091683149338, "learning_rate": 8.761059972826045e-06, "loss": 0.0019, "step": 74860 }, { "epoch": 0.6122582491720162, "grad_norm": 0.058626528829336166, "learning_rate": 8.760589707426463e-06, "loss": 0.0024, "step": 74870 }, { "epoch": 0.6123400253506154, "grad_norm": 0.08934081345796585, "learning_rate": 8.76011936542061e-06, "loss": 0.0017, "step": 74880 }, { "epoch": 0.6124218015292145, "grad_norm": 0.12465239316225052, "learning_rate": 8.75964894681807e-06, "loss": 0.0026, "step": 74890 }, { "epoch": 0.6125035777078137, "grad_norm": 0.04818132519721985, "learning_rate": 8.759178451628425e-06, "loss": 0.0024, "step": 74900 }, { "epoch": 0.6125853538864129, "grad_norm": 0.08849989622831345, "learning_rate": 8.75870787986126e-06, "loss": 0.0016, "step": 74910 }, { "epoch": 0.6126671300650121, "grad_norm": 0.12784220278263092, "learning_rate": 8.75823723152616e-06, "loss": 0.0023, "step": 74920 }, { "epoch": 0.6127489062436112, "grad_norm": 0.04706248268485069, "learning_rate": 8.75776650663271e-06, "loss": 0.0013, "step": 74930 }, { "epoch": 0.6128306824222104, "grad_norm": 0.1424582451581955, "learning_rate": 8.757295705190505e-06, "loss": 0.0037, "step": 74940 }, { "epoch": 0.6129124586008096, "grad_norm": 0.08023513853549957, "learning_rate": 8.756824827209132e-06, "loss": 0.0017, "step": 74950 }, { "epoch": 0.6129942347794087, "grad_norm": 0.11313124001026154, "learning_rate": 8.756353872698182e-06, "loss": 0.0028, "step": 74960 }, { "epoch": 0.6130760109580079, "grad_norm": 0.05013517662882805, "learning_rate": 8.755882841667252e-06, "loss": 0.0013, "step": 74970 }, { "epoch": 0.6131577871366071, "grad_norm": 0.06230544298887253, "learning_rate": 8.755411734125933e-06, "loss": 0.0033, "step": 74980 }, { "epoch": 0.6132395633152062, "grad_norm": 0.03655058518052101, "learning_rate": 8.754940550083827e-06, "loss": 0.0017, "step": 74990 }, { "epoch": 0.6133213394938054, "grad_norm": 0.05676355957984924, "learning_rate": 8.75446928955053e-06, "loss": 0.0017, "step": 75000 }, { "epoch": 0.6134031156724046, "grad_norm": 0.04366437718272209, "learning_rate": 8.75399795253564e-06, "loss": 0.0016, "step": 75010 }, { "epoch": 0.6134848918510039, "grad_norm": 0.10789195448160172, "learning_rate": 8.75352653904876e-06, "loss": 0.0026, "step": 75020 }, { "epoch": 0.613566668029603, "grad_norm": 0.04025629907846451, "learning_rate": 8.753055049099494e-06, "loss": 0.001, "step": 75030 }, { "epoch": 0.6136484442082022, "grad_norm": 0.053052373230457306, "learning_rate": 8.752583482697448e-06, "loss": 0.0017, "step": 75040 }, { "epoch": 0.6137302203868014, "grad_norm": 0.17891135811805725, "learning_rate": 8.752111839852224e-06, "loss": 0.0033, "step": 75050 }, { "epoch": 0.6138119965654005, "grad_norm": 0.0738123282790184, "learning_rate": 8.751640120573433e-06, "loss": 0.0018, "step": 75060 }, { "epoch": 0.6138937727439997, "grad_norm": 0.06414433568716049, "learning_rate": 8.751168324870682e-06, "loss": 0.0026, "step": 75070 }, { "epoch": 0.6139755489225989, "grad_norm": 0.011222416535019875, "learning_rate": 8.750696452753584e-06, "loss": 0.0013, "step": 75080 }, { "epoch": 0.614057325101198, "grad_norm": 0.21104921400547028, "learning_rate": 8.750224504231751e-06, "loss": 0.0018, "step": 75090 }, { "epoch": 0.6141391012797972, "grad_norm": 0.007820617407560349, "learning_rate": 8.749752479314797e-06, "loss": 0.003, "step": 75100 }, { "epoch": 0.6142208774583964, "grad_norm": 0.09453903883695602, "learning_rate": 8.749280378012335e-06, "loss": 0.0015, "step": 75110 }, { "epoch": 0.6143026536369955, "grad_norm": 0.041144657880067825, "learning_rate": 8.748808200333985e-06, "loss": 0.0028, "step": 75120 }, { "epoch": 0.6143844298155947, "grad_norm": 0.026583323255181313, "learning_rate": 8.748335946289365e-06, "loss": 0.0015, "step": 75130 }, { "epoch": 0.6144662059941939, "grad_norm": 0.09676150977611542, "learning_rate": 8.747863615888097e-06, "loss": 0.0039, "step": 75140 }, { "epoch": 0.614547982172793, "grad_norm": 0.061074890196323395, "learning_rate": 8.747391209139798e-06, "loss": 0.0034, "step": 75150 }, { "epoch": 0.6146297583513922, "grad_norm": 0.01629435271024704, "learning_rate": 8.746918726054095e-06, "loss": 0.0035, "step": 75160 }, { "epoch": 0.6147115345299914, "grad_norm": 0.01956268399953842, "learning_rate": 8.74644616664061e-06, "loss": 0.0027, "step": 75170 }, { "epoch": 0.6147933107085906, "grad_norm": 0.05137702822685242, "learning_rate": 8.745973530908974e-06, "loss": 0.0024, "step": 75180 }, { "epoch": 0.6148750868871897, "grad_norm": 0.09906715154647827, "learning_rate": 8.74550081886881e-06, "loss": 0.0045, "step": 75190 }, { "epoch": 0.6149568630657889, "grad_norm": 0.02451326884329319, "learning_rate": 8.745028030529753e-06, "loss": 0.0019, "step": 75200 }, { "epoch": 0.6150386392443881, "grad_norm": 0.08861206471920013, "learning_rate": 8.744555165901428e-06, "loss": 0.0035, "step": 75210 }, { "epoch": 0.6151204154229872, "grad_norm": 0.07521452009677887, "learning_rate": 8.744082224993473e-06, "loss": 0.0021, "step": 75220 }, { "epoch": 0.6152021916015865, "grad_norm": 0.03018125332891941, "learning_rate": 8.743609207815517e-06, "loss": 0.0042, "step": 75230 }, { "epoch": 0.6152839677801857, "grad_norm": 0.16170921921730042, "learning_rate": 8.7431361143772e-06, "loss": 0.0045, "step": 75240 }, { "epoch": 0.6153657439587848, "grad_norm": 0.09684595465660095, "learning_rate": 8.742662944688158e-06, "loss": 0.0024, "step": 75250 }, { "epoch": 0.615447520137384, "grad_norm": 0.056509554386138916, "learning_rate": 8.742189698758028e-06, "loss": 0.0019, "step": 75260 }, { "epoch": 0.6155292963159832, "grad_norm": 0.12840671837329865, "learning_rate": 8.741716376596454e-06, "loss": 0.0016, "step": 75270 }, { "epoch": 0.6156110724945824, "grad_norm": 0.4158878028392792, "learning_rate": 8.741242978213075e-06, "loss": 0.0034, "step": 75280 }, { "epoch": 0.6156928486731815, "grad_norm": 0.02425520494580269, "learning_rate": 8.740769503617535e-06, "loss": 0.0019, "step": 75290 }, { "epoch": 0.6157746248517807, "grad_norm": 0.04063045233488083, "learning_rate": 8.740295952819481e-06, "loss": 0.0018, "step": 75300 }, { "epoch": 0.6158564010303799, "grad_norm": 0.04687061533331871, "learning_rate": 8.739822325828558e-06, "loss": 0.0012, "step": 75310 }, { "epoch": 0.615938177208979, "grad_norm": 0.07429538667201996, "learning_rate": 8.739348622654413e-06, "loss": 0.0018, "step": 75320 }, { "epoch": 0.6160199533875782, "grad_norm": 0.019190924242138863, "learning_rate": 8.738874843306698e-06, "loss": 0.0012, "step": 75330 }, { "epoch": 0.6161017295661774, "grad_norm": 0.07337324321269989, "learning_rate": 8.738400987795064e-06, "loss": 0.0016, "step": 75340 }, { "epoch": 0.6161835057447765, "grad_norm": 0.01618591509759426, "learning_rate": 8.73792705612916e-06, "loss": 0.0009, "step": 75350 }, { "epoch": 0.6162652819233757, "grad_norm": 0.06394974887371063, "learning_rate": 8.737453048318648e-06, "loss": 0.0036, "step": 75360 }, { "epoch": 0.6163470581019749, "grad_norm": 0.004579589702188969, "learning_rate": 8.736978964373178e-06, "loss": 0.0019, "step": 75370 }, { "epoch": 0.616428834280574, "grad_norm": 0.0697464644908905, "learning_rate": 8.736504804302406e-06, "loss": 0.0018, "step": 75380 }, { "epoch": 0.6165106104591732, "grad_norm": 0.1730012744665146, "learning_rate": 8.736030568115997e-06, "loss": 0.0024, "step": 75390 }, { "epoch": 0.6165923866377724, "grad_norm": 0.12384051829576492, "learning_rate": 8.735556255823607e-06, "loss": 0.003, "step": 75400 }, { "epoch": 0.6166741628163716, "grad_norm": 0.10750815272331238, "learning_rate": 8.7350818674349e-06, "loss": 0.0015, "step": 75410 }, { "epoch": 0.6167559389949707, "grad_norm": 0.12399350851774216, "learning_rate": 8.734607402959542e-06, "loss": 0.0021, "step": 75420 }, { "epoch": 0.6168377151735699, "grad_norm": 0.09044699370861053, "learning_rate": 8.734132862407192e-06, "loss": 0.0029, "step": 75430 }, { "epoch": 0.6169194913521691, "grad_norm": 0.018514923751354218, "learning_rate": 8.733658245787522e-06, "loss": 0.0023, "step": 75440 }, { "epoch": 0.6170012675307683, "grad_norm": 0.11659050732851028, "learning_rate": 8.733183553110197e-06, "loss": 0.002, "step": 75450 }, { "epoch": 0.6170830437093675, "grad_norm": 0.02309485711157322, "learning_rate": 8.732708784384892e-06, "loss": 0.002, "step": 75460 }, { "epoch": 0.6171648198879667, "grad_norm": 0.09406552463769913, "learning_rate": 8.732233939621271e-06, "loss": 0.0037, "step": 75470 }, { "epoch": 0.6172465960665658, "grad_norm": 0.05051153153181076, "learning_rate": 8.731759018829012e-06, "loss": 0.002, "step": 75480 }, { "epoch": 0.617328372245165, "grad_norm": 0.14314065873622894, "learning_rate": 8.73128402201779e-06, "loss": 0.0021, "step": 75490 }, { "epoch": 0.6174101484237642, "grad_norm": 0.051717862486839294, "learning_rate": 8.730808949197279e-06, "loss": 0.0011, "step": 75500 }, { "epoch": 0.6174919246023634, "grad_norm": 0.033395420759916306, "learning_rate": 8.730333800377155e-06, "loss": 0.0014, "step": 75510 }, { "epoch": 0.6175737007809625, "grad_norm": 0.040485527366399765, "learning_rate": 8.729858575567103e-06, "loss": 0.0013, "step": 75520 }, { "epoch": 0.6176554769595617, "grad_norm": 0.1037692129611969, "learning_rate": 8.729383274776797e-06, "loss": 0.0017, "step": 75530 }, { "epoch": 0.6177372531381609, "grad_norm": 0.28636983036994934, "learning_rate": 8.728907898015924e-06, "loss": 0.0021, "step": 75540 }, { "epoch": 0.61781902931676, "grad_norm": 0.04812251403927803, "learning_rate": 8.728432445294165e-06, "loss": 0.0021, "step": 75550 }, { "epoch": 0.6179008054953592, "grad_norm": 0.12741488218307495, "learning_rate": 8.727956916621208e-06, "loss": 0.0015, "step": 75560 }, { "epoch": 0.6179825816739584, "grad_norm": 0.011012034490704536, "learning_rate": 8.727481312006736e-06, "loss": 0.0027, "step": 75570 }, { "epoch": 0.6180643578525575, "grad_norm": 0.11686282604932785, "learning_rate": 8.727005631460443e-06, "loss": 0.0021, "step": 75580 }, { "epoch": 0.6181461340311567, "grad_norm": 0.07499835640192032, "learning_rate": 8.726529874992014e-06, "loss": 0.0023, "step": 75590 }, { "epoch": 0.6182279102097559, "grad_norm": 0.037319086492061615, "learning_rate": 8.726054042611141e-06, "loss": 0.0027, "step": 75600 }, { "epoch": 0.618309686388355, "grad_norm": 0.17371578514575958, "learning_rate": 8.725578134327521e-06, "loss": 0.0018, "step": 75610 }, { "epoch": 0.6183914625669542, "grad_norm": 0.010230245999991894, "learning_rate": 8.725102150150845e-06, "loss": 0.0012, "step": 75620 }, { "epoch": 0.6184732387455534, "grad_norm": 0.0680946335196495, "learning_rate": 8.72462609009081e-06, "loss": 0.0046, "step": 75630 }, { "epoch": 0.6185550149241525, "grad_norm": 0.07926985621452332, "learning_rate": 8.724149954157115e-06, "loss": 0.0031, "step": 75640 }, { "epoch": 0.6186367911027517, "grad_norm": 0.0441337414085865, "learning_rate": 8.723673742359457e-06, "loss": 0.0021, "step": 75650 }, { "epoch": 0.618718567281351, "grad_norm": 0.0441722497344017, "learning_rate": 8.723197454707539e-06, "loss": 0.0026, "step": 75660 }, { "epoch": 0.6188003434599502, "grad_norm": 0.037862904369831085, "learning_rate": 8.722721091211062e-06, "loss": 0.0019, "step": 75670 }, { "epoch": 0.6188821196385493, "grad_norm": 0.0115685248747468, "learning_rate": 8.72224465187973e-06, "loss": 0.0028, "step": 75680 }, { "epoch": 0.6189638958171485, "grad_norm": 0.1382729411125183, "learning_rate": 8.72176813672325e-06, "loss": 0.001, "step": 75690 }, { "epoch": 0.6190456719957477, "grad_norm": 0.0863214060664177, "learning_rate": 8.721291545751327e-06, "loss": 0.0014, "step": 75700 }, { "epoch": 0.6191274481743468, "grad_norm": 0.24092303216457367, "learning_rate": 8.720814878973672e-06, "loss": 0.0022, "step": 75710 }, { "epoch": 0.619209224352946, "grad_norm": 0.07365529984235764, "learning_rate": 8.720338136399991e-06, "loss": 0.0059, "step": 75720 }, { "epoch": 0.6192910005315452, "grad_norm": 0.212127223610878, "learning_rate": 8.71986131804e-06, "loss": 0.0028, "step": 75730 }, { "epoch": 0.6193727767101443, "grad_norm": 0.12062996625900269, "learning_rate": 8.719384423903408e-06, "loss": 0.0011, "step": 75740 }, { "epoch": 0.6194545528887435, "grad_norm": 0.04381493106484413, "learning_rate": 8.718907453999934e-06, "loss": 0.0027, "step": 75750 }, { "epoch": 0.6195363290673427, "grad_norm": 0.03620116412639618, "learning_rate": 8.718430408339291e-06, "loss": 0.0023, "step": 75760 }, { "epoch": 0.6196181052459419, "grad_norm": 0.054828498512506485, "learning_rate": 8.7179532869312e-06, "loss": 0.0019, "step": 75770 }, { "epoch": 0.619699881424541, "grad_norm": 0.04086058586835861, "learning_rate": 8.717476089785377e-06, "loss": 0.0016, "step": 75780 }, { "epoch": 0.6197816576031402, "grad_norm": 0.23356394469738007, "learning_rate": 8.716998816911545e-06, "loss": 0.0033, "step": 75790 }, { "epoch": 0.6198634337817394, "grad_norm": 0.020036010071635246, "learning_rate": 8.716521468319425e-06, "loss": 0.0012, "step": 75800 }, { "epoch": 0.6199452099603385, "grad_norm": 0.13635624945163727, "learning_rate": 8.716044044018743e-06, "loss": 0.0011, "step": 75810 }, { "epoch": 0.6200269861389377, "grad_norm": 0.038420338183641434, "learning_rate": 8.715566544019223e-06, "loss": 0.0015, "step": 75820 }, { "epoch": 0.6201087623175369, "grad_norm": 0.022550620138645172, "learning_rate": 8.715088968330592e-06, "loss": 0.0019, "step": 75830 }, { "epoch": 0.620190538496136, "grad_norm": 0.04091285541653633, "learning_rate": 8.714611316962578e-06, "loss": 0.0014, "step": 75840 }, { "epoch": 0.6202723146747352, "grad_norm": 0.037907496094703674, "learning_rate": 8.714133589924914e-06, "loss": 0.0024, "step": 75850 }, { "epoch": 0.6203540908533344, "grad_norm": 0.014451129361987114, "learning_rate": 8.713655787227328e-06, "loss": 0.0014, "step": 75860 }, { "epoch": 0.6204358670319337, "grad_norm": 0.0013169179437682033, "learning_rate": 8.713177908879556e-06, "loss": 0.0009, "step": 75870 }, { "epoch": 0.6205176432105328, "grad_norm": 0.05837560072541237, "learning_rate": 8.71269995489133e-06, "loss": 0.0012, "step": 75880 }, { "epoch": 0.620599419389132, "grad_norm": 0.04915411397814751, "learning_rate": 8.71222192527239e-06, "loss": 0.002, "step": 75890 }, { "epoch": 0.6206811955677312, "grad_norm": 0.016077851876616478, "learning_rate": 8.71174382003247e-06, "loss": 0.0011, "step": 75900 }, { "epoch": 0.6207629717463303, "grad_norm": 0.07857862114906311, "learning_rate": 8.711265639181315e-06, "loss": 0.0038, "step": 75910 }, { "epoch": 0.6208447479249295, "grad_norm": 0.031199848279356956, "learning_rate": 8.71078738272866e-06, "loss": 0.0026, "step": 75920 }, { "epoch": 0.6209265241035287, "grad_norm": 0.0958641767501831, "learning_rate": 8.71030905068425e-06, "loss": 0.0017, "step": 75930 }, { "epoch": 0.6210083002821278, "grad_norm": 0.028156250715255737, "learning_rate": 8.709830643057828e-06, "loss": 0.0017, "step": 75940 }, { "epoch": 0.621090076460727, "grad_norm": 0.07102120667695999, "learning_rate": 8.70935215985914e-06, "loss": 0.0023, "step": 75950 }, { "epoch": 0.6211718526393262, "grad_norm": 0.06170337274670601, "learning_rate": 8.708873601097934e-06, "loss": 0.0014, "step": 75960 }, { "epoch": 0.6212536288179253, "grad_norm": 0.04306001961231232, "learning_rate": 8.708394966783958e-06, "loss": 0.0011, "step": 75970 }, { "epoch": 0.6213354049965245, "grad_norm": 0.1284477263689041, "learning_rate": 8.707916256926962e-06, "loss": 0.0023, "step": 75980 }, { "epoch": 0.6214171811751237, "grad_norm": 0.008035031147301197, "learning_rate": 8.7074374715367e-06, "loss": 0.0019, "step": 75990 }, { "epoch": 0.6214989573537228, "grad_norm": 0.184568852186203, "learning_rate": 8.706958610622921e-06, "loss": 0.0025, "step": 76000 }, { "epoch": 0.621580733532322, "grad_norm": 0.15396219491958618, "learning_rate": 8.706479674195383e-06, "loss": 0.0016, "step": 76010 }, { "epoch": 0.6216625097109212, "grad_norm": 0.07208896428346634, "learning_rate": 8.70600066226384e-06, "loss": 0.0015, "step": 76020 }, { "epoch": 0.6217442858895204, "grad_norm": 0.09153160452842712, "learning_rate": 8.705521574838051e-06, "loss": 0.002, "step": 76030 }, { "epoch": 0.6218260620681195, "grad_norm": 0.14103539288043976, "learning_rate": 8.705042411927778e-06, "loss": 0.0023, "step": 76040 }, { "epoch": 0.6219078382467187, "grad_norm": 0.09166134148836136, "learning_rate": 8.704563173542777e-06, "loss": 0.0021, "step": 76050 }, { "epoch": 0.6219896144253179, "grad_norm": 0.030649464577436447, "learning_rate": 8.704083859692814e-06, "loss": 0.0028, "step": 76060 }, { "epoch": 0.622071390603917, "grad_norm": 0.09666144847869873, "learning_rate": 8.703604470387652e-06, "loss": 0.0019, "step": 76070 }, { "epoch": 0.6221531667825162, "grad_norm": 0.04546627402305603, "learning_rate": 8.703125005637055e-06, "loss": 0.0019, "step": 76080 }, { "epoch": 0.6222349429611155, "grad_norm": 0.022403083741664886, "learning_rate": 8.702645465450793e-06, "loss": 0.004, "step": 76090 }, { "epoch": 0.6223167191397146, "grad_norm": 0.03339585289359093, "learning_rate": 8.702165849838632e-06, "loss": 0.0012, "step": 76100 }, { "epoch": 0.6223984953183138, "grad_norm": 0.0709686130285263, "learning_rate": 8.701686158810345e-06, "loss": 0.0018, "step": 76110 }, { "epoch": 0.622480271496913, "grad_norm": 0.0838359147310257, "learning_rate": 8.7012063923757e-06, "loss": 0.0026, "step": 76120 }, { "epoch": 0.6225620476755122, "grad_norm": 0.022109750658273697, "learning_rate": 8.700726550544476e-06, "loss": 0.0015, "step": 76130 }, { "epoch": 0.6226438238541113, "grad_norm": 0.01633148267865181, "learning_rate": 8.70024663332644e-06, "loss": 0.0015, "step": 76140 }, { "epoch": 0.6227256000327105, "grad_norm": 0.11747951060533524, "learning_rate": 8.699766640731376e-06, "loss": 0.002, "step": 76150 }, { "epoch": 0.6228073762113097, "grad_norm": 0.10187514871358871, "learning_rate": 8.699286572769054e-06, "loss": 0.004, "step": 76160 }, { "epoch": 0.6228891523899088, "grad_norm": 0.026775270700454712, "learning_rate": 8.69880642944926e-06, "loss": 0.002, "step": 76170 }, { "epoch": 0.622970928568508, "grad_norm": 0.06542456150054932, "learning_rate": 8.698326210781773e-06, "loss": 0.0023, "step": 76180 }, { "epoch": 0.6230527047471072, "grad_norm": 0.04889731854200363, "learning_rate": 8.697845916776374e-06, "loss": 0.0023, "step": 76190 }, { "epoch": 0.6231344809257063, "grad_norm": 0.07753875851631165, "learning_rate": 8.697365547442849e-06, "loss": 0.0017, "step": 76200 }, { "epoch": 0.6232162571043055, "grad_norm": 0.11421801149845123, "learning_rate": 8.696885102790982e-06, "loss": 0.0029, "step": 76210 }, { "epoch": 0.6232980332829047, "grad_norm": 0.11234567314386368, "learning_rate": 8.69640458283056e-06, "loss": 0.0015, "step": 76220 }, { "epoch": 0.6233798094615038, "grad_norm": 0.052293483167886734, "learning_rate": 8.69592398757137e-06, "loss": 0.0014, "step": 76230 }, { "epoch": 0.623461585640103, "grad_norm": 0.052613914012908936, "learning_rate": 8.695443317023208e-06, "loss": 0.0016, "step": 76240 }, { "epoch": 0.6235433618187022, "grad_norm": 0.043177928775548935, "learning_rate": 8.69496257119586e-06, "loss": 0.002, "step": 76250 }, { "epoch": 0.6236251379973013, "grad_norm": 0.047823600471019745, "learning_rate": 8.694481750099121e-06, "loss": 0.0011, "step": 76260 }, { "epoch": 0.6237069141759005, "grad_norm": 0.045536208897829056, "learning_rate": 8.694000853742785e-06, "loss": 0.0022, "step": 76270 }, { "epoch": 0.6237886903544997, "grad_norm": 0.12703289091587067, "learning_rate": 8.69351988213665e-06, "loss": 0.0016, "step": 76280 }, { "epoch": 0.6238704665330989, "grad_norm": 0.02423250675201416, "learning_rate": 8.693038835290513e-06, "loss": 0.0038, "step": 76290 }, { "epoch": 0.6239522427116981, "grad_norm": 0.0324985608458519, "learning_rate": 8.692557713214174e-06, "loss": 0.0024, "step": 76300 }, { "epoch": 0.6240340188902973, "grad_norm": 0.1299850344657898, "learning_rate": 8.692076515917431e-06, "loss": 0.0033, "step": 76310 }, { "epoch": 0.6241157950688965, "grad_norm": 0.01277908869087696, "learning_rate": 8.691595243410089e-06, "loss": 0.0018, "step": 76320 }, { "epoch": 0.6241975712474956, "grad_norm": 0.05306504666805267, "learning_rate": 8.691113895701952e-06, "loss": 0.0016, "step": 76330 }, { "epoch": 0.6242793474260948, "grad_norm": 0.043614063411951065, "learning_rate": 8.690632472802822e-06, "loss": 0.0025, "step": 76340 }, { "epoch": 0.624361123604694, "grad_norm": 0.09240680932998657, "learning_rate": 8.69015097472251e-06, "loss": 0.0051, "step": 76350 }, { "epoch": 0.6244428997832931, "grad_norm": 0.17947275936603546, "learning_rate": 8.689669401470824e-06, "loss": 0.0015, "step": 76360 }, { "epoch": 0.6245246759618923, "grad_norm": 0.23863178491592407, "learning_rate": 8.689187753057573e-06, "loss": 0.0024, "step": 76370 }, { "epoch": 0.6246064521404915, "grad_norm": 0.1445489376783371, "learning_rate": 8.68870602949257e-06, "loss": 0.0029, "step": 76380 }, { "epoch": 0.6246882283190907, "grad_norm": 0.19327250123023987, "learning_rate": 8.688224230785624e-06, "loss": 0.0033, "step": 76390 }, { "epoch": 0.6247700044976898, "grad_norm": 0.13410083949565887, "learning_rate": 8.687742356946553e-06, "loss": 0.0021, "step": 76400 }, { "epoch": 0.624851780676289, "grad_norm": 0.09930005669593811, "learning_rate": 8.687260407985174e-06, "loss": 0.0023, "step": 76410 }, { "epoch": 0.6249335568548882, "grad_norm": 0.08640670776367188, "learning_rate": 8.686778383911303e-06, "loss": 0.0017, "step": 76420 }, { "epoch": 0.6250153330334873, "grad_norm": 0.10088738054037094, "learning_rate": 8.68629628473476e-06, "loss": 0.0029, "step": 76430 }, { "epoch": 0.6250971092120865, "grad_norm": 0.06698764115571976, "learning_rate": 8.685814110465365e-06, "loss": 0.0018, "step": 76440 }, { "epoch": 0.6251788853906857, "grad_norm": 0.11266139894723892, "learning_rate": 8.68533186111294e-06, "loss": 0.0027, "step": 76450 }, { "epoch": 0.6252606615692848, "grad_norm": 0.03878450021147728, "learning_rate": 8.68484953668731e-06, "loss": 0.0019, "step": 76460 }, { "epoch": 0.625342437747884, "grad_norm": 0.013035379350185394, "learning_rate": 8.684367137198298e-06, "loss": 0.0033, "step": 76470 }, { "epoch": 0.6254242139264832, "grad_norm": 0.09374349564313889, "learning_rate": 8.683884662655734e-06, "loss": 0.0047, "step": 76480 }, { "epoch": 0.6255059901050823, "grad_norm": 0.17005674540996552, "learning_rate": 8.683402113069445e-06, "loss": 0.0048, "step": 76490 }, { "epoch": 0.6255877662836815, "grad_norm": 0.03426500782370567, "learning_rate": 8.682919488449262e-06, "loss": 0.0031, "step": 76500 }, { "epoch": 0.6256695424622808, "grad_norm": 0.08060883730649948, "learning_rate": 8.682436788805013e-06, "loss": 0.0027, "step": 76510 }, { "epoch": 0.62575131864088, "grad_norm": 0.02407669834792614, "learning_rate": 8.681954014146534e-06, "loss": 0.0015, "step": 76520 }, { "epoch": 0.6258330948194791, "grad_norm": 0.08438870310783386, "learning_rate": 8.681471164483662e-06, "loss": 0.0038, "step": 76530 }, { "epoch": 0.6259148709980783, "grad_norm": 0.10185957700014114, "learning_rate": 8.680988239826226e-06, "loss": 0.0023, "step": 76540 }, { "epoch": 0.6259966471766775, "grad_norm": 0.07749517261981964, "learning_rate": 8.68050524018407e-06, "loss": 0.0016, "step": 76550 }, { "epoch": 0.6260784233552766, "grad_norm": 0.04640583321452141, "learning_rate": 8.68002216556703e-06, "loss": 0.003, "step": 76560 }, { "epoch": 0.6261601995338758, "grad_norm": 0.20692481100559235, "learning_rate": 8.679539015984945e-06, "loss": 0.0008, "step": 76570 }, { "epoch": 0.626241975712475, "grad_norm": 0.13658910989761353, "learning_rate": 8.679055791447662e-06, "loss": 0.0017, "step": 76580 }, { "epoch": 0.6263237518910741, "grad_norm": 0.12431100010871887, "learning_rate": 8.67857249196502e-06, "loss": 0.0035, "step": 76590 }, { "epoch": 0.6264055280696733, "grad_norm": 0.041773248463869095, "learning_rate": 8.67808911754687e-06, "loss": 0.001, "step": 76600 }, { "epoch": 0.6264873042482725, "grad_norm": 0.03241506963968277, "learning_rate": 8.67760566820305e-06, "loss": 0.0008, "step": 76610 }, { "epoch": 0.6265690804268716, "grad_norm": 0.10810983180999756, "learning_rate": 8.677122143943418e-06, "loss": 0.0016, "step": 76620 }, { "epoch": 0.6266508566054708, "grad_norm": 0.31309983134269714, "learning_rate": 8.676638544777815e-06, "loss": 0.0018, "step": 76630 }, { "epoch": 0.62673263278407, "grad_norm": 0.05724097415804863, "learning_rate": 8.676154870716098e-06, "loss": 0.0025, "step": 76640 }, { "epoch": 0.6268144089626692, "grad_norm": 0.092239610850811, "learning_rate": 8.67567112176812e-06, "loss": 0.0012, "step": 76650 }, { "epoch": 0.6268961851412683, "grad_norm": 0.07557740062475204, "learning_rate": 8.67518729794373e-06, "loss": 0.002, "step": 76660 }, { "epoch": 0.6269779613198675, "grad_norm": 0.050039198249578476, "learning_rate": 8.67470339925279e-06, "loss": 0.001, "step": 76670 }, { "epoch": 0.6270597374984667, "grad_norm": 0.014609339646995068, "learning_rate": 8.674219425705152e-06, "loss": 0.0016, "step": 76680 }, { "epoch": 0.6271415136770658, "grad_norm": 0.06717605143785477, "learning_rate": 8.673735377310681e-06, "loss": 0.0017, "step": 76690 }, { "epoch": 0.627223289855665, "grad_norm": 0.00993372406810522, "learning_rate": 8.673251254079233e-06, "loss": 0.0012, "step": 76700 }, { "epoch": 0.6273050660342642, "grad_norm": 0.013049974106252193, "learning_rate": 8.67276705602067e-06, "loss": 0.0014, "step": 76710 }, { "epoch": 0.6273868422128633, "grad_norm": 0.008447838947176933, "learning_rate": 8.67228278314486e-06, "loss": 0.001, "step": 76720 }, { "epoch": 0.6274686183914626, "grad_norm": 0.13609479367733002, "learning_rate": 8.671798435461661e-06, "loss": 0.0025, "step": 76730 }, { "epoch": 0.6275503945700618, "grad_norm": 0.13776321709156036, "learning_rate": 8.671314012980945e-06, "loss": 0.0021, "step": 76740 }, { "epoch": 0.627632170748661, "grad_norm": 0.12269655615091324, "learning_rate": 8.670829515712577e-06, "loss": 0.0021, "step": 76750 }, { "epoch": 0.6277139469272601, "grad_norm": 0.1425500214099884, "learning_rate": 8.67034494366643e-06, "loss": 0.002, "step": 76760 }, { "epoch": 0.6277957231058593, "grad_norm": 0.0905190035700798, "learning_rate": 8.669860296852372e-06, "loss": 0.0024, "step": 76770 }, { "epoch": 0.6278774992844585, "grad_norm": 0.026069913059473038, "learning_rate": 8.669375575280277e-06, "loss": 0.0041, "step": 76780 }, { "epoch": 0.6279592754630576, "grad_norm": 0.048597726970911026, "learning_rate": 8.668890778960018e-06, "loss": 0.0019, "step": 76790 }, { "epoch": 0.6280410516416568, "grad_norm": 0.07287666946649551, "learning_rate": 8.668405907901472e-06, "loss": 0.0019, "step": 76800 }, { "epoch": 0.628122827820256, "grad_norm": 0.15039385855197906, "learning_rate": 8.667920962114518e-06, "loss": 0.0016, "step": 76810 }, { "epoch": 0.6282046039988551, "grad_norm": 0.11841066926717758, "learning_rate": 8.667435941609031e-06, "loss": 0.0012, "step": 76820 }, { "epoch": 0.6282863801774543, "grad_norm": 0.06234089657664299, "learning_rate": 8.666950846394891e-06, "loss": 0.0024, "step": 76830 }, { "epoch": 0.6283681563560535, "grad_norm": 0.09055006504058838, "learning_rate": 8.666465676481982e-06, "loss": 0.0024, "step": 76840 }, { "epoch": 0.6284499325346526, "grad_norm": 0.03374538943171501, "learning_rate": 8.66598043188019e-06, "loss": 0.0016, "step": 76850 }, { "epoch": 0.6285317087132518, "grad_norm": 0.06583345681428909, "learning_rate": 8.665495112599395e-06, "loss": 0.0017, "step": 76860 }, { "epoch": 0.628613484891851, "grad_norm": 0.030175136402249336, "learning_rate": 8.665009718649484e-06, "loss": 0.0008, "step": 76870 }, { "epoch": 0.6286952610704502, "grad_norm": 0.042218271642923355, "learning_rate": 8.664524250040347e-06, "loss": 0.001, "step": 76880 }, { "epoch": 0.6287770372490493, "grad_norm": 0.030755233019590378, "learning_rate": 8.664038706781872e-06, "loss": 0.0021, "step": 76890 }, { "epoch": 0.6288588134276485, "grad_norm": 0.08543654531240463, "learning_rate": 8.663553088883952e-06, "loss": 0.0015, "step": 76900 }, { "epoch": 0.6289405896062477, "grad_norm": 0.1109832301735878, "learning_rate": 8.663067396356476e-06, "loss": 0.0054, "step": 76910 }, { "epoch": 0.6290223657848468, "grad_norm": 0.06013265624642372, "learning_rate": 8.662581629209338e-06, "loss": 0.0025, "step": 76920 }, { "epoch": 0.629104141963446, "grad_norm": 0.017055287957191467, "learning_rate": 8.662095787452437e-06, "loss": 0.0027, "step": 76930 }, { "epoch": 0.6291859181420453, "grad_norm": 0.11832823604345322, "learning_rate": 8.661609871095669e-06, "loss": 0.0023, "step": 76940 }, { "epoch": 0.6292676943206444, "grad_norm": 0.12769559025764465, "learning_rate": 8.661123880148932e-06, "loss": 0.0027, "step": 76950 }, { "epoch": 0.6293494704992436, "grad_norm": 0.01536741852760315, "learning_rate": 8.660637814622124e-06, "loss": 0.002, "step": 76960 }, { "epoch": 0.6294312466778428, "grad_norm": 0.09650703519582748, "learning_rate": 8.660151674525148e-06, "loss": 0.0017, "step": 76970 }, { "epoch": 0.629513022856442, "grad_norm": 0.03280613198876381, "learning_rate": 8.659665459867908e-06, "loss": 0.0016, "step": 76980 }, { "epoch": 0.6295947990350411, "grad_norm": 0.19043277204036713, "learning_rate": 8.659179170660308e-06, "loss": 0.0063, "step": 76990 }, { "epoch": 0.6296765752136403, "grad_norm": 0.09376227110624313, "learning_rate": 8.658692806912253e-06, "loss": 0.0017, "step": 77000 }, { "epoch": 0.6297583513922395, "grad_norm": 0.02688641846179962, "learning_rate": 8.658206368633652e-06, "loss": 0.0018, "step": 77010 }, { "epoch": 0.6298401275708386, "grad_norm": 0.023543644696474075, "learning_rate": 8.657719855834415e-06, "loss": 0.002, "step": 77020 }, { "epoch": 0.6299219037494378, "grad_norm": 0.033387113362550735, "learning_rate": 8.65723326852445e-06, "loss": 0.0024, "step": 77030 }, { "epoch": 0.630003679928037, "grad_norm": 0.11118852347135544, "learning_rate": 8.656746606713671e-06, "loss": 0.0013, "step": 77040 }, { "epoch": 0.6300854561066361, "grad_norm": 0.04189235344529152, "learning_rate": 8.656259870411992e-06, "loss": 0.0028, "step": 77050 }, { "epoch": 0.6301672322852353, "grad_norm": 0.03883783146739006, "learning_rate": 8.655773059629326e-06, "loss": 0.0033, "step": 77060 }, { "epoch": 0.6302490084638345, "grad_norm": 0.044650617986917496, "learning_rate": 8.655286174375591e-06, "loss": 0.0031, "step": 77070 }, { "epoch": 0.6303307846424336, "grad_norm": 0.06722764670848846, "learning_rate": 8.654799214660708e-06, "loss": 0.0019, "step": 77080 }, { "epoch": 0.6304125608210328, "grad_norm": 0.047284290194511414, "learning_rate": 8.65431218049459e-06, "loss": 0.0023, "step": 77090 }, { "epoch": 0.630494336999632, "grad_norm": 0.022358274087309837, "learning_rate": 8.653825071887166e-06, "loss": 0.0017, "step": 77100 }, { "epoch": 0.6305761131782311, "grad_norm": 0.1453767716884613, "learning_rate": 8.653337888848355e-06, "loss": 0.0028, "step": 77110 }, { "epoch": 0.6306578893568303, "grad_norm": 0.05103856697678566, "learning_rate": 8.65285063138808e-06, "loss": 0.0014, "step": 77120 }, { "epoch": 0.6307396655354295, "grad_norm": 0.07968293875455856, "learning_rate": 8.65236329951627e-06, "loss": 0.002, "step": 77130 }, { "epoch": 0.6308214417140287, "grad_norm": 0.08450756967067719, "learning_rate": 8.651875893242851e-06, "loss": 0.0025, "step": 77140 }, { "epoch": 0.6309032178926279, "grad_norm": 0.02983318455517292, "learning_rate": 8.65138841257775e-06, "loss": 0.0018, "step": 77150 }, { "epoch": 0.6309849940712271, "grad_norm": 0.07570359110832214, "learning_rate": 8.650900857530901e-06, "loss": 0.0027, "step": 77160 }, { "epoch": 0.6310667702498263, "grad_norm": 0.04759184643626213, "learning_rate": 8.650413228112234e-06, "loss": 0.0023, "step": 77170 }, { "epoch": 0.6311485464284254, "grad_norm": 0.0801171138882637, "learning_rate": 8.64992552433168e-06, "loss": 0.0026, "step": 77180 }, { "epoch": 0.6312303226070246, "grad_norm": 0.031907156109809875, "learning_rate": 8.649437746199176e-06, "loss": 0.0017, "step": 77190 }, { "epoch": 0.6313120987856238, "grad_norm": 0.06618013978004456, "learning_rate": 8.648949893724661e-06, "loss": 0.0019, "step": 77200 }, { "epoch": 0.631393874964223, "grad_norm": 0.05428200215101242, "learning_rate": 8.64846196691807e-06, "loss": 0.0023, "step": 77210 }, { "epoch": 0.6314756511428221, "grad_norm": 0.1536887139081955, "learning_rate": 8.647973965789342e-06, "loss": 0.0016, "step": 77220 }, { "epoch": 0.6315574273214213, "grad_norm": 0.046045247465372086, "learning_rate": 8.647485890348422e-06, "loss": 0.0026, "step": 77230 }, { "epoch": 0.6316392035000205, "grad_norm": 0.17628221213817596, "learning_rate": 8.646997740605247e-06, "loss": 0.0021, "step": 77240 }, { "epoch": 0.6317209796786196, "grad_norm": 0.05540185421705246, "learning_rate": 8.646509516569764e-06, "loss": 0.0016, "step": 77250 }, { "epoch": 0.6318027558572188, "grad_norm": 0.16131599247455597, "learning_rate": 8.646021218251919e-06, "loss": 0.0035, "step": 77260 }, { "epoch": 0.631884532035818, "grad_norm": 0.0829206183552742, "learning_rate": 8.645532845661656e-06, "loss": 0.0019, "step": 77270 }, { "epoch": 0.6319663082144171, "grad_norm": 0.043185748159885406, "learning_rate": 8.645044398808929e-06, "loss": 0.0014, "step": 77280 }, { "epoch": 0.6320480843930163, "grad_norm": 0.20367436110973358, "learning_rate": 8.644555877703682e-06, "loss": 0.0032, "step": 77290 }, { "epoch": 0.6321298605716155, "grad_norm": 0.07410438358783722, "learning_rate": 8.64406728235587e-06, "loss": 0.0028, "step": 77300 }, { "epoch": 0.6322116367502146, "grad_norm": 0.04913916811347008, "learning_rate": 8.643578612775446e-06, "loss": 0.0018, "step": 77310 }, { "epoch": 0.6322934129288138, "grad_norm": 0.027030471712350845, "learning_rate": 8.643089868972365e-06, "loss": 0.0015, "step": 77320 }, { "epoch": 0.632375189107413, "grad_norm": 0.0636158138513565, "learning_rate": 8.642601050956579e-06, "loss": 0.002, "step": 77330 }, { "epoch": 0.6324569652860121, "grad_norm": 0.008952638134360313, "learning_rate": 8.642112158738051e-06, "loss": 0.0008, "step": 77340 }, { "epoch": 0.6325387414646113, "grad_norm": 0.045393746346235275, "learning_rate": 8.641623192326739e-06, "loss": 0.0038, "step": 77350 }, { "epoch": 0.6326205176432105, "grad_norm": 0.11102961003780365, "learning_rate": 8.6411341517326e-06, "loss": 0.0018, "step": 77360 }, { "epoch": 0.6327022938218098, "grad_norm": 0.13790959119796753, "learning_rate": 8.640645036965598e-06, "loss": 0.0019, "step": 77370 }, { "epoch": 0.6327840700004089, "grad_norm": 0.12324143946170807, "learning_rate": 8.640155848035701e-06, "loss": 0.0029, "step": 77380 }, { "epoch": 0.6328658461790081, "grad_norm": 0.16653768718242645, "learning_rate": 8.639666584952868e-06, "loss": 0.0019, "step": 77390 }, { "epoch": 0.6329476223576073, "grad_norm": 0.09574887901544571, "learning_rate": 8.639177247727068e-06, "loss": 0.0036, "step": 77400 }, { "epoch": 0.6330293985362064, "grad_norm": 0.0574396476149559, "learning_rate": 8.63868783636827e-06, "loss": 0.0042, "step": 77410 }, { "epoch": 0.6331111747148056, "grad_norm": 0.2580151855945587, "learning_rate": 8.638198350886444e-06, "loss": 0.002, "step": 77420 }, { "epoch": 0.6331929508934048, "grad_norm": 0.09968255460262299, "learning_rate": 8.63770879129156e-06, "loss": 0.0029, "step": 77430 }, { "epoch": 0.6332747270720039, "grad_norm": 0.03000129759311676, "learning_rate": 8.63721915759359e-06, "loss": 0.0015, "step": 77440 }, { "epoch": 0.6333565032506031, "grad_norm": 0.0794256404042244, "learning_rate": 8.636729449802509e-06, "loss": 0.0021, "step": 77450 }, { "epoch": 0.6334382794292023, "grad_norm": 0.059106551110744476, "learning_rate": 8.636239667928293e-06, "loss": 0.0018, "step": 77460 }, { "epoch": 0.6335200556078014, "grad_norm": 0.023274505510926247, "learning_rate": 8.63574981198092e-06, "loss": 0.0021, "step": 77470 }, { "epoch": 0.6336018317864006, "grad_norm": 0.049601368606090546, "learning_rate": 8.635259881970367e-06, "loss": 0.0043, "step": 77480 }, { "epoch": 0.6336836079649998, "grad_norm": 0.1412268579006195, "learning_rate": 8.634769877906618e-06, "loss": 0.0028, "step": 77490 }, { "epoch": 0.633765384143599, "grad_norm": 0.030173860490322113, "learning_rate": 8.634279799799651e-06, "loss": 0.0024, "step": 77500 }, { "epoch": 0.6338471603221981, "grad_norm": 0.11662785708904266, "learning_rate": 8.63378964765945e-06, "loss": 0.0021, "step": 77510 }, { "epoch": 0.6339289365007973, "grad_norm": 0.11561310291290283, "learning_rate": 8.633299421496e-06, "loss": 0.0025, "step": 77520 }, { "epoch": 0.6340107126793965, "grad_norm": 0.0940699428319931, "learning_rate": 8.632809121319288e-06, "loss": 0.0023, "step": 77530 }, { "epoch": 0.6340924888579956, "grad_norm": 0.08322020620107651, "learning_rate": 8.6323187471393e-06, "loss": 0.0013, "step": 77540 }, { "epoch": 0.6341742650365948, "grad_norm": 0.022227255627512932, "learning_rate": 8.631828298966028e-06, "loss": 0.0022, "step": 77550 }, { "epoch": 0.634256041215194, "grad_norm": 0.26682689785957336, "learning_rate": 8.631337776809462e-06, "loss": 0.0024, "step": 77560 }, { "epoch": 0.6343378173937931, "grad_norm": 0.26959607005119324, "learning_rate": 8.63084718067959e-06, "loss": 0.0011, "step": 77570 }, { "epoch": 0.6344195935723924, "grad_norm": 0.14918391406536102, "learning_rate": 8.630356510586413e-06, "loss": 0.0021, "step": 77580 }, { "epoch": 0.6345013697509916, "grad_norm": 0.12641514837741852, "learning_rate": 8.629865766539924e-06, "loss": 0.0031, "step": 77590 }, { "epoch": 0.6345831459295908, "grad_norm": 0.04030647501349449, "learning_rate": 8.629374948550115e-06, "loss": 0.0023, "step": 77600 }, { "epoch": 0.6346649221081899, "grad_norm": 0.04177006334066391, "learning_rate": 8.628884056626991e-06, "loss": 0.0014, "step": 77610 }, { "epoch": 0.6347466982867891, "grad_norm": 0.11061526834964752, "learning_rate": 8.628393090780549e-06, "loss": 0.0019, "step": 77620 }, { "epoch": 0.6348284744653883, "grad_norm": 0.025781795382499695, "learning_rate": 8.62790205102079e-06, "loss": 0.0026, "step": 77630 }, { "epoch": 0.6349102506439874, "grad_norm": 0.05010295659303665, "learning_rate": 8.627410937357717e-06, "loss": 0.0017, "step": 77640 }, { "epoch": 0.6349920268225866, "grad_norm": 0.04565935954451561, "learning_rate": 8.626919749801335e-06, "loss": 0.002, "step": 77650 }, { "epoch": 0.6350738030011858, "grad_norm": 0.30270400643348694, "learning_rate": 8.62642848836165e-06, "loss": 0.0028, "step": 77660 }, { "epoch": 0.6351555791797849, "grad_norm": 0.04013776406645775, "learning_rate": 8.625937153048667e-06, "loss": 0.0028, "step": 77670 }, { "epoch": 0.6352373553583841, "grad_norm": 0.056835830211639404, "learning_rate": 8.6254457438724e-06, "loss": 0.0024, "step": 77680 }, { "epoch": 0.6353191315369833, "grad_norm": 0.03708630055189133, "learning_rate": 8.624954260842855e-06, "loss": 0.0014, "step": 77690 }, { "epoch": 0.6354009077155824, "grad_norm": 0.15236493945121765, "learning_rate": 8.624462703970044e-06, "loss": 0.0017, "step": 77700 }, { "epoch": 0.6354826838941816, "grad_norm": 0.6187537312507629, "learning_rate": 8.62397107326398e-06, "loss": 0.0048, "step": 77710 }, { "epoch": 0.6355644600727808, "grad_norm": 0.10885360091924667, "learning_rate": 8.623479368734684e-06, "loss": 0.0021, "step": 77720 }, { "epoch": 0.63564623625138, "grad_norm": 0.04082987830042839, "learning_rate": 8.622987590392166e-06, "loss": 0.0017, "step": 77730 }, { "epoch": 0.6357280124299791, "grad_norm": 0.07015357166528702, "learning_rate": 8.622495738246446e-06, "loss": 0.0009, "step": 77740 }, { "epoch": 0.6358097886085783, "grad_norm": 0.1156451478600502, "learning_rate": 8.622003812307542e-06, "loss": 0.0018, "step": 77750 }, { "epoch": 0.6358915647871775, "grad_norm": 0.06796853244304657, "learning_rate": 8.621511812585478e-06, "loss": 0.0029, "step": 77760 }, { "epoch": 0.6359733409657766, "grad_norm": 0.11889255791902542, "learning_rate": 8.621019739090273e-06, "loss": 0.0021, "step": 77770 }, { "epoch": 0.6360551171443758, "grad_norm": 0.11588206887245178, "learning_rate": 8.620527591831955e-06, "loss": 0.0048, "step": 77780 }, { "epoch": 0.6361368933229751, "grad_norm": 0.05846896767616272, "learning_rate": 8.620035370820546e-06, "loss": 0.0016, "step": 77790 }, { "epoch": 0.6362186695015742, "grad_norm": 0.08010528236627579, "learning_rate": 8.619543076066073e-06, "loss": 0.0022, "step": 77800 }, { "epoch": 0.6363004456801734, "grad_norm": 0.02602401003241539, "learning_rate": 8.619050707578566e-06, "loss": 0.0019, "step": 77810 }, { "epoch": 0.6363822218587726, "grad_norm": 0.059441495686769485, "learning_rate": 8.618558265368054e-06, "loss": 0.0011, "step": 77820 }, { "epoch": 0.6364639980373717, "grad_norm": 0.051671337336301804, "learning_rate": 8.61806574944457e-06, "loss": 0.0017, "step": 77830 }, { "epoch": 0.6365457742159709, "grad_norm": 0.11843553930521011, "learning_rate": 8.617573159818144e-06, "loss": 0.0028, "step": 77840 }, { "epoch": 0.6366275503945701, "grad_norm": 0.021349608898162842, "learning_rate": 8.617080496498812e-06, "loss": 0.002, "step": 77850 }, { "epoch": 0.6367093265731693, "grad_norm": 0.044586796313524246, "learning_rate": 8.616587759496613e-06, "loss": 0.0021, "step": 77860 }, { "epoch": 0.6367911027517684, "grad_norm": 0.10634975135326385, "learning_rate": 8.61609494882158e-06, "loss": 0.0021, "step": 77870 }, { "epoch": 0.6368728789303676, "grad_norm": 0.056956611573696136, "learning_rate": 8.615602064483754e-06, "loss": 0.002, "step": 77880 }, { "epoch": 0.6369546551089668, "grad_norm": 0.08189241588115692, "learning_rate": 8.615109106493173e-06, "loss": 0.0028, "step": 77890 }, { "epoch": 0.6370364312875659, "grad_norm": 0.06493555009365082, "learning_rate": 8.614616074859883e-06, "loss": 0.0022, "step": 77900 }, { "epoch": 0.6371182074661651, "grad_norm": 0.0321945995092392, "learning_rate": 8.614122969593925e-06, "loss": 0.0039, "step": 77910 }, { "epoch": 0.6371999836447643, "grad_norm": 0.096184141933918, "learning_rate": 8.613629790705345e-06, "loss": 0.002, "step": 77920 }, { "epoch": 0.6372817598233634, "grad_norm": 0.11216504126787186, "learning_rate": 8.613136538204188e-06, "loss": 0.0018, "step": 77930 }, { "epoch": 0.6373635360019626, "grad_norm": 0.1911860704421997, "learning_rate": 8.612643212100505e-06, "loss": 0.0021, "step": 77940 }, { "epoch": 0.6374453121805618, "grad_norm": 0.13854876160621643, "learning_rate": 8.61214981240434e-06, "loss": 0.0013, "step": 77950 }, { "epoch": 0.6375270883591609, "grad_norm": 0.02827172912657261, "learning_rate": 8.61165633912575e-06, "loss": 0.0014, "step": 77960 }, { "epoch": 0.6376088645377601, "grad_norm": 0.0919121578335762, "learning_rate": 8.611162792274782e-06, "loss": 0.0021, "step": 77970 }, { "epoch": 0.6376906407163593, "grad_norm": 0.05082235485315323, "learning_rate": 8.610669171861495e-06, "loss": 0.0009, "step": 77980 }, { "epoch": 0.6377724168949584, "grad_norm": 0.07578260451555252, "learning_rate": 8.610175477895939e-06, "loss": 0.0011, "step": 77990 }, { "epoch": 0.6378541930735576, "grad_norm": 0.03847402706742287, "learning_rate": 8.609681710388177e-06, "loss": 0.0014, "step": 78000 }, { "epoch": 0.6379359692521569, "grad_norm": 0.021138189360499382, "learning_rate": 8.609187869348264e-06, "loss": 0.0018, "step": 78010 }, { "epoch": 0.6380177454307561, "grad_norm": 0.044129882007837296, "learning_rate": 8.608693954786258e-06, "loss": 0.0023, "step": 78020 }, { "epoch": 0.6380995216093552, "grad_norm": 0.053182899951934814, "learning_rate": 8.608199966712225e-06, "loss": 0.0017, "step": 78030 }, { "epoch": 0.6381812977879544, "grad_norm": 0.0596548356115818, "learning_rate": 8.607705905136225e-06, "loss": 0.0014, "step": 78040 }, { "epoch": 0.6382630739665536, "grad_norm": 0.17337670922279358, "learning_rate": 8.607211770068325e-06, "loss": 0.0033, "step": 78050 }, { "epoch": 0.6383448501451527, "grad_norm": 0.0768359825015068, "learning_rate": 8.606717561518586e-06, "loss": 0.0027, "step": 78060 }, { "epoch": 0.6384266263237519, "grad_norm": 0.1500478982925415, "learning_rate": 8.606223279497081e-06, "loss": 0.0014, "step": 78070 }, { "epoch": 0.6385084025023511, "grad_norm": 0.06800282746553421, "learning_rate": 8.605728924013876e-06, "loss": 0.0025, "step": 78080 }, { "epoch": 0.6385901786809502, "grad_norm": 0.04252001270651817, "learning_rate": 8.605234495079043e-06, "loss": 0.0022, "step": 78090 }, { "epoch": 0.6386719548595494, "grad_norm": 0.06184264272451401, "learning_rate": 8.60473999270265e-06, "loss": 0.0015, "step": 78100 }, { "epoch": 0.6387537310381486, "grad_norm": 0.01465871836990118, "learning_rate": 8.604245416894776e-06, "loss": 0.0019, "step": 78110 }, { "epoch": 0.6388355072167478, "grad_norm": 0.09049270302057266, "learning_rate": 8.603750767665493e-06, "loss": 0.0033, "step": 78120 }, { "epoch": 0.6389172833953469, "grad_norm": 0.0629807561635971, "learning_rate": 8.603256045024877e-06, "loss": 0.0022, "step": 78130 }, { "epoch": 0.6389990595739461, "grad_norm": 0.06040281057357788, "learning_rate": 8.602761248983006e-06, "loss": 0.0013, "step": 78140 }, { "epoch": 0.6390808357525453, "grad_norm": 0.056123942136764526, "learning_rate": 8.602266379549959e-06, "loss": 0.0019, "step": 78150 }, { "epoch": 0.6391626119311444, "grad_norm": 0.04061022028326988, "learning_rate": 8.601771436735819e-06, "loss": 0.0015, "step": 78160 }, { "epoch": 0.6392443881097436, "grad_norm": 0.13215209543704987, "learning_rate": 8.601276420550669e-06, "loss": 0.0017, "step": 78170 }, { "epoch": 0.6393261642883428, "grad_norm": 0.06159132346510887, "learning_rate": 8.60078133100459e-06, "loss": 0.002, "step": 78180 }, { "epoch": 0.6394079404669419, "grad_norm": 0.08016921579837799, "learning_rate": 8.600286168107666e-06, "loss": 0.0011, "step": 78190 }, { "epoch": 0.6394897166455411, "grad_norm": 0.028992027044296265, "learning_rate": 8.599790931869988e-06, "loss": 0.0015, "step": 78200 }, { "epoch": 0.6395714928241403, "grad_norm": 0.009905279614031315, "learning_rate": 8.599295622301642e-06, "loss": 0.0025, "step": 78210 }, { "epoch": 0.6396532690027396, "grad_norm": 0.174460306763649, "learning_rate": 8.598800239412719e-06, "loss": 0.0015, "step": 78220 }, { "epoch": 0.6397350451813387, "grad_norm": 0.0378713458776474, "learning_rate": 8.59830478321331e-06, "loss": 0.0018, "step": 78230 }, { "epoch": 0.6398168213599379, "grad_norm": 0.03827613964676857, "learning_rate": 8.597809253713507e-06, "loss": 0.0038, "step": 78240 }, { "epoch": 0.6398985975385371, "grad_norm": 0.04681425914168358, "learning_rate": 8.597313650923407e-06, "loss": 0.0023, "step": 78250 }, { "epoch": 0.6399803737171362, "grad_norm": 0.18649424612522125, "learning_rate": 8.596817974853103e-06, "loss": 0.0031, "step": 78260 }, { "epoch": 0.6400621498957354, "grad_norm": 0.007900145836174488, "learning_rate": 8.596322225512693e-06, "loss": 0.0015, "step": 78270 }, { "epoch": 0.6401439260743346, "grad_norm": 0.04019613564014435, "learning_rate": 8.595826402912275e-06, "loss": 0.0018, "step": 78280 }, { "epoch": 0.6402257022529337, "grad_norm": 0.028488634154200554, "learning_rate": 8.595330507061951e-06, "loss": 0.0029, "step": 78290 }, { "epoch": 0.6403074784315329, "grad_norm": 0.08166097849607468, "learning_rate": 8.594834537971822e-06, "loss": 0.0022, "step": 78300 }, { "epoch": 0.6403892546101321, "grad_norm": 0.1511719524860382, "learning_rate": 8.594338495651992e-06, "loss": 0.0014, "step": 78310 }, { "epoch": 0.6404710307887312, "grad_norm": 0.05700240656733513, "learning_rate": 8.593842380112565e-06, "loss": 0.002, "step": 78320 }, { "epoch": 0.6405528069673304, "grad_norm": 0.022346539422869682, "learning_rate": 8.593346191363647e-06, "loss": 0.0068, "step": 78330 }, { "epoch": 0.6406345831459296, "grad_norm": 0.19134646654129028, "learning_rate": 8.592849929415347e-06, "loss": 0.0024, "step": 78340 }, { "epoch": 0.6407163593245288, "grad_norm": 0.03007933497428894, "learning_rate": 8.592353594277774e-06, "loss": 0.0015, "step": 78350 }, { "epoch": 0.6407981355031279, "grad_norm": 0.00602646404877305, "learning_rate": 8.591857185961038e-06, "loss": 0.0017, "step": 78360 }, { "epoch": 0.6408799116817271, "grad_norm": 0.10266927629709244, "learning_rate": 8.591360704475248e-06, "loss": 0.0023, "step": 78370 }, { "epoch": 0.6409616878603263, "grad_norm": 0.09414692223072052, "learning_rate": 8.590864149830526e-06, "loss": 0.0031, "step": 78380 }, { "epoch": 0.6410434640389254, "grad_norm": 0.08709587901830673, "learning_rate": 8.59036752203698e-06, "loss": 0.0034, "step": 78390 }, { "epoch": 0.6411252402175246, "grad_norm": 0.0377236045897007, "learning_rate": 8.58987082110473e-06, "loss": 0.0013, "step": 78400 }, { "epoch": 0.6412070163961238, "grad_norm": 0.0608786903321743, "learning_rate": 8.589374047043893e-06, "loss": 0.0013, "step": 78410 }, { "epoch": 0.6412887925747229, "grad_norm": 0.0781317949295044, "learning_rate": 8.588877199864589e-06, "loss": 0.0018, "step": 78420 }, { "epoch": 0.6413705687533222, "grad_norm": 0.10141652077436447, "learning_rate": 8.58838027957694e-06, "loss": 0.0038, "step": 78430 }, { "epoch": 0.6414523449319214, "grad_norm": 0.08757872879505157, "learning_rate": 8.587883286191067e-06, "loss": 0.0019, "step": 78440 }, { "epoch": 0.6415341211105205, "grad_norm": 0.09188603609800339, "learning_rate": 8.587386219717096e-06, "loss": 0.004, "step": 78450 }, { "epoch": 0.6416158972891197, "grad_norm": 0.07691819220781326, "learning_rate": 8.586889080165152e-06, "loss": 0.002, "step": 78460 }, { "epoch": 0.6416976734677189, "grad_norm": 0.026711629703640938, "learning_rate": 8.586391867545361e-06, "loss": 0.0015, "step": 78470 }, { "epoch": 0.6417794496463181, "grad_norm": 0.13147109746932983, "learning_rate": 8.585894581867852e-06, "loss": 0.0017, "step": 78480 }, { "epoch": 0.6418612258249172, "grad_norm": 0.0565468966960907, "learning_rate": 8.585397223142757e-06, "loss": 0.0022, "step": 78490 }, { "epoch": 0.6419430020035164, "grad_norm": 0.17161433398723602, "learning_rate": 8.584899791380206e-06, "loss": 0.0022, "step": 78500 }, { "epoch": 0.6420247781821156, "grad_norm": 0.021664801985025406, "learning_rate": 8.584402286590332e-06, "loss": 0.0028, "step": 78510 }, { "epoch": 0.6421065543607147, "grad_norm": 0.08999638259410858, "learning_rate": 8.583904708783269e-06, "loss": 0.0019, "step": 78520 }, { "epoch": 0.6421883305393139, "grad_norm": 0.0570405013859272, "learning_rate": 8.583407057969155e-06, "loss": 0.0017, "step": 78530 }, { "epoch": 0.6422701067179131, "grad_norm": 0.037061627954244614, "learning_rate": 8.582909334158126e-06, "loss": 0.0028, "step": 78540 }, { "epoch": 0.6423518828965122, "grad_norm": 0.05994950234889984, "learning_rate": 8.582411537360322e-06, "loss": 0.0021, "step": 78550 }, { "epoch": 0.6424336590751114, "grad_norm": 0.1071896180510521, "learning_rate": 8.581913667585882e-06, "loss": 0.0021, "step": 78560 }, { "epoch": 0.6425154352537106, "grad_norm": 0.02535059116780758, "learning_rate": 8.58141572484495e-06, "loss": 0.0019, "step": 78570 }, { "epoch": 0.6425972114323097, "grad_norm": 0.14779795706272125, "learning_rate": 8.580917709147668e-06, "loss": 0.0021, "step": 78580 }, { "epoch": 0.6426789876109089, "grad_norm": 0.06167592108249664, "learning_rate": 8.580419620504182e-06, "loss": 0.0009, "step": 78590 }, { "epoch": 0.6427607637895081, "grad_norm": 0.07061352580785751, "learning_rate": 8.579921458924638e-06, "loss": 0.0012, "step": 78600 }, { "epoch": 0.6428425399681073, "grad_norm": 0.08099239319562912, "learning_rate": 8.579423224419183e-06, "loss": 0.0013, "step": 78610 }, { "epoch": 0.6429243161467064, "grad_norm": 0.28956177830696106, "learning_rate": 8.578924916997968e-06, "loss": 0.0017, "step": 78620 }, { "epoch": 0.6430060923253056, "grad_norm": 0.0341041162610054, "learning_rate": 8.578426536671144e-06, "loss": 0.0023, "step": 78630 }, { "epoch": 0.6430878685039048, "grad_norm": 0.05252024531364441, "learning_rate": 8.577928083448864e-06, "loss": 0.002, "step": 78640 }, { "epoch": 0.643169644682504, "grad_norm": 0.04988829791545868, "learning_rate": 8.577429557341279e-06, "loss": 0.0015, "step": 78650 }, { "epoch": 0.6432514208611032, "grad_norm": 0.029403239488601685, "learning_rate": 8.576930958358545e-06, "loss": 0.0014, "step": 78660 }, { "epoch": 0.6433331970397024, "grad_norm": 0.043083351105451584, "learning_rate": 8.576432286510821e-06, "loss": 0.0015, "step": 78670 }, { "epoch": 0.6434149732183015, "grad_norm": 0.07594752311706543, "learning_rate": 8.575933541808264e-06, "loss": 0.0019, "step": 78680 }, { "epoch": 0.6434967493969007, "grad_norm": 0.00976446084678173, "learning_rate": 8.575434724261033e-06, "loss": 0.0045, "step": 78690 }, { "epoch": 0.6435785255754999, "grad_norm": 0.10861700028181076, "learning_rate": 8.574935833879294e-06, "loss": 0.0021, "step": 78700 }, { "epoch": 0.643660301754099, "grad_norm": 0.054374389350414276, "learning_rate": 8.574436870673202e-06, "loss": 0.0012, "step": 78710 }, { "epoch": 0.6437420779326982, "grad_norm": 0.04277864471077919, "learning_rate": 8.573937834652928e-06, "loss": 0.0012, "step": 78720 }, { "epoch": 0.6438238541112974, "grad_norm": 0.16948579251766205, "learning_rate": 8.573438725828635e-06, "loss": 0.0013, "step": 78730 }, { "epoch": 0.6439056302898966, "grad_norm": 0.07520725578069687, "learning_rate": 8.57293954421049e-06, "loss": 0.0021, "step": 78740 }, { "epoch": 0.6439874064684957, "grad_norm": 0.051654450595378876, "learning_rate": 8.572440289808663e-06, "loss": 0.0018, "step": 78750 }, { "epoch": 0.6440691826470949, "grad_norm": 0.03742396831512451, "learning_rate": 8.571940962633323e-06, "loss": 0.0022, "step": 78760 }, { "epoch": 0.6441509588256941, "grad_norm": 0.07871810346841812, "learning_rate": 8.571441562694642e-06, "loss": 0.0009, "step": 78770 }, { "epoch": 0.6442327350042932, "grad_norm": 0.060392431914806366, "learning_rate": 8.570942090002793e-06, "loss": 0.0018, "step": 78780 }, { "epoch": 0.6443145111828924, "grad_norm": 0.08705158531665802, "learning_rate": 8.570442544567951e-06, "loss": 0.0023, "step": 78790 }, { "epoch": 0.6443962873614916, "grad_norm": 0.05272040143609047, "learning_rate": 8.569942926400292e-06, "loss": 0.0018, "step": 78800 }, { "epoch": 0.6444780635400907, "grad_norm": 0.07245075702667236, "learning_rate": 8.569443235509995e-06, "loss": 0.0018, "step": 78810 }, { "epoch": 0.6445598397186899, "grad_norm": 0.043656229972839355, "learning_rate": 8.568943471907238e-06, "loss": 0.0029, "step": 78820 }, { "epoch": 0.6446416158972891, "grad_norm": 0.05526743456721306, "learning_rate": 8.568443635602202e-06, "loss": 0.002, "step": 78830 }, { "epoch": 0.6447233920758882, "grad_norm": 0.02993275225162506, "learning_rate": 8.567943726605066e-06, "loss": 0.0009, "step": 78840 }, { "epoch": 0.6448051682544874, "grad_norm": 0.11034604907035828, "learning_rate": 8.567443744926017e-06, "loss": 0.0023, "step": 78850 }, { "epoch": 0.6448869444330867, "grad_norm": 0.007599068805575371, "learning_rate": 8.56694369057524e-06, "loss": 0.0013, "step": 78860 }, { "epoch": 0.6449687206116859, "grad_norm": 0.11611545830965042, "learning_rate": 8.56644356356292e-06, "loss": 0.0021, "step": 78870 }, { "epoch": 0.645050496790285, "grad_norm": 0.01292154286056757, "learning_rate": 8.565943363899247e-06, "loss": 0.0023, "step": 78880 }, { "epoch": 0.6451322729688842, "grad_norm": 0.03801301494240761, "learning_rate": 8.565443091594408e-06, "loss": 0.0023, "step": 78890 }, { "epoch": 0.6452140491474834, "grad_norm": 0.08956050127744675, "learning_rate": 8.564942746658596e-06, "loss": 0.0025, "step": 78900 }, { "epoch": 0.6452958253260825, "grad_norm": 0.0697537511587143, "learning_rate": 8.564442329102e-06, "loss": 0.0009, "step": 78910 }, { "epoch": 0.6453776015046817, "grad_norm": 0.09027116000652313, "learning_rate": 8.56394183893482e-06, "loss": 0.0011, "step": 78920 }, { "epoch": 0.6454593776832809, "grad_norm": 0.061302050948143005, "learning_rate": 8.563441276167244e-06, "loss": 0.0018, "step": 78930 }, { "epoch": 0.64554115386188, "grad_norm": 0.07496769726276398, "learning_rate": 8.562940640809474e-06, "loss": 0.0026, "step": 78940 }, { "epoch": 0.6456229300404792, "grad_norm": 0.030272210016846657, "learning_rate": 8.56243993287171e-06, "loss": 0.0019, "step": 78950 }, { "epoch": 0.6457047062190784, "grad_norm": 0.21242749691009521, "learning_rate": 8.561939152364144e-06, "loss": 0.0032, "step": 78960 }, { "epoch": 0.6457864823976776, "grad_norm": 0.06502064317464828, "learning_rate": 8.561438299296986e-06, "loss": 0.0023, "step": 78970 }, { "epoch": 0.6458682585762767, "grad_norm": 0.06411971896886826, "learning_rate": 8.560937373680434e-06, "loss": 0.0021, "step": 78980 }, { "epoch": 0.6459500347548759, "grad_norm": 0.09314987063407898, "learning_rate": 8.560436375524692e-06, "loss": 0.0022, "step": 78990 }, { "epoch": 0.6460318109334751, "grad_norm": 0.09623738378286362, "learning_rate": 8.559935304839968e-06, "loss": 0.0016, "step": 79000 }, { "epoch": 0.6461135871120742, "grad_norm": 0.05514578893780708, "learning_rate": 8.559434161636468e-06, "loss": 0.0013, "step": 79010 }, { "epoch": 0.6461953632906734, "grad_norm": 0.056102048605680466, "learning_rate": 8.5589329459244e-06, "loss": 0.0023, "step": 79020 }, { "epoch": 0.6462771394692726, "grad_norm": 0.09727437049150467, "learning_rate": 8.558431657713978e-06, "loss": 0.0029, "step": 79030 }, { "epoch": 0.6463589156478717, "grad_norm": 0.06799698621034622, "learning_rate": 8.557930297015408e-06, "loss": 0.0019, "step": 79040 }, { "epoch": 0.6464406918264709, "grad_norm": 0.09334108978509903, "learning_rate": 8.557428863838906e-06, "loss": 0.0018, "step": 79050 }, { "epoch": 0.6465224680050701, "grad_norm": 0.020313680171966553, "learning_rate": 8.556927358194689e-06, "loss": 0.0049, "step": 79060 }, { "epoch": 0.6466042441836694, "grad_norm": 0.04220118373632431, "learning_rate": 8.556425780092967e-06, "loss": 0.0017, "step": 79070 }, { "epoch": 0.6466860203622685, "grad_norm": 0.026617005467414856, "learning_rate": 8.555924129543961e-06, "loss": 0.0017, "step": 79080 }, { "epoch": 0.6467677965408677, "grad_norm": 0.09275830537080765, "learning_rate": 8.555422406557893e-06, "loss": 0.0013, "step": 79090 }, { "epoch": 0.6468495727194669, "grad_norm": 0.06701748073101044, "learning_rate": 8.55492061114498e-06, "loss": 0.0017, "step": 79100 }, { "epoch": 0.646931348898066, "grad_norm": 0.024793291464447975, "learning_rate": 8.554418743315443e-06, "loss": 0.0011, "step": 79110 }, { "epoch": 0.6470131250766652, "grad_norm": 0.11349537968635559, "learning_rate": 8.553916803079508e-06, "loss": 0.0018, "step": 79120 }, { "epoch": 0.6470949012552644, "grad_norm": 0.08864240348339081, "learning_rate": 8.5534147904474e-06, "loss": 0.0038, "step": 79130 }, { "epoch": 0.6471766774338635, "grad_norm": 0.051956623792648315, "learning_rate": 8.552912705429342e-06, "loss": 0.0013, "step": 79140 }, { "epoch": 0.6472584536124627, "grad_norm": 0.12285918742418289, "learning_rate": 8.552410548035565e-06, "loss": 0.0023, "step": 79150 }, { "epoch": 0.6473402297910619, "grad_norm": 0.2760917544364929, "learning_rate": 8.551908318276298e-06, "loss": 0.002, "step": 79160 }, { "epoch": 0.647422005969661, "grad_norm": 0.19655339419841766, "learning_rate": 8.551406016161772e-06, "loss": 0.0027, "step": 79170 }, { "epoch": 0.6475037821482602, "grad_norm": 0.0637342557311058, "learning_rate": 8.550903641702218e-06, "loss": 0.0029, "step": 79180 }, { "epoch": 0.6475855583268594, "grad_norm": 0.03710860013961792, "learning_rate": 8.55040119490787e-06, "loss": 0.0022, "step": 79190 }, { "epoch": 0.6476673345054585, "grad_norm": 0.1684299111366272, "learning_rate": 8.549898675788964e-06, "loss": 0.0028, "step": 79200 }, { "epoch": 0.6477491106840577, "grad_norm": 0.17189493775367737, "learning_rate": 8.549396084355737e-06, "loss": 0.0025, "step": 79210 }, { "epoch": 0.6478308868626569, "grad_norm": 0.06338426470756531, "learning_rate": 8.548893420618426e-06, "loss": 0.0018, "step": 79220 }, { "epoch": 0.647912663041256, "grad_norm": 0.037029724568128586, "learning_rate": 8.548390684587272e-06, "loss": 0.0011, "step": 79230 }, { "epoch": 0.6479944392198552, "grad_norm": 0.060224827378988266, "learning_rate": 8.547887876272515e-06, "loss": 0.0011, "step": 79240 }, { "epoch": 0.6480762153984544, "grad_norm": 0.01894034631550312, "learning_rate": 8.547384995684397e-06, "loss": 0.0018, "step": 79250 }, { "epoch": 0.6481579915770536, "grad_norm": 0.006336531601846218, "learning_rate": 8.546882042833165e-06, "loss": 0.0021, "step": 79260 }, { "epoch": 0.6482397677556527, "grad_norm": 0.09958727657794952, "learning_rate": 8.546379017729063e-06, "loss": 0.0012, "step": 79270 }, { "epoch": 0.6483215439342519, "grad_norm": 0.047941070050001144, "learning_rate": 8.545875920382337e-06, "loss": 0.0009, "step": 79280 }, { "epoch": 0.6484033201128512, "grad_norm": 0.04258527606725693, "learning_rate": 8.545372750803236e-06, "loss": 0.0019, "step": 79290 }, { "epoch": 0.6484850962914503, "grad_norm": 0.13063152134418488, "learning_rate": 8.544869509002009e-06, "loss": 0.0019, "step": 79300 }, { "epoch": 0.6485668724700495, "grad_norm": 0.04614265263080597, "learning_rate": 8.544366194988912e-06, "loss": 0.0017, "step": 79310 }, { "epoch": 0.6486486486486487, "grad_norm": 0.07909367233514786, "learning_rate": 8.543862808774193e-06, "loss": 0.0018, "step": 79320 }, { "epoch": 0.6487304248272479, "grad_norm": 0.019356757402420044, "learning_rate": 8.543359350368107e-06, "loss": 0.002, "step": 79330 }, { "epoch": 0.648812201005847, "grad_norm": 0.015218731947243214, "learning_rate": 8.542855819780912e-06, "loss": 0.0027, "step": 79340 }, { "epoch": 0.6488939771844462, "grad_norm": 0.028534170240163803, "learning_rate": 8.542352217022863e-06, "loss": 0.0024, "step": 79350 }, { "epoch": 0.6489757533630454, "grad_norm": 0.08838421106338501, "learning_rate": 8.54184854210422e-06, "loss": 0.0018, "step": 79360 }, { "epoch": 0.6490575295416445, "grad_norm": 0.057840801775455475, "learning_rate": 8.541344795035244e-06, "loss": 0.0023, "step": 79370 }, { "epoch": 0.6491393057202437, "grad_norm": 0.11122019588947296, "learning_rate": 8.540840975826196e-06, "loss": 0.0012, "step": 79380 }, { "epoch": 0.6492210818988429, "grad_norm": 0.04398214817047119, "learning_rate": 8.54033708448734e-06, "loss": 0.0013, "step": 79390 }, { "epoch": 0.649302858077442, "grad_norm": 0.07810699194669724, "learning_rate": 8.539833121028939e-06, "loss": 0.0019, "step": 79400 }, { "epoch": 0.6493846342560412, "grad_norm": 0.0305193942040205, "learning_rate": 8.539329085461261e-06, "loss": 0.0014, "step": 79410 }, { "epoch": 0.6494664104346404, "grad_norm": 0.1290150284767151, "learning_rate": 8.538824977794571e-06, "loss": 0.002, "step": 79420 }, { "epoch": 0.6495481866132395, "grad_norm": 0.11852607876062393, "learning_rate": 8.538320798039141e-06, "loss": 0.0022, "step": 79430 }, { "epoch": 0.6496299627918387, "grad_norm": 0.05026311054825783, "learning_rate": 8.537816546205239e-06, "loss": 0.0022, "step": 79440 }, { "epoch": 0.6497117389704379, "grad_norm": 0.15163850784301758, "learning_rate": 8.537312222303138e-06, "loss": 0.0023, "step": 79450 }, { "epoch": 0.649793515149037, "grad_norm": 0.056134868413209915, "learning_rate": 8.536807826343114e-06, "loss": 0.0015, "step": 79460 }, { "epoch": 0.6498752913276362, "grad_norm": 0.05138497054576874, "learning_rate": 8.536303358335438e-06, "loss": 0.0024, "step": 79470 }, { "epoch": 0.6499570675062354, "grad_norm": 0.07545805722475052, "learning_rate": 8.535798818290389e-06, "loss": 0.0031, "step": 79480 }, { "epoch": 0.6500388436848346, "grad_norm": 0.034751273691654205, "learning_rate": 8.535294206218241e-06, "loss": 0.0016, "step": 79490 }, { "epoch": 0.6501206198634338, "grad_norm": 0.03699487820267677, "learning_rate": 8.53478952212928e-06, "loss": 0.003, "step": 79500 }, { "epoch": 0.650202396042033, "grad_norm": 0.04163135215640068, "learning_rate": 8.534284766033782e-06, "loss": 0.002, "step": 79510 }, { "epoch": 0.6502841722206322, "grad_norm": 0.03183155506849289, "learning_rate": 8.533779937942031e-06, "loss": 0.0018, "step": 79520 }, { "epoch": 0.6503659483992313, "grad_norm": 0.04226463660597801, "learning_rate": 8.533275037864309e-06, "loss": 0.0013, "step": 79530 }, { "epoch": 0.6504477245778305, "grad_norm": 0.007170555647462606, "learning_rate": 8.532770065810904e-06, "loss": 0.0014, "step": 79540 }, { "epoch": 0.6505295007564297, "grad_norm": 0.08437436819076538, "learning_rate": 8.532265021792102e-06, "loss": 0.0019, "step": 79550 }, { "epoch": 0.6506112769350288, "grad_norm": 0.04032830893993378, "learning_rate": 8.531759905818187e-06, "loss": 0.0016, "step": 79560 }, { "epoch": 0.650693053113628, "grad_norm": 0.043909989297389984, "learning_rate": 8.531254717899452e-06, "loss": 0.0009, "step": 79570 }, { "epoch": 0.6507748292922272, "grad_norm": 0.41170358657836914, "learning_rate": 8.530749458046192e-06, "loss": 0.0008, "step": 79580 }, { "epoch": 0.6508566054708264, "grad_norm": 0.07485253363847733, "learning_rate": 8.530244126268693e-06, "loss": 0.0016, "step": 79590 }, { "epoch": 0.6509383816494255, "grad_norm": 0.02926638349890709, "learning_rate": 8.529738722577252e-06, "loss": 0.0019, "step": 79600 }, { "epoch": 0.6510201578280247, "grad_norm": 0.15309934318065643, "learning_rate": 8.529233246982164e-06, "loss": 0.0014, "step": 79610 }, { "epoch": 0.6511019340066239, "grad_norm": 0.06383680552244186, "learning_rate": 8.528727699493727e-06, "loss": 0.0021, "step": 79620 }, { "epoch": 0.651183710185223, "grad_norm": 0.04196113348007202, "learning_rate": 8.528222080122237e-06, "loss": 0.0016, "step": 79630 }, { "epoch": 0.6512654863638222, "grad_norm": 0.043276380747556686, "learning_rate": 8.527716388877996e-06, "loss": 0.0015, "step": 79640 }, { "epoch": 0.6513472625424214, "grad_norm": 0.09102075546979904, "learning_rate": 8.527210625771305e-06, "loss": 0.0015, "step": 79650 }, { "epoch": 0.6514290387210205, "grad_norm": 0.07746507972478867, "learning_rate": 8.526704790812467e-06, "loss": 0.0025, "step": 79660 }, { "epoch": 0.6515108148996197, "grad_norm": 0.15654487907886505, "learning_rate": 8.526198884011784e-06, "loss": 0.0036, "step": 79670 }, { "epoch": 0.6515925910782189, "grad_norm": 0.17772196233272552, "learning_rate": 8.525692905379565e-06, "loss": 0.0023, "step": 79680 }, { "epoch": 0.651674367256818, "grad_norm": 0.059795599430799484, "learning_rate": 8.525186854926115e-06, "loss": 0.0025, "step": 79690 }, { "epoch": 0.6517561434354172, "grad_norm": 0.62898850440979, "learning_rate": 8.524680732661744e-06, "loss": 0.0014, "step": 79700 }, { "epoch": 0.6518379196140165, "grad_norm": 0.09546003490686417, "learning_rate": 8.524174538596761e-06, "loss": 0.0019, "step": 79710 }, { "epoch": 0.6519196957926157, "grad_norm": 0.011372959241271019, "learning_rate": 8.523668272741477e-06, "loss": 0.0027, "step": 79720 }, { "epoch": 0.6520014719712148, "grad_norm": 0.8056721091270447, "learning_rate": 8.523161935106208e-06, "loss": 0.0015, "step": 79730 }, { "epoch": 0.652083248149814, "grad_norm": 0.030744513496756554, "learning_rate": 8.522655525701267e-06, "loss": 0.002, "step": 79740 }, { "epoch": 0.6521650243284132, "grad_norm": 0.1160031110048294, "learning_rate": 8.522149044536967e-06, "loss": 0.0015, "step": 79750 }, { "epoch": 0.6522468005070123, "grad_norm": 0.03555198758840561, "learning_rate": 8.52164249162363e-06, "loss": 0.0016, "step": 79760 }, { "epoch": 0.6523285766856115, "grad_norm": 0.1657080501317978, "learning_rate": 8.521135866971572e-06, "loss": 0.0022, "step": 79770 }, { "epoch": 0.6524103528642107, "grad_norm": 0.041213508695364, "learning_rate": 8.520629170591115e-06, "loss": 0.0018, "step": 79780 }, { "epoch": 0.6524921290428098, "grad_norm": 0.044564977288246155, "learning_rate": 8.520122402492579e-06, "loss": 0.002, "step": 79790 }, { "epoch": 0.652573905221409, "grad_norm": 0.034859251230955124, "learning_rate": 8.519615562686289e-06, "loss": 0.0014, "step": 79800 }, { "epoch": 0.6526556814000082, "grad_norm": 0.06439337134361267, "learning_rate": 8.519108651182568e-06, "loss": 0.0025, "step": 79810 }, { "epoch": 0.6527374575786073, "grad_norm": 0.10560561716556549, "learning_rate": 8.518601667991745e-06, "loss": 0.0017, "step": 79820 }, { "epoch": 0.6528192337572065, "grad_norm": 0.07985740900039673, "learning_rate": 8.518094613124146e-06, "loss": 0.0012, "step": 79830 }, { "epoch": 0.6529010099358057, "grad_norm": 0.09143849462270737, "learning_rate": 8.517587486590098e-06, "loss": 0.002, "step": 79840 }, { "epoch": 0.6529827861144049, "grad_norm": 0.45462271571159363, "learning_rate": 8.517080288399937e-06, "loss": 0.0019, "step": 79850 }, { "epoch": 0.653064562293004, "grad_norm": 0.26246315240859985, "learning_rate": 8.51657301856399e-06, "loss": 0.0031, "step": 79860 }, { "epoch": 0.6531463384716032, "grad_norm": 0.036196354776620865, "learning_rate": 8.51606567709259e-06, "loss": 0.0017, "step": 79870 }, { "epoch": 0.6532281146502024, "grad_norm": 0.00144849659409374, "learning_rate": 8.515558263996077e-06, "loss": 0.0019, "step": 79880 }, { "epoch": 0.6533098908288015, "grad_norm": 0.05781039968132973, "learning_rate": 8.515050779284783e-06, "loss": 0.0022, "step": 79890 }, { "epoch": 0.6533916670074007, "grad_norm": 0.052989158779382706, "learning_rate": 8.514543222969049e-06, "loss": 0.0026, "step": 79900 }, { "epoch": 0.6534734431859999, "grad_norm": 0.07390139251947403, "learning_rate": 8.514035595059212e-06, "loss": 0.0037, "step": 79910 }, { "epoch": 0.653555219364599, "grad_norm": 0.027415720745921135, "learning_rate": 8.513527895565614e-06, "loss": 0.0029, "step": 79920 }, { "epoch": 0.6536369955431983, "grad_norm": 0.08317063003778458, "learning_rate": 8.513020124498596e-06, "loss": 0.0021, "step": 79930 }, { "epoch": 0.6537187717217975, "grad_norm": 0.18277151882648468, "learning_rate": 8.512512281868502e-06, "loss": 0.0028, "step": 79940 }, { "epoch": 0.6538005479003967, "grad_norm": 0.1398133784532547, "learning_rate": 8.51200436768568e-06, "loss": 0.002, "step": 79950 }, { "epoch": 0.6538823240789958, "grad_norm": 0.03716379031538963, "learning_rate": 8.511496381960472e-06, "loss": 0.002, "step": 79960 }, { "epoch": 0.653964100257595, "grad_norm": 0.21610501408576965, "learning_rate": 8.510988324703229e-06, "loss": 0.003, "step": 79970 }, { "epoch": 0.6540458764361942, "grad_norm": 0.018137194216251373, "learning_rate": 8.5104801959243e-06, "loss": 0.0019, "step": 79980 }, { "epoch": 0.6541276526147933, "grad_norm": 0.02918345294892788, "learning_rate": 8.509971995634036e-06, "loss": 0.0027, "step": 79990 }, { "epoch": 0.6542094287933925, "grad_norm": 0.10597653687000275, "learning_rate": 8.50946372384279e-06, "loss": 0.0019, "step": 80000 }, { "epoch": 0.6542912049719917, "grad_norm": 0.2255156934261322, "learning_rate": 8.508955380560917e-06, "loss": 0.0022, "step": 80010 }, { "epoch": 0.6543729811505908, "grad_norm": 0.18279889225959778, "learning_rate": 8.508446965798766e-06, "loss": 0.0026, "step": 80020 }, { "epoch": 0.65445475732919, "grad_norm": 0.02700858935713768, "learning_rate": 8.507938479566701e-06, "loss": 0.0018, "step": 80030 }, { "epoch": 0.6545365335077892, "grad_norm": 0.05476941540837288, "learning_rate": 8.50742992187508e-06, "loss": 0.003, "step": 80040 }, { "epoch": 0.6546183096863883, "grad_norm": 0.09525693953037262, "learning_rate": 8.506921292734259e-06, "loss": 0.0021, "step": 80050 }, { "epoch": 0.6547000858649875, "grad_norm": 0.14538097381591797, "learning_rate": 8.506412592154597e-06, "loss": 0.0027, "step": 80060 }, { "epoch": 0.6547818620435867, "grad_norm": 0.11967476457357407, "learning_rate": 8.505903820146466e-06, "loss": 0.0028, "step": 80070 }, { "epoch": 0.6548636382221859, "grad_norm": 0.08813796192407608, "learning_rate": 8.50539497672022e-06, "loss": 0.0016, "step": 80080 }, { "epoch": 0.654945414400785, "grad_norm": 0.08151517808437347, "learning_rate": 8.504886061886232e-06, "loss": 0.0023, "step": 80090 }, { "epoch": 0.6550271905793842, "grad_norm": 0.026870619505643845, "learning_rate": 8.504377075654863e-06, "loss": 0.0011, "step": 80100 }, { "epoch": 0.6551089667579834, "grad_norm": 0.034786470234394073, "learning_rate": 8.503868018036487e-06, "loss": 0.0012, "step": 80110 }, { "epoch": 0.6551907429365825, "grad_norm": 0.02935328707098961, "learning_rate": 8.50335888904147e-06, "loss": 0.0023, "step": 80120 }, { "epoch": 0.6552725191151817, "grad_norm": 0.03658623993396759, "learning_rate": 8.502849688680187e-06, "loss": 0.0019, "step": 80130 }, { "epoch": 0.655354295293781, "grad_norm": 0.0244169719517231, "learning_rate": 8.502340416963006e-06, "loss": 0.0032, "step": 80140 }, { "epoch": 0.6554360714723801, "grad_norm": 0.020276742056012154, "learning_rate": 8.501831073900306e-06, "loss": 0.0011, "step": 80150 }, { "epoch": 0.6555178476509793, "grad_norm": 0.11390333622694016, "learning_rate": 8.50132165950246e-06, "loss": 0.0019, "step": 80160 }, { "epoch": 0.6555996238295785, "grad_norm": 0.0993146076798439, "learning_rate": 8.500812173779844e-06, "loss": 0.0013, "step": 80170 }, { "epoch": 0.6556814000081777, "grad_norm": 0.03464556857943535, "learning_rate": 8.50030261674284e-06, "loss": 0.0021, "step": 80180 }, { "epoch": 0.6557631761867768, "grad_norm": 0.019709551706910133, "learning_rate": 8.499792988401827e-06, "loss": 0.0033, "step": 80190 }, { "epoch": 0.655844952365376, "grad_norm": 0.14544863998889923, "learning_rate": 8.499283288767184e-06, "loss": 0.0017, "step": 80200 }, { "epoch": 0.6559267285439752, "grad_norm": 0.2099270075559616, "learning_rate": 8.498773517849298e-06, "loss": 0.0022, "step": 80210 }, { "epoch": 0.6560085047225743, "grad_norm": 0.04251116141676903, "learning_rate": 8.498263675658551e-06, "loss": 0.0026, "step": 80220 }, { "epoch": 0.6560902809011735, "grad_norm": 0.08720063418149948, "learning_rate": 8.49775376220533e-06, "loss": 0.002, "step": 80230 }, { "epoch": 0.6561720570797727, "grad_norm": 0.07995638251304626, "learning_rate": 8.49724377750002e-06, "loss": 0.002, "step": 80240 }, { "epoch": 0.6562538332583718, "grad_norm": 0.06001163646578789, "learning_rate": 8.496733721553012e-06, "loss": 0.0028, "step": 80250 }, { "epoch": 0.656335609436971, "grad_norm": 0.0419606938958168, "learning_rate": 8.496223594374698e-06, "loss": 0.0012, "step": 80260 }, { "epoch": 0.6564173856155702, "grad_norm": 0.06975610554218292, "learning_rate": 8.495713395975465e-06, "loss": 0.0036, "step": 80270 }, { "epoch": 0.6564991617941693, "grad_norm": 0.025435473769903183, "learning_rate": 8.495203126365709e-06, "loss": 0.0008, "step": 80280 }, { "epoch": 0.6565809379727685, "grad_norm": 0.07954700291156769, "learning_rate": 8.494692785555824e-06, "loss": 0.0025, "step": 80290 }, { "epoch": 0.6566627141513677, "grad_norm": 0.015499178320169449, "learning_rate": 8.494182373556209e-06, "loss": 0.0015, "step": 80300 }, { "epoch": 0.6567444903299668, "grad_norm": 0.11807666718959808, "learning_rate": 8.493671890377255e-06, "loss": 0.0032, "step": 80310 }, { "epoch": 0.656826266508566, "grad_norm": 0.06625397503376007, "learning_rate": 8.493161336029368e-06, "loss": 0.0027, "step": 80320 }, { "epoch": 0.6569080426871652, "grad_norm": 0.11415378004312515, "learning_rate": 8.492650710522943e-06, "loss": 0.0042, "step": 80330 }, { "epoch": 0.6569898188657644, "grad_norm": 0.0948428064584732, "learning_rate": 8.492140013868385e-06, "loss": 0.0017, "step": 80340 }, { "epoch": 0.6570715950443636, "grad_norm": 0.11544478684663773, "learning_rate": 8.491629246076096e-06, "loss": 0.0048, "step": 80350 }, { "epoch": 0.6571533712229628, "grad_norm": 0.21846553683280945, "learning_rate": 8.491118407156483e-06, "loss": 0.002, "step": 80360 }, { "epoch": 0.657235147401562, "grad_norm": 0.044428110122680664, "learning_rate": 8.490607497119948e-06, "loss": 0.0023, "step": 80370 }, { "epoch": 0.6573169235801611, "grad_norm": 0.09940103441476822, "learning_rate": 8.4900965159769e-06, "loss": 0.0039, "step": 80380 }, { "epoch": 0.6573986997587603, "grad_norm": 0.24434566497802734, "learning_rate": 8.489585463737753e-06, "loss": 0.0032, "step": 80390 }, { "epoch": 0.6574804759373595, "grad_norm": 0.012291161343455315, "learning_rate": 8.489074340412911e-06, "loss": 0.0017, "step": 80400 }, { "epoch": 0.6575622521159586, "grad_norm": 0.092256098985672, "learning_rate": 8.488563146012789e-06, "loss": 0.0022, "step": 80410 }, { "epoch": 0.6576440282945578, "grad_norm": 0.0739920511841774, "learning_rate": 8.4880518805478e-06, "loss": 0.0023, "step": 80420 }, { "epoch": 0.657725804473157, "grad_norm": 0.03233455866575241, "learning_rate": 8.487540544028358e-06, "loss": 0.0015, "step": 80430 }, { "epoch": 0.6578075806517562, "grad_norm": 0.12101846188306808, "learning_rate": 8.487029136464881e-06, "loss": 0.002, "step": 80440 }, { "epoch": 0.6578893568303553, "grad_norm": 0.09807676076889038, "learning_rate": 8.486517657867787e-06, "loss": 0.0022, "step": 80450 }, { "epoch": 0.6579711330089545, "grad_norm": 0.0746883675456047, "learning_rate": 8.486006108247495e-06, "loss": 0.0018, "step": 80460 }, { "epoch": 0.6580529091875537, "grad_norm": 0.046293728053569794, "learning_rate": 8.485494487614424e-06, "loss": 0.0027, "step": 80470 }, { "epoch": 0.6581346853661528, "grad_norm": 0.1348997801542282, "learning_rate": 8.484982795978997e-06, "loss": 0.0032, "step": 80480 }, { "epoch": 0.658216461544752, "grad_norm": 0.14484520256519318, "learning_rate": 8.484471033351638e-06, "loss": 0.0027, "step": 80490 }, { "epoch": 0.6582982377233512, "grad_norm": 0.14055022597312927, "learning_rate": 8.483959199742771e-06, "loss": 0.002, "step": 80500 }, { "epoch": 0.6583800139019503, "grad_norm": 0.021730994805693626, "learning_rate": 8.483447295162823e-06, "loss": 0.004, "step": 80510 }, { "epoch": 0.6584617900805495, "grad_norm": 0.13970538973808289, "learning_rate": 8.482935319622223e-06, "loss": 0.0012, "step": 80520 }, { "epoch": 0.6585435662591487, "grad_norm": 0.051791202276945114, "learning_rate": 8.4824232731314e-06, "loss": 0.001, "step": 80530 }, { "epoch": 0.6586253424377478, "grad_norm": 0.0769951269030571, "learning_rate": 8.48191115570078e-06, "loss": 0.0023, "step": 80540 }, { "epoch": 0.658707118616347, "grad_norm": 0.09914702922105789, "learning_rate": 8.481398967340805e-06, "loss": 0.0025, "step": 80550 }, { "epoch": 0.6587888947949462, "grad_norm": 0.016465123742818832, "learning_rate": 8.480886708061902e-06, "loss": 0.0023, "step": 80560 }, { "epoch": 0.6588706709735455, "grad_norm": 0.05345834791660309, "learning_rate": 8.480374377874505e-06, "loss": 0.002, "step": 80570 }, { "epoch": 0.6589524471521446, "grad_norm": 0.036490414291620255, "learning_rate": 8.479861976789054e-06, "loss": 0.001, "step": 80580 }, { "epoch": 0.6590342233307438, "grad_norm": 0.016851186752319336, "learning_rate": 8.479349504815988e-06, "loss": 0.0014, "step": 80590 }, { "epoch": 0.659115999509343, "grad_norm": 0.04037335142493248, "learning_rate": 8.478836961965741e-06, "loss": 0.0013, "step": 80600 }, { "epoch": 0.6591977756879421, "grad_norm": 0.03463957831263542, "learning_rate": 8.47832434824876e-06, "loss": 0.0019, "step": 80610 }, { "epoch": 0.6592795518665413, "grad_norm": 0.18237513303756714, "learning_rate": 8.477811663675483e-06, "loss": 0.0027, "step": 80620 }, { "epoch": 0.6593613280451405, "grad_norm": 0.04881620407104492, "learning_rate": 8.477298908256357e-06, "loss": 0.0016, "step": 80630 }, { "epoch": 0.6594431042237396, "grad_norm": 0.07398522645235062, "learning_rate": 8.476786082001827e-06, "loss": 0.0014, "step": 80640 }, { "epoch": 0.6595248804023388, "grad_norm": 0.17204152047634125, "learning_rate": 8.476273184922336e-06, "loss": 0.0019, "step": 80650 }, { "epoch": 0.659606656580938, "grad_norm": 0.021775053814053535, "learning_rate": 8.475760217028335e-06, "loss": 0.0008, "step": 80660 }, { "epoch": 0.6596884327595371, "grad_norm": 0.044357482343912125, "learning_rate": 8.475247178330273e-06, "loss": 0.0011, "step": 80670 }, { "epoch": 0.6597702089381363, "grad_norm": 0.0436583086848259, "learning_rate": 8.474734068838601e-06, "loss": 0.0015, "step": 80680 }, { "epoch": 0.6598519851167355, "grad_norm": 0.04073767736554146, "learning_rate": 8.474220888563773e-06, "loss": 0.001, "step": 80690 }, { "epoch": 0.6599337612953347, "grad_norm": 0.08121314644813538, "learning_rate": 8.47370763751624e-06, "loss": 0.0016, "step": 80700 }, { "epoch": 0.6600155374739338, "grad_norm": 0.04856160283088684, "learning_rate": 8.47319431570646e-06, "loss": 0.0012, "step": 80710 }, { "epoch": 0.660097313652533, "grad_norm": 0.037100836634635925, "learning_rate": 8.472680923144886e-06, "loss": 0.0023, "step": 80720 }, { "epoch": 0.6601790898311322, "grad_norm": 0.025325197726488113, "learning_rate": 8.472167459841983e-06, "loss": 0.0009, "step": 80730 }, { "epoch": 0.6602608660097313, "grad_norm": 0.03519592806696892, "learning_rate": 8.471653925808203e-06, "loss": 0.0013, "step": 80740 }, { "epoch": 0.6603426421883305, "grad_norm": 0.10248788446187973, "learning_rate": 8.471140321054012e-06, "loss": 0.0016, "step": 80750 }, { "epoch": 0.6604244183669297, "grad_norm": 0.07481393218040466, "learning_rate": 8.470626645589872e-06, "loss": 0.0017, "step": 80760 }, { "epoch": 0.6605061945455288, "grad_norm": 0.03642197325825691, "learning_rate": 8.470112899426246e-06, "loss": 0.0016, "step": 80770 }, { "epoch": 0.6605879707241281, "grad_norm": 0.36769962310791016, "learning_rate": 8.469599082573598e-06, "loss": 0.0044, "step": 80780 }, { "epoch": 0.6606697469027273, "grad_norm": 0.030559198930859566, "learning_rate": 8.469085195042398e-06, "loss": 0.0016, "step": 80790 }, { "epoch": 0.6607515230813265, "grad_norm": 0.07644118368625641, "learning_rate": 8.468571236843112e-06, "loss": 0.0026, "step": 80800 }, { "epoch": 0.6608332992599256, "grad_norm": 0.07032135874032974, "learning_rate": 8.468057207986211e-06, "loss": 0.002, "step": 80810 }, { "epoch": 0.6609150754385248, "grad_norm": 0.046033989638090134, "learning_rate": 8.467543108482165e-06, "loss": 0.012, "step": 80820 }, { "epoch": 0.660996851617124, "grad_norm": 0.05962425842881203, "learning_rate": 8.467028938341448e-06, "loss": 0.0021, "step": 80830 }, { "epoch": 0.6610786277957231, "grad_norm": 0.0539470911026001, "learning_rate": 8.466514697574532e-06, "loss": 0.0009, "step": 80840 }, { "epoch": 0.6611604039743223, "grad_norm": 0.026199480518698692, "learning_rate": 8.466000386191896e-06, "loss": 0.0016, "step": 80850 }, { "epoch": 0.6612421801529215, "grad_norm": 0.023790040984749794, "learning_rate": 8.465486004204013e-06, "loss": 0.0023, "step": 80860 }, { "epoch": 0.6613239563315206, "grad_norm": 0.12970972061157227, "learning_rate": 8.464971551621364e-06, "loss": 0.0027, "step": 80870 }, { "epoch": 0.6614057325101198, "grad_norm": 0.04314045608043671, "learning_rate": 8.464457028454428e-06, "loss": 0.002, "step": 80880 }, { "epoch": 0.661487508688719, "grad_norm": 0.061359088867902756, "learning_rate": 8.463942434713687e-06, "loss": 0.0022, "step": 80890 }, { "epoch": 0.6615692848673181, "grad_norm": 0.03320452570915222, "learning_rate": 8.463427770409622e-06, "loss": 0.0025, "step": 80900 }, { "epoch": 0.6616510610459173, "grad_norm": 0.03725409507751465, "learning_rate": 8.462913035552718e-06, "loss": 0.0012, "step": 80910 }, { "epoch": 0.6617328372245165, "grad_norm": 0.047175560146570206, "learning_rate": 8.462398230153461e-06, "loss": 0.0018, "step": 80920 }, { "epoch": 0.6618146134031156, "grad_norm": 0.045577436685562134, "learning_rate": 8.461883354222337e-06, "loss": 0.0022, "step": 80930 }, { "epoch": 0.6618963895817148, "grad_norm": 0.04977865889668465, "learning_rate": 8.461368407769837e-06, "loss": 0.0018, "step": 80940 }, { "epoch": 0.661978165760314, "grad_norm": 0.06848374754190445, "learning_rate": 8.460853390806447e-06, "loss": 0.0022, "step": 80950 }, { "epoch": 0.6620599419389132, "grad_norm": 0.16687040030956268, "learning_rate": 8.460338303342661e-06, "loss": 0.0016, "step": 80960 }, { "epoch": 0.6621417181175123, "grad_norm": 0.17263439297676086, "learning_rate": 8.459823145388973e-06, "loss": 0.0021, "step": 80970 }, { "epoch": 0.6622234942961115, "grad_norm": 0.029919838532805443, "learning_rate": 8.459307916955872e-06, "loss": 0.0028, "step": 80980 }, { "epoch": 0.6623052704747108, "grad_norm": 0.015457085333764553, "learning_rate": 8.458792618053859e-06, "loss": 0.0015, "step": 80990 }, { "epoch": 0.6623870466533099, "grad_norm": 0.20006106793880463, "learning_rate": 8.458277248693427e-06, "loss": 0.0018, "step": 81000 }, { "epoch": 0.6624688228319091, "grad_norm": 0.2486630082130432, "learning_rate": 8.45776180888508e-06, "loss": 0.0026, "step": 81010 }, { "epoch": 0.6625505990105083, "grad_norm": 0.012940788641571999, "learning_rate": 8.45724629863931e-06, "loss": 0.002, "step": 81020 }, { "epoch": 0.6626323751891074, "grad_norm": 0.12335088104009628, "learning_rate": 8.456730717966626e-06, "loss": 0.0022, "step": 81030 }, { "epoch": 0.6627141513677066, "grad_norm": 0.044176481664180756, "learning_rate": 8.456215066877525e-06, "loss": 0.0026, "step": 81040 }, { "epoch": 0.6627959275463058, "grad_norm": 0.1472204029560089, "learning_rate": 8.455699345382516e-06, "loss": 0.0036, "step": 81050 }, { "epoch": 0.662877703724905, "grad_norm": 0.049761369824409485, "learning_rate": 8.4551835534921e-06, "loss": 0.0012, "step": 81060 }, { "epoch": 0.6629594799035041, "grad_norm": 0.0725027397274971, "learning_rate": 8.454667691216788e-06, "loss": 0.0024, "step": 81070 }, { "epoch": 0.6630412560821033, "grad_norm": 0.10057602822780609, "learning_rate": 8.454151758567087e-06, "loss": 0.002, "step": 81080 }, { "epoch": 0.6631230322607025, "grad_norm": 0.05508827045559883, "learning_rate": 8.453635755553506e-06, "loss": 0.0018, "step": 81090 }, { "epoch": 0.6632048084393016, "grad_norm": 0.07097624987363815, "learning_rate": 8.453119682186559e-06, "loss": 0.0015, "step": 81100 }, { "epoch": 0.6632865846179008, "grad_norm": 0.03563152253627777, "learning_rate": 8.452603538476755e-06, "loss": 0.0013, "step": 81110 }, { "epoch": 0.6633683607965, "grad_norm": 0.11541667580604553, "learning_rate": 8.452087324434613e-06, "loss": 0.002, "step": 81120 }, { "epoch": 0.6634501369750991, "grad_norm": 0.04923567175865173, "learning_rate": 8.451571040070645e-06, "loss": 0.0017, "step": 81130 }, { "epoch": 0.6635319131536983, "grad_norm": 0.05687883123755455, "learning_rate": 8.45105468539537e-06, "loss": 0.0028, "step": 81140 }, { "epoch": 0.6636136893322975, "grad_norm": 0.06942732632160187, "learning_rate": 8.450538260419306e-06, "loss": 0.0013, "step": 81150 }, { "epoch": 0.6636954655108966, "grad_norm": 0.0610496811568737, "learning_rate": 8.450021765152972e-06, "loss": 0.0025, "step": 81160 }, { "epoch": 0.6637772416894958, "grad_norm": 0.11101267486810684, "learning_rate": 8.449505199606892e-06, "loss": 0.0025, "step": 81170 }, { "epoch": 0.663859017868095, "grad_norm": 0.02741037681698799, "learning_rate": 8.448988563791585e-06, "loss": 0.002, "step": 81180 }, { "epoch": 0.6639407940466941, "grad_norm": 0.06537333875894547, "learning_rate": 8.448471857717578e-06, "loss": 0.0042, "step": 81190 }, { "epoch": 0.6640225702252933, "grad_norm": 0.09449082612991333, "learning_rate": 8.447955081395395e-06, "loss": 0.0017, "step": 81200 }, { "epoch": 0.6641043464038926, "grad_norm": 0.037645746022462845, "learning_rate": 8.447438234835567e-06, "loss": 0.001, "step": 81210 }, { "epoch": 0.6641861225824918, "grad_norm": 0.031282175332307816, "learning_rate": 8.446921318048618e-06, "loss": 0.0014, "step": 81220 }, { "epoch": 0.6642678987610909, "grad_norm": 0.03134709224104881, "learning_rate": 8.446404331045082e-06, "loss": 0.0011, "step": 81230 }, { "epoch": 0.6643496749396901, "grad_norm": 0.06522492319345474, "learning_rate": 8.445887273835487e-06, "loss": 0.0011, "step": 81240 }, { "epoch": 0.6644314511182893, "grad_norm": 0.30909737944602966, "learning_rate": 8.445370146430367e-06, "loss": 0.0025, "step": 81250 }, { "epoch": 0.6645132272968884, "grad_norm": 0.04214978218078613, "learning_rate": 8.444852948840256e-06, "loss": 0.0015, "step": 81260 }, { "epoch": 0.6645950034754876, "grad_norm": 0.08441374450922012, "learning_rate": 8.444335681075693e-06, "loss": 0.002, "step": 81270 }, { "epoch": 0.6646767796540868, "grad_norm": 0.03572649136185646, "learning_rate": 8.44381834314721e-06, "loss": 0.0019, "step": 81280 }, { "epoch": 0.664758555832686, "grad_norm": 0.004865261260420084, "learning_rate": 8.443300935065348e-06, "loss": 0.0014, "step": 81290 }, { "epoch": 0.6648403320112851, "grad_norm": 0.07759090512990952, "learning_rate": 8.442783456840649e-06, "loss": 0.0015, "step": 81300 }, { "epoch": 0.6649221081898843, "grad_norm": 0.06999633461236954, "learning_rate": 8.44226590848365e-06, "loss": 0.0011, "step": 81310 }, { "epoch": 0.6650038843684835, "grad_norm": 0.01731288433074951, "learning_rate": 8.4417482900049e-06, "loss": 0.0017, "step": 81320 }, { "epoch": 0.6650856605470826, "grad_norm": 0.07857228815555573, "learning_rate": 8.441230601414938e-06, "loss": 0.0015, "step": 81330 }, { "epoch": 0.6651674367256818, "grad_norm": 0.10472352802753448, "learning_rate": 8.44071284272431e-06, "loss": 0.0015, "step": 81340 }, { "epoch": 0.665249212904281, "grad_norm": 0.037968363612890244, "learning_rate": 8.440195013943566e-06, "loss": 0.0015, "step": 81350 }, { "epoch": 0.6653309890828801, "grad_norm": 0.004371495917439461, "learning_rate": 8.439677115083253e-06, "loss": 0.0022, "step": 81360 }, { "epoch": 0.6654127652614793, "grad_norm": 0.15905535221099854, "learning_rate": 8.43915914615392e-06, "loss": 0.0036, "step": 81370 }, { "epoch": 0.6654945414400785, "grad_norm": 0.06953112035989761, "learning_rate": 8.438641107166124e-06, "loss": 0.0014, "step": 81380 }, { "epoch": 0.6655763176186776, "grad_norm": 0.10200865566730499, "learning_rate": 8.43812299813041e-06, "loss": 0.0035, "step": 81390 }, { "epoch": 0.6656580937972768, "grad_norm": 0.08523699641227722, "learning_rate": 8.437604819057336e-06, "loss": 0.002, "step": 81400 }, { "epoch": 0.665739869975876, "grad_norm": 0.061731062829494476, "learning_rate": 8.437086569957458e-06, "loss": 0.0039, "step": 81410 }, { "epoch": 0.6658216461544753, "grad_norm": 0.04080338031053543, "learning_rate": 8.436568250841332e-06, "loss": 0.0024, "step": 81420 }, { "epoch": 0.6659034223330744, "grad_norm": 0.029465271160006523, "learning_rate": 8.436049861719519e-06, "loss": 0.001, "step": 81430 }, { "epoch": 0.6659851985116736, "grad_norm": 0.05019529536366463, "learning_rate": 8.435531402602577e-06, "loss": 0.0009, "step": 81440 }, { "epoch": 0.6660669746902728, "grad_norm": 0.0487397238612175, "learning_rate": 8.435012873501068e-06, "loss": 0.0019, "step": 81450 }, { "epoch": 0.6661487508688719, "grad_norm": 0.13755792379379272, "learning_rate": 8.434494274425553e-06, "loss": 0.002, "step": 81460 }, { "epoch": 0.6662305270474711, "grad_norm": 0.021719222888350487, "learning_rate": 8.433975605386598e-06, "loss": 0.0018, "step": 81470 }, { "epoch": 0.6663123032260703, "grad_norm": 0.07798696309328079, "learning_rate": 8.43345686639477e-06, "loss": 0.0022, "step": 81480 }, { "epoch": 0.6663940794046694, "grad_norm": 0.02070929855108261, "learning_rate": 8.432938057460634e-06, "loss": 0.0015, "step": 81490 }, { "epoch": 0.6664758555832686, "grad_norm": 0.032197874039411545, "learning_rate": 8.43241917859476e-06, "loss": 0.0016, "step": 81500 }, { "epoch": 0.6665576317618678, "grad_norm": 0.05910487473011017, "learning_rate": 8.431900229807717e-06, "loss": 0.0014, "step": 81510 }, { "epoch": 0.6666394079404669, "grad_norm": 0.010736593045294285, "learning_rate": 8.431381211110077e-06, "loss": 0.0014, "step": 81520 }, { "epoch": 0.6667211841190661, "grad_norm": 0.08308403193950653, "learning_rate": 8.430862122512411e-06, "loss": 0.0012, "step": 81530 }, { "epoch": 0.6668029602976653, "grad_norm": 0.05543200299143791, "learning_rate": 8.430342964025298e-06, "loss": 0.0025, "step": 81540 }, { "epoch": 0.6668847364762645, "grad_norm": 0.11023391783237457, "learning_rate": 8.429823735659307e-06, "loss": 0.0026, "step": 81550 }, { "epoch": 0.6669665126548636, "grad_norm": 0.04311629384756088, "learning_rate": 8.42930443742502e-06, "loss": 0.0025, "step": 81560 }, { "epoch": 0.6670482888334628, "grad_norm": 0.05774684622883797, "learning_rate": 8.428785069333015e-06, "loss": 0.0022, "step": 81570 }, { "epoch": 0.667130065012062, "grad_norm": 0.015807513147592545, "learning_rate": 8.428265631393871e-06, "loss": 0.0026, "step": 81580 }, { "epoch": 0.6672118411906611, "grad_norm": 0.026994485408067703, "learning_rate": 8.427746123618168e-06, "loss": 0.0019, "step": 81590 }, { "epoch": 0.6672936173692603, "grad_norm": 0.10232943296432495, "learning_rate": 8.427226546016492e-06, "loss": 0.0014, "step": 81600 }, { "epoch": 0.6673753935478595, "grad_norm": 0.049668919295072556, "learning_rate": 8.426706898599424e-06, "loss": 0.0017, "step": 81610 }, { "epoch": 0.6674571697264586, "grad_norm": 0.1222880482673645, "learning_rate": 8.426187181377552e-06, "loss": 0.0019, "step": 81620 }, { "epoch": 0.6675389459050578, "grad_norm": 0.011159301735460758, "learning_rate": 8.42566739436146e-06, "loss": 0.0012, "step": 81630 }, { "epoch": 0.6676207220836571, "grad_norm": 0.021023714914917946, "learning_rate": 8.425147537561742e-06, "loss": 0.0016, "step": 81640 }, { "epoch": 0.6677024982622562, "grad_norm": 0.1046067550778389, "learning_rate": 8.424627610988983e-06, "loss": 0.0031, "step": 81650 }, { "epoch": 0.6677842744408554, "grad_norm": 0.08274616301059723, "learning_rate": 8.424107614653777e-06, "loss": 0.0016, "step": 81660 }, { "epoch": 0.6678660506194546, "grad_norm": 0.06025868281722069, "learning_rate": 8.423587548566714e-06, "loss": 0.0025, "step": 81670 }, { "epoch": 0.6679478267980538, "grad_norm": 0.1563042402267456, "learning_rate": 8.42306741273839e-06, "loss": 0.0019, "step": 81680 }, { "epoch": 0.6680296029766529, "grad_norm": 0.0539800301194191, "learning_rate": 8.4225472071794e-06, "loss": 0.0015, "step": 81690 }, { "epoch": 0.6681113791552521, "grad_norm": 0.1565849483013153, "learning_rate": 8.422026931900342e-06, "loss": 0.003, "step": 81700 }, { "epoch": 0.6681931553338513, "grad_norm": 0.023492369800806046, "learning_rate": 8.421506586911815e-06, "loss": 0.0013, "step": 81710 }, { "epoch": 0.6682749315124504, "grad_norm": 0.078373484313488, "learning_rate": 8.420986172224417e-06, "loss": 0.0033, "step": 81720 }, { "epoch": 0.6683567076910496, "grad_norm": 0.04847676306962967, "learning_rate": 8.420465687848749e-06, "loss": 0.0025, "step": 81730 }, { "epoch": 0.6684384838696488, "grad_norm": 0.011319665238261223, "learning_rate": 8.419945133795415e-06, "loss": 0.0018, "step": 81740 }, { "epoch": 0.6685202600482479, "grad_norm": 0.09445195645093918, "learning_rate": 8.41942451007502e-06, "loss": 0.0028, "step": 81750 }, { "epoch": 0.6686020362268471, "grad_norm": 0.04649718850851059, "learning_rate": 8.418903816698168e-06, "loss": 0.0014, "step": 81760 }, { "epoch": 0.6686838124054463, "grad_norm": 0.09533508121967316, "learning_rate": 8.418383053675464e-06, "loss": 0.0025, "step": 81770 }, { "epoch": 0.6687655885840454, "grad_norm": 0.10059326887130737, "learning_rate": 8.41786222101752e-06, "loss": 0.0043, "step": 81780 }, { "epoch": 0.6688473647626446, "grad_norm": 0.18367093801498413, "learning_rate": 8.417341318734944e-06, "loss": 0.0022, "step": 81790 }, { "epoch": 0.6689291409412438, "grad_norm": 0.030908336862921715, "learning_rate": 8.416820346838349e-06, "loss": 0.0014, "step": 81800 }, { "epoch": 0.669010917119843, "grad_norm": 0.02372274175286293, "learning_rate": 8.416299305338345e-06, "loss": 0.001, "step": 81810 }, { "epoch": 0.6690926932984421, "grad_norm": 0.021996987983584404, "learning_rate": 8.415778194245547e-06, "loss": 0.0021, "step": 81820 }, { "epoch": 0.6691744694770413, "grad_norm": 0.11088796705007553, "learning_rate": 8.415257013570572e-06, "loss": 0.002, "step": 81830 }, { "epoch": 0.6692562456556405, "grad_norm": 0.06991460919380188, "learning_rate": 8.414735763324035e-06, "loss": 0.0021, "step": 81840 }, { "epoch": 0.6693380218342397, "grad_norm": 0.05438147485256195, "learning_rate": 8.414214443516554e-06, "loss": 0.0025, "step": 81850 }, { "epoch": 0.6694197980128389, "grad_norm": 0.045101940631866455, "learning_rate": 8.413693054158751e-06, "loss": 0.0014, "step": 81860 }, { "epoch": 0.6695015741914381, "grad_norm": 0.030940737575292587, "learning_rate": 8.413171595261245e-06, "loss": 0.0025, "step": 81870 }, { "epoch": 0.6695833503700372, "grad_norm": 0.03995905816555023, "learning_rate": 8.412650066834658e-06, "loss": 0.002, "step": 81880 }, { "epoch": 0.6696651265486364, "grad_norm": 0.12706385552883148, "learning_rate": 8.412128468889618e-06, "loss": 0.003, "step": 81890 }, { "epoch": 0.6697469027272356, "grad_norm": 0.009915364906191826, "learning_rate": 8.411606801436746e-06, "loss": 0.0014, "step": 81900 }, { "epoch": 0.6698286789058348, "grad_norm": 0.07761058956384659, "learning_rate": 8.411085064486669e-06, "loss": 0.0014, "step": 81910 }, { "epoch": 0.6699104550844339, "grad_norm": 0.0642167329788208, "learning_rate": 8.410563258050018e-06, "loss": 0.0021, "step": 81920 }, { "epoch": 0.6699922312630331, "grad_norm": 0.07725320011377335, "learning_rate": 8.410041382137421e-06, "loss": 0.0014, "step": 81930 }, { "epoch": 0.6700740074416323, "grad_norm": 0.027981193736195564, "learning_rate": 8.40951943675951e-06, "loss": 0.0017, "step": 81940 }, { "epoch": 0.6701557836202314, "grad_norm": 0.06796437501907349, "learning_rate": 8.408997421926915e-06, "loss": 0.0015, "step": 81950 }, { "epoch": 0.6702375597988306, "grad_norm": 0.024262648075819016, "learning_rate": 8.408475337650273e-06, "loss": 0.0016, "step": 81960 }, { "epoch": 0.6703193359774298, "grad_norm": 0.12105505168437958, "learning_rate": 8.407953183940217e-06, "loss": 0.0019, "step": 81970 }, { "epoch": 0.6704011121560289, "grad_norm": 0.10933812707662582, "learning_rate": 8.407430960807386e-06, "loss": 0.0012, "step": 81980 }, { "epoch": 0.6704828883346281, "grad_norm": 0.05461512878537178, "learning_rate": 8.406908668262415e-06, "loss": 0.0017, "step": 81990 }, { "epoch": 0.6705646645132273, "grad_norm": 0.12239603698253632, "learning_rate": 8.406386306315946e-06, "loss": 0.002, "step": 82000 }, { "epoch": 0.6706464406918264, "grad_norm": 0.08496777713298798, "learning_rate": 8.405863874978618e-06, "loss": 0.002, "step": 82010 }, { "epoch": 0.6707282168704256, "grad_norm": 0.031700585037469864, "learning_rate": 8.405341374261075e-06, "loss": 0.0018, "step": 82020 }, { "epoch": 0.6708099930490248, "grad_norm": 0.10059220343828201, "learning_rate": 8.404818804173962e-06, "loss": 0.002, "step": 82030 }, { "epoch": 0.670891769227624, "grad_norm": 0.010700157843530178, "learning_rate": 8.40429616472792e-06, "loss": 0.0018, "step": 82040 }, { "epoch": 0.6709735454062231, "grad_norm": 0.06288770586252213, "learning_rate": 8.403773455933599e-06, "loss": 0.0021, "step": 82050 }, { "epoch": 0.6710553215848224, "grad_norm": 0.06717878580093384, "learning_rate": 8.403250677801646e-06, "loss": 0.0011, "step": 82060 }, { "epoch": 0.6711370977634216, "grad_norm": 0.13410237431526184, "learning_rate": 8.402727830342709e-06, "loss": 0.0059, "step": 82070 }, { "epoch": 0.6712188739420207, "grad_norm": 0.09173928201198578, "learning_rate": 8.402204913567443e-06, "loss": 0.0026, "step": 82080 }, { "epoch": 0.6713006501206199, "grad_norm": 0.15854255855083466, "learning_rate": 8.401681927486494e-06, "loss": 0.0017, "step": 82090 }, { "epoch": 0.6713824262992191, "grad_norm": 0.08294758945703506, "learning_rate": 8.401158872110522e-06, "loss": 0.0028, "step": 82100 }, { "epoch": 0.6714642024778182, "grad_norm": 0.06641467660665512, "learning_rate": 8.400635747450177e-06, "loss": 0.0038, "step": 82110 }, { "epoch": 0.6715459786564174, "grad_norm": 0.07082925736904144, "learning_rate": 8.40011255351612e-06, "loss": 0.002, "step": 82120 }, { "epoch": 0.6716277548350166, "grad_norm": 0.028369911015033722, "learning_rate": 8.399589290319003e-06, "loss": 0.0014, "step": 82130 }, { "epoch": 0.6717095310136157, "grad_norm": 0.0234663225710392, "learning_rate": 8.399065957869492e-06, "loss": 0.0027, "step": 82140 }, { "epoch": 0.6717913071922149, "grad_norm": 0.2440723180770874, "learning_rate": 8.398542556178244e-06, "loss": 0.0014, "step": 82150 }, { "epoch": 0.6718730833708141, "grad_norm": 0.0671137273311615, "learning_rate": 8.398019085255921e-06, "loss": 0.0035, "step": 82160 }, { "epoch": 0.6719548595494133, "grad_norm": 0.026205968111753464, "learning_rate": 8.397495545113188e-06, "loss": 0.0029, "step": 82170 }, { "epoch": 0.6720366357280124, "grad_norm": 0.03566163778305054, "learning_rate": 8.396971935760709e-06, "loss": 0.0016, "step": 82180 }, { "epoch": 0.6721184119066116, "grad_norm": 0.07540292292833328, "learning_rate": 8.396448257209148e-06, "loss": 0.0019, "step": 82190 }, { "epoch": 0.6722001880852108, "grad_norm": 0.06512143462896347, "learning_rate": 8.395924509469179e-06, "loss": 0.001, "step": 82200 }, { "epoch": 0.6722819642638099, "grad_norm": 0.11630380153656006, "learning_rate": 8.395400692551466e-06, "loss": 0.0023, "step": 82210 }, { "epoch": 0.6723637404424091, "grad_norm": 0.11438312381505966, "learning_rate": 8.39487680646668e-06, "loss": 0.0035, "step": 82220 }, { "epoch": 0.6724455166210083, "grad_norm": 0.04208756610751152, "learning_rate": 8.394352851225495e-06, "loss": 0.0016, "step": 82230 }, { "epoch": 0.6725272927996074, "grad_norm": 0.11781133711338043, "learning_rate": 8.393828826838583e-06, "loss": 0.0025, "step": 82240 }, { "epoch": 0.6726090689782066, "grad_norm": 0.05363164097070694, "learning_rate": 8.393304733316618e-06, "loss": 0.0015, "step": 82250 }, { "epoch": 0.6726908451568058, "grad_norm": 0.3551176190376282, "learning_rate": 8.39278057067028e-06, "loss": 0.0026, "step": 82260 }, { "epoch": 0.6727726213354049, "grad_norm": 0.13050545752048492, "learning_rate": 8.392256338910241e-06, "loss": 0.0022, "step": 82270 }, { "epoch": 0.6728543975140042, "grad_norm": 0.05107418820261955, "learning_rate": 8.391732038047184e-06, "loss": 0.0033, "step": 82280 }, { "epoch": 0.6729361736926034, "grad_norm": 0.032624080777168274, "learning_rate": 8.39120766809179e-06, "loss": 0.002, "step": 82290 }, { "epoch": 0.6730179498712026, "grad_norm": 0.04630220681428909, "learning_rate": 8.390683229054739e-06, "loss": 0.0018, "step": 82300 }, { "epoch": 0.6730997260498017, "grad_norm": 0.0072388555854558945, "learning_rate": 8.390158720946713e-06, "loss": 0.0013, "step": 82310 }, { "epoch": 0.6731815022284009, "grad_norm": 0.03536687418818474, "learning_rate": 8.3896341437784e-06, "loss": 0.0015, "step": 82320 }, { "epoch": 0.6732632784070001, "grad_norm": 0.029503053054213524, "learning_rate": 8.38910949756048e-06, "loss": 0.0018, "step": 82330 }, { "epoch": 0.6733450545855992, "grad_norm": 0.058323491364717484, "learning_rate": 8.38858478230365e-06, "loss": 0.0017, "step": 82340 }, { "epoch": 0.6734268307641984, "grad_norm": 0.0821152776479721, "learning_rate": 8.388059998018593e-06, "loss": 0.0015, "step": 82350 }, { "epoch": 0.6735086069427976, "grad_norm": 0.06003107875585556, "learning_rate": 8.387535144715997e-06, "loss": 0.0027, "step": 82360 }, { "epoch": 0.6735903831213967, "grad_norm": 0.01663093827664852, "learning_rate": 8.387010222406558e-06, "loss": 0.002, "step": 82370 }, { "epoch": 0.6736721592999959, "grad_norm": 0.027095910161733627, "learning_rate": 8.386485231100967e-06, "loss": 0.0021, "step": 82380 }, { "epoch": 0.6737539354785951, "grad_norm": 0.09681306034326553, "learning_rate": 8.38596017080992e-06, "loss": 0.0015, "step": 82390 }, { "epoch": 0.6738357116571942, "grad_norm": 0.02820240706205368, "learning_rate": 8.385435041544112e-06, "loss": 0.0027, "step": 82400 }, { "epoch": 0.6739174878357934, "grad_norm": 0.007386290933936834, "learning_rate": 8.38490984331424e-06, "loss": 0.0018, "step": 82410 }, { "epoch": 0.6739992640143926, "grad_norm": 0.030209731310606003, "learning_rate": 8.384384576131004e-06, "loss": 0.002, "step": 82420 }, { "epoch": 0.6740810401929918, "grad_norm": 0.0030639274045825005, "learning_rate": 8.383859240005102e-06, "loss": 0.0018, "step": 82430 }, { "epoch": 0.6741628163715909, "grad_norm": 0.07679200172424316, "learning_rate": 8.383333834947237e-06, "loss": 0.0018, "step": 82440 }, { "epoch": 0.6742445925501901, "grad_norm": 0.05951680615544319, "learning_rate": 8.382808360968114e-06, "loss": 0.0029, "step": 82450 }, { "epoch": 0.6743263687287893, "grad_norm": 0.01828654296696186, "learning_rate": 8.38228281807843e-06, "loss": 0.0019, "step": 82460 }, { "epoch": 0.6744081449073884, "grad_norm": 0.013727366924285889, "learning_rate": 8.3817572062889e-06, "loss": 0.0014, "step": 82470 }, { "epoch": 0.6744899210859876, "grad_norm": 0.11465077847242355, "learning_rate": 8.381231525610226e-06, "loss": 0.0053, "step": 82480 }, { "epoch": 0.6745716972645869, "grad_norm": 0.03491484373807907, "learning_rate": 8.380705776053117e-06, "loss": 0.0016, "step": 82490 }, { "epoch": 0.674653473443186, "grad_norm": 0.016101161018013954, "learning_rate": 8.380179957628283e-06, "loss": 0.0017, "step": 82500 }, { "epoch": 0.6747352496217852, "grad_norm": 0.030293304473161697, "learning_rate": 8.379654070346437e-06, "loss": 0.0022, "step": 82510 }, { "epoch": 0.6748170258003844, "grad_norm": 0.01498590037226677, "learning_rate": 8.379128114218289e-06, "loss": 0.0017, "step": 82520 }, { "epoch": 0.6748988019789836, "grad_norm": 0.035404253751039505, "learning_rate": 8.378602089254555e-06, "loss": 0.0031, "step": 82530 }, { "epoch": 0.6749805781575827, "grad_norm": 0.037907589226961136, "learning_rate": 8.378075995465951e-06, "loss": 0.0017, "step": 82540 }, { "epoch": 0.6750623543361819, "grad_norm": 0.026719363406300545, "learning_rate": 8.377549832863192e-06, "loss": 0.0011, "step": 82550 }, { "epoch": 0.6751441305147811, "grad_norm": 0.10233236849308014, "learning_rate": 8.377023601456999e-06, "loss": 0.0017, "step": 82560 }, { "epoch": 0.6752259066933802, "grad_norm": 0.06892630457878113, "learning_rate": 8.376497301258088e-06, "loss": 0.0019, "step": 82570 }, { "epoch": 0.6753076828719794, "grad_norm": 0.012844495475292206, "learning_rate": 8.375970932277185e-06, "loss": 0.0015, "step": 82580 }, { "epoch": 0.6753894590505786, "grad_norm": 0.1619330793619156, "learning_rate": 8.375444494525007e-06, "loss": 0.0028, "step": 82590 }, { "epoch": 0.6754712352291777, "grad_norm": 0.0696437656879425, "learning_rate": 8.374917988012282e-06, "loss": 0.0026, "step": 82600 }, { "epoch": 0.6755530114077769, "grad_norm": 0.08513247221708298, "learning_rate": 8.374391412749736e-06, "loss": 0.0011, "step": 82610 }, { "epoch": 0.6756347875863761, "grad_norm": 0.04723808541893959, "learning_rate": 8.373864768748095e-06, "loss": 0.0008, "step": 82620 }, { "epoch": 0.6757165637649752, "grad_norm": 0.11592567712068558, "learning_rate": 8.373338056018084e-06, "loss": 0.0018, "step": 82630 }, { "epoch": 0.6757983399435744, "grad_norm": 0.07892458140850067, "learning_rate": 8.372811274570433e-06, "loss": 0.0013, "step": 82640 }, { "epoch": 0.6758801161221736, "grad_norm": 0.019950469955801964, "learning_rate": 8.372284424415878e-06, "loss": 0.0025, "step": 82650 }, { "epoch": 0.6759618923007727, "grad_norm": 0.12321590632200241, "learning_rate": 8.371757505565148e-06, "loss": 0.0019, "step": 82660 }, { "epoch": 0.6760436684793719, "grad_norm": 0.03917553275823593, "learning_rate": 8.371230518028975e-06, "loss": 0.0024, "step": 82670 }, { "epoch": 0.6761254446579711, "grad_norm": 0.01370461005717516, "learning_rate": 8.370703461818097e-06, "loss": 0.0023, "step": 82680 }, { "epoch": 0.6762072208365703, "grad_norm": 0.038383595645427704, "learning_rate": 8.370176336943248e-06, "loss": 0.0035, "step": 82690 }, { "epoch": 0.6762889970151695, "grad_norm": 0.07705036550760269, "learning_rate": 8.369649143415171e-06, "loss": 0.0031, "step": 82700 }, { "epoch": 0.6763707731937687, "grad_norm": 0.05257175862789154, "learning_rate": 8.3691218812446e-06, "loss": 0.0025, "step": 82710 }, { "epoch": 0.6764525493723679, "grad_norm": 0.025843527168035507, "learning_rate": 8.368594550442277e-06, "loss": 0.0022, "step": 82720 }, { "epoch": 0.676534325550967, "grad_norm": 0.17553943395614624, "learning_rate": 8.368067151018946e-06, "loss": 0.0021, "step": 82730 }, { "epoch": 0.6766161017295662, "grad_norm": 0.03858603537082672, "learning_rate": 8.36753968298535e-06, "loss": 0.0025, "step": 82740 }, { "epoch": 0.6766978779081654, "grad_norm": 0.07914392650127411, "learning_rate": 8.367012146352232e-06, "loss": 0.0013, "step": 82750 }, { "epoch": 0.6767796540867645, "grad_norm": 0.010540611110627651, "learning_rate": 8.366484541130339e-06, "loss": 0.0018, "step": 82760 }, { "epoch": 0.6768614302653637, "grad_norm": 0.03502853959798813, "learning_rate": 8.365956867330422e-06, "loss": 0.0017, "step": 82770 }, { "epoch": 0.6769432064439629, "grad_norm": 0.022710351273417473, "learning_rate": 8.365429124963227e-06, "loss": 0.0019, "step": 82780 }, { "epoch": 0.677024982622562, "grad_norm": 0.04876190796494484, "learning_rate": 8.364901314039504e-06, "loss": 0.0016, "step": 82790 }, { "epoch": 0.6771067588011612, "grad_norm": 0.040247127413749695, "learning_rate": 8.364373434570006e-06, "loss": 0.0018, "step": 82800 }, { "epoch": 0.6771885349797604, "grad_norm": 0.023243004456162453, "learning_rate": 8.363845486565489e-06, "loss": 0.0025, "step": 82810 }, { "epoch": 0.6772703111583596, "grad_norm": 0.044772516936063766, "learning_rate": 8.363317470036702e-06, "loss": 0.002, "step": 82820 }, { "epoch": 0.6773520873369587, "grad_norm": 0.07126357406377792, "learning_rate": 8.362789384994405e-06, "loss": 0.0016, "step": 82830 }, { "epoch": 0.6774338635155579, "grad_norm": 0.11876699328422546, "learning_rate": 8.362261231449355e-06, "loss": 0.0014, "step": 82840 }, { "epoch": 0.6775156396941571, "grad_norm": 0.04197932779788971, "learning_rate": 8.361733009412312e-06, "loss": 0.0023, "step": 82850 }, { "epoch": 0.6775974158727562, "grad_norm": 0.08799224346876144, "learning_rate": 8.361204718894033e-06, "loss": 0.0019, "step": 82860 }, { "epoch": 0.6776791920513554, "grad_norm": 0.053871527314186096, "learning_rate": 8.360676359905284e-06, "loss": 0.0013, "step": 82870 }, { "epoch": 0.6777609682299546, "grad_norm": 0.08293426781892776, "learning_rate": 8.360147932456825e-06, "loss": 0.0017, "step": 82880 }, { "epoch": 0.6778427444085537, "grad_norm": 0.0690779983997345, "learning_rate": 8.359619436559421e-06, "loss": 0.0032, "step": 82890 }, { "epoch": 0.6779245205871529, "grad_norm": 0.08542119711637497, "learning_rate": 8.359090872223838e-06, "loss": 0.002, "step": 82900 }, { "epoch": 0.6780062967657521, "grad_norm": 0.17445749044418335, "learning_rate": 8.358562239460845e-06, "loss": 0.0035, "step": 82910 }, { "epoch": 0.6780880729443514, "grad_norm": 0.06377067416906357, "learning_rate": 8.358033538281207e-06, "loss": 0.0028, "step": 82920 }, { "epoch": 0.6781698491229505, "grad_norm": 0.11680813878774643, "learning_rate": 8.357504768695698e-06, "loss": 0.0013, "step": 82930 }, { "epoch": 0.6782516253015497, "grad_norm": 0.05949631705880165, "learning_rate": 8.356975930715086e-06, "loss": 0.0013, "step": 82940 }, { "epoch": 0.6783334014801489, "grad_norm": 0.03166034072637558, "learning_rate": 8.356447024350148e-06, "loss": 0.0021, "step": 82950 }, { "epoch": 0.678415177658748, "grad_norm": 0.04854701831936836, "learning_rate": 8.355918049611655e-06, "loss": 0.0015, "step": 82960 }, { "epoch": 0.6784969538373472, "grad_norm": 0.06125679239630699, "learning_rate": 8.355389006510383e-06, "loss": 0.0021, "step": 82970 }, { "epoch": 0.6785787300159464, "grad_norm": 0.06742334365844727, "learning_rate": 8.35485989505711e-06, "loss": 0.0029, "step": 82980 }, { "epoch": 0.6786605061945455, "grad_norm": 0.024186674505472183, "learning_rate": 8.354330715262613e-06, "loss": 0.0021, "step": 82990 }, { "epoch": 0.6787422823731447, "grad_norm": 0.4000420570373535, "learning_rate": 8.353801467137672e-06, "loss": 0.004, "step": 83000 }, { "epoch": 0.6788240585517439, "grad_norm": 0.059630222618579865, "learning_rate": 8.353272150693072e-06, "loss": 0.0014, "step": 83010 }, { "epoch": 0.678905834730343, "grad_norm": 0.014122980646789074, "learning_rate": 8.35274276593959e-06, "loss": 0.0013, "step": 83020 }, { "epoch": 0.6789876109089422, "grad_norm": 0.05965745076537132, "learning_rate": 8.352213312888013e-06, "loss": 0.0014, "step": 83030 }, { "epoch": 0.6790693870875414, "grad_norm": 0.12320700287818909, "learning_rate": 8.351683791549128e-06, "loss": 0.0038, "step": 83040 }, { "epoch": 0.6791511632661406, "grad_norm": 0.009780067950487137, "learning_rate": 8.351154201933718e-06, "loss": 0.0029, "step": 83050 }, { "epoch": 0.6792329394447397, "grad_norm": 0.010803606361150742, "learning_rate": 8.350624544052572e-06, "loss": 0.0013, "step": 83060 }, { "epoch": 0.6793147156233389, "grad_norm": 0.04754341393709183, "learning_rate": 8.350094817916483e-06, "loss": 0.0031, "step": 83070 }, { "epoch": 0.6793964918019381, "grad_norm": 0.11229188740253448, "learning_rate": 8.349565023536238e-06, "loss": 0.0026, "step": 83080 }, { "epoch": 0.6794782679805372, "grad_norm": 0.2193821519613266, "learning_rate": 8.349035160922631e-06, "loss": 0.0012, "step": 83090 }, { "epoch": 0.6795600441591364, "grad_norm": 0.11123177409172058, "learning_rate": 8.348505230086456e-06, "loss": 0.0028, "step": 83100 }, { "epoch": 0.6796418203377356, "grad_norm": 0.03216288238763809, "learning_rate": 8.347975231038508e-06, "loss": 0.0015, "step": 83110 }, { "epoch": 0.6797235965163347, "grad_norm": 0.26146796345710754, "learning_rate": 8.34744516378958e-06, "loss": 0.0022, "step": 83120 }, { "epoch": 0.679805372694934, "grad_norm": 0.09870323538780212, "learning_rate": 8.346915028350478e-06, "loss": 0.0036, "step": 83130 }, { "epoch": 0.6798871488735332, "grad_norm": 0.03757786378264427, "learning_rate": 8.346384824731993e-06, "loss": 0.0007, "step": 83140 }, { "epoch": 0.6799689250521324, "grad_norm": 0.039711784571409225, "learning_rate": 8.345854552944929e-06, "loss": 0.0013, "step": 83150 }, { "epoch": 0.6800507012307315, "grad_norm": 0.16578036546707153, "learning_rate": 8.34532421300009e-06, "loss": 0.0009, "step": 83160 }, { "epoch": 0.6801324774093307, "grad_norm": 0.06687769293785095, "learning_rate": 8.344793804908277e-06, "loss": 0.0015, "step": 83170 }, { "epoch": 0.6802142535879299, "grad_norm": 0.054843686521053314, "learning_rate": 8.344263328680296e-06, "loss": 0.0016, "step": 83180 }, { "epoch": 0.680296029766529, "grad_norm": 0.16542164981365204, "learning_rate": 8.34373278432695e-06, "loss": 0.0014, "step": 83190 }, { "epoch": 0.6803778059451282, "grad_norm": 0.15007050335407257, "learning_rate": 8.343202171859052e-06, "loss": 0.003, "step": 83200 }, { "epoch": 0.6804595821237274, "grad_norm": 0.05204227939248085, "learning_rate": 8.342671491287408e-06, "loss": 0.0016, "step": 83210 }, { "epoch": 0.6805413583023265, "grad_norm": 0.04803071171045303, "learning_rate": 8.342140742622827e-06, "loss": 0.003, "step": 83220 }, { "epoch": 0.6806231344809257, "grad_norm": 0.04596010223031044, "learning_rate": 8.341609925876124e-06, "loss": 0.0021, "step": 83230 }, { "epoch": 0.6807049106595249, "grad_norm": 0.03343509882688522, "learning_rate": 8.34107904105811e-06, "loss": 0.0011, "step": 83240 }, { "epoch": 0.680786686838124, "grad_norm": 0.06358607858419418, "learning_rate": 8.340548088179598e-06, "loss": 0.0017, "step": 83250 }, { "epoch": 0.6808684630167232, "grad_norm": 0.06695929914712906, "learning_rate": 8.340017067251409e-06, "loss": 0.0017, "step": 83260 }, { "epoch": 0.6809502391953224, "grad_norm": 0.010301219299435616, "learning_rate": 8.339485978284358e-06, "loss": 0.0016, "step": 83270 }, { "epoch": 0.6810320153739216, "grad_norm": 0.18196581304073334, "learning_rate": 8.33895482128926e-06, "loss": 0.0013, "step": 83280 }, { "epoch": 0.6811137915525207, "grad_norm": 0.08194022625684738, "learning_rate": 8.338423596276942e-06, "loss": 0.0018, "step": 83290 }, { "epoch": 0.6811955677311199, "grad_norm": 0.0873967781662941, "learning_rate": 8.337892303258219e-06, "loss": 0.0071, "step": 83300 }, { "epoch": 0.6812773439097191, "grad_norm": 0.08120706677436829, "learning_rate": 8.337360942243918e-06, "loss": 0.0018, "step": 83310 }, { "epoch": 0.6813591200883182, "grad_norm": 0.07161686569452286, "learning_rate": 8.33682951324486e-06, "loss": 0.0036, "step": 83320 }, { "epoch": 0.6814408962669174, "grad_norm": 0.11081825196743011, "learning_rate": 8.336298016271874e-06, "loss": 0.0017, "step": 83330 }, { "epoch": 0.6815226724455167, "grad_norm": 0.10347960889339447, "learning_rate": 8.335766451335784e-06, "loss": 0.0023, "step": 83340 }, { "epoch": 0.6816044486241158, "grad_norm": 0.07952956855297089, "learning_rate": 8.335234818447422e-06, "loss": 0.0013, "step": 83350 }, { "epoch": 0.681686224802715, "grad_norm": 0.07445894181728363, "learning_rate": 8.334703117617616e-06, "loss": 0.0024, "step": 83360 }, { "epoch": 0.6817680009813142, "grad_norm": 0.1295633316040039, "learning_rate": 8.334171348857197e-06, "loss": 0.0015, "step": 83370 }, { "epoch": 0.6818497771599134, "grad_norm": 0.14686504006385803, "learning_rate": 8.333639512176994e-06, "loss": 0.002, "step": 83380 }, { "epoch": 0.6819315533385125, "grad_norm": 0.023848125711083412, "learning_rate": 8.333107607587847e-06, "loss": 0.0033, "step": 83390 }, { "epoch": 0.6820133295171117, "grad_norm": 0.04696343466639519, "learning_rate": 8.33257563510059e-06, "loss": 0.0021, "step": 83400 }, { "epoch": 0.6820951056957109, "grad_norm": 0.023676738142967224, "learning_rate": 8.332043594726056e-06, "loss": 0.0015, "step": 83410 }, { "epoch": 0.68217688187431, "grad_norm": 0.026392240077257156, "learning_rate": 8.331511486475086e-06, "loss": 0.0024, "step": 83420 }, { "epoch": 0.6822586580529092, "grad_norm": 0.08989937603473663, "learning_rate": 8.33097931035852e-06, "loss": 0.0031, "step": 83430 }, { "epoch": 0.6823404342315084, "grad_norm": 0.04527869075536728, "learning_rate": 8.330447066387198e-06, "loss": 0.0015, "step": 83440 }, { "epoch": 0.6824222104101075, "grad_norm": 0.012072326615452766, "learning_rate": 8.329914754571962e-06, "loss": 0.0018, "step": 83450 }, { "epoch": 0.6825039865887067, "grad_norm": 0.03270205110311508, "learning_rate": 8.329382374923655e-06, "loss": 0.002, "step": 83460 }, { "epoch": 0.6825857627673059, "grad_norm": 0.005333533510565758, "learning_rate": 8.328849927453123e-06, "loss": 0.0022, "step": 83470 }, { "epoch": 0.682667538945905, "grad_norm": 0.022960152477025986, "learning_rate": 8.328317412171212e-06, "loss": 0.0012, "step": 83480 }, { "epoch": 0.6827493151245042, "grad_norm": 0.01955338753759861, "learning_rate": 8.327784829088769e-06, "loss": 0.0018, "step": 83490 }, { "epoch": 0.6828310913031034, "grad_norm": 0.0517750047147274, "learning_rate": 8.327252178216646e-06, "loss": 0.0016, "step": 83500 }, { "epoch": 0.6829128674817025, "grad_norm": 0.08849445730447769, "learning_rate": 8.32671945956569e-06, "loss": 0.0034, "step": 83510 }, { "epoch": 0.6829946436603017, "grad_norm": 0.013602091930806637, "learning_rate": 8.326186673146756e-06, "loss": 0.0009, "step": 83520 }, { "epoch": 0.6830764198389009, "grad_norm": 0.02320868708193302, "learning_rate": 8.325653818970695e-06, "loss": 0.0015, "step": 83530 }, { "epoch": 0.6831581960175, "grad_norm": 0.04825863987207413, "learning_rate": 8.325120897048363e-06, "loss": 0.002, "step": 83540 }, { "epoch": 0.6832399721960992, "grad_norm": 0.05450271815061569, "learning_rate": 8.324587907390615e-06, "loss": 0.002, "step": 83550 }, { "epoch": 0.6833217483746985, "grad_norm": 0.0651092380285263, "learning_rate": 8.32405485000831e-06, "loss": 0.0027, "step": 83560 }, { "epoch": 0.6834035245532977, "grad_norm": 0.201212078332901, "learning_rate": 8.323521724912304e-06, "loss": 0.0036, "step": 83570 }, { "epoch": 0.6834853007318968, "grad_norm": 0.008568016812205315, "learning_rate": 8.32298853211346e-06, "loss": 0.0016, "step": 83580 }, { "epoch": 0.683567076910496, "grad_norm": 0.024478711187839508, "learning_rate": 8.32245527162264e-06, "loss": 0.002, "step": 83590 }, { "epoch": 0.6836488530890952, "grad_norm": 0.0642474815249443, "learning_rate": 8.321921943450702e-06, "loss": 0.0029, "step": 83600 }, { "epoch": 0.6837306292676943, "grad_norm": 0.07119913399219513, "learning_rate": 8.321388547608517e-06, "loss": 0.0023, "step": 83610 }, { "epoch": 0.6838124054462935, "grad_norm": 0.049966536462306976, "learning_rate": 8.320855084106945e-06, "loss": 0.0026, "step": 83620 }, { "epoch": 0.6838941816248927, "grad_norm": 0.13483308255672455, "learning_rate": 8.320321552956858e-06, "loss": 0.0016, "step": 83630 }, { "epoch": 0.6839759578034919, "grad_norm": 0.09553377330303192, "learning_rate": 8.31978795416912e-06, "loss": 0.0031, "step": 83640 }, { "epoch": 0.684057733982091, "grad_norm": 0.03132014721632004, "learning_rate": 8.319254287754606e-06, "loss": 0.0015, "step": 83650 }, { "epoch": 0.6841395101606902, "grad_norm": 0.029463017359375954, "learning_rate": 8.31872055372418e-06, "loss": 0.0015, "step": 83660 }, { "epoch": 0.6842212863392894, "grad_norm": 0.0873192772269249, "learning_rate": 8.318186752088722e-06, "loss": 0.003, "step": 83670 }, { "epoch": 0.6843030625178885, "grad_norm": 0.002514749998226762, "learning_rate": 8.3176528828591e-06, "loss": 0.0008, "step": 83680 }, { "epoch": 0.6843848386964877, "grad_norm": 0.1234932690858841, "learning_rate": 8.317118946046194e-06, "loss": 0.003, "step": 83690 }, { "epoch": 0.6844666148750869, "grad_norm": 0.08374162018299103, "learning_rate": 8.316584941660878e-06, "loss": 0.0018, "step": 83700 }, { "epoch": 0.684548391053686, "grad_norm": 0.0786687433719635, "learning_rate": 8.316050869714031e-06, "loss": 0.0024, "step": 83710 }, { "epoch": 0.6846301672322852, "grad_norm": 0.011814507655799389, "learning_rate": 8.315516730216534e-06, "loss": 0.0012, "step": 83720 }, { "epoch": 0.6847119434108844, "grad_norm": 0.09822893142700195, "learning_rate": 8.314982523179263e-06, "loss": 0.003, "step": 83730 }, { "epoch": 0.6847937195894835, "grad_norm": 0.03972027823328972, "learning_rate": 8.314448248613105e-06, "loss": 0.0019, "step": 83740 }, { "epoch": 0.6848754957680827, "grad_norm": 0.03721647709608078, "learning_rate": 8.313913906528944e-06, "loss": 0.0019, "step": 83750 }, { "epoch": 0.6849572719466819, "grad_norm": 0.04904389753937721, "learning_rate": 8.31337949693766e-06, "loss": 0.0015, "step": 83760 }, { "epoch": 0.6850390481252812, "grad_norm": 0.08056831359863281, "learning_rate": 8.312845019850143e-06, "loss": 0.0038, "step": 83770 }, { "epoch": 0.6851208243038803, "grad_norm": 0.07935803383588791, "learning_rate": 8.31231047527728e-06, "loss": 0.0012, "step": 83780 }, { "epoch": 0.6852026004824795, "grad_norm": 0.2962711453437805, "learning_rate": 8.311775863229963e-06, "loss": 0.002, "step": 83790 }, { "epoch": 0.6852843766610787, "grad_norm": 0.05938303843140602, "learning_rate": 8.311241183719078e-06, "loss": 0.0013, "step": 83800 }, { "epoch": 0.6853661528396778, "grad_norm": 0.06309068202972412, "learning_rate": 8.310706436755519e-06, "loss": 0.002, "step": 83810 }, { "epoch": 0.685447929018277, "grad_norm": 0.0413568913936615, "learning_rate": 8.310171622350176e-06, "loss": 0.0025, "step": 83820 }, { "epoch": 0.6855297051968762, "grad_norm": 0.036606643348932266, "learning_rate": 8.30963674051395e-06, "loss": 0.0034, "step": 83830 }, { "epoch": 0.6856114813754753, "grad_norm": 0.0182504802942276, "learning_rate": 8.309101791257731e-06, "loss": 0.0018, "step": 83840 }, { "epoch": 0.6856932575540745, "grad_norm": 0.05352498218417168, "learning_rate": 8.30856677459242e-06, "loss": 0.0027, "step": 83850 }, { "epoch": 0.6857750337326737, "grad_norm": 0.039059918373823166, "learning_rate": 8.308031690528915e-06, "loss": 0.0026, "step": 83860 }, { "epoch": 0.6858568099112728, "grad_norm": 0.047416917979717255, "learning_rate": 8.307496539078114e-06, "loss": 0.0025, "step": 83870 }, { "epoch": 0.685938586089872, "grad_norm": 0.006480574607849121, "learning_rate": 8.306961320250921e-06, "loss": 0.0017, "step": 83880 }, { "epoch": 0.6860203622684712, "grad_norm": 0.17254342138767242, "learning_rate": 8.306426034058238e-06, "loss": 0.0022, "step": 83890 }, { "epoch": 0.6861021384470704, "grad_norm": 0.12892356514930725, "learning_rate": 8.305890680510968e-06, "loss": 0.002, "step": 83900 }, { "epoch": 0.6861839146256695, "grad_norm": 0.2606489360332489, "learning_rate": 8.305355259620019e-06, "loss": 0.0013, "step": 83910 }, { "epoch": 0.6862656908042687, "grad_norm": 0.049824248999357224, "learning_rate": 8.304819771396296e-06, "loss": 0.0027, "step": 83920 }, { "epoch": 0.6863474669828679, "grad_norm": 0.009907788597047329, "learning_rate": 8.304284215850708e-06, "loss": 0.0025, "step": 83930 }, { "epoch": 0.686429243161467, "grad_norm": 0.035822123289108276, "learning_rate": 8.303748592994162e-06, "loss": 0.0012, "step": 83940 }, { "epoch": 0.6865110193400662, "grad_norm": 0.03402762487530708, "learning_rate": 8.303212902837577e-06, "loss": 0.0011, "step": 83950 }, { "epoch": 0.6865927955186654, "grad_norm": 0.11772904545068741, "learning_rate": 8.302677145391857e-06, "loss": 0.002, "step": 83960 }, { "epoch": 0.6866745716972645, "grad_norm": 0.03602505475282669, "learning_rate": 8.302141320667919e-06, "loss": 0.0017, "step": 83970 }, { "epoch": 0.6867563478758638, "grad_norm": 0.15930770337581635, "learning_rate": 8.301605428676679e-06, "loss": 0.003, "step": 83980 }, { "epoch": 0.686838124054463, "grad_norm": 0.35992106795310974, "learning_rate": 8.301069469429052e-06, "loss": 0.0017, "step": 83990 }, { "epoch": 0.6869199002330622, "grad_norm": 0.13025622069835663, "learning_rate": 8.300533442935957e-06, "loss": 0.0024, "step": 84000 }, { "epoch": 0.6870016764116613, "grad_norm": 0.17328234016895294, "learning_rate": 8.299997349208315e-06, "loss": 0.0011, "step": 84010 }, { "epoch": 0.6870834525902605, "grad_norm": 0.08465912193059921, "learning_rate": 8.29946118825704e-06, "loss": 0.0013, "step": 84020 }, { "epoch": 0.6871652287688597, "grad_norm": 0.07701101899147034, "learning_rate": 8.298924960093062e-06, "loss": 0.0015, "step": 84030 }, { "epoch": 0.6872470049474588, "grad_norm": 0.2759930491447449, "learning_rate": 8.298388664727302e-06, "loss": 0.0024, "step": 84040 }, { "epoch": 0.687328781126058, "grad_norm": 0.05458607152104378, "learning_rate": 8.297852302170682e-06, "loss": 0.0009, "step": 84050 }, { "epoch": 0.6874105573046572, "grad_norm": 0.012149592861533165, "learning_rate": 8.297315872434129e-06, "loss": 0.002, "step": 84060 }, { "epoch": 0.6874923334832563, "grad_norm": 0.01854853890836239, "learning_rate": 8.296779375528575e-06, "loss": 0.0022, "step": 84070 }, { "epoch": 0.6875741096618555, "grad_norm": 0.16018104553222656, "learning_rate": 8.296242811464941e-06, "loss": 0.0016, "step": 84080 }, { "epoch": 0.6876558858404547, "grad_norm": 0.08328226953744888, "learning_rate": 8.295706180254163e-06, "loss": 0.0015, "step": 84090 }, { "epoch": 0.6877376620190538, "grad_norm": 0.02505539357662201, "learning_rate": 8.295169481907173e-06, "loss": 0.0017, "step": 84100 }, { "epoch": 0.687819438197653, "grad_norm": 0.21703436970710754, "learning_rate": 8.294632716434901e-06, "loss": 0.0019, "step": 84110 }, { "epoch": 0.6879012143762522, "grad_norm": 0.14758442342281342, "learning_rate": 8.294095883848283e-06, "loss": 0.0019, "step": 84120 }, { "epoch": 0.6879829905548513, "grad_norm": 0.022052286192774773, "learning_rate": 8.293558984158254e-06, "loss": 0.0016, "step": 84130 }, { "epoch": 0.6880647667334505, "grad_norm": 0.2836303114891052, "learning_rate": 8.29302201737575e-06, "loss": 0.0023, "step": 84140 }, { "epoch": 0.6881465429120497, "grad_norm": 0.05882382020354271, "learning_rate": 8.292484983511712e-06, "loss": 0.002, "step": 84150 }, { "epoch": 0.6882283190906489, "grad_norm": 0.08078793436288834, "learning_rate": 8.29194788257708e-06, "loss": 0.0014, "step": 84160 }, { "epoch": 0.688310095269248, "grad_norm": 0.027231158688664436, "learning_rate": 8.291410714582792e-06, "loss": 0.002, "step": 84170 }, { "epoch": 0.6883918714478472, "grad_norm": 0.05473938211798668, "learning_rate": 8.290873479539793e-06, "loss": 0.0016, "step": 84180 }, { "epoch": 0.6884736476264464, "grad_norm": 0.12449310719966888, "learning_rate": 8.290336177459026e-06, "loss": 0.0021, "step": 84190 }, { "epoch": 0.6885554238050456, "grad_norm": 0.10216062515974045, "learning_rate": 8.289798808351437e-06, "loss": 0.0021, "step": 84200 }, { "epoch": 0.6886371999836448, "grad_norm": 0.0854305773973465, "learning_rate": 8.289261372227972e-06, "loss": 0.002, "step": 84210 }, { "epoch": 0.688718976162244, "grad_norm": 0.061805713921785355, "learning_rate": 8.288723869099579e-06, "loss": 0.002, "step": 84220 }, { "epoch": 0.6888007523408431, "grad_norm": 0.13041836023330688, "learning_rate": 8.288186298977207e-06, "loss": 0.0015, "step": 84230 }, { "epoch": 0.6888825285194423, "grad_norm": 0.03594318404793739, "learning_rate": 8.287648661871809e-06, "loss": 0.002, "step": 84240 }, { "epoch": 0.6889643046980415, "grad_norm": 0.015372133813798428, "learning_rate": 8.287110957794333e-06, "loss": 0.0034, "step": 84250 }, { "epoch": 0.6890460808766407, "grad_norm": 0.05970669537782669, "learning_rate": 8.286573186755738e-06, "loss": 0.002, "step": 84260 }, { "epoch": 0.6891278570552398, "grad_norm": 0.04362183064222336, "learning_rate": 8.286035348766974e-06, "loss": 0.0015, "step": 84270 }, { "epoch": 0.689209633233839, "grad_norm": 0.017306597903370857, "learning_rate": 8.285497443838999e-06, "loss": 0.0014, "step": 84280 }, { "epoch": 0.6892914094124382, "grad_norm": 0.019954489544034004, "learning_rate": 8.28495947198277e-06, "loss": 0.0014, "step": 84290 }, { "epoch": 0.6893731855910373, "grad_norm": 0.016283279284834862, "learning_rate": 8.284421433209247e-06, "loss": 0.0017, "step": 84300 }, { "epoch": 0.6894549617696365, "grad_norm": 0.14906740188598633, "learning_rate": 8.28388332752939e-06, "loss": 0.0028, "step": 84310 }, { "epoch": 0.6895367379482357, "grad_norm": 0.049615278840065, "learning_rate": 8.283345154954158e-06, "loss": 0.0017, "step": 84320 }, { "epoch": 0.6896185141268348, "grad_norm": 0.06701261550188065, "learning_rate": 8.282806915494518e-06, "loss": 0.0018, "step": 84330 }, { "epoch": 0.689700290305434, "grad_norm": 0.02204699069261551, "learning_rate": 8.282268609161435e-06, "loss": 0.0021, "step": 84340 }, { "epoch": 0.6897820664840332, "grad_norm": 0.05975552275776863, "learning_rate": 8.281730235965869e-06, "loss": 0.0021, "step": 84350 }, { "epoch": 0.6898638426626323, "grad_norm": 0.11894779652357101, "learning_rate": 8.281191795918793e-06, "loss": 0.0035, "step": 84360 }, { "epoch": 0.6899456188412315, "grad_norm": 0.08687944710254669, "learning_rate": 8.28065328903117e-06, "loss": 0.0027, "step": 84370 }, { "epoch": 0.6900273950198307, "grad_norm": 0.044865820556879044, "learning_rate": 8.280114715313976e-06, "loss": 0.0013, "step": 84380 }, { "epoch": 0.6901091711984298, "grad_norm": 0.052780862897634506, "learning_rate": 8.279576074778177e-06, "loss": 0.0016, "step": 84390 }, { "epoch": 0.690190947377029, "grad_norm": 0.027540775015950203, "learning_rate": 8.27903736743475e-06, "loss": 0.0017, "step": 84400 }, { "epoch": 0.6902727235556283, "grad_norm": 0.09049331396818161, "learning_rate": 8.278498593294663e-06, "loss": 0.0014, "step": 84410 }, { "epoch": 0.6903544997342275, "grad_norm": 0.16685211658477783, "learning_rate": 8.277959752368898e-06, "loss": 0.0022, "step": 84420 }, { "epoch": 0.6904362759128266, "grad_norm": 0.0067753866314888, "learning_rate": 8.277420844668427e-06, "loss": 0.0012, "step": 84430 }, { "epoch": 0.6905180520914258, "grad_norm": 0.04638754203915596, "learning_rate": 8.27688187020423e-06, "loss": 0.0027, "step": 84440 }, { "epoch": 0.690599828270025, "grad_norm": 0.008686665445566177, "learning_rate": 8.276342828987285e-06, "loss": 0.001, "step": 84450 }, { "epoch": 0.6906816044486241, "grad_norm": 0.08411579579114914, "learning_rate": 8.275803721028573e-06, "loss": 0.0016, "step": 84460 }, { "epoch": 0.6907633806272233, "grad_norm": 0.1462859958410263, "learning_rate": 8.275264546339077e-06, "loss": 0.0024, "step": 84470 }, { "epoch": 0.6908451568058225, "grad_norm": 0.0878659337759018, "learning_rate": 8.27472530492978e-06, "loss": 0.0023, "step": 84480 }, { "epoch": 0.6909269329844216, "grad_norm": 0.10840241611003876, "learning_rate": 8.274185996811668e-06, "loss": 0.0012, "step": 84490 }, { "epoch": 0.6910087091630208, "grad_norm": 0.02612176723778248, "learning_rate": 8.273646621995723e-06, "loss": 0.0018, "step": 84500 }, { "epoch": 0.69109048534162, "grad_norm": 0.005975290667265654, "learning_rate": 8.273107180492939e-06, "loss": 0.0015, "step": 84510 }, { "epoch": 0.6911722615202192, "grad_norm": 0.07264729589223862, "learning_rate": 8.272567672314298e-06, "loss": 0.0018, "step": 84520 }, { "epoch": 0.6912540376988183, "grad_norm": 0.10772446542978287, "learning_rate": 8.272028097470795e-06, "loss": 0.002, "step": 84530 }, { "epoch": 0.6913358138774175, "grad_norm": 0.22141586244106293, "learning_rate": 8.27148845597342e-06, "loss": 0.0026, "step": 84540 }, { "epoch": 0.6914175900560167, "grad_norm": 0.059979476034641266, "learning_rate": 8.270948747833165e-06, "loss": 0.0032, "step": 84550 }, { "epoch": 0.6914993662346158, "grad_norm": 0.04658185690641403, "learning_rate": 8.270408973061026e-06, "loss": 0.0018, "step": 84560 }, { "epoch": 0.691581142413215, "grad_norm": 0.09919324517250061, "learning_rate": 8.269869131667997e-06, "loss": 0.0021, "step": 84570 }, { "epoch": 0.6916629185918142, "grad_norm": 0.09300599247217178, "learning_rate": 8.269329223665077e-06, "loss": 0.0015, "step": 84580 }, { "epoch": 0.6917446947704133, "grad_norm": 0.06386913359165192, "learning_rate": 8.268789249063263e-06, "loss": 0.0019, "step": 84590 }, { "epoch": 0.6918264709490125, "grad_norm": 0.033250488340854645, "learning_rate": 8.268249207873556e-06, "loss": 0.0011, "step": 84600 }, { "epoch": 0.6919082471276117, "grad_norm": 0.024783845990896225, "learning_rate": 8.267709100106953e-06, "loss": 0.0013, "step": 84610 }, { "epoch": 0.691990023306211, "grad_norm": 0.07300044596195221, "learning_rate": 8.267168925774462e-06, "loss": 0.0028, "step": 84620 }, { "epoch": 0.6920717994848101, "grad_norm": 0.019437016919255257, "learning_rate": 8.266628684887083e-06, "loss": 0.0025, "step": 84630 }, { "epoch": 0.6921535756634093, "grad_norm": 0.029728449881076813, "learning_rate": 8.266088377455824e-06, "loss": 0.001, "step": 84640 }, { "epoch": 0.6922353518420085, "grad_norm": 0.09559352695941925, "learning_rate": 8.265548003491689e-06, "loss": 0.0023, "step": 84650 }, { "epoch": 0.6923171280206076, "grad_norm": 0.021409982815384865, "learning_rate": 8.265007563005687e-06, "loss": 0.0017, "step": 84660 }, { "epoch": 0.6923989041992068, "grad_norm": 0.05290886387228966, "learning_rate": 8.264467056008827e-06, "loss": 0.002, "step": 84670 }, { "epoch": 0.692480680377806, "grad_norm": 0.16856442391872406, "learning_rate": 8.263926482512119e-06, "loss": 0.0013, "step": 84680 }, { "epoch": 0.6925624565564051, "grad_norm": 0.14679256081581116, "learning_rate": 8.263385842526576e-06, "loss": 0.0017, "step": 84690 }, { "epoch": 0.6926442327350043, "grad_norm": 0.00213725958019495, "learning_rate": 8.262845136063212e-06, "loss": 0.0013, "step": 84700 }, { "epoch": 0.6927260089136035, "grad_norm": 0.09262955188751221, "learning_rate": 8.262304363133038e-06, "loss": 0.0031, "step": 84710 }, { "epoch": 0.6928077850922026, "grad_norm": 0.03263615444302559, "learning_rate": 8.261763523747073e-06, "loss": 0.0013, "step": 84720 }, { "epoch": 0.6928895612708018, "grad_norm": 0.026856375858187675, "learning_rate": 8.261222617916335e-06, "loss": 0.0015, "step": 84730 }, { "epoch": 0.692971337449401, "grad_norm": 0.06674834340810776, "learning_rate": 8.260681645651842e-06, "loss": 0.0027, "step": 84740 }, { "epoch": 0.6930531136280002, "grad_norm": 0.0033283710945397615, "learning_rate": 8.260140606964611e-06, "loss": 0.0018, "step": 84750 }, { "epoch": 0.6931348898065993, "grad_norm": 0.07834643125534058, "learning_rate": 8.259599501865668e-06, "loss": 0.002, "step": 84760 }, { "epoch": 0.6932166659851985, "grad_norm": 0.07518716901540756, "learning_rate": 8.259058330366031e-06, "loss": 0.0026, "step": 84770 }, { "epoch": 0.6932984421637977, "grad_norm": 0.1894095093011856, "learning_rate": 8.25851709247673e-06, "loss": 0.0014, "step": 84780 }, { "epoch": 0.6933802183423968, "grad_norm": 0.039497263729572296, "learning_rate": 8.257975788208785e-06, "loss": 0.0023, "step": 84790 }, { "epoch": 0.693461994520996, "grad_norm": 0.0644204318523407, "learning_rate": 8.257434417573224e-06, "loss": 0.002, "step": 84800 }, { "epoch": 0.6935437706995952, "grad_norm": 0.04423349350690842, "learning_rate": 8.25689298058108e-06, "loss": 0.0022, "step": 84810 }, { "epoch": 0.6936255468781943, "grad_norm": 0.08140881359577179, "learning_rate": 8.256351477243376e-06, "loss": 0.0017, "step": 84820 }, { "epoch": 0.6937073230567935, "grad_norm": 0.035453975200653076, "learning_rate": 8.255809907571144e-06, "loss": 0.0018, "step": 84830 }, { "epoch": 0.6937890992353928, "grad_norm": 0.05756368115544319, "learning_rate": 8.25526827157542e-06, "loss": 0.0018, "step": 84840 }, { "epoch": 0.693870875413992, "grad_norm": 0.028275402262806892, "learning_rate": 8.254726569267236e-06, "loss": 0.0022, "step": 84850 }, { "epoch": 0.6939526515925911, "grad_norm": 0.06354746967554092, "learning_rate": 8.254184800657625e-06, "loss": 0.0023, "step": 84860 }, { "epoch": 0.6940344277711903, "grad_norm": 0.008501729927957058, "learning_rate": 8.253642965757625e-06, "loss": 0.0019, "step": 84870 }, { "epoch": 0.6941162039497895, "grad_norm": 0.08414735645055771, "learning_rate": 8.253101064578271e-06, "loss": 0.0015, "step": 84880 }, { "epoch": 0.6941979801283886, "grad_norm": 0.09092311561107635, "learning_rate": 8.252559097130605e-06, "loss": 0.0014, "step": 84890 }, { "epoch": 0.6942797563069878, "grad_norm": 0.02205786481499672, "learning_rate": 8.252017063425668e-06, "loss": 0.0024, "step": 84900 }, { "epoch": 0.694361532485587, "grad_norm": 0.07120133936405182, "learning_rate": 8.2514749634745e-06, "loss": 0.0019, "step": 84910 }, { "epoch": 0.6944433086641861, "grad_norm": 0.1951296031475067, "learning_rate": 8.250932797288142e-06, "loss": 0.0024, "step": 84920 }, { "epoch": 0.6945250848427853, "grad_norm": 0.06359150260686874, "learning_rate": 8.250390564877642e-06, "loss": 0.0012, "step": 84930 }, { "epoch": 0.6946068610213845, "grad_norm": 0.028211116790771484, "learning_rate": 8.249848266254044e-06, "loss": 0.0017, "step": 84940 }, { "epoch": 0.6946886371999836, "grad_norm": 0.05589709430932999, "learning_rate": 8.249305901428395e-06, "loss": 0.0011, "step": 84950 }, { "epoch": 0.6947704133785828, "grad_norm": 0.005937742535024881, "learning_rate": 8.248763470411743e-06, "loss": 0.0018, "step": 84960 }, { "epoch": 0.694852189557182, "grad_norm": 0.027577467262744904, "learning_rate": 8.24822097321514e-06, "loss": 0.001, "step": 84970 }, { "epoch": 0.6949339657357811, "grad_norm": 0.05826742947101593, "learning_rate": 8.247678409849633e-06, "loss": 0.0013, "step": 84980 }, { "epoch": 0.6950157419143803, "grad_norm": 0.06724091619253159, "learning_rate": 8.247135780326279e-06, "loss": 0.0019, "step": 84990 }, { "epoch": 0.6950975180929795, "grad_norm": 0.048375204205513, "learning_rate": 8.24659308465613e-06, "loss": 0.0036, "step": 85000 }, { "epoch": 0.6951792942715787, "grad_norm": 0.055067941546440125, "learning_rate": 8.246050322850239e-06, "loss": 0.0026, "step": 85010 }, { "epoch": 0.6952610704501778, "grad_norm": 0.037025682628154755, "learning_rate": 8.245507494919666e-06, "loss": 0.0014, "step": 85020 }, { "epoch": 0.695342846628777, "grad_norm": 0.11562419682741165, "learning_rate": 8.244964600875465e-06, "loss": 0.0026, "step": 85030 }, { "epoch": 0.6954246228073762, "grad_norm": 0.003987441770732403, "learning_rate": 8.2444216407287e-06, "loss": 0.0016, "step": 85040 }, { "epoch": 0.6955063989859754, "grad_norm": 0.08008938282728195, "learning_rate": 8.243878614490428e-06, "loss": 0.0031, "step": 85050 }, { "epoch": 0.6955881751645746, "grad_norm": 0.05929658189415932, "learning_rate": 8.243335522171712e-06, "loss": 0.0032, "step": 85060 }, { "epoch": 0.6956699513431738, "grad_norm": 0.017204901203513145, "learning_rate": 8.242792363783615e-06, "loss": 0.0016, "step": 85070 }, { "epoch": 0.695751727521773, "grad_norm": 0.08249582350254059, "learning_rate": 8.242249139337202e-06, "loss": 0.0015, "step": 85080 }, { "epoch": 0.6958335037003721, "grad_norm": 0.06633341312408447, "learning_rate": 8.241705848843538e-06, "loss": 0.0017, "step": 85090 }, { "epoch": 0.6959152798789713, "grad_norm": 0.023596396669745445, "learning_rate": 8.241162492313692e-06, "loss": 0.0018, "step": 85100 }, { "epoch": 0.6959970560575705, "grad_norm": 0.024963069707155228, "learning_rate": 8.24061906975873e-06, "loss": 0.0015, "step": 85110 }, { "epoch": 0.6960788322361696, "grad_norm": 0.05810116231441498, "learning_rate": 8.240075581189723e-06, "loss": 0.0018, "step": 85120 }, { "epoch": 0.6961606084147688, "grad_norm": 0.05026117339730263, "learning_rate": 8.239532026617745e-06, "loss": 0.0023, "step": 85130 }, { "epoch": 0.696242384593368, "grad_norm": 0.09544342756271362, "learning_rate": 8.238988406053865e-06, "loss": 0.0028, "step": 85140 }, { "epoch": 0.6963241607719671, "grad_norm": 0.07420560717582703, "learning_rate": 8.238444719509158e-06, "loss": 0.0013, "step": 85150 }, { "epoch": 0.6964059369505663, "grad_norm": 0.15380635857582092, "learning_rate": 8.237900966994701e-06, "loss": 0.0038, "step": 85160 }, { "epoch": 0.6964877131291655, "grad_norm": 0.10546949505805969, "learning_rate": 8.23735714852157e-06, "loss": 0.0027, "step": 85170 }, { "epoch": 0.6965694893077646, "grad_norm": 0.08812795579433441, "learning_rate": 8.23681326410084e-06, "loss": 0.0027, "step": 85180 }, { "epoch": 0.6966512654863638, "grad_norm": 0.19941379129886627, "learning_rate": 8.236269313743595e-06, "loss": 0.0026, "step": 85190 }, { "epoch": 0.696733041664963, "grad_norm": 0.11079635471105576, "learning_rate": 8.235725297460912e-06, "loss": 0.0019, "step": 85200 }, { "epoch": 0.6968148178435621, "grad_norm": 0.06633865088224411, "learning_rate": 8.235181215263874e-06, "loss": 0.0014, "step": 85210 }, { "epoch": 0.6968965940221613, "grad_norm": 0.08490178734064102, "learning_rate": 8.234637067163567e-06, "loss": 0.0046, "step": 85220 }, { "epoch": 0.6969783702007605, "grad_norm": 0.04824936017394066, "learning_rate": 8.234092853171073e-06, "loss": 0.0023, "step": 85230 }, { "epoch": 0.6970601463793596, "grad_norm": 0.16115868091583252, "learning_rate": 8.233548573297479e-06, "loss": 0.0022, "step": 85240 }, { "epoch": 0.6971419225579588, "grad_norm": 0.014485841616988182, "learning_rate": 8.23300422755387e-06, "loss": 0.003, "step": 85250 }, { "epoch": 0.6972236987365581, "grad_norm": 0.10178235173225403, "learning_rate": 8.23245981595134e-06, "loss": 0.0031, "step": 85260 }, { "epoch": 0.6973054749151573, "grad_norm": 0.050925299525260925, "learning_rate": 8.231915338500974e-06, "loss": 0.0015, "step": 85270 }, { "epoch": 0.6973872510937564, "grad_norm": 0.12607744336128235, "learning_rate": 8.231370795213867e-06, "loss": 0.0024, "step": 85280 }, { "epoch": 0.6974690272723556, "grad_norm": 0.08572506159543991, "learning_rate": 8.230826186101108e-06, "loss": 0.0027, "step": 85290 }, { "epoch": 0.6975508034509548, "grad_norm": 0.07484828680753708, "learning_rate": 8.230281511173797e-06, "loss": 0.0014, "step": 85300 }, { "epoch": 0.6976325796295539, "grad_norm": 0.09991903603076935, "learning_rate": 8.229736770443022e-06, "loss": 0.0032, "step": 85310 }, { "epoch": 0.6977143558081531, "grad_norm": 0.06457079201936722, "learning_rate": 8.229191963919887e-06, "loss": 0.0034, "step": 85320 }, { "epoch": 0.6977961319867523, "grad_norm": 0.043926700949668884, "learning_rate": 8.228647091615485e-06, "loss": 0.0016, "step": 85330 }, { "epoch": 0.6978779081653514, "grad_norm": 0.14173130691051483, "learning_rate": 8.228102153540917e-06, "loss": 0.0027, "step": 85340 }, { "epoch": 0.6979596843439506, "grad_norm": 0.01789485104382038, "learning_rate": 8.227557149707286e-06, "loss": 0.0012, "step": 85350 }, { "epoch": 0.6980414605225498, "grad_norm": 0.07714028656482697, "learning_rate": 8.227012080125692e-06, "loss": 0.0019, "step": 85360 }, { "epoch": 0.698123236701149, "grad_norm": 0.043664101511240005, "learning_rate": 8.226466944807236e-06, "loss": 0.0021, "step": 85370 }, { "epoch": 0.6982050128797481, "grad_norm": 0.11632400751113892, "learning_rate": 8.22592174376303e-06, "loss": 0.0027, "step": 85380 }, { "epoch": 0.6982867890583473, "grad_norm": 0.07500329613685608, "learning_rate": 8.225376477004174e-06, "loss": 0.0022, "step": 85390 }, { "epoch": 0.6983685652369465, "grad_norm": 0.07452821731567383, "learning_rate": 8.224831144541777e-06, "loss": 0.0018, "step": 85400 }, { "epoch": 0.6984503414155456, "grad_norm": 0.057207971811294556, "learning_rate": 8.224285746386948e-06, "loss": 0.0021, "step": 85410 }, { "epoch": 0.6985321175941448, "grad_norm": 0.12998047471046448, "learning_rate": 8.2237402825508e-06, "loss": 0.0032, "step": 85420 }, { "epoch": 0.698613893772744, "grad_norm": 0.06455761939287186, "learning_rate": 8.223194753044438e-06, "loss": 0.0024, "step": 85430 }, { "epoch": 0.6986956699513431, "grad_norm": 0.06765688210725784, "learning_rate": 8.222649157878983e-06, "loss": 0.0021, "step": 85440 }, { "epoch": 0.6987774461299423, "grad_norm": 0.03218953683972359, "learning_rate": 8.222103497065544e-06, "loss": 0.0027, "step": 85450 }, { "epoch": 0.6988592223085415, "grad_norm": 0.041949883103370667, "learning_rate": 8.221557770615236e-06, "loss": 0.0017, "step": 85460 }, { "epoch": 0.6989409984871406, "grad_norm": 0.04489978402853012, "learning_rate": 8.221011978539178e-06, "loss": 0.0012, "step": 85470 }, { "epoch": 0.6990227746657399, "grad_norm": 0.06654976308345795, "learning_rate": 8.22046612084849e-06, "loss": 0.0019, "step": 85480 }, { "epoch": 0.6991045508443391, "grad_norm": 0.18311583995819092, "learning_rate": 8.219920197554286e-06, "loss": 0.0012, "step": 85490 }, { "epoch": 0.6991863270229383, "grad_norm": 0.19053517282009125, "learning_rate": 8.219374208667693e-06, "loss": 0.0044, "step": 85500 }, { "epoch": 0.6992681032015374, "grad_norm": 0.01657084748148918, "learning_rate": 8.218828154199829e-06, "loss": 0.0028, "step": 85510 }, { "epoch": 0.6993498793801366, "grad_norm": 0.18963250517845154, "learning_rate": 8.218282034161821e-06, "loss": 0.0029, "step": 85520 }, { "epoch": 0.6994316555587358, "grad_norm": 0.07533888518810272, "learning_rate": 8.217735848564791e-06, "loss": 0.0022, "step": 85530 }, { "epoch": 0.6995134317373349, "grad_norm": 0.0516529344022274, "learning_rate": 8.217189597419866e-06, "loss": 0.0013, "step": 85540 }, { "epoch": 0.6995952079159341, "grad_norm": 0.10580625385046005, "learning_rate": 8.216643280738172e-06, "loss": 0.0022, "step": 85550 }, { "epoch": 0.6996769840945333, "grad_norm": 0.08980385959148407, "learning_rate": 8.216096898530843e-06, "loss": 0.0067, "step": 85560 }, { "epoch": 0.6997587602731324, "grad_norm": 0.023843536153435707, "learning_rate": 8.215550450809005e-06, "loss": 0.0014, "step": 85570 }, { "epoch": 0.6998405364517316, "grad_norm": 0.2372877150774002, "learning_rate": 8.21500393758379e-06, "loss": 0.002, "step": 85580 }, { "epoch": 0.6999223126303308, "grad_norm": 0.10804662853479385, "learning_rate": 8.214457358866333e-06, "loss": 0.0016, "step": 85590 }, { "epoch": 0.70000408880893, "grad_norm": 0.08311421424150467, "learning_rate": 8.213910714667766e-06, "loss": 0.0023, "step": 85600 }, { "epoch": 0.7000858649875291, "grad_norm": 0.043532080948352814, "learning_rate": 8.213364004999224e-06, "loss": 0.0022, "step": 85610 }, { "epoch": 0.7001676411661283, "grad_norm": 0.029964564368128777, "learning_rate": 8.212817229871845e-06, "loss": 0.0021, "step": 85620 }, { "epoch": 0.7002494173447275, "grad_norm": 0.05798111855983734, "learning_rate": 8.21227038929677e-06, "loss": 0.0017, "step": 85630 }, { "epoch": 0.7003311935233266, "grad_norm": 0.058667317032814026, "learning_rate": 8.211723483285135e-06, "loss": 0.0026, "step": 85640 }, { "epoch": 0.7004129697019258, "grad_norm": 0.19678419828414917, "learning_rate": 8.211176511848081e-06, "loss": 0.0016, "step": 85650 }, { "epoch": 0.700494745880525, "grad_norm": 0.15654918551445007, "learning_rate": 8.210629474996753e-06, "loss": 0.0017, "step": 85660 }, { "epoch": 0.7005765220591241, "grad_norm": 0.04077305644750595, "learning_rate": 8.210082372742292e-06, "loss": 0.0023, "step": 85670 }, { "epoch": 0.7006582982377233, "grad_norm": 0.16771721839904785, "learning_rate": 8.209535205095842e-06, "loss": 0.0025, "step": 85680 }, { "epoch": 0.7007400744163226, "grad_norm": 0.14235810935497284, "learning_rate": 8.208987972068554e-06, "loss": 0.0023, "step": 85690 }, { "epoch": 0.7008218505949217, "grad_norm": 0.1263761669397354, "learning_rate": 8.20844067367157e-06, "loss": 0.0027, "step": 85700 }, { "epoch": 0.7009036267735209, "grad_norm": 0.035915862768888474, "learning_rate": 8.207893309916044e-06, "loss": 0.0014, "step": 85710 }, { "epoch": 0.7009854029521201, "grad_norm": 0.31901493668556213, "learning_rate": 8.207345880813123e-06, "loss": 0.0017, "step": 85720 }, { "epoch": 0.7010671791307193, "grad_norm": 0.16674256324768066, "learning_rate": 8.206798386373959e-06, "loss": 0.002, "step": 85730 }, { "epoch": 0.7011489553093184, "grad_norm": 0.04282670468091965, "learning_rate": 8.206250826609703e-06, "loss": 0.0029, "step": 85740 }, { "epoch": 0.7012307314879176, "grad_norm": 0.046644698828458786, "learning_rate": 8.205703201531515e-06, "loss": 0.0014, "step": 85750 }, { "epoch": 0.7013125076665168, "grad_norm": 0.12412740290164948, "learning_rate": 8.205155511150547e-06, "loss": 0.0015, "step": 85760 }, { "epoch": 0.7013942838451159, "grad_norm": 0.19630034267902374, "learning_rate": 8.204607755477954e-06, "loss": 0.0015, "step": 85770 }, { "epoch": 0.7014760600237151, "grad_norm": 0.04169469326734543, "learning_rate": 8.204059934524897e-06, "loss": 0.0019, "step": 85780 }, { "epoch": 0.7015578362023143, "grad_norm": 0.021106865257024765, "learning_rate": 8.203512048302535e-06, "loss": 0.0014, "step": 85790 }, { "epoch": 0.7016396123809134, "grad_norm": 0.05297863483428955, "learning_rate": 8.202964096822028e-06, "loss": 0.0028, "step": 85800 }, { "epoch": 0.7017213885595126, "grad_norm": 0.03448772430419922, "learning_rate": 8.20241608009454e-06, "loss": 0.001, "step": 85810 }, { "epoch": 0.7018031647381118, "grad_norm": 0.0621105320751667, "learning_rate": 8.201867998131233e-06, "loss": 0.0015, "step": 85820 }, { "epoch": 0.7018849409167109, "grad_norm": 0.05412633344531059, "learning_rate": 8.20131985094327e-06, "loss": 0.0014, "step": 85830 }, { "epoch": 0.7019667170953101, "grad_norm": 0.06866531819105148, "learning_rate": 8.200771638541822e-06, "loss": 0.0024, "step": 85840 }, { "epoch": 0.7020484932739093, "grad_norm": 0.07251520454883575, "learning_rate": 8.200223360938053e-06, "loss": 0.0025, "step": 85850 }, { "epoch": 0.7021302694525084, "grad_norm": 0.06947866827249527, "learning_rate": 8.199675018143133e-06, "loss": 0.0032, "step": 85860 }, { "epoch": 0.7022120456311076, "grad_norm": 0.07180671393871307, "learning_rate": 8.199126610168232e-06, "loss": 0.0018, "step": 85870 }, { "epoch": 0.7022938218097068, "grad_norm": 0.03875547647476196, "learning_rate": 8.198578137024523e-06, "loss": 0.0019, "step": 85880 }, { "epoch": 0.702375597988306, "grad_norm": 0.050553299486637115, "learning_rate": 8.198029598723177e-06, "loss": 0.0023, "step": 85890 }, { "epoch": 0.7024573741669052, "grad_norm": 0.04792364314198494, "learning_rate": 8.197480995275366e-06, "loss": 0.0017, "step": 85900 }, { "epoch": 0.7025391503455044, "grad_norm": 0.1056404635310173, "learning_rate": 8.19693232669227e-06, "loss": 0.0013, "step": 85910 }, { "epoch": 0.7026209265241036, "grad_norm": 0.0776427760720253, "learning_rate": 8.196383592985063e-06, "loss": 0.0019, "step": 85920 }, { "epoch": 0.7027027027027027, "grad_norm": 0.023830078542232513, "learning_rate": 8.195834794164925e-06, "loss": 0.0016, "step": 85930 }, { "epoch": 0.7027844788813019, "grad_norm": 0.12410138547420502, "learning_rate": 8.195285930243035e-06, "loss": 0.0067, "step": 85940 }, { "epoch": 0.7028662550599011, "grad_norm": 0.13619469106197357, "learning_rate": 8.194737001230572e-06, "loss": 0.0071, "step": 85950 }, { "epoch": 0.7029480312385002, "grad_norm": 0.06306253373622894, "learning_rate": 8.194188007138722e-06, "loss": 0.0017, "step": 85960 }, { "epoch": 0.7030298074170994, "grad_norm": 0.022950217127799988, "learning_rate": 8.193638947978664e-06, "loss": 0.0018, "step": 85970 }, { "epoch": 0.7031115835956986, "grad_norm": 0.06559314578771591, "learning_rate": 8.193089823761584e-06, "loss": 0.002, "step": 85980 }, { "epoch": 0.7031933597742978, "grad_norm": 0.039976708590984344, "learning_rate": 8.192540634498668e-06, "loss": 0.0013, "step": 85990 }, { "epoch": 0.7032751359528969, "grad_norm": 0.08678071200847626, "learning_rate": 8.191991380201107e-06, "loss": 0.0016, "step": 86000 }, { "epoch": 0.7033569121314961, "grad_norm": 0.015149975195527077, "learning_rate": 8.191442060880086e-06, "loss": 0.0013, "step": 86010 }, { "epoch": 0.7034386883100953, "grad_norm": 0.22338040173053741, "learning_rate": 8.190892676546795e-06, "loss": 0.0036, "step": 86020 }, { "epoch": 0.7035204644886944, "grad_norm": 0.09341476857662201, "learning_rate": 8.19034322721243e-06, "loss": 0.0019, "step": 86030 }, { "epoch": 0.7036022406672936, "grad_norm": 0.12652665376663208, "learning_rate": 8.189793712888177e-06, "loss": 0.0039, "step": 86040 }, { "epoch": 0.7036840168458928, "grad_norm": 0.12994718551635742, "learning_rate": 8.189244133585232e-06, "loss": 0.002, "step": 86050 }, { "epoch": 0.7037657930244919, "grad_norm": 0.1395433098077774, "learning_rate": 8.188694489314795e-06, "loss": 0.0016, "step": 86060 }, { "epoch": 0.7038475692030911, "grad_norm": 0.13551248610019684, "learning_rate": 8.188144780088058e-06, "loss": 0.0016, "step": 86070 }, { "epoch": 0.7039293453816903, "grad_norm": 0.19386444985866547, "learning_rate": 8.187595005916221e-06, "loss": 0.0029, "step": 86080 }, { "epoch": 0.7040111215602894, "grad_norm": 0.14302653074264526, "learning_rate": 8.18704516681048e-06, "loss": 0.0028, "step": 86090 }, { "epoch": 0.7040928977388886, "grad_norm": 0.004140133503824472, "learning_rate": 8.186495262782041e-06, "loss": 0.0015, "step": 86100 }, { "epoch": 0.7041746739174878, "grad_norm": 0.06422081589698792, "learning_rate": 8.185945293842103e-06, "loss": 0.0023, "step": 86110 }, { "epoch": 0.7042564500960871, "grad_norm": 0.2343723028898239, "learning_rate": 8.18539526000187e-06, "loss": 0.0021, "step": 86120 }, { "epoch": 0.7043382262746862, "grad_norm": 0.0903363823890686, "learning_rate": 8.184845161272545e-06, "loss": 0.0031, "step": 86130 }, { "epoch": 0.7044200024532854, "grad_norm": 0.027734212577342987, "learning_rate": 8.184294997665337e-06, "loss": 0.0015, "step": 86140 }, { "epoch": 0.7045017786318846, "grad_norm": 0.07156113535165787, "learning_rate": 8.18374476919145e-06, "loss": 0.002, "step": 86150 }, { "epoch": 0.7045835548104837, "grad_norm": 0.051163189113140106, "learning_rate": 8.183194475862095e-06, "loss": 0.0021, "step": 86160 }, { "epoch": 0.7046653309890829, "grad_norm": 0.08002490550279617, "learning_rate": 8.182644117688481e-06, "loss": 0.0017, "step": 86170 }, { "epoch": 0.7047471071676821, "grad_norm": 0.11919291317462921, "learning_rate": 8.18209369468182e-06, "loss": 0.0016, "step": 86180 }, { "epoch": 0.7048288833462812, "grad_norm": 0.03802517056465149, "learning_rate": 8.181543206853324e-06, "loss": 0.0012, "step": 86190 }, { "epoch": 0.7049106595248804, "grad_norm": 0.022651467472314835, "learning_rate": 8.180992654214206e-06, "loss": 0.0015, "step": 86200 }, { "epoch": 0.7049924357034796, "grad_norm": 0.008246750570833683, "learning_rate": 8.180442036775682e-06, "loss": 0.001, "step": 86210 }, { "epoch": 0.7050742118820787, "grad_norm": 0.039153411984443665, "learning_rate": 8.179891354548967e-06, "loss": 0.0016, "step": 86220 }, { "epoch": 0.7051559880606779, "grad_norm": 0.028821121901273727, "learning_rate": 8.179340607545284e-06, "loss": 0.0011, "step": 86230 }, { "epoch": 0.7052377642392771, "grad_norm": 0.01885538175702095, "learning_rate": 8.178789795775845e-06, "loss": 0.0014, "step": 86240 }, { "epoch": 0.7053195404178763, "grad_norm": 0.10398223251104355, "learning_rate": 8.178238919251876e-06, "loss": 0.0057, "step": 86250 }, { "epoch": 0.7054013165964754, "grad_norm": 0.02026079222559929, "learning_rate": 8.177687977984596e-06, "loss": 0.0019, "step": 86260 }, { "epoch": 0.7054830927750746, "grad_norm": 0.05830458924174309, "learning_rate": 8.177136971985228e-06, "loss": 0.0014, "step": 86270 }, { "epoch": 0.7055648689536738, "grad_norm": 0.10780739784240723, "learning_rate": 8.176585901264999e-06, "loss": 0.0022, "step": 86280 }, { "epoch": 0.7056466451322729, "grad_norm": 0.06568992882966995, "learning_rate": 8.176034765835133e-06, "loss": 0.0018, "step": 86290 }, { "epoch": 0.7057284213108721, "grad_norm": 0.1260519176721573, "learning_rate": 8.175483565706856e-06, "loss": 0.0023, "step": 86300 }, { "epoch": 0.7058101974894713, "grad_norm": 0.04331430792808533, "learning_rate": 8.1749323008914e-06, "loss": 0.0024, "step": 86310 }, { "epoch": 0.7058919736680704, "grad_norm": 0.011173071339726448, "learning_rate": 8.17438097139999e-06, "loss": 0.004, "step": 86320 }, { "epoch": 0.7059737498466697, "grad_norm": 0.07195745408535004, "learning_rate": 8.173829577243862e-06, "loss": 0.0024, "step": 86330 }, { "epoch": 0.7060555260252689, "grad_norm": 0.016303539276123047, "learning_rate": 8.173278118434242e-06, "loss": 0.0013, "step": 86340 }, { "epoch": 0.7061373022038681, "grad_norm": 0.042591698467731476, "learning_rate": 8.17272659498237e-06, "loss": 0.0021, "step": 86350 }, { "epoch": 0.7062190783824672, "grad_norm": 0.027974212542176247, "learning_rate": 8.172175006899476e-06, "loss": 0.0013, "step": 86360 }, { "epoch": 0.7063008545610664, "grad_norm": 0.07891490310430527, "learning_rate": 8.171623354196801e-06, "loss": 0.0014, "step": 86370 }, { "epoch": 0.7063826307396656, "grad_norm": 0.020703226327896118, "learning_rate": 8.171071636885578e-06, "loss": 0.002, "step": 86380 }, { "epoch": 0.7064644069182647, "grad_norm": 0.02300713025033474, "learning_rate": 8.170519854977052e-06, "loss": 0.0018, "step": 86390 }, { "epoch": 0.7065461830968639, "grad_norm": 0.045428913086652756, "learning_rate": 8.169968008482456e-06, "loss": 0.0016, "step": 86400 }, { "epoch": 0.7066279592754631, "grad_norm": 0.039052244275808334, "learning_rate": 8.169416097413036e-06, "loss": 0.0012, "step": 86410 }, { "epoch": 0.7067097354540622, "grad_norm": 0.011359068565070629, "learning_rate": 8.168864121780033e-06, "loss": 0.0015, "step": 86420 }, { "epoch": 0.7067915116326614, "grad_norm": 0.04028063267469406, "learning_rate": 8.168312081594695e-06, "loss": 0.0016, "step": 86430 }, { "epoch": 0.7068732878112606, "grad_norm": 0.03147600591182709, "learning_rate": 8.167759976868261e-06, "loss": 0.002, "step": 86440 }, { "epoch": 0.7069550639898597, "grad_norm": 0.01742999255657196, "learning_rate": 8.167207807611983e-06, "loss": 0.001, "step": 86450 }, { "epoch": 0.7070368401684589, "grad_norm": 0.028929980471730232, "learning_rate": 8.166655573837108e-06, "loss": 0.0017, "step": 86460 }, { "epoch": 0.7071186163470581, "grad_norm": 0.04881686717271805, "learning_rate": 8.166103275554884e-06, "loss": 0.0011, "step": 86470 }, { "epoch": 0.7072003925256573, "grad_norm": 0.04107610136270523, "learning_rate": 8.165550912776563e-06, "loss": 0.0014, "step": 86480 }, { "epoch": 0.7072821687042564, "grad_norm": 0.055385153740644455, "learning_rate": 8.164998485513399e-06, "loss": 0.0023, "step": 86490 }, { "epoch": 0.7073639448828556, "grad_norm": 0.333106130361557, "learning_rate": 8.164445993776639e-06, "loss": 0.0021, "step": 86500 }, { "epoch": 0.7074457210614548, "grad_norm": 0.05372762680053711, "learning_rate": 8.163893437577545e-06, "loss": 0.0013, "step": 86510 }, { "epoch": 0.7075274972400539, "grad_norm": 0.1342836618423462, "learning_rate": 8.163340816927368e-06, "loss": 0.0016, "step": 86520 }, { "epoch": 0.7076092734186531, "grad_norm": 0.1394127607345581, "learning_rate": 8.16278813183737e-06, "loss": 0.0022, "step": 86530 }, { "epoch": 0.7076910495972524, "grad_norm": 0.09708721190690994, "learning_rate": 8.162235382318804e-06, "loss": 0.0015, "step": 86540 }, { "epoch": 0.7077728257758515, "grad_norm": 0.040922436863183975, "learning_rate": 8.161682568382934e-06, "loss": 0.0028, "step": 86550 }, { "epoch": 0.7078546019544507, "grad_norm": 0.06818056106567383, "learning_rate": 8.161129690041021e-06, "loss": 0.0023, "step": 86560 }, { "epoch": 0.7079363781330499, "grad_norm": 0.009437755681574345, "learning_rate": 8.160576747304325e-06, "loss": 0.0014, "step": 86570 }, { "epoch": 0.708018154311649, "grad_norm": 0.0732676237821579, "learning_rate": 8.160023740184113e-06, "loss": 0.0029, "step": 86580 }, { "epoch": 0.7080999304902482, "grad_norm": 0.0688149556517601, "learning_rate": 8.15947066869165e-06, "loss": 0.0021, "step": 86590 }, { "epoch": 0.7081817066688474, "grad_norm": 0.009017677046358585, "learning_rate": 8.158917532838197e-06, "loss": 0.0016, "step": 86600 }, { "epoch": 0.7082634828474466, "grad_norm": 0.018580585718154907, "learning_rate": 8.158364332635029e-06, "loss": 0.0017, "step": 86610 }, { "epoch": 0.7083452590260457, "grad_norm": 0.0048012458719313145, "learning_rate": 8.157811068093412e-06, "loss": 0.0029, "step": 86620 }, { "epoch": 0.7084270352046449, "grad_norm": 0.016685767099261284, "learning_rate": 8.157257739224618e-06, "loss": 0.0029, "step": 86630 }, { "epoch": 0.7085088113832441, "grad_norm": 0.11228187382221222, "learning_rate": 8.156704346039915e-06, "loss": 0.0015, "step": 86640 }, { "epoch": 0.7085905875618432, "grad_norm": 0.03487490862607956, "learning_rate": 8.156150888550582e-06, "loss": 0.0013, "step": 86650 }, { "epoch": 0.7086723637404424, "grad_norm": 0.14443163573741913, "learning_rate": 8.155597366767887e-06, "loss": 0.0028, "step": 86660 }, { "epoch": 0.7087541399190416, "grad_norm": 0.0926203653216362, "learning_rate": 8.155043780703108e-06, "loss": 0.0013, "step": 86670 }, { "epoch": 0.7088359160976407, "grad_norm": 0.07134688645601273, "learning_rate": 8.154490130367524e-06, "loss": 0.003, "step": 86680 }, { "epoch": 0.7089176922762399, "grad_norm": 0.062102966010570526, "learning_rate": 8.153936415772414e-06, "loss": 0.0027, "step": 86690 }, { "epoch": 0.7089994684548391, "grad_norm": 0.03910486400127411, "learning_rate": 8.153382636929052e-06, "loss": 0.003, "step": 86700 }, { "epoch": 0.7090812446334382, "grad_norm": 0.06179502606391907, "learning_rate": 8.152828793848727e-06, "loss": 0.0018, "step": 86710 }, { "epoch": 0.7091630208120374, "grad_norm": 0.15961919724941254, "learning_rate": 8.152274886542713e-06, "loss": 0.0033, "step": 86720 }, { "epoch": 0.7092447969906366, "grad_norm": 0.19237129390239716, "learning_rate": 8.151720915022298e-06, "loss": 0.0024, "step": 86730 }, { "epoch": 0.7093265731692358, "grad_norm": 0.06337224692106247, "learning_rate": 8.151166879298766e-06, "loss": 0.0014, "step": 86740 }, { "epoch": 0.7094083493478349, "grad_norm": 0.05340433120727539, "learning_rate": 8.150612779383403e-06, "loss": 0.002, "step": 86750 }, { "epoch": 0.7094901255264342, "grad_norm": 0.053887803107500076, "learning_rate": 8.150058615287498e-06, "loss": 0.0018, "step": 86760 }, { "epoch": 0.7095719017050334, "grad_norm": 0.04069870710372925, "learning_rate": 8.149504387022338e-06, "loss": 0.002, "step": 86770 }, { "epoch": 0.7096536778836325, "grad_norm": 0.06368184089660645, "learning_rate": 8.148950094599212e-06, "loss": 0.0013, "step": 86780 }, { "epoch": 0.7097354540622317, "grad_norm": 0.040421806275844574, "learning_rate": 8.148395738029415e-06, "loss": 0.0018, "step": 86790 }, { "epoch": 0.7098172302408309, "grad_norm": 0.028689421713352203, "learning_rate": 8.147841317324237e-06, "loss": 0.0016, "step": 86800 }, { "epoch": 0.70989900641943, "grad_norm": 0.03697223216295242, "learning_rate": 8.147286832494973e-06, "loss": 0.003, "step": 86810 }, { "epoch": 0.7099807825980292, "grad_norm": 0.10651008039712906, "learning_rate": 8.146732283552916e-06, "loss": 0.0011, "step": 86820 }, { "epoch": 0.7100625587766284, "grad_norm": 0.010708577930927277, "learning_rate": 8.146177670509367e-06, "loss": 0.0019, "step": 86830 }, { "epoch": 0.7101443349552276, "grad_norm": 0.08925748616456985, "learning_rate": 8.145622993375621e-06, "loss": 0.003, "step": 86840 }, { "epoch": 0.7102261111338267, "grad_norm": 0.0974154993891716, "learning_rate": 8.145068252162977e-06, "loss": 0.0018, "step": 86850 }, { "epoch": 0.7103078873124259, "grad_norm": 0.09718739241361618, "learning_rate": 8.144513446882738e-06, "loss": 0.002, "step": 86860 }, { "epoch": 0.7103896634910251, "grad_norm": 0.029001159593462944, "learning_rate": 8.143958577546202e-06, "loss": 0.0031, "step": 86870 }, { "epoch": 0.7104714396696242, "grad_norm": 0.034483082592487335, "learning_rate": 8.143403644164676e-06, "loss": 0.0012, "step": 86880 }, { "epoch": 0.7105532158482234, "grad_norm": 0.013486423529684544, "learning_rate": 8.142848646749461e-06, "loss": 0.0014, "step": 86890 }, { "epoch": 0.7106349920268226, "grad_norm": 0.04665297269821167, "learning_rate": 8.142293585311865e-06, "loss": 0.0013, "step": 86900 }, { "epoch": 0.7107167682054217, "grad_norm": 0.06218377500772476, "learning_rate": 8.141738459863195e-06, "loss": 0.0028, "step": 86910 }, { "epoch": 0.7107985443840209, "grad_norm": 0.03215993195772171, "learning_rate": 8.141183270414758e-06, "loss": 0.0014, "step": 86920 }, { "epoch": 0.7108803205626201, "grad_norm": 0.19264569878578186, "learning_rate": 8.140628016977865e-06, "loss": 0.0029, "step": 86930 }, { "epoch": 0.7109620967412192, "grad_norm": 0.025010351091623306, "learning_rate": 8.140072699563827e-06, "loss": 0.0026, "step": 86940 }, { "epoch": 0.7110438729198184, "grad_norm": 0.05819734185934067, "learning_rate": 8.139517318183956e-06, "loss": 0.0022, "step": 86950 }, { "epoch": 0.7111256490984176, "grad_norm": 0.18652677536010742, "learning_rate": 8.138961872849564e-06, "loss": 0.0013, "step": 86960 }, { "epoch": 0.7112074252770169, "grad_norm": 0.02340899594128132, "learning_rate": 8.138406363571967e-06, "loss": 0.0018, "step": 86970 }, { "epoch": 0.711289201455616, "grad_norm": 0.0384557880461216, "learning_rate": 8.137850790362483e-06, "loss": 0.0016, "step": 86980 }, { "epoch": 0.7113709776342152, "grad_norm": 0.026789963245391846, "learning_rate": 8.137295153232428e-06, "loss": 0.0011, "step": 86990 }, { "epoch": 0.7114527538128144, "grad_norm": 0.0933091789484024, "learning_rate": 8.136739452193118e-06, "loss": 0.002, "step": 87000 }, { "epoch": 0.7115345299914135, "grad_norm": 0.04699870944023132, "learning_rate": 8.136183687255878e-06, "loss": 0.0011, "step": 87010 }, { "epoch": 0.7116163061700127, "grad_norm": 0.11300251632928848, "learning_rate": 8.135627858432025e-06, "loss": 0.0014, "step": 87020 }, { "epoch": 0.7116980823486119, "grad_norm": 0.131325826048851, "learning_rate": 8.135071965732885e-06, "loss": 0.0014, "step": 87030 }, { "epoch": 0.711779858527211, "grad_norm": 0.04358479753136635, "learning_rate": 8.134516009169782e-06, "loss": 0.0032, "step": 87040 }, { "epoch": 0.7118616347058102, "grad_norm": 0.10638271272182465, "learning_rate": 8.133959988754038e-06, "loss": 0.0029, "step": 87050 }, { "epoch": 0.7119434108844094, "grad_norm": 0.09714782983064651, "learning_rate": 8.133403904496983e-06, "loss": 0.0011, "step": 87060 }, { "epoch": 0.7120251870630085, "grad_norm": 0.055222321301698685, "learning_rate": 8.132847756409942e-06, "loss": 0.0016, "step": 87070 }, { "epoch": 0.7121069632416077, "grad_norm": 0.12866069376468658, "learning_rate": 8.132291544504248e-06, "loss": 0.0012, "step": 87080 }, { "epoch": 0.7121887394202069, "grad_norm": 0.08181649446487427, "learning_rate": 8.131735268791226e-06, "loss": 0.0022, "step": 87090 }, { "epoch": 0.712270515598806, "grad_norm": 0.006733504123985767, "learning_rate": 8.131178929282212e-06, "loss": 0.0016, "step": 87100 }, { "epoch": 0.7123522917774052, "grad_norm": 0.01219945028424263, "learning_rate": 8.13062252598854e-06, "loss": 0.0023, "step": 87110 }, { "epoch": 0.7124340679560044, "grad_norm": 0.1616346836090088, "learning_rate": 8.130066058921542e-06, "loss": 0.0027, "step": 87120 }, { "epoch": 0.7125158441346036, "grad_norm": 0.009111578576266766, "learning_rate": 8.129509528092552e-06, "loss": 0.0019, "step": 87130 }, { "epoch": 0.7125976203132027, "grad_norm": 0.0026440760120749474, "learning_rate": 8.128952933512912e-06, "loss": 0.0011, "step": 87140 }, { "epoch": 0.7126793964918019, "grad_norm": 0.029973819851875305, "learning_rate": 8.128396275193955e-06, "loss": 0.0017, "step": 87150 }, { "epoch": 0.7127611726704011, "grad_norm": 0.07592669874429703, "learning_rate": 8.127839553147023e-06, "loss": 0.003, "step": 87160 }, { "epoch": 0.7128429488490002, "grad_norm": 0.02009754627943039, "learning_rate": 8.12728276738346e-06, "loss": 0.0011, "step": 87170 }, { "epoch": 0.7129247250275995, "grad_norm": 0.07282239943742752, "learning_rate": 8.126725917914602e-06, "loss": 0.0014, "step": 87180 }, { "epoch": 0.7130065012061987, "grad_norm": 0.13128729164600372, "learning_rate": 8.126169004751796e-06, "loss": 0.0027, "step": 87190 }, { "epoch": 0.7130882773847979, "grad_norm": 0.05323265120387077, "learning_rate": 8.125612027906387e-06, "loss": 0.0011, "step": 87200 }, { "epoch": 0.713170053563397, "grad_norm": 0.06625337898731232, "learning_rate": 8.12505498738972e-06, "loss": 0.0015, "step": 87210 }, { "epoch": 0.7132518297419962, "grad_norm": 0.05419441685080528, "learning_rate": 8.124497883213143e-06, "loss": 0.0012, "step": 87220 }, { "epoch": 0.7133336059205954, "grad_norm": 0.023272408172488213, "learning_rate": 8.123940715388003e-06, "loss": 0.0016, "step": 87230 }, { "epoch": 0.7134153820991945, "grad_norm": 0.09363865852355957, "learning_rate": 8.123383483925653e-06, "loss": 0.0023, "step": 87240 }, { "epoch": 0.7134971582777937, "grad_norm": 0.0866793692111969, "learning_rate": 8.122826188837443e-06, "loss": 0.0021, "step": 87250 }, { "epoch": 0.7135789344563929, "grad_norm": 0.19117332994937897, "learning_rate": 8.122268830134722e-06, "loss": 0.0021, "step": 87260 }, { "epoch": 0.713660710634992, "grad_norm": 0.06345207989215851, "learning_rate": 8.12171140782885e-06, "loss": 0.0026, "step": 87270 }, { "epoch": 0.7137424868135912, "grad_norm": 0.004377323668450117, "learning_rate": 8.12115392193118e-06, "loss": 0.0018, "step": 87280 }, { "epoch": 0.7138242629921904, "grad_norm": 0.12894412875175476, "learning_rate": 8.120596372453064e-06, "loss": 0.0016, "step": 87290 }, { "epoch": 0.7139060391707895, "grad_norm": 0.08486968278884888, "learning_rate": 8.120038759405867e-06, "loss": 0.0019, "step": 87300 }, { "epoch": 0.7139878153493887, "grad_norm": 0.07557640224695206, "learning_rate": 8.119481082800942e-06, "loss": 0.0016, "step": 87310 }, { "epoch": 0.7140695915279879, "grad_norm": 0.049697596579790115, "learning_rate": 8.118923342649653e-06, "loss": 0.0033, "step": 87320 }, { "epoch": 0.714151367706587, "grad_norm": 0.07393159717321396, "learning_rate": 8.11836553896336e-06, "loss": 0.0013, "step": 87330 }, { "epoch": 0.7142331438851862, "grad_norm": 0.06640603393316269, "learning_rate": 8.117807671753426e-06, "loss": 0.004, "step": 87340 }, { "epoch": 0.7143149200637854, "grad_norm": 0.14287126064300537, "learning_rate": 8.117249741031214e-06, "loss": 0.0016, "step": 87350 }, { "epoch": 0.7143966962423846, "grad_norm": 0.0779249519109726, "learning_rate": 8.116691746808094e-06, "loss": 0.0018, "step": 87360 }, { "epoch": 0.7144784724209837, "grad_norm": 0.014790092594921589, "learning_rate": 8.116133689095427e-06, "loss": 0.002, "step": 87370 }, { "epoch": 0.7145602485995829, "grad_norm": 0.026651300489902496, "learning_rate": 8.115575567904585e-06, "loss": 0.0013, "step": 87380 }, { "epoch": 0.7146420247781821, "grad_norm": 0.061221953481435776, "learning_rate": 8.115017383246937e-06, "loss": 0.0015, "step": 87390 }, { "epoch": 0.7147238009567813, "grad_norm": 0.06548941135406494, "learning_rate": 8.114459135133852e-06, "loss": 0.0012, "step": 87400 }, { "epoch": 0.7148055771353805, "grad_norm": 0.09545756876468658, "learning_rate": 8.113900823576703e-06, "loss": 0.0012, "step": 87410 }, { "epoch": 0.7148873533139797, "grad_norm": 0.02119002304971218, "learning_rate": 8.113342448586862e-06, "loss": 0.002, "step": 87420 }, { "epoch": 0.7149691294925788, "grad_norm": 0.14075396955013275, "learning_rate": 8.112784010175708e-06, "loss": 0.0023, "step": 87430 }, { "epoch": 0.715050905671178, "grad_norm": 0.01950898766517639, "learning_rate": 8.11222550835461e-06, "loss": 0.0011, "step": 87440 }, { "epoch": 0.7151326818497772, "grad_norm": 0.029887894168496132, "learning_rate": 8.11166694313495e-06, "loss": 0.0022, "step": 87450 }, { "epoch": 0.7152144580283764, "grad_norm": 0.04491250589489937, "learning_rate": 8.111108314528105e-06, "loss": 0.0013, "step": 87460 }, { "epoch": 0.7152962342069755, "grad_norm": 0.05403563752770424, "learning_rate": 8.110549622545454e-06, "loss": 0.0009, "step": 87470 }, { "epoch": 0.7153780103855747, "grad_norm": 0.011998960748314857, "learning_rate": 8.10999086719838e-06, "loss": 0.001, "step": 87480 }, { "epoch": 0.7154597865641739, "grad_norm": 0.09477977454662323, "learning_rate": 8.109432048498264e-06, "loss": 0.0022, "step": 87490 }, { "epoch": 0.715541562742773, "grad_norm": 0.09701690077781677, "learning_rate": 8.10887316645649e-06, "loss": 0.0013, "step": 87500 }, { "epoch": 0.7156233389213722, "grad_norm": 0.02329784445464611, "learning_rate": 8.10831422108444e-06, "loss": 0.0029, "step": 87510 }, { "epoch": 0.7157051150999714, "grad_norm": 0.21518787741661072, "learning_rate": 8.107755212393507e-06, "loss": 0.0016, "step": 87520 }, { "epoch": 0.7157868912785705, "grad_norm": 0.0420759953558445, "learning_rate": 8.10719614039507e-06, "loss": 0.0014, "step": 87530 }, { "epoch": 0.7158686674571697, "grad_norm": 0.081437848508358, "learning_rate": 8.106637005100525e-06, "loss": 0.0026, "step": 87540 }, { "epoch": 0.7159504436357689, "grad_norm": 0.06996249407529831, "learning_rate": 8.106077806521258e-06, "loss": 0.0018, "step": 87550 }, { "epoch": 0.716032219814368, "grad_norm": 0.007716800086200237, "learning_rate": 8.105518544668661e-06, "loss": 0.0012, "step": 87560 }, { "epoch": 0.7161139959929672, "grad_norm": 0.026441330090165138, "learning_rate": 8.104959219554127e-06, "loss": 0.0026, "step": 87570 }, { "epoch": 0.7161957721715664, "grad_norm": 0.029380742460489273, "learning_rate": 8.10439983118905e-06, "loss": 0.0036, "step": 87580 }, { "epoch": 0.7162775483501655, "grad_norm": 0.09681802242994308, "learning_rate": 8.103840379584825e-06, "loss": 0.0019, "step": 87590 }, { "epoch": 0.7163593245287647, "grad_norm": 0.04358511418104172, "learning_rate": 8.103280864752849e-06, "loss": 0.002, "step": 87600 }, { "epoch": 0.716441100707364, "grad_norm": 0.01776134967803955, "learning_rate": 8.102721286704519e-06, "loss": 0.0033, "step": 87610 }, { "epoch": 0.7165228768859632, "grad_norm": 0.14360052347183228, "learning_rate": 8.102161645451234e-06, "loss": 0.0008, "step": 87620 }, { "epoch": 0.7166046530645623, "grad_norm": 0.14795035123825073, "learning_rate": 8.101601941004395e-06, "loss": 0.0026, "step": 87630 }, { "epoch": 0.7166864292431615, "grad_norm": 0.013785099610686302, "learning_rate": 8.101042173375404e-06, "loss": 0.0012, "step": 87640 }, { "epoch": 0.7167682054217607, "grad_norm": 0.028642326593399048, "learning_rate": 8.100482342575663e-06, "loss": 0.0014, "step": 87650 }, { "epoch": 0.7168499816003598, "grad_norm": 0.03421952947974205, "learning_rate": 8.099922448616576e-06, "loss": 0.0018, "step": 87660 }, { "epoch": 0.716931757778959, "grad_norm": 0.002584925852715969, "learning_rate": 8.099362491509548e-06, "loss": 0.0019, "step": 87670 }, { "epoch": 0.7170135339575582, "grad_norm": 0.2126448005437851, "learning_rate": 8.098802471265989e-06, "loss": 0.0025, "step": 87680 }, { "epoch": 0.7170953101361573, "grad_norm": 0.06464729458093643, "learning_rate": 8.098242387897303e-06, "loss": 0.0025, "step": 87690 }, { "epoch": 0.7171770863147565, "grad_norm": 0.09336856752634048, "learning_rate": 8.097682241414903e-06, "loss": 0.007, "step": 87700 }, { "epoch": 0.7172588624933557, "grad_norm": 0.05254814401268959, "learning_rate": 8.097122031830196e-06, "loss": 0.002, "step": 87710 }, { "epoch": 0.7173406386719549, "grad_norm": 0.12519386410713196, "learning_rate": 8.096561759154597e-06, "loss": 0.0015, "step": 87720 }, { "epoch": 0.717422414850554, "grad_norm": 0.0451858825981617, "learning_rate": 8.096001423399518e-06, "loss": 0.0015, "step": 87730 }, { "epoch": 0.7175041910291532, "grad_norm": 0.05839215964078903, "learning_rate": 8.095441024576372e-06, "loss": 0.0014, "step": 87740 }, { "epoch": 0.7175859672077524, "grad_norm": 0.05788397789001465, "learning_rate": 8.094880562696577e-06, "loss": 0.0016, "step": 87750 }, { "epoch": 0.7176677433863515, "grad_norm": 0.16585518419742584, "learning_rate": 8.09432003777155e-06, "loss": 0.0033, "step": 87760 }, { "epoch": 0.7177495195649507, "grad_norm": 0.10249081254005432, "learning_rate": 8.093759449812706e-06, "loss": 0.0017, "step": 87770 }, { "epoch": 0.7178312957435499, "grad_norm": 0.15528851747512817, "learning_rate": 8.09319879883147e-06, "loss": 0.0024, "step": 87780 }, { "epoch": 0.717913071922149, "grad_norm": 0.045858461409807205, "learning_rate": 8.092638084839259e-06, "loss": 0.0079, "step": 87790 }, { "epoch": 0.7179948481007482, "grad_norm": 0.02624703198671341, "learning_rate": 8.0920773078475e-06, "loss": 0.002, "step": 87800 }, { "epoch": 0.7180766242793474, "grad_norm": 0.09115438908338547, "learning_rate": 8.091516467867607e-06, "loss": 0.0016, "step": 87810 }, { "epoch": 0.7181584004579467, "grad_norm": 0.010249817743897438, "learning_rate": 8.090955564911014e-06, "loss": 0.0019, "step": 87820 }, { "epoch": 0.7182401766365458, "grad_norm": 0.14142853021621704, "learning_rate": 8.090394598989144e-06, "loss": 0.0018, "step": 87830 }, { "epoch": 0.718321952815145, "grad_norm": 0.030337130650877953, "learning_rate": 8.089833570113422e-06, "loss": 0.0014, "step": 87840 }, { "epoch": 0.7184037289937442, "grad_norm": 0.06245850399136543, "learning_rate": 8.089272478295281e-06, "loss": 0.0018, "step": 87850 }, { "epoch": 0.7184855051723433, "grad_norm": 0.02405940555036068, "learning_rate": 8.088711323546146e-06, "loss": 0.0025, "step": 87860 }, { "epoch": 0.7185672813509425, "grad_norm": 0.05706338956952095, "learning_rate": 8.088150105877453e-06, "loss": 0.0018, "step": 87870 }, { "epoch": 0.7186490575295417, "grad_norm": 0.0664103627204895, "learning_rate": 8.087588825300629e-06, "loss": 0.0012, "step": 87880 }, { "epoch": 0.7187308337081408, "grad_norm": 0.03764433041214943, "learning_rate": 8.087027481827114e-06, "loss": 0.0019, "step": 87890 }, { "epoch": 0.71881260988674, "grad_norm": 0.040163736790418625, "learning_rate": 8.08646607546834e-06, "loss": 0.0012, "step": 87900 }, { "epoch": 0.7188943860653392, "grad_norm": 0.05780358612537384, "learning_rate": 8.085904606235742e-06, "loss": 0.0022, "step": 87910 }, { "epoch": 0.7189761622439383, "grad_norm": 0.016446266323328018, "learning_rate": 8.08534307414076e-06, "loss": 0.0037, "step": 87920 }, { "epoch": 0.7190579384225375, "grad_norm": 0.17539523541927338, "learning_rate": 8.08478147919483e-06, "loss": 0.0026, "step": 87930 }, { "epoch": 0.7191397146011367, "grad_norm": 0.03612901270389557, "learning_rate": 8.084219821409394e-06, "loss": 0.0018, "step": 87940 }, { "epoch": 0.7192214907797359, "grad_norm": 0.06500939279794693, "learning_rate": 8.083658100795894e-06, "loss": 0.0014, "step": 87950 }, { "epoch": 0.719303266958335, "grad_norm": 0.036224961280822754, "learning_rate": 8.083096317365772e-06, "loss": 0.0018, "step": 87960 }, { "epoch": 0.7193850431369342, "grad_norm": 0.040921617299318314, "learning_rate": 8.082534471130471e-06, "loss": 0.002, "step": 87970 }, { "epoch": 0.7194668193155334, "grad_norm": 0.06997714936733246, "learning_rate": 8.081972562101438e-06, "loss": 0.0014, "step": 87980 }, { "epoch": 0.7195485954941325, "grad_norm": 0.07014668732881546, "learning_rate": 8.081410590290118e-06, "loss": 0.0014, "step": 87990 }, { "epoch": 0.7196303716727317, "grad_norm": 0.055484380573034286, "learning_rate": 8.080848555707961e-06, "loss": 0.0017, "step": 88000 }, { "epoch": 0.7197121478513309, "grad_norm": 0.042812496423721313, "learning_rate": 8.080286458366414e-06, "loss": 0.0018, "step": 88010 }, { "epoch": 0.71979392402993, "grad_norm": 0.09436947107315063, "learning_rate": 8.079724298276927e-06, "loss": 0.0021, "step": 88020 }, { "epoch": 0.7198757002085292, "grad_norm": 0.0636582002043724, "learning_rate": 8.079162075450956e-06, "loss": 0.002, "step": 88030 }, { "epoch": 0.7199574763871285, "grad_norm": 0.06509171426296234, "learning_rate": 8.078599789899948e-06, "loss": 0.0017, "step": 88040 }, { "epoch": 0.7200392525657276, "grad_norm": 0.015215424820780754, "learning_rate": 8.07803744163536e-06, "loss": 0.0013, "step": 88050 }, { "epoch": 0.7201210287443268, "grad_norm": 0.04994095116853714, "learning_rate": 8.077475030668647e-06, "loss": 0.0017, "step": 88060 }, { "epoch": 0.720202804922926, "grad_norm": 0.1238531842827797, "learning_rate": 8.076912557011267e-06, "loss": 0.0016, "step": 88070 }, { "epoch": 0.7202845811015252, "grad_norm": 0.023768488317728043, "learning_rate": 8.076350020674678e-06, "loss": 0.0024, "step": 88080 }, { "epoch": 0.7203663572801243, "grad_norm": 0.04998130723834038, "learning_rate": 8.075787421670336e-06, "loss": 0.0027, "step": 88090 }, { "epoch": 0.7204481334587235, "grad_norm": 0.11426007002592087, "learning_rate": 8.075224760009706e-06, "loss": 0.0015, "step": 88100 }, { "epoch": 0.7205299096373227, "grad_norm": 0.06693411618471146, "learning_rate": 8.074662035704246e-06, "loss": 0.0018, "step": 88110 }, { "epoch": 0.7206116858159218, "grad_norm": 0.04245647042989731, "learning_rate": 8.074099248765423e-06, "loss": 0.0016, "step": 88120 }, { "epoch": 0.720693461994521, "grad_norm": 0.03526762127876282, "learning_rate": 8.0735363992047e-06, "loss": 0.0019, "step": 88130 }, { "epoch": 0.7207752381731202, "grad_norm": 0.04521084576845169, "learning_rate": 8.072973487033541e-06, "loss": 0.002, "step": 88140 }, { "epoch": 0.7208570143517193, "grad_norm": 0.07626578211784363, "learning_rate": 8.072410512263415e-06, "loss": 0.0016, "step": 88150 }, { "epoch": 0.7209387905303185, "grad_norm": 0.009817398153245449, "learning_rate": 8.07184747490579e-06, "loss": 0.0013, "step": 88160 }, { "epoch": 0.7210205667089177, "grad_norm": 0.05700604245066643, "learning_rate": 8.071284374972132e-06, "loss": 0.0012, "step": 88170 }, { "epoch": 0.7211023428875168, "grad_norm": 0.08268726617097855, "learning_rate": 8.070721212473918e-06, "loss": 0.0034, "step": 88180 }, { "epoch": 0.721184119066116, "grad_norm": 0.008384868502616882, "learning_rate": 8.070157987422616e-06, "loss": 0.0027, "step": 88190 }, { "epoch": 0.7212658952447152, "grad_norm": 0.030148131772875786, "learning_rate": 8.0695946998297e-06, "loss": 0.0022, "step": 88200 }, { "epoch": 0.7213476714233144, "grad_norm": 0.07949526607990265, "learning_rate": 8.069031349706646e-06, "loss": 0.0014, "step": 88210 }, { "epoch": 0.7214294476019135, "grad_norm": 0.18809451162815094, "learning_rate": 8.068467937064928e-06, "loss": 0.002, "step": 88220 }, { "epoch": 0.7215112237805127, "grad_norm": 0.076151043176651, "learning_rate": 8.067904461916022e-06, "loss": 0.0015, "step": 88230 }, { "epoch": 0.7215929999591119, "grad_norm": 0.16295300424098969, "learning_rate": 8.067340924271411e-06, "loss": 0.0021, "step": 88240 }, { "epoch": 0.7216747761377111, "grad_norm": 0.1386207789182663, "learning_rate": 8.06677732414257e-06, "loss": 0.0017, "step": 88250 }, { "epoch": 0.7217565523163103, "grad_norm": 0.026285119354724884, "learning_rate": 8.066213661540985e-06, "loss": 0.0027, "step": 88260 }, { "epoch": 0.7218383284949095, "grad_norm": 0.051905013620853424, "learning_rate": 8.065649936478133e-06, "loss": 0.0016, "step": 88270 }, { "epoch": 0.7219201046735086, "grad_norm": 0.042946361005306244, "learning_rate": 8.0650861489655e-06, "loss": 0.0023, "step": 88280 }, { "epoch": 0.7220018808521078, "grad_norm": 0.0200372152030468, "learning_rate": 8.06452229901457e-06, "loss": 0.0018, "step": 88290 }, { "epoch": 0.722083657030707, "grad_norm": 0.045139625668525696, "learning_rate": 8.063958386636831e-06, "loss": 0.0033, "step": 88300 }, { "epoch": 0.7221654332093062, "grad_norm": 0.06834915280342102, "learning_rate": 8.06339441184377e-06, "loss": 0.0018, "step": 88310 }, { "epoch": 0.7222472093879053, "grad_norm": 0.016760211437940598, "learning_rate": 8.062830374646874e-06, "loss": 0.0016, "step": 88320 }, { "epoch": 0.7223289855665045, "grad_norm": 0.0844726413488388, "learning_rate": 8.062266275057633e-06, "loss": 0.0016, "step": 88330 }, { "epoch": 0.7224107617451037, "grad_norm": 0.020326269790530205, "learning_rate": 8.061702113087541e-06, "loss": 0.0014, "step": 88340 }, { "epoch": 0.7224925379237028, "grad_norm": 0.041717711836099625, "learning_rate": 8.061137888748086e-06, "loss": 0.0009, "step": 88350 }, { "epoch": 0.722574314102302, "grad_norm": 0.020349226891994476, "learning_rate": 8.060573602050765e-06, "loss": 0.0013, "step": 88360 }, { "epoch": 0.7226560902809012, "grad_norm": 0.2963613271713257, "learning_rate": 8.060009253007073e-06, "loss": 0.0022, "step": 88370 }, { "epoch": 0.7227378664595003, "grad_norm": 0.027298925444483757, "learning_rate": 8.059444841628502e-06, "loss": 0.0017, "step": 88380 }, { "epoch": 0.7228196426380995, "grad_norm": 0.01692483387887478, "learning_rate": 8.058880367926555e-06, "loss": 0.0018, "step": 88390 }, { "epoch": 0.7229014188166987, "grad_norm": 0.061376120895147324, "learning_rate": 8.058315831912728e-06, "loss": 0.0014, "step": 88400 }, { "epoch": 0.7229831949952978, "grad_norm": 0.028951959684491158, "learning_rate": 8.057751233598523e-06, "loss": 0.001, "step": 88410 }, { "epoch": 0.723064971173897, "grad_norm": 0.19290906190872192, "learning_rate": 8.057186572995439e-06, "loss": 0.0018, "step": 88420 }, { "epoch": 0.7231467473524962, "grad_norm": 0.028158128261566162, "learning_rate": 8.056621850114979e-06, "loss": 0.0028, "step": 88430 }, { "epoch": 0.7232285235310953, "grad_norm": 0.2567415237426758, "learning_rate": 8.056057064968648e-06, "loss": 0.0019, "step": 88440 }, { "epoch": 0.7233102997096945, "grad_norm": 0.014016373082995415, "learning_rate": 8.05549221756795e-06, "loss": 0.0023, "step": 88450 }, { "epoch": 0.7233920758882938, "grad_norm": 0.09052802622318268, "learning_rate": 8.054927307924391e-06, "loss": 0.0021, "step": 88460 }, { "epoch": 0.723473852066893, "grad_norm": 0.05604236572980881, "learning_rate": 8.054362336049481e-06, "loss": 0.0017, "step": 88470 }, { "epoch": 0.7235556282454921, "grad_norm": 0.03704807907342911, "learning_rate": 8.053797301954727e-06, "loss": 0.0034, "step": 88480 }, { "epoch": 0.7236374044240913, "grad_norm": 0.12035027146339417, "learning_rate": 8.05323220565164e-06, "loss": 0.0011, "step": 88490 }, { "epoch": 0.7237191806026905, "grad_norm": 0.007663491647690535, "learning_rate": 8.05266704715173e-06, "loss": 0.0046, "step": 88500 }, { "epoch": 0.7238009567812896, "grad_norm": 0.024349892511963844, "learning_rate": 8.052101826466513e-06, "loss": 0.003, "step": 88510 }, { "epoch": 0.7238827329598888, "grad_norm": 0.014725148677825928, "learning_rate": 8.051536543607498e-06, "loss": 0.0012, "step": 88520 }, { "epoch": 0.723964509138488, "grad_norm": 0.07066167145967484, "learning_rate": 8.050971198586207e-06, "loss": 0.0024, "step": 88530 }, { "epoch": 0.7240462853170871, "grad_norm": 0.059359751641750336, "learning_rate": 8.05040579141415e-06, "loss": 0.0013, "step": 88540 }, { "epoch": 0.7241280614956863, "grad_norm": 0.0416143499314785, "learning_rate": 8.049840322102848e-06, "loss": 0.0012, "step": 88550 }, { "epoch": 0.7242098376742855, "grad_norm": 0.0651552602648735, "learning_rate": 8.049274790663821e-06, "loss": 0.003, "step": 88560 }, { "epoch": 0.7242916138528847, "grad_norm": 0.02503455989062786, "learning_rate": 8.048709197108586e-06, "loss": 0.0021, "step": 88570 }, { "epoch": 0.7243733900314838, "grad_norm": 0.06392078101634979, "learning_rate": 8.048143541448667e-06, "loss": 0.0021, "step": 88580 }, { "epoch": 0.724455166210083, "grad_norm": 0.018688123673200607, "learning_rate": 8.047577823695586e-06, "loss": 0.0019, "step": 88590 }, { "epoch": 0.7245369423886822, "grad_norm": 0.014147850684821606, "learning_rate": 8.04701204386087e-06, "loss": 0.0008, "step": 88600 }, { "epoch": 0.7246187185672813, "grad_norm": 0.02677115797996521, "learning_rate": 8.04644620195604e-06, "loss": 0.0046, "step": 88610 }, { "epoch": 0.7247004947458805, "grad_norm": 0.039414506405591965, "learning_rate": 8.045880297992624e-06, "loss": 0.0016, "step": 88620 }, { "epoch": 0.7247822709244797, "grad_norm": 0.03482268378138542, "learning_rate": 8.045314331982152e-06, "loss": 0.0037, "step": 88630 }, { "epoch": 0.7248640471030788, "grad_norm": 0.09769119322299957, "learning_rate": 8.04474830393615e-06, "loss": 0.0019, "step": 88640 }, { "epoch": 0.724945823281678, "grad_norm": 0.06657274812459946, "learning_rate": 8.044182213866152e-06, "loss": 0.0015, "step": 88650 }, { "epoch": 0.7250275994602772, "grad_norm": 0.45005592703819275, "learning_rate": 8.043616061783685e-06, "loss": 0.0014, "step": 88660 }, { "epoch": 0.7251093756388763, "grad_norm": 0.13936395943164825, "learning_rate": 8.043049847700289e-06, "loss": 0.0027, "step": 88670 }, { "epoch": 0.7251911518174756, "grad_norm": 0.036764957010746, "learning_rate": 8.04248357162749e-06, "loss": 0.0015, "step": 88680 }, { "epoch": 0.7252729279960748, "grad_norm": 0.13168857991695404, "learning_rate": 8.041917233576831e-06, "loss": 0.0019, "step": 88690 }, { "epoch": 0.725354704174674, "grad_norm": 0.08515964448451996, "learning_rate": 8.041350833559845e-06, "loss": 0.0022, "step": 88700 }, { "epoch": 0.7254364803532731, "grad_norm": 0.043882496654987335, "learning_rate": 8.040784371588069e-06, "loss": 0.0018, "step": 88710 }, { "epoch": 0.7255182565318723, "grad_norm": 0.06319703906774521, "learning_rate": 8.040217847673043e-06, "loss": 0.0024, "step": 88720 }, { "epoch": 0.7256000327104715, "grad_norm": 0.026494288817048073, "learning_rate": 8.039651261826308e-06, "loss": 0.0022, "step": 88730 }, { "epoch": 0.7256818088890706, "grad_norm": 0.03268686309456825, "learning_rate": 8.039084614059408e-06, "loss": 0.0028, "step": 88740 }, { "epoch": 0.7257635850676698, "grad_norm": 0.02463565208017826, "learning_rate": 8.038517904383884e-06, "loss": 0.0029, "step": 88750 }, { "epoch": 0.725845361246269, "grad_norm": 0.07783658057451248, "learning_rate": 8.037951132811279e-06, "loss": 0.0018, "step": 88760 }, { "epoch": 0.7259271374248681, "grad_norm": 0.009493486024439335, "learning_rate": 8.037384299353141e-06, "loss": 0.001, "step": 88770 }, { "epoch": 0.7260089136034673, "grad_norm": 0.08053633570671082, "learning_rate": 8.036817404021015e-06, "loss": 0.0023, "step": 88780 }, { "epoch": 0.7260906897820665, "grad_norm": 0.1078597754240036, "learning_rate": 8.036250446826452e-06, "loss": 0.0026, "step": 88790 }, { "epoch": 0.7261724659606656, "grad_norm": 0.0035057796631008387, "learning_rate": 8.035683427780998e-06, "loss": 0.001, "step": 88800 }, { "epoch": 0.7262542421392648, "grad_norm": 0.03566279262304306, "learning_rate": 8.035116346896205e-06, "loss": 0.0018, "step": 88810 }, { "epoch": 0.726336018317864, "grad_norm": 0.030341051518917084, "learning_rate": 8.034549204183626e-06, "loss": 0.0022, "step": 88820 }, { "epoch": 0.7264177944964632, "grad_norm": 0.11427082866430283, "learning_rate": 8.033981999654812e-06, "loss": 0.0019, "step": 88830 }, { "epoch": 0.7264995706750623, "grad_norm": 0.12856647372245789, "learning_rate": 8.033414733321319e-06, "loss": 0.0021, "step": 88840 }, { "epoch": 0.7265813468536615, "grad_norm": 0.13250087201595306, "learning_rate": 8.032847405194701e-06, "loss": 0.0013, "step": 88850 }, { "epoch": 0.7266631230322607, "grad_norm": 0.03687996417284012, "learning_rate": 8.032280015286518e-06, "loss": 0.0015, "step": 88860 }, { "epoch": 0.7267448992108598, "grad_norm": 0.03899860754609108, "learning_rate": 8.031712563608324e-06, "loss": 0.0011, "step": 88870 }, { "epoch": 0.726826675389459, "grad_norm": 0.014456900767982006, "learning_rate": 8.031145050171684e-06, "loss": 0.001, "step": 88880 }, { "epoch": 0.7269084515680583, "grad_norm": 0.1086789146065712, "learning_rate": 8.030577474988153e-06, "loss": 0.0017, "step": 88890 }, { "epoch": 0.7269902277466574, "grad_norm": 0.07947050780057907, "learning_rate": 8.030009838069296e-06, "loss": 0.0027, "step": 88900 }, { "epoch": 0.7270720039252566, "grad_norm": 0.031278740614652634, "learning_rate": 8.029442139426678e-06, "loss": 0.0013, "step": 88910 }, { "epoch": 0.7271537801038558, "grad_norm": 0.03224126994609833, "learning_rate": 8.028874379071858e-06, "loss": 0.0018, "step": 88920 }, { "epoch": 0.727235556282455, "grad_norm": 0.0538073293864727, "learning_rate": 8.028306557016405e-06, "loss": 0.0021, "step": 88930 }, { "epoch": 0.7273173324610541, "grad_norm": 0.011684785597026348, "learning_rate": 8.027738673271887e-06, "loss": 0.0009, "step": 88940 }, { "epoch": 0.7273991086396533, "grad_norm": 0.030079348012804985, "learning_rate": 8.027170727849872e-06, "loss": 0.0019, "step": 88950 }, { "epoch": 0.7274808848182525, "grad_norm": 0.06467761844396591, "learning_rate": 8.026602720761927e-06, "loss": 0.0016, "step": 88960 }, { "epoch": 0.7275626609968516, "grad_norm": 0.15334315598011017, "learning_rate": 8.026034652019626e-06, "loss": 0.0024, "step": 88970 }, { "epoch": 0.7276444371754508, "grad_norm": 0.029900474473834038, "learning_rate": 8.025466521634537e-06, "loss": 0.0014, "step": 88980 }, { "epoch": 0.72772621335405, "grad_norm": 0.0530867800116539, "learning_rate": 8.024898329618238e-06, "loss": 0.0018, "step": 88990 }, { "epoch": 0.7278079895326491, "grad_norm": 0.01350356638431549, "learning_rate": 8.0243300759823e-06, "loss": 0.0019, "step": 89000 }, { "epoch": 0.7278897657112483, "grad_norm": 0.036402627825737, "learning_rate": 8.023761760738302e-06, "loss": 0.0014, "step": 89010 }, { "epoch": 0.7279715418898475, "grad_norm": 0.026305317878723145, "learning_rate": 8.023193383897816e-06, "loss": 0.0013, "step": 89020 }, { "epoch": 0.7280533180684466, "grad_norm": 0.08120504021644592, "learning_rate": 8.022624945472425e-06, "loss": 0.0018, "step": 89030 }, { "epoch": 0.7281350942470458, "grad_norm": 0.13519065082073212, "learning_rate": 8.022056445473708e-06, "loss": 0.0027, "step": 89040 }, { "epoch": 0.728216870425645, "grad_norm": 0.03244948387145996, "learning_rate": 8.021487883913243e-06, "loss": 0.002, "step": 89050 }, { "epoch": 0.7282986466042441, "grad_norm": 0.03811373934149742, "learning_rate": 8.020919260802615e-06, "loss": 0.0019, "step": 89060 }, { "epoch": 0.7283804227828433, "grad_norm": 0.08652861416339874, "learning_rate": 8.020350576153407e-06, "loss": 0.0027, "step": 89070 }, { "epoch": 0.7284621989614425, "grad_norm": 0.4015129506587982, "learning_rate": 8.0197818299772e-06, "loss": 0.001, "step": 89080 }, { "epoch": 0.7285439751400417, "grad_norm": 0.056941866874694824, "learning_rate": 8.019213022285586e-06, "loss": 0.0019, "step": 89090 }, { "epoch": 0.7286257513186409, "grad_norm": 0.02410270832479, "learning_rate": 8.018644153090145e-06, "loss": 0.0013, "step": 89100 }, { "epoch": 0.7287075274972401, "grad_norm": 0.08067712932825089, "learning_rate": 8.01807522240247e-06, "loss": 0.0012, "step": 89110 }, { "epoch": 0.7287893036758393, "grad_norm": 0.10167121142148972, "learning_rate": 8.017506230234152e-06, "loss": 0.0021, "step": 89120 }, { "epoch": 0.7288710798544384, "grad_norm": 0.12394573539495468, "learning_rate": 8.016937176596777e-06, "loss": 0.0031, "step": 89130 }, { "epoch": 0.7289528560330376, "grad_norm": 0.08427497744560242, "learning_rate": 8.016368061501938e-06, "loss": 0.0015, "step": 89140 }, { "epoch": 0.7290346322116368, "grad_norm": 0.06923168152570724, "learning_rate": 8.015798884961233e-06, "loss": 0.0013, "step": 89150 }, { "epoch": 0.729116408390236, "grad_norm": 0.08444753289222717, "learning_rate": 8.01522964698625e-06, "loss": 0.0021, "step": 89160 }, { "epoch": 0.7291981845688351, "grad_norm": 0.07090157270431519, "learning_rate": 8.014660347588591e-06, "loss": 0.0022, "step": 89170 }, { "epoch": 0.7292799607474343, "grad_norm": 0.12625297904014587, "learning_rate": 8.01409098677985e-06, "loss": 0.0028, "step": 89180 }, { "epoch": 0.7293617369260335, "grad_norm": 0.013626840896904469, "learning_rate": 8.013521564571625e-06, "loss": 0.0012, "step": 89190 }, { "epoch": 0.7294435131046326, "grad_norm": 0.0971660390496254, "learning_rate": 8.012952080975516e-06, "loss": 0.0014, "step": 89200 }, { "epoch": 0.7295252892832318, "grad_norm": 0.004594466648995876, "learning_rate": 8.012382536003125e-06, "loss": 0.0032, "step": 89210 }, { "epoch": 0.729607065461831, "grad_norm": 0.06291655451059341, "learning_rate": 8.011812929666052e-06, "loss": 0.0014, "step": 89220 }, { "epoch": 0.7296888416404301, "grad_norm": 0.03372340649366379, "learning_rate": 8.011243261975901e-06, "loss": 0.0009, "step": 89230 }, { "epoch": 0.7297706178190293, "grad_norm": 0.08576468378305435, "learning_rate": 8.010673532944278e-06, "loss": 0.0019, "step": 89240 }, { "epoch": 0.7298523939976285, "grad_norm": 0.15940120816230774, "learning_rate": 8.010103742582788e-06, "loss": 0.0021, "step": 89250 }, { "epoch": 0.7299341701762276, "grad_norm": 0.05537528544664383, "learning_rate": 8.009533890903037e-06, "loss": 0.0016, "step": 89260 }, { "epoch": 0.7300159463548268, "grad_norm": 0.10710707306861877, "learning_rate": 8.008963977916637e-06, "loss": 0.0023, "step": 89270 }, { "epoch": 0.730097722533426, "grad_norm": 0.10322672873735428, "learning_rate": 8.00839400363519e-06, "loss": 0.0012, "step": 89280 }, { "epoch": 0.7301794987120251, "grad_norm": 0.016884764656424522, "learning_rate": 8.007823968070318e-06, "loss": 0.002, "step": 89290 }, { "epoch": 0.7302612748906243, "grad_norm": 0.01901644468307495, "learning_rate": 8.007253871233624e-06, "loss": 0.0012, "step": 89300 }, { "epoch": 0.7303430510692235, "grad_norm": 0.041009437292814255, "learning_rate": 8.006683713136724e-06, "loss": 0.0018, "step": 89310 }, { "epoch": 0.7304248272478228, "grad_norm": 0.0759587362408638, "learning_rate": 8.006113493791233e-06, "loss": 0.0016, "step": 89320 }, { "epoch": 0.7305066034264219, "grad_norm": 0.04668238386511803, "learning_rate": 8.005543213208766e-06, "loss": 0.0023, "step": 89330 }, { "epoch": 0.7305883796050211, "grad_norm": 0.01682044379413128, "learning_rate": 8.004972871400942e-06, "loss": 0.0015, "step": 89340 }, { "epoch": 0.7306701557836203, "grad_norm": 0.07763762772083282, "learning_rate": 8.004402468379377e-06, "loss": 0.0017, "step": 89350 }, { "epoch": 0.7307519319622194, "grad_norm": 0.033437579870224, "learning_rate": 8.003832004155691e-06, "loss": 0.0027, "step": 89360 }, { "epoch": 0.7308337081408186, "grad_norm": 0.03152834251523018, "learning_rate": 8.003261478741507e-06, "loss": 0.001, "step": 89370 }, { "epoch": 0.7309154843194178, "grad_norm": 0.03863904997706413, "learning_rate": 8.002690892148446e-06, "loss": 0.001, "step": 89380 }, { "epoch": 0.7309972604980169, "grad_norm": 0.12080434709787369, "learning_rate": 8.00212024438813e-06, "loss": 0.002, "step": 89390 }, { "epoch": 0.7310790366766161, "grad_norm": 0.04645616561174393, "learning_rate": 8.001549535472183e-06, "loss": 0.0011, "step": 89400 }, { "epoch": 0.7311608128552153, "grad_norm": 0.05097213387489319, "learning_rate": 8.000978765412234e-06, "loss": 0.0012, "step": 89410 }, { "epoch": 0.7312425890338144, "grad_norm": 0.10158669203519821, "learning_rate": 8.000407934219907e-06, "loss": 0.0022, "step": 89420 }, { "epoch": 0.7313243652124136, "grad_norm": 0.06947648525238037, "learning_rate": 7.999837041906834e-06, "loss": 0.0026, "step": 89430 }, { "epoch": 0.7314061413910128, "grad_norm": 0.14367717504501343, "learning_rate": 7.999266088484638e-06, "loss": 0.0014, "step": 89440 }, { "epoch": 0.731487917569612, "grad_norm": 0.04056309536099434, "learning_rate": 7.998695073964956e-06, "loss": 0.0025, "step": 89450 }, { "epoch": 0.7315696937482111, "grad_norm": 0.027865249663591385, "learning_rate": 7.998123998359417e-06, "loss": 0.0015, "step": 89460 }, { "epoch": 0.7316514699268103, "grad_norm": 0.051638878881931305, "learning_rate": 7.997552861679658e-06, "loss": 0.0027, "step": 89470 }, { "epoch": 0.7317332461054095, "grad_norm": 0.053220052272081375, "learning_rate": 7.996981663937307e-06, "loss": 0.0028, "step": 89480 }, { "epoch": 0.7318150222840086, "grad_norm": 0.07604671269655228, "learning_rate": 7.996410405144006e-06, "loss": 0.0018, "step": 89490 }, { "epoch": 0.7318967984626078, "grad_norm": 0.03272087872028351, "learning_rate": 7.995839085311387e-06, "loss": 0.0019, "step": 89500 }, { "epoch": 0.731978574641207, "grad_norm": 0.11131007969379425, "learning_rate": 7.995267704451096e-06, "loss": 0.0018, "step": 89510 }, { "epoch": 0.7320603508198061, "grad_norm": 0.015938101336359978, "learning_rate": 7.994696262574761e-06, "loss": 0.0011, "step": 89520 }, { "epoch": 0.7321421269984054, "grad_norm": 0.022740298882126808, "learning_rate": 7.994124759694033e-06, "loss": 0.0013, "step": 89530 }, { "epoch": 0.7322239031770046, "grad_norm": 0.03664330020546913, "learning_rate": 7.993553195820547e-06, "loss": 0.001, "step": 89540 }, { "epoch": 0.7323056793556038, "grad_norm": 0.07848811149597168, "learning_rate": 7.99298157096595e-06, "loss": 0.0019, "step": 89550 }, { "epoch": 0.7323874555342029, "grad_norm": 0.01749560236930847, "learning_rate": 7.992409885141886e-06, "loss": 0.0013, "step": 89560 }, { "epoch": 0.7324692317128021, "grad_norm": 0.05177505314350128, "learning_rate": 7.99183813836e-06, "loss": 0.0019, "step": 89570 }, { "epoch": 0.7325510078914013, "grad_norm": 0.007852647453546524, "learning_rate": 7.99126633063194e-06, "loss": 0.0013, "step": 89580 }, { "epoch": 0.7326327840700004, "grad_norm": 0.021745754405856133, "learning_rate": 7.990694461969354e-06, "loss": 0.0012, "step": 89590 }, { "epoch": 0.7327145602485996, "grad_norm": 0.06515233218669891, "learning_rate": 7.990122532383889e-06, "loss": 0.0013, "step": 89600 }, { "epoch": 0.7327963364271988, "grad_norm": 0.048228103667497635, "learning_rate": 7.989550541887199e-06, "loss": 0.0031, "step": 89610 }, { "epoch": 0.7328781126057979, "grad_norm": 0.05058037117123604, "learning_rate": 7.988978490490932e-06, "loss": 0.0013, "step": 89620 }, { "epoch": 0.7329598887843971, "grad_norm": 0.08737529814243317, "learning_rate": 7.988406378206746e-06, "loss": 0.002, "step": 89630 }, { "epoch": 0.7330416649629963, "grad_norm": 0.06146084889769554, "learning_rate": 7.987834205046291e-06, "loss": 0.0014, "step": 89640 }, { "epoch": 0.7331234411415954, "grad_norm": 0.012899879366159439, "learning_rate": 7.987261971021227e-06, "loss": 0.0009, "step": 89650 }, { "epoch": 0.7332052173201946, "grad_norm": 0.1335228681564331, "learning_rate": 7.986689676143207e-06, "loss": 0.0018, "step": 89660 }, { "epoch": 0.7332869934987938, "grad_norm": 0.08883922547101974, "learning_rate": 7.986117320423892e-06, "loss": 0.0017, "step": 89670 }, { "epoch": 0.733368769677393, "grad_norm": 0.0024890978820621967, "learning_rate": 7.985544903874938e-06, "loss": 0.002, "step": 89680 }, { "epoch": 0.7334505458559921, "grad_norm": 0.12329544872045517, "learning_rate": 7.984972426508008e-06, "loss": 0.0038, "step": 89690 }, { "epoch": 0.7335323220345913, "grad_norm": 0.116469606757164, "learning_rate": 7.984399888334763e-06, "loss": 0.0019, "step": 89700 }, { "epoch": 0.7336140982131905, "grad_norm": 0.060265231877565384, "learning_rate": 7.983827289366869e-06, "loss": 0.0014, "step": 89710 }, { "epoch": 0.7336958743917896, "grad_norm": 0.009485602378845215, "learning_rate": 7.983254629615986e-06, "loss": 0.0011, "step": 89720 }, { "epoch": 0.7337776505703888, "grad_norm": 0.05163218080997467, "learning_rate": 7.982681909093781e-06, "loss": 0.0018, "step": 89730 }, { "epoch": 0.7338594267489881, "grad_norm": 0.038309305906295776, "learning_rate": 7.98210912781192e-06, "loss": 0.0022, "step": 89740 }, { "epoch": 0.7339412029275872, "grad_norm": 0.08878395706415176, "learning_rate": 7.981536285782076e-06, "loss": 0.002, "step": 89750 }, { "epoch": 0.7340229791061864, "grad_norm": 0.08731924742460251, "learning_rate": 7.98096338301591e-06, "loss": 0.0012, "step": 89760 }, { "epoch": 0.7341047552847856, "grad_norm": 0.04719976335763931, "learning_rate": 7.980390419525099e-06, "loss": 0.0018, "step": 89770 }, { "epoch": 0.7341865314633848, "grad_norm": 0.05953413248062134, "learning_rate": 7.979817395321313e-06, "loss": 0.001, "step": 89780 }, { "epoch": 0.7342683076419839, "grad_norm": 0.03803973272442818, "learning_rate": 7.979244310416223e-06, "loss": 0.0018, "step": 89790 }, { "epoch": 0.7343500838205831, "grad_norm": 0.02053944393992424, "learning_rate": 7.978671164821505e-06, "loss": 0.0012, "step": 89800 }, { "epoch": 0.7344318599991823, "grad_norm": 0.1913599967956543, "learning_rate": 7.978097958548835e-06, "loss": 0.0016, "step": 89810 }, { "epoch": 0.7345136361777814, "grad_norm": 0.20092585682868958, "learning_rate": 7.977524691609888e-06, "loss": 0.0021, "step": 89820 }, { "epoch": 0.7345954123563806, "grad_norm": 0.035524867475032806, "learning_rate": 7.976951364016344e-06, "loss": 0.0012, "step": 89830 }, { "epoch": 0.7346771885349798, "grad_norm": 0.0015046108746901155, "learning_rate": 7.976377975779879e-06, "loss": 0.0026, "step": 89840 }, { "epoch": 0.7347589647135789, "grad_norm": 0.05008360743522644, "learning_rate": 7.975804526912178e-06, "loss": 0.0029, "step": 89850 }, { "epoch": 0.7348407408921781, "grad_norm": 0.04339390993118286, "learning_rate": 7.975231017424919e-06, "loss": 0.0056, "step": 89860 }, { "epoch": 0.7349225170707773, "grad_norm": 0.14159749448299408, "learning_rate": 7.974657447329784e-06, "loss": 0.0017, "step": 89870 }, { "epoch": 0.7350042932493764, "grad_norm": 0.0635455846786499, "learning_rate": 7.97408381663846e-06, "loss": 0.0025, "step": 89880 }, { "epoch": 0.7350860694279756, "grad_norm": 0.009452183730900288, "learning_rate": 7.973510125362631e-06, "loss": 0.0019, "step": 89890 }, { "epoch": 0.7351678456065748, "grad_norm": 0.11081027239561081, "learning_rate": 7.972936373513984e-06, "loss": 0.0015, "step": 89900 }, { "epoch": 0.735249621785174, "grad_norm": 0.2669806480407715, "learning_rate": 7.972362561104207e-06, "loss": 0.003, "step": 89910 }, { "epoch": 0.7353313979637731, "grad_norm": 0.04174412041902542, "learning_rate": 7.971788688144988e-06, "loss": 0.0034, "step": 89920 }, { "epoch": 0.7354131741423723, "grad_norm": 0.029424114152789116, "learning_rate": 7.971214754648018e-06, "loss": 0.0021, "step": 89930 }, { "epoch": 0.7354949503209715, "grad_norm": 0.036671239882707596, "learning_rate": 7.970640760624991e-06, "loss": 0.0038, "step": 89940 }, { "epoch": 0.7355767264995706, "grad_norm": 0.11264243721961975, "learning_rate": 7.970066706087592e-06, "loss": 0.0021, "step": 89950 }, { "epoch": 0.7356585026781699, "grad_norm": 0.05158267170190811, "learning_rate": 7.969492591047522e-06, "loss": 0.001, "step": 89960 }, { "epoch": 0.7357402788567691, "grad_norm": 0.05795517563819885, "learning_rate": 7.968918415516476e-06, "loss": 0.0017, "step": 89970 }, { "epoch": 0.7358220550353682, "grad_norm": 0.019974587485194206, "learning_rate": 7.968344179506146e-06, "loss": 0.0015, "step": 89980 }, { "epoch": 0.7359038312139674, "grad_norm": 0.10331212729215622, "learning_rate": 7.967769883028233e-06, "loss": 0.002, "step": 89990 }, { "epoch": 0.7359856073925666, "grad_norm": 0.055949900299310684, "learning_rate": 7.967195526094437e-06, "loss": 0.0024, "step": 90000 }, { "epoch": 0.7360673835711657, "grad_norm": 0.016880828887224197, "learning_rate": 7.966621108716455e-06, "loss": 0.0009, "step": 90010 }, { "epoch": 0.7361491597497649, "grad_norm": 0.11318696290254593, "learning_rate": 7.96604663090599e-06, "loss": 0.0032, "step": 90020 }, { "epoch": 0.7362309359283641, "grad_norm": 0.31773096323013306, "learning_rate": 7.965472092674742e-06, "loss": 0.0025, "step": 90030 }, { "epoch": 0.7363127121069633, "grad_norm": 0.08267378062009811, "learning_rate": 7.96489749403442e-06, "loss": 0.0011, "step": 90040 }, { "epoch": 0.7363944882855624, "grad_norm": 0.05517568066716194, "learning_rate": 7.964322834996725e-06, "loss": 0.0015, "step": 90050 }, { "epoch": 0.7364762644641616, "grad_norm": 0.04461325705051422, "learning_rate": 7.963748115573364e-06, "loss": 0.0014, "step": 90060 }, { "epoch": 0.7365580406427608, "grad_norm": 0.03470718115568161, "learning_rate": 7.963173335776044e-06, "loss": 0.002, "step": 90070 }, { "epoch": 0.7366398168213599, "grad_norm": 0.01992739737033844, "learning_rate": 7.962598495616476e-06, "loss": 0.0013, "step": 90080 }, { "epoch": 0.7367215929999591, "grad_norm": 0.2615038752555847, "learning_rate": 7.962023595106367e-06, "loss": 0.0018, "step": 90090 }, { "epoch": 0.7368033691785583, "grad_norm": 0.0438433513045311, "learning_rate": 7.961448634257432e-06, "loss": 0.0018, "step": 90100 }, { "epoch": 0.7368851453571574, "grad_norm": 0.028204811736941338, "learning_rate": 7.96087361308138e-06, "loss": 0.0032, "step": 90110 }, { "epoch": 0.7369669215357566, "grad_norm": 0.04092470556497574, "learning_rate": 7.960298531589924e-06, "loss": 0.0024, "step": 90120 }, { "epoch": 0.7370486977143558, "grad_norm": 0.05098382383584976, "learning_rate": 7.959723389794783e-06, "loss": 0.0013, "step": 90130 }, { "epoch": 0.7371304738929549, "grad_norm": 0.007889818400144577, "learning_rate": 7.95914818770767e-06, "loss": 0.0024, "step": 90140 }, { "epoch": 0.7372122500715541, "grad_norm": 0.06766383349895477, "learning_rate": 7.958572925340305e-06, "loss": 0.0022, "step": 90150 }, { "epoch": 0.7372940262501533, "grad_norm": 0.05683530122041702, "learning_rate": 7.957997602704402e-06, "loss": 0.0013, "step": 90160 }, { "epoch": 0.7373758024287526, "grad_norm": 0.12350792437791824, "learning_rate": 7.957422219811683e-06, "loss": 0.0017, "step": 90170 }, { "epoch": 0.7374575786073517, "grad_norm": 0.281531423330307, "learning_rate": 7.956846776673872e-06, "loss": 0.0023, "step": 90180 }, { "epoch": 0.7375393547859509, "grad_norm": 0.05701496824622154, "learning_rate": 7.956271273302686e-06, "loss": 0.0011, "step": 90190 }, { "epoch": 0.7376211309645501, "grad_norm": 0.10261017829179764, "learning_rate": 7.955695709709854e-06, "loss": 0.002, "step": 90200 }, { "epoch": 0.7377029071431492, "grad_norm": 0.08005143702030182, "learning_rate": 7.955120085907097e-06, "loss": 0.003, "step": 90210 }, { "epoch": 0.7377846833217484, "grad_norm": 0.019440220668911934, "learning_rate": 7.954544401906141e-06, "loss": 0.0018, "step": 90220 }, { "epoch": 0.7378664595003476, "grad_norm": 0.13365332782268524, "learning_rate": 7.953968657718713e-06, "loss": 0.0023, "step": 90230 }, { "epoch": 0.7379482356789467, "grad_norm": 0.08637289702892303, "learning_rate": 7.953392853356545e-06, "loss": 0.0018, "step": 90240 }, { "epoch": 0.7380300118575459, "grad_norm": 0.03654138743877411, "learning_rate": 7.952816988831362e-06, "loss": 0.0035, "step": 90250 }, { "epoch": 0.7381117880361451, "grad_norm": 0.010464333929121494, "learning_rate": 7.952241064154898e-06, "loss": 0.0011, "step": 90260 }, { "epoch": 0.7381935642147442, "grad_norm": 0.03136087581515312, "learning_rate": 7.951665079338882e-06, "loss": 0.0019, "step": 90270 }, { "epoch": 0.7382753403933434, "grad_norm": 0.0277864933013916, "learning_rate": 7.951089034395053e-06, "loss": 0.001, "step": 90280 }, { "epoch": 0.7383571165719426, "grad_norm": 0.06143078953027725, "learning_rate": 7.950512929335136e-06, "loss": 0.0011, "step": 90290 }, { "epoch": 0.7384388927505418, "grad_norm": 0.03397795185446739, "learning_rate": 7.949936764170877e-06, "loss": 0.0016, "step": 90300 }, { "epoch": 0.7385206689291409, "grad_norm": 0.048925187438726425, "learning_rate": 7.949360538914006e-06, "loss": 0.0015, "step": 90310 }, { "epoch": 0.7386024451077401, "grad_norm": 0.03350280970335007, "learning_rate": 7.948784253576264e-06, "loss": 0.0023, "step": 90320 }, { "epoch": 0.7386842212863393, "grad_norm": 0.0676674172282219, "learning_rate": 7.94820790816939e-06, "loss": 0.0022, "step": 90330 }, { "epoch": 0.7387659974649384, "grad_norm": 0.13202416896820068, "learning_rate": 7.947631502705125e-06, "loss": 0.0022, "step": 90340 }, { "epoch": 0.7388477736435376, "grad_norm": 0.09731296449899673, "learning_rate": 7.947055037195208e-06, "loss": 0.0014, "step": 90350 }, { "epoch": 0.7389295498221368, "grad_norm": 0.036082107573747635, "learning_rate": 7.946478511651388e-06, "loss": 0.0012, "step": 90360 }, { "epoch": 0.7390113260007359, "grad_norm": 0.09124840795993805, "learning_rate": 7.945901926085401e-06, "loss": 0.0019, "step": 90370 }, { "epoch": 0.7390931021793352, "grad_norm": 0.00855193380266428, "learning_rate": 7.945325280509003e-06, "loss": 0.0015, "step": 90380 }, { "epoch": 0.7391748783579344, "grad_norm": 0.046701934188604355, "learning_rate": 7.94474857493393e-06, "loss": 0.0023, "step": 90390 }, { "epoch": 0.7392566545365336, "grad_norm": 0.18610142171382904, "learning_rate": 7.944171809371936e-06, "loss": 0.0025, "step": 90400 }, { "epoch": 0.7393384307151327, "grad_norm": 0.08039353787899017, "learning_rate": 7.94359498383477e-06, "loss": 0.0015, "step": 90410 }, { "epoch": 0.7394202068937319, "grad_norm": 0.0626489520072937, "learning_rate": 7.943018098334184e-06, "loss": 0.0013, "step": 90420 }, { "epoch": 0.7395019830723311, "grad_norm": 0.03096199221909046, "learning_rate": 7.942441152881923e-06, "loss": 0.0018, "step": 90430 }, { "epoch": 0.7395837592509302, "grad_norm": 0.2039286494255066, "learning_rate": 7.941864147489747e-06, "loss": 0.003, "step": 90440 }, { "epoch": 0.7396655354295294, "grad_norm": 0.08289380371570587, "learning_rate": 7.941287082169405e-06, "loss": 0.0039, "step": 90450 }, { "epoch": 0.7397473116081286, "grad_norm": 0.034812018275260925, "learning_rate": 7.940709956932655e-06, "loss": 0.0023, "step": 90460 }, { "epoch": 0.7398290877867277, "grad_norm": 0.11579956114292145, "learning_rate": 7.94013277179125e-06, "loss": 0.0016, "step": 90470 }, { "epoch": 0.7399108639653269, "grad_norm": 0.05387498810887337, "learning_rate": 7.939555526756955e-06, "loss": 0.002, "step": 90480 }, { "epoch": 0.7399926401439261, "grad_norm": 0.09793464839458466, "learning_rate": 7.938978221841522e-06, "loss": 0.001, "step": 90490 }, { "epoch": 0.7400744163225252, "grad_norm": 0.10885094106197357, "learning_rate": 7.938400857056717e-06, "loss": 0.0023, "step": 90500 }, { "epoch": 0.7401561925011244, "grad_norm": 0.055935006588697433, "learning_rate": 7.937823432414294e-06, "loss": 0.002, "step": 90510 }, { "epoch": 0.7402379686797236, "grad_norm": 0.055526409298181534, "learning_rate": 7.937245947926022e-06, "loss": 0.002, "step": 90520 }, { "epoch": 0.7403197448583227, "grad_norm": 0.037683650851249695, "learning_rate": 7.936668403603663e-06, "loss": 0.0014, "step": 90530 }, { "epoch": 0.7404015210369219, "grad_norm": 0.07620403915643692, "learning_rate": 7.93609079945898e-06, "loss": 0.0025, "step": 90540 }, { "epoch": 0.7404832972155211, "grad_norm": 0.055888254195451736, "learning_rate": 7.935513135503741e-06, "loss": 0.0014, "step": 90550 }, { "epoch": 0.7405650733941203, "grad_norm": 0.045423999428749084, "learning_rate": 7.934935411749714e-06, "loss": 0.0016, "step": 90560 }, { "epoch": 0.7406468495727194, "grad_norm": 0.17108726501464844, "learning_rate": 7.934357628208667e-06, "loss": 0.0026, "step": 90570 }, { "epoch": 0.7407286257513186, "grad_norm": 0.05834151431918144, "learning_rate": 7.933779784892368e-06, "loss": 0.0014, "step": 90580 }, { "epoch": 0.7408104019299178, "grad_norm": 0.07755561172962189, "learning_rate": 7.933201881812593e-06, "loss": 0.0018, "step": 90590 }, { "epoch": 0.740892178108517, "grad_norm": 0.034474194049835205, "learning_rate": 7.932623918981109e-06, "loss": 0.0015, "step": 90600 }, { "epoch": 0.7409739542871162, "grad_norm": 0.32565173506736755, "learning_rate": 7.932045896409692e-06, "loss": 0.0008, "step": 90610 }, { "epoch": 0.7410557304657154, "grad_norm": 0.1739126592874527, "learning_rate": 7.931467814110118e-06, "loss": 0.0018, "step": 90620 }, { "epoch": 0.7411375066443145, "grad_norm": 0.06763748824596405, "learning_rate": 7.930889672094163e-06, "loss": 0.001, "step": 90630 }, { "epoch": 0.7412192828229137, "grad_norm": 0.09691615402698517, "learning_rate": 7.930311470373602e-06, "loss": 0.0018, "step": 90640 }, { "epoch": 0.7413010590015129, "grad_norm": 0.08064761012792587, "learning_rate": 7.929733208960214e-06, "loss": 0.0018, "step": 90650 }, { "epoch": 0.741382835180112, "grad_norm": 0.032421812415122986, "learning_rate": 7.92915488786578e-06, "loss": 0.0012, "step": 90660 }, { "epoch": 0.7414646113587112, "grad_norm": 0.050598833709955215, "learning_rate": 7.92857650710208e-06, "loss": 0.0036, "step": 90670 }, { "epoch": 0.7415463875373104, "grad_norm": 0.007864958606660366, "learning_rate": 7.927998066680894e-06, "loss": 0.0014, "step": 90680 }, { "epoch": 0.7416281637159096, "grad_norm": 0.0944487676024437, "learning_rate": 7.92741956661401e-06, "loss": 0.0011, "step": 90690 }, { "epoch": 0.7417099398945087, "grad_norm": 0.043587930500507355, "learning_rate": 7.926841006913209e-06, "loss": 0.0013, "step": 90700 }, { "epoch": 0.7417917160731079, "grad_norm": 0.08247006684541702, "learning_rate": 7.926262387590276e-06, "loss": 0.002, "step": 90710 }, { "epoch": 0.7418734922517071, "grad_norm": 0.020354369655251503, "learning_rate": 7.925683708657001e-06, "loss": 0.0015, "step": 90720 }, { "epoch": 0.7419552684303062, "grad_norm": 0.0255745779722929, "learning_rate": 7.92510497012517e-06, "loss": 0.0017, "step": 90730 }, { "epoch": 0.7420370446089054, "grad_norm": 0.08875177800655365, "learning_rate": 7.924526172006575e-06, "loss": 0.0028, "step": 90740 }, { "epoch": 0.7421188207875046, "grad_norm": 0.1130884513258934, "learning_rate": 7.923947314313002e-06, "loss": 0.0014, "step": 90750 }, { "epoch": 0.7422005969661037, "grad_norm": 0.06105756014585495, "learning_rate": 7.923368397056249e-06, "loss": 0.002, "step": 90760 }, { "epoch": 0.7422823731447029, "grad_norm": 0.04398462921380997, "learning_rate": 7.922789420248102e-06, "loss": 0.0013, "step": 90770 }, { "epoch": 0.7423641493233021, "grad_norm": 0.02293490245938301, "learning_rate": 7.922210383900362e-06, "loss": 0.0012, "step": 90780 }, { "epoch": 0.7424459255019012, "grad_norm": 0.08451617509126663, "learning_rate": 7.921631288024818e-06, "loss": 0.0028, "step": 90790 }, { "epoch": 0.7425277016805004, "grad_norm": 0.0500434935092926, "learning_rate": 7.921052132633272e-06, "loss": 0.0017, "step": 90800 }, { "epoch": 0.7426094778590997, "grad_norm": 0.005756210535764694, "learning_rate": 7.920472917737518e-06, "loss": 0.0026, "step": 90810 }, { "epoch": 0.7426912540376989, "grad_norm": 0.035797104239463806, "learning_rate": 7.91989364334936e-06, "loss": 0.0017, "step": 90820 }, { "epoch": 0.742773030216298, "grad_norm": 0.08126778155565262, "learning_rate": 7.919314309480593e-06, "loss": 0.0019, "step": 90830 }, { "epoch": 0.7428548063948972, "grad_norm": 0.06836879253387451, "learning_rate": 7.91873491614302e-06, "loss": 0.0019, "step": 90840 }, { "epoch": 0.7429365825734964, "grad_norm": 0.02617013268172741, "learning_rate": 7.918155463348444e-06, "loss": 0.0019, "step": 90850 }, { "epoch": 0.7430183587520955, "grad_norm": 0.08959764987230301, "learning_rate": 7.91757595110867e-06, "loss": 0.0022, "step": 90860 }, { "epoch": 0.7431001349306947, "grad_norm": 0.05967080593109131, "learning_rate": 7.916996379435501e-06, "loss": 0.0014, "step": 90870 }, { "epoch": 0.7431819111092939, "grad_norm": 0.03540205955505371, "learning_rate": 7.916416748340747e-06, "loss": 0.0023, "step": 90880 }, { "epoch": 0.743263687287893, "grad_norm": 0.005555163137614727, "learning_rate": 7.915837057836214e-06, "loss": 0.0015, "step": 90890 }, { "epoch": 0.7433454634664922, "grad_norm": 0.10371168702840805, "learning_rate": 7.915257307933708e-06, "loss": 0.0021, "step": 90900 }, { "epoch": 0.7434272396450914, "grad_norm": 0.16427303850650787, "learning_rate": 7.914677498645041e-06, "loss": 0.002, "step": 90910 }, { "epoch": 0.7435090158236906, "grad_norm": 0.10711606591939926, "learning_rate": 7.914097629982025e-06, "loss": 0.0019, "step": 90920 }, { "epoch": 0.7435907920022897, "grad_norm": 0.01364078652113676, "learning_rate": 7.91351770195647e-06, "loss": 0.002, "step": 90930 }, { "epoch": 0.7436725681808889, "grad_norm": 0.20841534435749054, "learning_rate": 7.912937714580192e-06, "loss": 0.002, "step": 90940 }, { "epoch": 0.7437543443594881, "grad_norm": 0.026652192696928978, "learning_rate": 7.912357667865004e-06, "loss": 0.0017, "step": 90950 }, { "epoch": 0.7438361205380872, "grad_norm": 0.016582027077674866, "learning_rate": 7.911777561822727e-06, "loss": 0.0012, "step": 90960 }, { "epoch": 0.7439178967166864, "grad_norm": 0.10136587172746658, "learning_rate": 7.91119739646517e-06, "loss": 0.0013, "step": 90970 }, { "epoch": 0.7439996728952856, "grad_norm": 0.015527167357504368, "learning_rate": 7.910617171804159e-06, "loss": 0.002, "step": 90980 }, { "epoch": 0.7440814490738847, "grad_norm": 0.029073145240545273, "learning_rate": 7.910036887851506e-06, "loss": 0.0014, "step": 90990 }, { "epoch": 0.7441632252524839, "grad_norm": 0.04570825397968292, "learning_rate": 7.90945654461904e-06, "loss": 0.0017, "step": 91000 }, { "epoch": 0.7442450014310831, "grad_norm": 0.058908611536026, "learning_rate": 7.908876142118579e-06, "loss": 0.0016, "step": 91010 }, { "epoch": 0.7443267776096822, "grad_norm": 0.06111254543066025, "learning_rate": 7.908295680361944e-06, "loss": 0.0038, "step": 91020 }, { "epoch": 0.7444085537882815, "grad_norm": 0.055616363883018494, "learning_rate": 7.907715159360963e-06, "loss": 0.0018, "step": 91030 }, { "epoch": 0.7444903299668807, "grad_norm": 0.05290861055254936, "learning_rate": 7.907134579127462e-06, "loss": 0.0022, "step": 91040 }, { "epoch": 0.7445721061454799, "grad_norm": 0.14216724038124084, "learning_rate": 7.906553939673266e-06, "loss": 0.0019, "step": 91050 }, { "epoch": 0.744653882324079, "grad_norm": 0.00575229013338685, "learning_rate": 7.905973241010203e-06, "loss": 0.0017, "step": 91060 }, { "epoch": 0.7447356585026782, "grad_norm": 0.05010393634438515, "learning_rate": 7.905392483150104e-06, "loss": 0.001, "step": 91070 }, { "epoch": 0.7448174346812774, "grad_norm": 0.04691702872514725, "learning_rate": 7.904811666104798e-06, "loss": 0.0018, "step": 91080 }, { "epoch": 0.7448992108598765, "grad_norm": 0.03275877609848976, "learning_rate": 7.904230789886116e-06, "loss": 0.0016, "step": 91090 }, { "epoch": 0.7449809870384757, "grad_norm": 0.1304972618818283, "learning_rate": 7.903649854505894e-06, "loss": 0.0017, "step": 91100 }, { "epoch": 0.7450627632170749, "grad_norm": 0.04529546946287155, "learning_rate": 7.903068859975964e-06, "loss": 0.0014, "step": 91110 }, { "epoch": 0.745144539395674, "grad_norm": 0.09155435860157013, "learning_rate": 7.902487806308161e-06, "loss": 0.0032, "step": 91120 }, { "epoch": 0.7452263155742732, "grad_norm": 0.03633657842874527, "learning_rate": 7.901906693514324e-06, "loss": 0.0031, "step": 91130 }, { "epoch": 0.7453080917528724, "grad_norm": 0.13828346133232117, "learning_rate": 7.901325521606286e-06, "loss": 0.0022, "step": 91140 }, { "epoch": 0.7453898679314716, "grad_norm": 0.03883271664381027, "learning_rate": 7.90074429059589e-06, "loss": 0.0023, "step": 91150 }, { "epoch": 0.7454716441100707, "grad_norm": 0.03514476493000984, "learning_rate": 7.900163000494974e-06, "loss": 0.0025, "step": 91160 }, { "epoch": 0.7455534202886699, "grad_norm": 0.07017391920089722, "learning_rate": 7.899581651315383e-06, "loss": 0.0016, "step": 91170 }, { "epoch": 0.7456351964672691, "grad_norm": 0.0470716692507267, "learning_rate": 7.899000243068954e-06, "loss": 0.0022, "step": 91180 }, { "epoch": 0.7457169726458682, "grad_norm": 0.0320148840546608, "learning_rate": 7.898418775767535e-06, "loss": 0.0026, "step": 91190 }, { "epoch": 0.7457987488244674, "grad_norm": 0.1386781483888626, "learning_rate": 7.89783724942297e-06, "loss": 0.0026, "step": 91200 }, { "epoch": 0.7458805250030666, "grad_norm": 0.014054174534976482, "learning_rate": 7.897255664047103e-06, "loss": 0.0021, "step": 91210 }, { "epoch": 0.7459623011816657, "grad_norm": 0.14651329815387726, "learning_rate": 7.896674019651785e-06, "loss": 0.0019, "step": 91220 }, { "epoch": 0.7460440773602649, "grad_norm": 0.13643911480903625, "learning_rate": 7.89609231624886e-06, "loss": 0.0025, "step": 91230 }, { "epoch": 0.7461258535388642, "grad_norm": 0.03572877123951912, "learning_rate": 7.895510553850184e-06, "loss": 0.0012, "step": 91240 }, { "epoch": 0.7462076297174633, "grad_norm": 0.14384405314922333, "learning_rate": 7.894928732467602e-06, "loss": 0.0021, "step": 91250 }, { "epoch": 0.7462894058960625, "grad_norm": 0.050287991762161255, "learning_rate": 7.894346852112969e-06, "loss": 0.0019, "step": 91260 }, { "epoch": 0.7463711820746617, "grad_norm": 0.0792040079832077, "learning_rate": 7.893764912798139e-06, "loss": 0.002, "step": 91270 }, { "epoch": 0.7464529582532609, "grad_norm": 0.023263968527317047, "learning_rate": 7.893182914534965e-06, "loss": 0.0018, "step": 91280 }, { "epoch": 0.74653473443186, "grad_norm": 0.0659291222691536, "learning_rate": 7.892600857335302e-06, "loss": 0.0013, "step": 91290 }, { "epoch": 0.7466165106104592, "grad_norm": 0.10965951532125473, "learning_rate": 7.892018741211011e-06, "loss": 0.0021, "step": 91300 }, { "epoch": 0.7466982867890584, "grad_norm": 0.21692168712615967, "learning_rate": 7.891436566173944e-06, "loss": 0.0024, "step": 91310 }, { "epoch": 0.7467800629676575, "grad_norm": 0.005806200671941042, "learning_rate": 7.890854332235968e-06, "loss": 0.0014, "step": 91320 }, { "epoch": 0.7468618391462567, "grad_norm": 0.05743543058633804, "learning_rate": 7.890272039408936e-06, "loss": 0.0012, "step": 91330 }, { "epoch": 0.7469436153248559, "grad_norm": 0.047044821083545685, "learning_rate": 7.889689687704717e-06, "loss": 0.0018, "step": 91340 }, { "epoch": 0.747025391503455, "grad_norm": 0.06679439544677734, "learning_rate": 7.889107277135167e-06, "loss": 0.0022, "step": 91350 }, { "epoch": 0.7471071676820542, "grad_norm": 0.11436858028173447, "learning_rate": 7.888524807712153e-06, "loss": 0.0029, "step": 91360 }, { "epoch": 0.7471889438606534, "grad_norm": 0.06271273642778397, "learning_rate": 7.887942279447543e-06, "loss": 0.0016, "step": 91370 }, { "epoch": 0.7472707200392525, "grad_norm": 0.0719187781214714, "learning_rate": 7.8873596923532e-06, "loss": 0.0016, "step": 91380 }, { "epoch": 0.7473524962178517, "grad_norm": 0.07299744337797165, "learning_rate": 7.886777046440993e-06, "loss": 0.0019, "step": 91390 }, { "epoch": 0.7474342723964509, "grad_norm": 0.026194129139184952, "learning_rate": 7.88619434172279e-06, "loss": 0.0014, "step": 91400 }, { "epoch": 0.74751604857505, "grad_norm": 0.1360514909029007, "learning_rate": 7.885611578210466e-06, "loss": 0.0018, "step": 91410 }, { "epoch": 0.7475978247536492, "grad_norm": 0.021436011418700218, "learning_rate": 7.885028755915885e-06, "loss": 0.0026, "step": 91420 }, { "epoch": 0.7476796009322484, "grad_norm": 0.045056141912937164, "learning_rate": 7.884445874850924e-06, "loss": 0.0019, "step": 91430 }, { "epoch": 0.7477613771108476, "grad_norm": 0.053997986018657684, "learning_rate": 7.883862935027458e-06, "loss": 0.0008, "step": 91440 }, { "epoch": 0.7478431532894468, "grad_norm": 0.05245738849043846, "learning_rate": 7.883279936457357e-06, "loss": 0.0013, "step": 91450 }, { "epoch": 0.747924929468046, "grad_norm": 0.27837690711021423, "learning_rate": 7.8826968791525e-06, "loss": 0.0021, "step": 91460 }, { "epoch": 0.7480067056466452, "grad_norm": 0.07716935127973557, "learning_rate": 7.882113763124768e-06, "loss": 0.0026, "step": 91470 }, { "epoch": 0.7480884818252443, "grad_norm": 0.0321684330701828, "learning_rate": 7.881530588386032e-06, "loss": 0.0008, "step": 91480 }, { "epoch": 0.7481702580038435, "grad_norm": 0.10318056493997574, "learning_rate": 7.880947354948179e-06, "loss": 0.0026, "step": 91490 }, { "epoch": 0.7482520341824427, "grad_norm": 0.028977643698453903, "learning_rate": 7.880364062823083e-06, "loss": 0.0032, "step": 91500 }, { "epoch": 0.7483338103610419, "grad_norm": 0.0756630003452301, "learning_rate": 7.879780712022633e-06, "loss": 0.0016, "step": 91510 }, { "epoch": 0.748415586539641, "grad_norm": 0.04093532636761665, "learning_rate": 7.879197302558708e-06, "loss": 0.0019, "step": 91520 }, { "epoch": 0.7484973627182402, "grad_norm": 0.1746959537267685, "learning_rate": 7.878613834443193e-06, "loss": 0.0048, "step": 91530 }, { "epoch": 0.7485791388968394, "grad_norm": 0.03271159902215004, "learning_rate": 7.878030307687974e-06, "loss": 0.002, "step": 91540 }, { "epoch": 0.7486609150754385, "grad_norm": 0.027901170775294304, "learning_rate": 7.87744672230494e-06, "loss": 0.0024, "step": 91550 }, { "epoch": 0.7487426912540377, "grad_norm": 0.05857287719845772, "learning_rate": 7.876863078305976e-06, "loss": 0.0015, "step": 91560 }, { "epoch": 0.7488244674326369, "grad_norm": 0.025255296379327774, "learning_rate": 7.876279375702974e-06, "loss": 0.0017, "step": 91570 }, { "epoch": 0.748906243611236, "grad_norm": 0.061667244881391525, "learning_rate": 7.875695614507822e-06, "loss": 0.0015, "step": 91580 }, { "epoch": 0.7489880197898352, "grad_norm": 0.020668216049671173, "learning_rate": 7.875111794732414e-06, "loss": 0.0013, "step": 91590 }, { "epoch": 0.7490697959684344, "grad_norm": 0.0329996757209301, "learning_rate": 7.87452791638864e-06, "loss": 0.0018, "step": 91600 }, { "epoch": 0.7491515721470335, "grad_norm": 0.12843938171863556, "learning_rate": 7.873943979488397e-06, "loss": 0.0026, "step": 91610 }, { "epoch": 0.7492333483256327, "grad_norm": 0.034777283668518066, "learning_rate": 7.873359984043579e-06, "loss": 0.0017, "step": 91620 }, { "epoch": 0.7493151245042319, "grad_norm": 0.04328252002596855, "learning_rate": 7.872775930066081e-06, "loss": 0.0011, "step": 91630 }, { "epoch": 0.749396900682831, "grad_norm": 0.15044578909873962, "learning_rate": 7.872191817567803e-06, "loss": 0.0011, "step": 91640 }, { "epoch": 0.7494786768614302, "grad_norm": 0.17145128548145294, "learning_rate": 7.871607646560644e-06, "loss": 0.0016, "step": 91650 }, { "epoch": 0.7495604530400294, "grad_norm": 0.07003229856491089, "learning_rate": 7.871023417056503e-06, "loss": 0.0029, "step": 91660 }, { "epoch": 0.7496422292186287, "grad_norm": 0.00928869005292654, "learning_rate": 7.87043912906728e-06, "loss": 0.0019, "step": 91670 }, { "epoch": 0.7497240053972278, "grad_norm": 0.02865697629749775, "learning_rate": 7.86985478260488e-06, "loss": 0.0013, "step": 91680 }, { "epoch": 0.749805781575827, "grad_norm": 0.1157553493976593, "learning_rate": 7.869270377681204e-06, "loss": 0.0015, "step": 91690 }, { "epoch": 0.7498875577544262, "grad_norm": 0.028665542602539062, "learning_rate": 7.868685914308158e-06, "loss": 0.0018, "step": 91700 }, { "epoch": 0.7499693339330253, "grad_norm": 0.03540783002972603, "learning_rate": 7.86810139249765e-06, "loss": 0.0023, "step": 91710 }, { "epoch": 0.7500511101116245, "grad_norm": 0.04031611979007721, "learning_rate": 7.867516812261583e-06, "loss": 0.0022, "step": 91720 }, { "epoch": 0.7501328862902237, "grad_norm": 0.04958300665020943, "learning_rate": 7.866932173611873e-06, "loss": 0.0013, "step": 91730 }, { "epoch": 0.7502146624688228, "grad_norm": 0.041977182030677795, "learning_rate": 7.86634747656042e-06, "loss": 0.0019, "step": 91740 }, { "epoch": 0.750296438647422, "grad_norm": 0.0669250339269638, "learning_rate": 7.86576272111914e-06, "loss": 0.0009, "step": 91750 }, { "epoch": 0.7503782148260212, "grad_norm": 0.006326352711766958, "learning_rate": 7.86517790729994e-06, "loss": 0.0013, "step": 91760 }, { "epoch": 0.7504599910046204, "grad_norm": 0.028252584859728813, "learning_rate": 7.864593035114742e-06, "loss": 0.001, "step": 91770 }, { "epoch": 0.7505417671832195, "grad_norm": 0.030461780726909637, "learning_rate": 7.864008104575453e-06, "loss": 0.0011, "step": 91780 }, { "epoch": 0.7506235433618187, "grad_norm": 0.02853976935148239, "learning_rate": 7.863423115693994e-06, "loss": 0.0014, "step": 91790 }, { "epoch": 0.7507053195404179, "grad_norm": 0.017962558194994926, "learning_rate": 7.862838068482276e-06, "loss": 0.0008, "step": 91800 }, { "epoch": 0.750787095719017, "grad_norm": 0.33818137645721436, "learning_rate": 7.86225296295222e-06, "loss": 0.0056, "step": 91810 }, { "epoch": 0.7508688718976162, "grad_norm": 0.024738401174545288, "learning_rate": 7.861667799115744e-06, "loss": 0.0012, "step": 91820 }, { "epoch": 0.7509506480762154, "grad_norm": 0.07881858199834824, "learning_rate": 7.86108257698477e-06, "loss": 0.0015, "step": 91830 }, { "epoch": 0.7510324242548145, "grad_norm": 0.04836457595229149, "learning_rate": 7.860497296571216e-06, "loss": 0.0041, "step": 91840 }, { "epoch": 0.7511142004334137, "grad_norm": 0.1434270143508911, "learning_rate": 7.85991195788701e-06, "loss": 0.0024, "step": 91850 }, { "epoch": 0.7511959766120129, "grad_norm": 0.05349918082356453, "learning_rate": 7.859326560944072e-06, "loss": 0.0016, "step": 91860 }, { "epoch": 0.751277752790612, "grad_norm": 0.11281207948923111, "learning_rate": 7.858741105754327e-06, "loss": 0.0028, "step": 91870 }, { "epoch": 0.7513595289692113, "grad_norm": 0.05692901834845543, "learning_rate": 7.858155592329702e-06, "loss": 0.002, "step": 91880 }, { "epoch": 0.7514413051478105, "grad_norm": 0.08168456703424454, "learning_rate": 7.857570020682126e-06, "loss": 0.0013, "step": 91890 }, { "epoch": 0.7515230813264097, "grad_norm": 0.016027620062232018, "learning_rate": 7.856984390823523e-06, "loss": 0.0021, "step": 91900 }, { "epoch": 0.7516048575050088, "grad_norm": 0.022771567106246948, "learning_rate": 7.85639870276583e-06, "loss": 0.0021, "step": 91910 }, { "epoch": 0.751686633683608, "grad_norm": 0.050081174820661545, "learning_rate": 7.855812956520968e-06, "loss": 0.002, "step": 91920 }, { "epoch": 0.7517684098622072, "grad_norm": 0.03374255448579788, "learning_rate": 7.855227152100879e-06, "loss": 0.0018, "step": 91930 }, { "epoch": 0.7518501860408063, "grad_norm": 0.04352937638759613, "learning_rate": 7.85464128951749e-06, "loss": 0.0019, "step": 91940 }, { "epoch": 0.7519319622194055, "grad_norm": 0.027847006916999817, "learning_rate": 7.854055368782738e-06, "loss": 0.0015, "step": 91950 }, { "epoch": 0.7520137383980047, "grad_norm": 0.1004333421587944, "learning_rate": 7.853469389908558e-06, "loss": 0.002, "step": 91960 }, { "epoch": 0.7520955145766038, "grad_norm": 0.07187926769256592, "learning_rate": 7.852883352906888e-06, "loss": 0.0031, "step": 91970 }, { "epoch": 0.752177290755203, "grad_norm": 0.01540228258818388, "learning_rate": 7.852297257789666e-06, "loss": 0.0018, "step": 91980 }, { "epoch": 0.7522590669338022, "grad_norm": 0.01254238560795784, "learning_rate": 7.851711104568828e-06, "loss": 0.0019, "step": 91990 }, { "epoch": 0.7523408431124013, "grad_norm": 0.013806428760290146, "learning_rate": 7.851124893256317e-06, "loss": 0.001, "step": 92000 }, { "epoch": 0.7524226192910005, "grad_norm": 0.04233497753739357, "learning_rate": 7.850538623864077e-06, "loss": 0.0013, "step": 92010 }, { "epoch": 0.7525043954695997, "grad_norm": 0.09996192902326584, "learning_rate": 7.849952296404045e-06, "loss": 0.0023, "step": 92020 }, { "epoch": 0.7525861716481989, "grad_norm": 0.10571945458650589, "learning_rate": 7.84936591088817e-06, "loss": 0.0017, "step": 92030 }, { "epoch": 0.752667947826798, "grad_norm": 0.05835762992501259, "learning_rate": 7.848779467328395e-06, "loss": 0.0016, "step": 92040 }, { "epoch": 0.7527497240053972, "grad_norm": 0.002131114015355706, "learning_rate": 7.848192965736667e-06, "loss": 0.001, "step": 92050 }, { "epoch": 0.7528315001839964, "grad_norm": 0.11028497666120529, "learning_rate": 7.847606406124933e-06, "loss": 0.0022, "step": 92060 }, { "epoch": 0.7529132763625955, "grad_norm": 0.06684325635433197, "learning_rate": 7.84701978850514e-06, "loss": 0.0016, "step": 92070 }, { "epoch": 0.7529950525411947, "grad_norm": 0.0773925930261612, "learning_rate": 7.846433112889243e-06, "loss": 0.001, "step": 92080 }, { "epoch": 0.753076828719794, "grad_norm": 0.06129155680537224, "learning_rate": 7.845846379289188e-06, "loss": 0.0023, "step": 92090 }, { "epoch": 0.7531586048983931, "grad_norm": 0.007240024395287037, "learning_rate": 7.84525958771693e-06, "loss": 0.0016, "step": 92100 }, { "epoch": 0.7532403810769923, "grad_norm": 0.09287246316671371, "learning_rate": 7.84467273818442e-06, "loss": 0.0019, "step": 92110 }, { "epoch": 0.7533221572555915, "grad_norm": 0.08427771180868149, "learning_rate": 7.844085830703615e-06, "loss": 0.0019, "step": 92120 }, { "epoch": 0.7534039334341907, "grad_norm": 0.011006535030901432, "learning_rate": 7.84349886528647e-06, "loss": 0.0018, "step": 92130 }, { "epoch": 0.7534857096127898, "grad_norm": 0.05227312818169594, "learning_rate": 7.84291184194494e-06, "loss": 0.0011, "step": 92140 }, { "epoch": 0.753567485791389, "grad_norm": 0.04026491567492485, "learning_rate": 7.842324760690986e-06, "loss": 0.0013, "step": 92150 }, { "epoch": 0.7536492619699882, "grad_norm": 0.0245528444647789, "learning_rate": 7.841737621536567e-06, "loss": 0.0007, "step": 92160 }, { "epoch": 0.7537310381485873, "grad_norm": 0.07348161190748215, "learning_rate": 7.841150424493643e-06, "loss": 0.0012, "step": 92170 }, { "epoch": 0.7538128143271865, "grad_norm": 0.03214983269572258, "learning_rate": 7.840563169574173e-06, "loss": 0.0018, "step": 92180 }, { "epoch": 0.7538945905057857, "grad_norm": 0.07973787933588028, "learning_rate": 7.839975856790126e-06, "loss": 0.0025, "step": 92190 }, { "epoch": 0.7539763666843848, "grad_norm": 0.0063718995079398155, "learning_rate": 7.839388486153461e-06, "loss": 0.002, "step": 92200 }, { "epoch": 0.754058142862984, "grad_norm": 0.05067969113588333, "learning_rate": 7.838801057676143e-06, "loss": 0.001, "step": 92210 }, { "epoch": 0.7541399190415832, "grad_norm": 0.0017077445518225431, "learning_rate": 7.838213571370142e-06, "loss": 0.0071, "step": 92220 }, { "epoch": 0.7542216952201823, "grad_norm": 0.003916766494512558, "learning_rate": 7.837626027247422e-06, "loss": 0.0009, "step": 92230 }, { "epoch": 0.7543034713987815, "grad_norm": 0.050303131341934204, "learning_rate": 7.837038425319954e-06, "loss": 0.0022, "step": 92240 }, { "epoch": 0.7543852475773807, "grad_norm": 0.2623555064201355, "learning_rate": 7.836450765599708e-06, "loss": 0.0022, "step": 92250 }, { "epoch": 0.7544670237559798, "grad_norm": 0.03561016917228699, "learning_rate": 7.835863048098652e-06, "loss": 0.0007, "step": 92260 }, { "epoch": 0.754548799934579, "grad_norm": 0.08543988317251205, "learning_rate": 7.835275272828761e-06, "loss": 0.0028, "step": 92270 }, { "epoch": 0.7546305761131782, "grad_norm": 0.04423803091049194, "learning_rate": 7.83468743980201e-06, "loss": 0.0016, "step": 92280 }, { "epoch": 0.7547123522917774, "grad_norm": 0.08100210875272751, "learning_rate": 7.834099549030372e-06, "loss": 0.0016, "step": 92290 }, { "epoch": 0.7547941284703765, "grad_norm": 0.10573337227106094, "learning_rate": 7.833511600525821e-06, "loss": 0.0014, "step": 92300 }, { "epoch": 0.7548759046489758, "grad_norm": 0.06923087686300278, "learning_rate": 7.832923594300334e-06, "loss": 0.0022, "step": 92310 }, { "epoch": 0.754957680827575, "grad_norm": 0.050118353217840195, "learning_rate": 7.832335530365894e-06, "loss": 0.0016, "step": 92320 }, { "epoch": 0.7550394570061741, "grad_norm": 0.018401097506284714, "learning_rate": 7.831747408734475e-06, "loss": 0.001, "step": 92330 }, { "epoch": 0.7551212331847733, "grad_norm": 0.060690343379974365, "learning_rate": 7.831159229418059e-06, "loss": 0.0017, "step": 92340 }, { "epoch": 0.7552030093633725, "grad_norm": 0.09251745790243149, "learning_rate": 7.83057099242863e-06, "loss": 0.0016, "step": 92350 }, { "epoch": 0.7552847855419716, "grad_norm": 0.15386369824409485, "learning_rate": 7.829982697778168e-06, "loss": 0.0022, "step": 92360 }, { "epoch": 0.7553665617205708, "grad_norm": 0.1632516235113144, "learning_rate": 7.829394345478658e-06, "loss": 0.003, "step": 92370 }, { "epoch": 0.75544833789917, "grad_norm": 0.10266971588134766, "learning_rate": 7.828805935542085e-06, "loss": 0.0019, "step": 92380 }, { "epoch": 0.7555301140777692, "grad_norm": 0.07639018446207047, "learning_rate": 7.828217467980437e-06, "loss": 0.0019, "step": 92390 }, { "epoch": 0.7556118902563683, "grad_norm": 0.07275745272636414, "learning_rate": 7.8276289428057e-06, "loss": 0.0022, "step": 92400 }, { "epoch": 0.7556936664349675, "grad_norm": 0.020617350935935974, "learning_rate": 7.827040360029863e-06, "loss": 0.0007, "step": 92410 }, { "epoch": 0.7557754426135667, "grad_norm": 0.08054988831281662, "learning_rate": 7.826451719664916e-06, "loss": 0.0017, "step": 92420 }, { "epoch": 0.7558572187921658, "grad_norm": 0.03185945004224777, "learning_rate": 7.82586302172285e-06, "loss": 0.0013, "step": 92430 }, { "epoch": 0.755938994970765, "grad_norm": 0.004019117448478937, "learning_rate": 7.825274266215657e-06, "loss": 0.0012, "step": 92440 }, { "epoch": 0.7560207711493642, "grad_norm": 0.10908214002847672, "learning_rate": 7.82468545315533e-06, "loss": 0.0022, "step": 92450 }, { "epoch": 0.7561025473279633, "grad_norm": 0.016418909654021263, "learning_rate": 7.824096582553865e-06, "loss": 0.0041, "step": 92460 }, { "epoch": 0.7561843235065625, "grad_norm": 0.04336552321910858, "learning_rate": 7.823507654423257e-06, "loss": 0.0025, "step": 92470 }, { "epoch": 0.7562660996851617, "grad_norm": 0.02344541810452938, "learning_rate": 7.822918668775504e-06, "loss": 0.0012, "step": 92480 }, { "epoch": 0.7563478758637608, "grad_norm": 0.051063187420368195, "learning_rate": 7.822329625622603e-06, "loss": 0.0014, "step": 92490 }, { "epoch": 0.75642965204236, "grad_norm": 0.030664069578051567, "learning_rate": 7.821740524976551e-06, "loss": 0.0018, "step": 92500 }, { "epoch": 0.7565114282209592, "grad_norm": 0.39138129353523254, "learning_rate": 7.821151366849353e-06, "loss": 0.0013, "step": 92510 }, { "epoch": 0.7565932043995585, "grad_norm": 0.06172320619225502, "learning_rate": 7.820562151253007e-06, "loss": 0.0018, "step": 92520 }, { "epoch": 0.7566749805781576, "grad_norm": 0.2003980129957199, "learning_rate": 7.81997287819952e-06, "loss": 0.0021, "step": 92530 }, { "epoch": 0.7567567567567568, "grad_norm": 0.020766010507941246, "learning_rate": 7.819383547700889e-06, "loss": 0.0017, "step": 92540 }, { "epoch": 0.756838532935356, "grad_norm": 0.10780027508735657, "learning_rate": 7.818794159769127e-06, "loss": 0.0015, "step": 92550 }, { "epoch": 0.7569203091139551, "grad_norm": 0.09388178586959839, "learning_rate": 7.818204714416236e-06, "loss": 0.0017, "step": 92560 }, { "epoch": 0.7570020852925543, "grad_norm": 0.10578620433807373, "learning_rate": 7.817615211654223e-06, "loss": 0.0013, "step": 92570 }, { "epoch": 0.7570838614711535, "grad_norm": 0.06148768216371536, "learning_rate": 7.8170256514951e-06, "loss": 0.0029, "step": 92580 }, { "epoch": 0.7571656376497526, "grad_norm": 0.03558190166950226, "learning_rate": 7.816436033950873e-06, "loss": 0.003, "step": 92590 }, { "epoch": 0.7572474138283518, "grad_norm": 0.028220849111676216, "learning_rate": 7.815846359033554e-06, "loss": 0.0022, "step": 92600 }, { "epoch": 0.757329190006951, "grad_norm": 0.05547196418046951, "learning_rate": 7.815256626755159e-06, "loss": 0.002, "step": 92610 }, { "epoch": 0.7574109661855501, "grad_norm": 0.03547004610300064, "learning_rate": 7.814666837127695e-06, "loss": 0.0012, "step": 92620 }, { "epoch": 0.7574927423641493, "grad_norm": 0.04647408798336983, "learning_rate": 7.814076990163182e-06, "loss": 0.0032, "step": 92630 }, { "epoch": 0.7575745185427485, "grad_norm": 0.06245103105902672, "learning_rate": 7.813487085873632e-06, "loss": 0.0023, "step": 92640 }, { "epoch": 0.7576562947213477, "grad_norm": 0.10592156648635864, "learning_rate": 7.812897124271063e-06, "loss": 0.0023, "step": 92650 }, { "epoch": 0.7577380708999468, "grad_norm": 0.008151655085384846, "learning_rate": 7.812307105367492e-06, "loss": 0.0013, "step": 92660 }, { "epoch": 0.757819847078546, "grad_norm": 0.010012373328208923, "learning_rate": 7.811717029174942e-06, "loss": 0.0011, "step": 92670 }, { "epoch": 0.7579016232571452, "grad_norm": 0.01898251101374626, "learning_rate": 7.811126895705429e-06, "loss": 0.0013, "step": 92680 }, { "epoch": 0.7579833994357443, "grad_norm": 0.06980353593826294, "learning_rate": 7.810536704970976e-06, "loss": 0.003, "step": 92690 }, { "epoch": 0.7580651756143435, "grad_norm": 0.03968430683016777, "learning_rate": 7.809946456983606e-06, "loss": 0.0034, "step": 92700 }, { "epoch": 0.7581469517929427, "grad_norm": 0.0026948789600282907, "learning_rate": 7.809356151755344e-06, "loss": 0.0019, "step": 92710 }, { "epoch": 0.7582287279715418, "grad_norm": 0.1267496645450592, "learning_rate": 7.808765789298212e-06, "loss": 0.002, "step": 92720 }, { "epoch": 0.7583105041501411, "grad_norm": 0.03110380470752716, "learning_rate": 7.808175369624239e-06, "loss": 0.0014, "step": 92730 }, { "epoch": 0.7583922803287403, "grad_norm": 0.08367352932691574, "learning_rate": 7.80758489274545e-06, "loss": 0.0019, "step": 92740 }, { "epoch": 0.7584740565073395, "grad_norm": 0.06544259190559387, "learning_rate": 7.806994358673876e-06, "loss": 0.0009, "step": 92750 }, { "epoch": 0.7585558326859386, "grad_norm": 0.052521344274282455, "learning_rate": 7.806403767421544e-06, "loss": 0.0013, "step": 92760 }, { "epoch": 0.7586376088645378, "grad_norm": 0.012133190408349037, "learning_rate": 7.805813119000487e-06, "loss": 0.0013, "step": 92770 }, { "epoch": 0.758719385043137, "grad_norm": 0.042885854840278625, "learning_rate": 7.805222413422736e-06, "loss": 0.0013, "step": 92780 }, { "epoch": 0.7588011612217361, "grad_norm": 0.09465239942073822, "learning_rate": 7.804631650700323e-06, "loss": 0.0016, "step": 92790 }, { "epoch": 0.7588829374003353, "grad_norm": 0.13429628312587738, "learning_rate": 7.804040830845285e-06, "loss": 0.0016, "step": 92800 }, { "epoch": 0.7589647135789345, "grad_norm": 0.06984549760818481, "learning_rate": 7.803449953869655e-06, "loss": 0.0016, "step": 92810 }, { "epoch": 0.7590464897575336, "grad_norm": 0.028566410765051842, "learning_rate": 7.80285901978547e-06, "loss": 0.0021, "step": 92820 }, { "epoch": 0.7591282659361328, "grad_norm": 0.05864159017801285, "learning_rate": 7.802268028604769e-06, "loss": 0.0009, "step": 92830 }, { "epoch": 0.759210042114732, "grad_norm": 0.20795677602291107, "learning_rate": 7.801676980339592e-06, "loss": 0.002, "step": 92840 }, { "epoch": 0.7592918182933311, "grad_norm": 0.14639140665531158, "learning_rate": 7.801085875001976e-06, "loss": 0.0037, "step": 92850 }, { "epoch": 0.7593735944719303, "grad_norm": 0.08670470863580704, "learning_rate": 7.800494712603966e-06, "loss": 0.0011, "step": 92860 }, { "epoch": 0.7594553706505295, "grad_norm": 0.03819030150771141, "learning_rate": 7.7999034931576e-06, "loss": 0.0029, "step": 92870 }, { "epoch": 0.7595371468291287, "grad_norm": 0.03639272600412369, "learning_rate": 7.799312216674924e-06, "loss": 0.0022, "step": 92880 }, { "epoch": 0.7596189230077278, "grad_norm": 0.08026400208473206, "learning_rate": 7.798720883167983e-06, "loss": 0.0016, "step": 92890 }, { "epoch": 0.759700699186327, "grad_norm": 0.009267253801226616, "learning_rate": 7.798129492648824e-06, "loss": 0.0008, "step": 92900 }, { "epoch": 0.7597824753649262, "grad_norm": 0.05749984458088875, "learning_rate": 7.797538045129492e-06, "loss": 0.001, "step": 92910 }, { "epoch": 0.7598642515435253, "grad_norm": 0.052725743502378464, "learning_rate": 7.796946540622037e-06, "loss": 0.0013, "step": 92920 }, { "epoch": 0.7599460277221245, "grad_norm": 0.020600026473402977, "learning_rate": 7.796354979138506e-06, "loss": 0.0015, "step": 92930 }, { "epoch": 0.7600278039007237, "grad_norm": 0.12236727774143219, "learning_rate": 7.795763360690952e-06, "loss": 0.0019, "step": 92940 }, { "epoch": 0.7601095800793229, "grad_norm": 0.05624178424477577, "learning_rate": 7.795171685291425e-06, "loss": 0.0012, "step": 92950 }, { "epoch": 0.7601913562579221, "grad_norm": 0.05272532254457474, "learning_rate": 7.794579952951979e-06, "loss": 0.0011, "step": 92960 }, { "epoch": 0.7602731324365213, "grad_norm": 0.04714301973581314, "learning_rate": 7.793988163684668e-06, "loss": 0.0018, "step": 92970 }, { "epoch": 0.7603549086151205, "grad_norm": 0.06174904853105545, "learning_rate": 7.793396317501547e-06, "loss": 0.0022, "step": 92980 }, { "epoch": 0.7604366847937196, "grad_norm": 0.016555950045585632, "learning_rate": 7.792804414414671e-06, "loss": 0.0017, "step": 92990 }, { "epoch": 0.7605184609723188, "grad_norm": 0.05903705954551697, "learning_rate": 7.792212454436103e-06, "loss": 0.0014, "step": 93000 }, { "epoch": 0.760600237150918, "grad_norm": 0.0666348785161972, "learning_rate": 7.791620437577893e-06, "loss": 0.0018, "step": 93010 }, { "epoch": 0.7606820133295171, "grad_norm": 0.055301226675510406, "learning_rate": 7.791028363852108e-06, "loss": 0.0026, "step": 93020 }, { "epoch": 0.7607637895081163, "grad_norm": 0.023153208196163177, "learning_rate": 7.790436233270808e-06, "loss": 0.0009, "step": 93030 }, { "epoch": 0.7608455656867155, "grad_norm": 0.03666115179657936, "learning_rate": 7.789844045846051e-06, "loss": 0.0018, "step": 93040 }, { "epoch": 0.7609273418653146, "grad_norm": 0.0683421865105629, "learning_rate": 7.789251801589905e-06, "loss": 0.0017, "step": 93050 }, { "epoch": 0.7610091180439138, "grad_norm": 0.014956126920878887, "learning_rate": 7.78865950051443e-06, "loss": 0.0009, "step": 93060 }, { "epoch": 0.761090894222513, "grad_norm": 0.0022040109615772963, "learning_rate": 7.788067142631697e-06, "loss": 0.0024, "step": 93070 }, { "epoch": 0.7611726704011121, "grad_norm": 0.07335870712995529, "learning_rate": 7.787474727953769e-06, "loss": 0.002, "step": 93080 }, { "epoch": 0.7612544465797113, "grad_norm": 0.05991324782371521, "learning_rate": 7.786882256492716e-06, "loss": 0.0023, "step": 93090 }, { "epoch": 0.7613362227583105, "grad_norm": 0.0843057632446289, "learning_rate": 7.786289728260604e-06, "loss": 0.003, "step": 93100 }, { "epoch": 0.7614179989369096, "grad_norm": 0.014778166078031063, "learning_rate": 7.785697143269507e-06, "loss": 0.0044, "step": 93110 }, { "epoch": 0.7614997751155088, "grad_norm": 0.0458747074007988, "learning_rate": 7.785104501531495e-06, "loss": 0.0012, "step": 93120 }, { "epoch": 0.761581551294108, "grad_norm": 0.006385503802448511, "learning_rate": 7.784511803058639e-06, "loss": 0.0008, "step": 93130 }, { "epoch": 0.7616633274727072, "grad_norm": 0.06882572919130325, "learning_rate": 7.783919047863015e-06, "loss": 0.0007, "step": 93140 }, { "epoch": 0.7617451036513063, "grad_norm": 0.10318784415721893, "learning_rate": 7.783326235956698e-06, "loss": 0.0029, "step": 93150 }, { "epoch": 0.7618268798299056, "grad_norm": 0.0851341113448143, "learning_rate": 7.782733367351763e-06, "loss": 0.0023, "step": 93160 }, { "epoch": 0.7619086560085048, "grad_norm": 0.0022931036073714495, "learning_rate": 7.782140442060285e-06, "loss": 0.006, "step": 93170 }, { "epoch": 0.7619904321871039, "grad_norm": 0.14863191545009613, "learning_rate": 7.781547460094348e-06, "loss": 0.0014, "step": 93180 }, { "epoch": 0.7620722083657031, "grad_norm": 0.006224370561540127, "learning_rate": 7.780954421466025e-06, "loss": 0.0013, "step": 93190 }, { "epoch": 0.7621539845443023, "grad_norm": 0.06403898447751999, "learning_rate": 7.780361326187402e-06, "loss": 0.0015, "step": 93200 }, { "epoch": 0.7622357607229014, "grad_norm": 0.026250869035720825, "learning_rate": 7.77976817427056e-06, "loss": 0.0009, "step": 93210 }, { "epoch": 0.7623175369015006, "grad_norm": 0.05414082482457161, "learning_rate": 7.779174965727578e-06, "loss": 0.0019, "step": 93220 }, { "epoch": 0.7623993130800998, "grad_norm": 0.15375499427318573, "learning_rate": 7.778581700570544e-06, "loss": 0.0014, "step": 93230 }, { "epoch": 0.762481089258699, "grad_norm": 0.10654831677675247, "learning_rate": 7.777988378811543e-06, "loss": 0.0016, "step": 93240 }, { "epoch": 0.7625628654372981, "grad_norm": 0.08098912984132767, "learning_rate": 7.77739500046266e-06, "loss": 0.0023, "step": 93250 }, { "epoch": 0.7626446416158973, "grad_norm": 0.029303288087248802, "learning_rate": 7.776801565535983e-06, "loss": 0.0015, "step": 93260 }, { "epoch": 0.7627264177944965, "grad_norm": 0.008751170709729195, "learning_rate": 7.776208074043602e-06, "loss": 0.0014, "step": 93270 }, { "epoch": 0.7628081939730956, "grad_norm": 0.03915517032146454, "learning_rate": 7.775614525997606e-06, "loss": 0.0011, "step": 93280 }, { "epoch": 0.7628899701516948, "grad_norm": 0.04803973063826561, "learning_rate": 7.775020921410084e-06, "loss": 0.0011, "step": 93290 }, { "epoch": 0.762971746330294, "grad_norm": 0.03021169826388359, "learning_rate": 7.774427260293132e-06, "loss": 0.0016, "step": 93300 }, { "epoch": 0.7630535225088931, "grad_norm": 0.070906862616539, "learning_rate": 7.773833542658842e-06, "loss": 0.0016, "step": 93310 }, { "epoch": 0.7631352986874923, "grad_norm": 0.11542150378227234, "learning_rate": 7.773239768519306e-06, "loss": 0.0012, "step": 93320 }, { "epoch": 0.7632170748660915, "grad_norm": 0.061928585171699524, "learning_rate": 7.772645937886623e-06, "loss": 0.0019, "step": 93330 }, { "epoch": 0.7632988510446906, "grad_norm": 0.08923044800758362, "learning_rate": 7.77205205077289e-06, "loss": 0.0019, "step": 93340 }, { "epoch": 0.7633806272232898, "grad_norm": 0.07578369975090027, "learning_rate": 7.7714581071902e-06, "loss": 0.0016, "step": 93350 }, { "epoch": 0.763462403401889, "grad_norm": 0.050384946167469025, "learning_rate": 7.770864107150657e-06, "loss": 0.0015, "step": 93360 }, { "epoch": 0.7635441795804883, "grad_norm": 0.042228199541568756, "learning_rate": 7.77027005066636e-06, "loss": 0.0014, "step": 93370 }, { "epoch": 0.7636259557590874, "grad_norm": 0.18287190794944763, "learning_rate": 7.769675937749411e-06, "loss": 0.0015, "step": 93380 }, { "epoch": 0.7637077319376866, "grad_norm": 0.1371031403541565, "learning_rate": 7.769081768411913e-06, "loss": 0.0021, "step": 93390 }, { "epoch": 0.7637895081162858, "grad_norm": 0.05411924794316292, "learning_rate": 7.768487542665965e-06, "loss": 0.0012, "step": 93400 }, { "epoch": 0.7638712842948849, "grad_norm": 0.042424023151397705, "learning_rate": 7.767893260523678e-06, "loss": 0.0042, "step": 93410 }, { "epoch": 0.7639530604734841, "grad_norm": 0.10829649120569229, "learning_rate": 7.767298921997154e-06, "loss": 0.0021, "step": 93420 }, { "epoch": 0.7640348366520833, "grad_norm": 0.019405320286750793, "learning_rate": 7.766704527098504e-06, "loss": 0.0027, "step": 93430 }, { "epoch": 0.7641166128306824, "grad_norm": 0.028467372059822083, "learning_rate": 7.766110075839833e-06, "loss": 0.0011, "step": 93440 }, { "epoch": 0.7641983890092816, "grad_norm": 0.016926420852541924, "learning_rate": 7.765515568233248e-06, "loss": 0.0025, "step": 93450 }, { "epoch": 0.7642801651878808, "grad_norm": 0.029215330258011818, "learning_rate": 7.764921004290866e-06, "loss": 0.0037, "step": 93460 }, { "epoch": 0.76436194136648, "grad_norm": 0.004495079629123211, "learning_rate": 7.764326384024796e-06, "loss": 0.0031, "step": 93470 }, { "epoch": 0.7644437175450791, "grad_norm": 0.03812355175614357, "learning_rate": 7.76373170744715e-06, "loss": 0.0009, "step": 93480 }, { "epoch": 0.7645254937236783, "grad_norm": 0.08009178191423416, "learning_rate": 7.763136974570043e-06, "loss": 0.0048, "step": 93490 }, { "epoch": 0.7646072699022775, "grad_norm": 0.15242508053779602, "learning_rate": 7.762542185405589e-06, "loss": 0.0012, "step": 93500 }, { "epoch": 0.7646890460808766, "grad_norm": 0.030036285519599915, "learning_rate": 7.761947339965906e-06, "loss": 0.0013, "step": 93510 }, { "epoch": 0.7647708222594758, "grad_norm": 0.026048079133033752, "learning_rate": 7.761352438263111e-06, "loss": 0.0026, "step": 93520 }, { "epoch": 0.764852598438075, "grad_norm": 0.05103486776351929, "learning_rate": 7.76075748030932e-06, "loss": 0.0015, "step": 93530 }, { "epoch": 0.7649343746166741, "grad_norm": 0.1613064408302307, "learning_rate": 7.760162466116656e-06, "loss": 0.0019, "step": 93540 }, { "epoch": 0.7650161507952733, "grad_norm": 0.07165548950433731, "learning_rate": 7.75956739569724e-06, "loss": 0.0032, "step": 93550 }, { "epoch": 0.7650979269738725, "grad_norm": 0.09708116948604584, "learning_rate": 7.758972269063192e-06, "loss": 0.0011, "step": 93560 }, { "epoch": 0.7651797031524716, "grad_norm": 0.08642467856407166, "learning_rate": 7.758377086226637e-06, "loss": 0.0016, "step": 93570 }, { "epoch": 0.7652614793310708, "grad_norm": 0.026672348380088806, "learning_rate": 7.7577818471997e-06, "loss": 0.0013, "step": 93580 }, { "epoch": 0.7653432555096701, "grad_norm": 0.06116903945803642, "learning_rate": 7.757186551994501e-06, "loss": 0.0016, "step": 93590 }, { "epoch": 0.7654250316882693, "grad_norm": 0.12376042455434799, "learning_rate": 7.756591200623176e-06, "loss": 0.0021, "step": 93600 }, { "epoch": 0.7655068078668684, "grad_norm": 0.00565542746335268, "learning_rate": 7.755995793097844e-06, "loss": 0.001, "step": 93610 }, { "epoch": 0.7655885840454676, "grad_norm": 0.036502450704574585, "learning_rate": 7.755400329430638e-06, "loss": 0.0015, "step": 93620 }, { "epoch": 0.7656703602240668, "grad_norm": 0.05002701282501221, "learning_rate": 7.754804809633688e-06, "loss": 0.0014, "step": 93630 }, { "epoch": 0.7657521364026659, "grad_norm": 0.06193016469478607, "learning_rate": 7.754209233719125e-06, "loss": 0.0022, "step": 93640 }, { "epoch": 0.7658339125812651, "grad_norm": 0.1069626733660698, "learning_rate": 7.75361360169908e-06, "loss": 0.0016, "step": 93650 }, { "epoch": 0.7659156887598643, "grad_norm": 0.05509350448846817, "learning_rate": 7.75301791358569e-06, "loss": 0.0017, "step": 93660 }, { "epoch": 0.7659974649384634, "grad_norm": 0.045202694833278656, "learning_rate": 7.752422169391084e-06, "loss": 0.002, "step": 93670 }, { "epoch": 0.7660792411170626, "grad_norm": 0.038077667355537415, "learning_rate": 7.751826369127406e-06, "loss": 0.0011, "step": 93680 }, { "epoch": 0.7661610172956618, "grad_norm": 0.08921518921852112, "learning_rate": 7.751230512806784e-06, "loss": 0.0018, "step": 93690 }, { "epoch": 0.7662427934742609, "grad_norm": 0.15674789249897003, "learning_rate": 7.750634600441363e-06, "loss": 0.0011, "step": 93700 }, { "epoch": 0.7663245696528601, "grad_norm": 0.020955989137291908, "learning_rate": 7.750038632043276e-06, "loss": 0.0015, "step": 93710 }, { "epoch": 0.7664063458314593, "grad_norm": 0.03946258872747421, "learning_rate": 7.749442607624669e-06, "loss": 0.0014, "step": 93720 }, { "epoch": 0.7664881220100584, "grad_norm": 0.131344273686409, "learning_rate": 7.748846527197679e-06, "loss": 0.0025, "step": 93730 }, { "epoch": 0.7665698981886576, "grad_norm": 0.021049777045845985, "learning_rate": 7.748250390774454e-06, "loss": 0.0024, "step": 93740 }, { "epoch": 0.7666516743672568, "grad_norm": 0.3443615734577179, "learning_rate": 7.747654198367133e-06, "loss": 0.0026, "step": 93750 }, { "epoch": 0.766733450545856, "grad_norm": 0.018820738419890404, "learning_rate": 7.747057949987864e-06, "loss": 0.0016, "step": 93760 }, { "epoch": 0.7668152267244551, "grad_norm": 0.03567107021808624, "learning_rate": 7.74646164564879e-06, "loss": 0.0017, "step": 93770 }, { "epoch": 0.7668970029030543, "grad_norm": 0.02389572188258171, "learning_rate": 7.74586528536206e-06, "loss": 0.0036, "step": 93780 }, { "epoch": 0.7669787790816535, "grad_norm": 0.052047014236450195, "learning_rate": 7.745268869139822e-06, "loss": 0.002, "step": 93790 }, { "epoch": 0.7670605552602527, "grad_norm": 0.12763240933418274, "learning_rate": 7.744672396994226e-06, "loss": 0.001, "step": 93800 }, { "epoch": 0.7671423314388519, "grad_norm": 0.056906361132860184, "learning_rate": 7.744075868937423e-06, "loss": 0.0015, "step": 93810 }, { "epoch": 0.7672241076174511, "grad_norm": 0.016017816960811615, "learning_rate": 7.743479284981562e-06, "loss": 0.0008, "step": 93820 }, { "epoch": 0.7673058837960502, "grad_norm": 0.018156487494707108, "learning_rate": 7.742882645138801e-06, "loss": 0.0015, "step": 93830 }, { "epoch": 0.7673876599746494, "grad_norm": 0.030724400654435158, "learning_rate": 7.742285949421288e-06, "loss": 0.0013, "step": 93840 }, { "epoch": 0.7674694361532486, "grad_norm": 0.02540716528892517, "learning_rate": 7.741689197841183e-06, "loss": 0.0011, "step": 93850 }, { "epoch": 0.7675512123318478, "grad_norm": 0.06814317405223846, "learning_rate": 7.74109239041064e-06, "loss": 0.0017, "step": 93860 }, { "epoch": 0.7676329885104469, "grad_norm": 0.038948312401771545, "learning_rate": 7.740495527141818e-06, "loss": 0.001, "step": 93870 }, { "epoch": 0.7677147646890461, "grad_norm": 0.0226537324488163, "learning_rate": 7.739898608046873e-06, "loss": 0.0016, "step": 93880 }, { "epoch": 0.7677965408676453, "grad_norm": 0.052640486508607864, "learning_rate": 7.739301633137966e-06, "loss": 0.0018, "step": 93890 }, { "epoch": 0.7678783170462444, "grad_norm": 0.15488125383853912, "learning_rate": 7.738704602427261e-06, "loss": 0.0026, "step": 93900 }, { "epoch": 0.7679600932248436, "grad_norm": 0.06314298510551453, "learning_rate": 7.738107515926913e-06, "loss": 0.0013, "step": 93910 }, { "epoch": 0.7680418694034428, "grad_norm": 0.07380931079387665, "learning_rate": 7.737510373649093e-06, "loss": 0.0018, "step": 93920 }, { "epoch": 0.7681236455820419, "grad_norm": 0.035246722400188446, "learning_rate": 7.736913175605959e-06, "loss": 0.0016, "step": 93930 }, { "epoch": 0.7682054217606411, "grad_norm": 0.09827050566673279, "learning_rate": 7.73631592180968e-06, "loss": 0.0017, "step": 93940 }, { "epoch": 0.7682871979392403, "grad_norm": 0.041565123945474625, "learning_rate": 7.73571861227242e-06, "loss": 0.0022, "step": 93950 }, { "epoch": 0.7683689741178394, "grad_norm": 0.1058872789144516, "learning_rate": 7.73512124700635e-06, "loss": 0.0047, "step": 93960 }, { "epoch": 0.7684507502964386, "grad_norm": 0.0486447699368, "learning_rate": 7.734523826023637e-06, "loss": 0.0012, "step": 93970 }, { "epoch": 0.7685325264750378, "grad_norm": 0.03879890963435173, "learning_rate": 7.73392634933645e-06, "loss": 0.002, "step": 93980 }, { "epoch": 0.768614302653637, "grad_norm": 0.027824977412819862, "learning_rate": 7.733328816956961e-06, "loss": 0.0018, "step": 93990 }, { "epoch": 0.7686960788322361, "grad_norm": 0.1488480418920517, "learning_rate": 7.732731228897343e-06, "loss": 0.003, "step": 94000 }, { "epoch": 0.7687778550108354, "grad_norm": 0.05486169457435608, "learning_rate": 7.732133585169768e-06, "loss": 0.0018, "step": 94010 }, { "epoch": 0.7688596311894346, "grad_norm": 0.04349156469106674, "learning_rate": 7.731535885786412e-06, "loss": 0.0027, "step": 94020 }, { "epoch": 0.7689414073680337, "grad_norm": 0.06506235152482986, "learning_rate": 7.730938130759449e-06, "loss": 0.002, "step": 94030 }, { "epoch": 0.7690231835466329, "grad_norm": 0.09945683181285858, "learning_rate": 7.730340320101054e-06, "loss": 0.0029, "step": 94040 }, { "epoch": 0.7691049597252321, "grad_norm": 0.038747210055589676, "learning_rate": 7.72974245382341e-06, "loss": 0.0017, "step": 94050 }, { "epoch": 0.7691867359038312, "grad_norm": 0.008482547476887703, "learning_rate": 7.729144531938696e-06, "loss": 0.0021, "step": 94060 }, { "epoch": 0.7692685120824304, "grad_norm": 0.02631450444459915, "learning_rate": 7.728546554459085e-06, "loss": 0.0035, "step": 94070 }, { "epoch": 0.7693502882610296, "grad_norm": 0.09960738569498062, "learning_rate": 7.727948521396764e-06, "loss": 0.0018, "step": 94080 }, { "epoch": 0.7694320644396287, "grad_norm": 0.024623967707157135, "learning_rate": 7.727350432763915e-06, "loss": 0.0015, "step": 94090 }, { "epoch": 0.7695138406182279, "grad_norm": 0.032676901668310165, "learning_rate": 7.726752288572721e-06, "loss": 0.0014, "step": 94100 }, { "epoch": 0.7695956167968271, "grad_norm": 0.08594653755426407, "learning_rate": 7.726154088835365e-06, "loss": 0.0026, "step": 94110 }, { "epoch": 0.7696773929754263, "grad_norm": 0.01830161362886429, "learning_rate": 7.725555833564035e-06, "loss": 0.0011, "step": 94120 }, { "epoch": 0.7697591691540254, "grad_norm": 0.08624549210071564, "learning_rate": 7.724957522770917e-06, "loss": 0.0014, "step": 94130 }, { "epoch": 0.7698409453326246, "grad_norm": 0.07217013090848923, "learning_rate": 7.724359156468201e-06, "loss": 0.0019, "step": 94140 }, { "epoch": 0.7699227215112238, "grad_norm": 0.0819297656416893, "learning_rate": 7.723760734668073e-06, "loss": 0.0024, "step": 94150 }, { "epoch": 0.7700044976898229, "grad_norm": 0.06040516495704651, "learning_rate": 7.723162257382727e-06, "loss": 0.0021, "step": 94160 }, { "epoch": 0.7700862738684221, "grad_norm": 0.05975990742444992, "learning_rate": 7.722563724624349e-06, "loss": 0.0015, "step": 94170 }, { "epoch": 0.7701680500470213, "grad_norm": 0.16109177470207214, "learning_rate": 7.721965136405136e-06, "loss": 0.0035, "step": 94180 }, { "epoch": 0.7702498262256204, "grad_norm": 0.11215870082378387, "learning_rate": 7.72136649273728e-06, "loss": 0.0014, "step": 94190 }, { "epoch": 0.7703316024042196, "grad_norm": 0.018840180709958076, "learning_rate": 7.720767793632979e-06, "loss": 0.0012, "step": 94200 }, { "epoch": 0.7704133785828188, "grad_norm": 0.10774307698011398, "learning_rate": 7.720169039104424e-06, "loss": 0.0014, "step": 94210 }, { "epoch": 0.7704951547614179, "grad_norm": 0.040503837168216705, "learning_rate": 7.719570229163815e-06, "loss": 0.0015, "step": 94220 }, { "epoch": 0.7705769309400172, "grad_norm": 0.07396022230386734, "learning_rate": 7.71897136382335e-06, "loss": 0.0021, "step": 94230 }, { "epoch": 0.7706587071186164, "grad_norm": 0.09452308714389801, "learning_rate": 7.71837244309523e-06, "loss": 0.0014, "step": 94240 }, { "epoch": 0.7707404832972156, "grad_norm": 0.06054185703396797, "learning_rate": 7.717773466991653e-06, "loss": 0.0033, "step": 94250 }, { "epoch": 0.7708222594758147, "grad_norm": 0.1335001289844513, "learning_rate": 7.717174435524821e-06, "loss": 0.0028, "step": 94260 }, { "epoch": 0.7709040356544139, "grad_norm": 0.08112150430679321, "learning_rate": 7.716575348706937e-06, "loss": 0.0024, "step": 94270 }, { "epoch": 0.7709858118330131, "grad_norm": 0.054855428636074066, "learning_rate": 7.715976206550204e-06, "loss": 0.0017, "step": 94280 }, { "epoch": 0.7710675880116122, "grad_norm": 0.009729926474392414, "learning_rate": 7.715377009066828e-06, "loss": 0.001, "step": 94290 }, { "epoch": 0.7711493641902114, "grad_norm": 0.09696294367313385, "learning_rate": 7.714777756269016e-06, "loss": 0.0015, "step": 94300 }, { "epoch": 0.7712311403688106, "grad_norm": 0.08722750097513199, "learning_rate": 7.714178448168976e-06, "loss": 0.0018, "step": 94310 }, { "epoch": 0.7713129165474097, "grad_norm": 0.06378839164972305, "learning_rate": 7.713579084778912e-06, "loss": 0.0018, "step": 94320 }, { "epoch": 0.7713946927260089, "grad_norm": 0.04874386638402939, "learning_rate": 7.712979666111038e-06, "loss": 0.0014, "step": 94330 }, { "epoch": 0.7714764689046081, "grad_norm": 0.15104462206363678, "learning_rate": 7.712380192177562e-06, "loss": 0.0019, "step": 94340 }, { "epoch": 0.7715582450832073, "grad_norm": 0.04849680885672569, "learning_rate": 7.711780662990697e-06, "loss": 0.0012, "step": 94350 }, { "epoch": 0.7716400212618064, "grad_norm": 0.06495711952447891, "learning_rate": 7.711181078562658e-06, "loss": 0.0017, "step": 94360 }, { "epoch": 0.7717217974404056, "grad_norm": 0.04757587984204292, "learning_rate": 7.710581438905655e-06, "loss": 0.0009, "step": 94370 }, { "epoch": 0.7718035736190048, "grad_norm": 0.018270200118422508, "learning_rate": 7.709981744031903e-06, "loss": 0.002, "step": 94380 }, { "epoch": 0.7718853497976039, "grad_norm": 0.03459206596016884, "learning_rate": 7.709381993953622e-06, "loss": 0.002, "step": 94390 }, { "epoch": 0.7719671259762031, "grad_norm": 0.14699624478816986, "learning_rate": 7.708782188683025e-06, "loss": 0.0018, "step": 94400 }, { "epoch": 0.7720489021548023, "grad_norm": 0.11613257229328156, "learning_rate": 7.708182328232336e-06, "loss": 0.0018, "step": 94410 }, { "epoch": 0.7721306783334014, "grad_norm": 0.0939592719078064, "learning_rate": 7.707582412613772e-06, "loss": 0.0025, "step": 94420 }, { "epoch": 0.7722124545120006, "grad_norm": 0.005867000203579664, "learning_rate": 7.706982441839553e-06, "loss": 0.0011, "step": 94430 }, { "epoch": 0.7722942306905999, "grad_norm": 0.16590791940689087, "learning_rate": 7.706382415921899e-06, "loss": 0.0029, "step": 94440 }, { "epoch": 0.772376006869199, "grad_norm": 0.057898249477148056, "learning_rate": 7.705782334873038e-06, "loss": 0.0023, "step": 94450 }, { "epoch": 0.7724577830477982, "grad_norm": 0.050791241228580475, "learning_rate": 7.70518219870519e-06, "loss": 0.0017, "step": 94460 }, { "epoch": 0.7725395592263974, "grad_norm": 0.04965295270085335, "learning_rate": 7.704582007430581e-06, "loss": 0.0013, "step": 94470 }, { "epoch": 0.7726213354049966, "grad_norm": 0.1315460056066513, "learning_rate": 7.703981761061438e-06, "loss": 0.0017, "step": 94480 }, { "epoch": 0.7727031115835957, "grad_norm": 0.06607314199209213, "learning_rate": 7.70338145960999e-06, "loss": 0.0024, "step": 94490 }, { "epoch": 0.7727848877621949, "grad_norm": 0.03365632891654968, "learning_rate": 7.702781103088465e-06, "loss": 0.0013, "step": 94500 }, { "epoch": 0.7728666639407941, "grad_norm": 0.05721358209848404, "learning_rate": 7.702180691509091e-06, "loss": 0.0013, "step": 94510 }, { "epoch": 0.7729484401193932, "grad_norm": 0.07363610714673996, "learning_rate": 7.7015802248841e-06, "loss": 0.0023, "step": 94520 }, { "epoch": 0.7730302162979924, "grad_norm": 0.009825981222093105, "learning_rate": 7.700979703225723e-06, "loss": 0.0009, "step": 94530 }, { "epoch": 0.7731119924765916, "grad_norm": 0.04206462204456329, "learning_rate": 7.700379126546195e-06, "loss": 0.0013, "step": 94540 }, { "epoch": 0.7731937686551907, "grad_norm": 0.10789625346660614, "learning_rate": 7.699778494857748e-06, "loss": 0.0019, "step": 94550 }, { "epoch": 0.7732755448337899, "grad_norm": 0.06436242908239365, "learning_rate": 7.69917780817262e-06, "loss": 0.001, "step": 94560 }, { "epoch": 0.7733573210123891, "grad_norm": 0.008676668629050255, "learning_rate": 7.698577066503045e-06, "loss": 0.0012, "step": 94570 }, { "epoch": 0.7734390971909882, "grad_norm": 0.10948450118303299, "learning_rate": 7.697976269861262e-06, "loss": 0.0021, "step": 94580 }, { "epoch": 0.7735208733695874, "grad_norm": 0.08332841843366623, "learning_rate": 7.697375418259511e-06, "loss": 0.0021, "step": 94590 }, { "epoch": 0.7736026495481866, "grad_norm": 0.05937745049595833, "learning_rate": 7.696774511710028e-06, "loss": 0.0014, "step": 94600 }, { "epoch": 0.7736844257267858, "grad_norm": 0.07923384010791779, "learning_rate": 7.696173550225058e-06, "loss": 0.0022, "step": 94610 }, { "epoch": 0.7737662019053849, "grad_norm": 0.025648372247815132, "learning_rate": 7.69557253381684e-06, "loss": 0.0024, "step": 94620 }, { "epoch": 0.7738479780839841, "grad_norm": 0.09549982845783234, "learning_rate": 7.694971462497618e-06, "loss": 0.0021, "step": 94630 }, { "epoch": 0.7739297542625833, "grad_norm": 0.022725261747837067, "learning_rate": 7.694370336279636e-06, "loss": 0.0017, "step": 94640 }, { "epoch": 0.7740115304411825, "grad_norm": 0.06778395175933838, "learning_rate": 7.693769155175141e-06, "loss": 0.002, "step": 94650 }, { "epoch": 0.7740933066197817, "grad_norm": 0.002170389983803034, "learning_rate": 7.693167919196381e-06, "loss": 0.0011, "step": 94660 }, { "epoch": 0.7741750827983809, "grad_norm": 0.05357588082551956, "learning_rate": 7.692566628355598e-06, "loss": 0.0011, "step": 94670 }, { "epoch": 0.77425685897698, "grad_norm": 0.030741574242711067, "learning_rate": 7.691965282665046e-06, "loss": 0.0024, "step": 94680 }, { "epoch": 0.7743386351555792, "grad_norm": 0.02541280910372734, "learning_rate": 7.691363882136974e-06, "loss": 0.0013, "step": 94690 }, { "epoch": 0.7744204113341784, "grad_norm": 0.24944618344306946, "learning_rate": 7.690762426783629e-06, "loss": 0.0023, "step": 94700 }, { "epoch": 0.7745021875127776, "grad_norm": 0.0785076767206192, "learning_rate": 7.690160916617268e-06, "loss": 0.0013, "step": 94710 }, { "epoch": 0.7745839636913767, "grad_norm": 0.05639972537755966, "learning_rate": 7.689559351650142e-06, "loss": 0.0029, "step": 94720 }, { "epoch": 0.7746657398699759, "grad_norm": 0.09211616218090057, "learning_rate": 7.688957731894506e-06, "loss": 0.0015, "step": 94730 }, { "epoch": 0.7747475160485751, "grad_norm": 0.02577664703130722, "learning_rate": 7.688356057362617e-06, "loss": 0.0023, "step": 94740 }, { "epoch": 0.7748292922271742, "grad_norm": 0.07006222754716873, "learning_rate": 7.687754328066728e-06, "loss": 0.002, "step": 94750 }, { "epoch": 0.7749110684057734, "grad_norm": 0.06375043094158173, "learning_rate": 7.6871525440191e-06, "loss": 0.0015, "step": 94760 }, { "epoch": 0.7749928445843726, "grad_norm": 0.07670892030000687, "learning_rate": 7.686550705231988e-06, "loss": 0.0014, "step": 94770 }, { "epoch": 0.7750746207629717, "grad_norm": 0.08490611612796783, "learning_rate": 7.685948811717657e-06, "loss": 0.0017, "step": 94780 }, { "epoch": 0.7751563969415709, "grad_norm": 0.035282429307699203, "learning_rate": 7.685346863488363e-06, "loss": 0.0024, "step": 94790 }, { "epoch": 0.7752381731201701, "grad_norm": 0.01250932551920414, "learning_rate": 7.684744860556372e-06, "loss": 0.0023, "step": 94800 }, { "epoch": 0.7753199492987692, "grad_norm": 0.07741154730319977, "learning_rate": 7.684142802933947e-06, "loss": 0.0011, "step": 94810 }, { "epoch": 0.7754017254773684, "grad_norm": 0.08994246274232864, "learning_rate": 7.683540690633348e-06, "loss": 0.0017, "step": 94820 }, { "epoch": 0.7754835016559676, "grad_norm": 0.05758268013596535, "learning_rate": 7.682938523666847e-06, "loss": 0.0019, "step": 94830 }, { "epoch": 0.7755652778345667, "grad_norm": 0.02011754736304283, "learning_rate": 7.682336302046707e-06, "loss": 0.0014, "step": 94840 }, { "epoch": 0.7756470540131659, "grad_norm": 0.005016016773879528, "learning_rate": 7.681734025785195e-06, "loss": 0.002, "step": 94850 }, { "epoch": 0.7757288301917651, "grad_norm": 0.03098401427268982, "learning_rate": 7.681131694894583e-06, "loss": 0.0008, "step": 94860 }, { "epoch": 0.7758106063703644, "grad_norm": 0.06384965777397156, "learning_rate": 7.680529309387137e-06, "loss": 0.001, "step": 94870 }, { "epoch": 0.7758923825489635, "grad_norm": 0.03971236199140549, "learning_rate": 7.679926869275131e-06, "loss": 0.0013, "step": 94880 }, { "epoch": 0.7759741587275627, "grad_norm": 0.1520373672246933, "learning_rate": 7.679324374570836e-06, "loss": 0.0021, "step": 94890 }, { "epoch": 0.7760559349061619, "grad_norm": 0.05601169168949127, "learning_rate": 7.678721825286526e-06, "loss": 0.0026, "step": 94900 }, { "epoch": 0.776137711084761, "grad_norm": 0.03531736135482788, "learning_rate": 7.678119221434474e-06, "loss": 0.0016, "step": 94910 }, { "epoch": 0.7762194872633602, "grad_norm": 0.039144620299339294, "learning_rate": 7.677516563026958e-06, "loss": 0.001, "step": 94920 }, { "epoch": 0.7763012634419594, "grad_norm": 0.00909395981580019, "learning_rate": 7.676913850076253e-06, "loss": 0.0013, "step": 94930 }, { "epoch": 0.7763830396205585, "grad_norm": 0.14153669774532318, "learning_rate": 7.676311082594636e-06, "loss": 0.0026, "step": 94940 }, { "epoch": 0.7764648157991577, "grad_norm": 0.07373111695051193, "learning_rate": 7.67570826059439e-06, "loss": 0.0019, "step": 94950 }, { "epoch": 0.7765465919777569, "grad_norm": 0.24155177175998688, "learning_rate": 7.675105384087787e-06, "loss": 0.0034, "step": 94960 }, { "epoch": 0.776628368156356, "grad_norm": 0.035333309322595596, "learning_rate": 7.674502453087117e-06, "loss": 0.0019, "step": 94970 }, { "epoch": 0.7767101443349552, "grad_norm": 0.030860621482133865, "learning_rate": 7.673899467604656e-06, "loss": 0.0012, "step": 94980 }, { "epoch": 0.7767919205135544, "grad_norm": 0.11406237632036209, "learning_rate": 7.67329642765269e-06, "loss": 0.0028, "step": 94990 }, { "epoch": 0.7768736966921536, "grad_norm": 0.06277390569448471, "learning_rate": 7.672693333243501e-06, "loss": 0.0031, "step": 95000 }, { "epoch": 0.7769554728707527, "grad_norm": 0.018474383279681206, "learning_rate": 7.672090184389377e-06, "loss": 0.0014, "step": 95010 }, { "epoch": 0.7770372490493519, "grad_norm": 0.05698617547750473, "learning_rate": 7.671486981102606e-06, "loss": 0.0021, "step": 95020 }, { "epoch": 0.7771190252279511, "grad_norm": 0.04917411133646965, "learning_rate": 7.670883723395473e-06, "loss": 0.0026, "step": 95030 }, { "epoch": 0.7772008014065502, "grad_norm": 0.09968341886997223, "learning_rate": 7.670280411280267e-06, "loss": 0.004, "step": 95040 }, { "epoch": 0.7772825775851494, "grad_norm": 0.022770613431930542, "learning_rate": 7.669677044769278e-06, "loss": 0.0019, "step": 95050 }, { "epoch": 0.7773643537637486, "grad_norm": 0.18443149328231812, "learning_rate": 7.6690736238748e-06, "loss": 0.0023, "step": 95060 }, { "epoch": 0.7774461299423477, "grad_norm": 0.019031105563044548, "learning_rate": 7.66847014860912e-06, "loss": 0.0017, "step": 95070 }, { "epoch": 0.777527906120947, "grad_norm": 0.0906432569026947, "learning_rate": 7.667866618984534e-06, "loss": 0.0012, "step": 95080 }, { "epoch": 0.7776096822995462, "grad_norm": 0.08771584182977676, "learning_rate": 7.667263035013337e-06, "loss": 0.0027, "step": 95090 }, { "epoch": 0.7776914584781454, "grad_norm": 0.1068883016705513, "learning_rate": 7.666659396707825e-06, "loss": 0.001, "step": 95100 }, { "epoch": 0.7777732346567445, "grad_norm": 0.06236838176846504, "learning_rate": 7.666055704080292e-06, "loss": 0.0041, "step": 95110 }, { "epoch": 0.7778550108353437, "grad_norm": 0.04818673059344292, "learning_rate": 7.665451957143038e-06, "loss": 0.001, "step": 95120 }, { "epoch": 0.7779367870139429, "grad_norm": 0.111143097281456, "learning_rate": 7.664848155908362e-06, "loss": 0.0017, "step": 95130 }, { "epoch": 0.778018563192542, "grad_norm": 0.1167408898472786, "learning_rate": 7.664244300388562e-06, "loss": 0.0027, "step": 95140 }, { "epoch": 0.7781003393711412, "grad_norm": 0.02085314691066742, "learning_rate": 7.663640390595938e-06, "loss": 0.0019, "step": 95150 }, { "epoch": 0.7781821155497404, "grad_norm": 0.2515203058719635, "learning_rate": 7.663036426542798e-06, "loss": 0.0023, "step": 95160 }, { "epoch": 0.7782638917283395, "grad_norm": 0.008562281727790833, "learning_rate": 7.66243240824144e-06, "loss": 0.0012, "step": 95170 }, { "epoch": 0.7783456679069387, "grad_norm": 0.08038897812366486, "learning_rate": 7.661828335704168e-06, "loss": 0.002, "step": 95180 }, { "epoch": 0.7784274440855379, "grad_norm": 0.2915063202381134, "learning_rate": 7.661224208943292e-06, "loss": 0.002, "step": 95190 }, { "epoch": 0.778509220264137, "grad_norm": 0.05536358058452606, "learning_rate": 7.660620027971112e-06, "loss": 0.0015, "step": 95200 }, { "epoch": 0.7785909964427362, "grad_norm": 0.04921213164925575, "learning_rate": 7.660015792799942e-06, "loss": 0.0024, "step": 95210 }, { "epoch": 0.7786727726213354, "grad_norm": 0.09884291887283325, "learning_rate": 7.659411503442087e-06, "loss": 0.0012, "step": 95220 }, { "epoch": 0.7787545487999346, "grad_norm": 0.029756607487797737, "learning_rate": 7.65880715990986e-06, "loss": 0.0012, "step": 95230 }, { "epoch": 0.7788363249785337, "grad_norm": 0.011701465584337711, "learning_rate": 7.658202762215567e-06, "loss": 0.0032, "step": 95240 }, { "epoch": 0.7789181011571329, "grad_norm": 0.06927911937236786, "learning_rate": 7.657598310371526e-06, "loss": 0.0037, "step": 95250 }, { "epoch": 0.7789998773357321, "grad_norm": 0.08389858901500702, "learning_rate": 7.656993804390045e-06, "loss": 0.0027, "step": 95260 }, { "epoch": 0.7790816535143312, "grad_norm": 0.044018879532814026, "learning_rate": 7.656389244283441e-06, "loss": 0.001, "step": 95270 }, { "epoch": 0.7791634296929304, "grad_norm": 0.012182146310806274, "learning_rate": 7.65578463006403e-06, "loss": 0.0018, "step": 95280 }, { "epoch": 0.7792452058715297, "grad_norm": 0.043207596987485886, "learning_rate": 7.655179961744126e-06, "loss": 0.0028, "step": 95290 }, { "epoch": 0.7793269820501288, "grad_norm": 0.08642412722110748, "learning_rate": 7.654575239336048e-06, "loss": 0.0022, "step": 95300 }, { "epoch": 0.779408758228728, "grad_norm": 0.054459478706121445, "learning_rate": 7.653970462852116e-06, "loss": 0.0024, "step": 95310 }, { "epoch": 0.7794905344073272, "grad_norm": 0.02639773301780224, "learning_rate": 7.653365632304645e-06, "loss": 0.0022, "step": 95320 }, { "epoch": 0.7795723105859264, "grad_norm": 0.04480118677020073, "learning_rate": 7.652760747705964e-06, "loss": 0.0009, "step": 95330 }, { "epoch": 0.7796540867645255, "grad_norm": 0.12311116605997086, "learning_rate": 7.652155809068388e-06, "loss": 0.0018, "step": 95340 }, { "epoch": 0.7797358629431247, "grad_norm": 0.0497109554708004, "learning_rate": 7.651550816404242e-06, "loss": 0.0012, "step": 95350 }, { "epoch": 0.7798176391217239, "grad_norm": 0.07054437696933746, "learning_rate": 7.65094576972585e-06, "loss": 0.0028, "step": 95360 }, { "epoch": 0.779899415300323, "grad_norm": 0.05310601368546486, "learning_rate": 7.65034066904554e-06, "loss": 0.0015, "step": 95370 }, { "epoch": 0.7799811914789222, "grad_norm": 0.05822691693902016, "learning_rate": 7.649735514375634e-06, "loss": 0.002, "step": 95380 }, { "epoch": 0.7800629676575214, "grad_norm": 0.026913531124591827, "learning_rate": 7.649130305728465e-06, "loss": 0.0026, "step": 95390 }, { "epoch": 0.7801447438361205, "grad_norm": 0.10797116905450821, "learning_rate": 7.648525043116355e-06, "loss": 0.0015, "step": 95400 }, { "epoch": 0.7802265200147197, "grad_norm": 0.026348888874053955, "learning_rate": 7.647919726551639e-06, "loss": 0.0023, "step": 95410 }, { "epoch": 0.7803082961933189, "grad_norm": 0.029609136283397675, "learning_rate": 7.647314356046644e-06, "loss": 0.0022, "step": 95420 }, { "epoch": 0.780390072371918, "grad_norm": 0.03877812623977661, "learning_rate": 7.646708931613706e-06, "loss": 0.0018, "step": 95430 }, { "epoch": 0.7804718485505172, "grad_norm": 0.022809019312262535, "learning_rate": 7.646103453265154e-06, "loss": 0.0017, "step": 95440 }, { "epoch": 0.7805536247291164, "grad_norm": 0.02784084901213646, "learning_rate": 7.645497921013325e-06, "loss": 0.0016, "step": 95450 }, { "epoch": 0.7806354009077155, "grad_norm": 0.088877834379673, "learning_rate": 7.644892334870552e-06, "loss": 0.0012, "step": 95460 }, { "epoch": 0.7807171770863147, "grad_norm": 0.06264762580394745, "learning_rate": 7.644286694849173e-06, "loss": 0.0023, "step": 95470 }, { "epoch": 0.7807989532649139, "grad_norm": 0.01996980793774128, "learning_rate": 7.643681000961525e-06, "loss": 0.0029, "step": 95480 }, { "epoch": 0.780880729443513, "grad_norm": 0.1212722659111023, "learning_rate": 7.643075253219945e-06, "loss": 0.0014, "step": 95490 }, { "epoch": 0.7809625056221122, "grad_norm": 0.03356470540165901, "learning_rate": 7.642469451636773e-06, "loss": 0.0017, "step": 95500 }, { "epoch": 0.7810442818007115, "grad_norm": 0.08196672052145004, "learning_rate": 7.641863596224353e-06, "loss": 0.003, "step": 95510 }, { "epoch": 0.7811260579793107, "grad_norm": 0.06058847904205322, "learning_rate": 7.641257686995021e-06, "loss": 0.0028, "step": 95520 }, { "epoch": 0.7812078341579098, "grad_norm": 0.05351461470127106, "learning_rate": 7.640651723961126e-06, "loss": 0.0021, "step": 95530 }, { "epoch": 0.781289610336509, "grad_norm": 0.016385763883590698, "learning_rate": 7.640045707135006e-06, "loss": 0.0019, "step": 95540 }, { "epoch": 0.7813713865151082, "grad_norm": 0.05375753715634346, "learning_rate": 7.639439636529012e-06, "loss": 0.0013, "step": 95550 }, { "epoch": 0.7814531626937073, "grad_norm": 0.0459863618016243, "learning_rate": 7.638833512155484e-06, "loss": 0.0024, "step": 95560 }, { "epoch": 0.7815349388723065, "grad_norm": 0.06121331453323364, "learning_rate": 7.638227334026775e-06, "loss": 0.0022, "step": 95570 }, { "epoch": 0.7816167150509057, "grad_norm": 0.0079013891518116, "learning_rate": 7.63762110215523e-06, "loss": 0.0019, "step": 95580 }, { "epoch": 0.7816984912295049, "grad_norm": 0.043043915182352066, "learning_rate": 7.637014816553198e-06, "loss": 0.0016, "step": 95590 }, { "epoch": 0.781780267408104, "grad_norm": 0.05545452609658241, "learning_rate": 7.636408477233032e-06, "loss": 0.0018, "step": 95600 }, { "epoch": 0.7818620435867032, "grad_norm": 0.031321801245212555, "learning_rate": 7.635802084207082e-06, "loss": 0.0021, "step": 95610 }, { "epoch": 0.7819438197653024, "grad_norm": 0.01757841557264328, "learning_rate": 7.635195637487702e-06, "loss": 0.0021, "step": 95620 }, { "epoch": 0.7820255959439015, "grad_norm": 0.06999394297599792, "learning_rate": 7.634589137087244e-06, "loss": 0.0014, "step": 95630 }, { "epoch": 0.7821073721225007, "grad_norm": 0.10838823765516281, "learning_rate": 7.633982583018064e-06, "loss": 0.0014, "step": 95640 }, { "epoch": 0.7821891483010999, "grad_norm": 0.020882295444607735, "learning_rate": 7.633375975292518e-06, "loss": 0.0016, "step": 95650 }, { "epoch": 0.782270924479699, "grad_norm": 0.06607289612293243, "learning_rate": 7.632769313922963e-06, "loss": 0.0014, "step": 95660 }, { "epoch": 0.7823527006582982, "grad_norm": 0.06818664073944092, "learning_rate": 7.632162598921754e-06, "loss": 0.0025, "step": 95670 }, { "epoch": 0.7824344768368974, "grad_norm": 0.055257368832826614, "learning_rate": 7.631555830301258e-06, "loss": 0.0015, "step": 95680 }, { "epoch": 0.7825162530154965, "grad_norm": 0.019711855798959732, "learning_rate": 7.630949008073827e-06, "loss": 0.0021, "step": 95690 }, { "epoch": 0.7825980291940957, "grad_norm": 0.08655599504709244, "learning_rate": 7.630342132251828e-06, "loss": 0.0016, "step": 95700 }, { "epoch": 0.7826798053726949, "grad_norm": 0.09525202959775925, "learning_rate": 7.629735202847623e-06, "loss": 0.0039, "step": 95710 }, { "epoch": 0.7827615815512942, "grad_norm": 0.11715386062860489, "learning_rate": 7.629128219873572e-06, "loss": 0.0015, "step": 95720 }, { "epoch": 0.7828433577298933, "grad_norm": 0.04248254746198654, "learning_rate": 7.628521183342042e-06, "loss": 0.0038, "step": 95730 }, { "epoch": 0.7829251339084925, "grad_norm": 0.01402530912309885, "learning_rate": 7.6279140932654005e-06, "loss": 0.0006, "step": 95740 }, { "epoch": 0.7830069100870917, "grad_norm": 0.11225849390029907, "learning_rate": 7.627306949656011e-06, "loss": 0.0017, "step": 95750 }, { "epoch": 0.7830886862656908, "grad_norm": 0.0201584380120039, "learning_rate": 7.626699752526246e-06, "loss": 0.0013, "step": 95760 }, { "epoch": 0.78317046244429, "grad_norm": 0.025296874344348907, "learning_rate": 7.62609250188847e-06, "loss": 0.0012, "step": 95770 }, { "epoch": 0.7832522386228892, "grad_norm": 0.04864996299147606, "learning_rate": 7.625485197755056e-06, "loss": 0.002, "step": 95780 }, { "epoch": 0.7833340148014883, "grad_norm": 0.06351787596940994, "learning_rate": 7.6248778401383735e-06, "loss": 0.0009, "step": 95790 }, { "epoch": 0.7834157909800875, "grad_norm": 0.05751701071858406, "learning_rate": 7.624270429050796e-06, "loss": 0.0011, "step": 95800 }, { "epoch": 0.7834975671586867, "grad_norm": 0.07505582273006439, "learning_rate": 7.623662964504696e-06, "loss": 0.0019, "step": 95810 }, { "epoch": 0.7835793433372858, "grad_norm": 0.032926734536886215, "learning_rate": 7.623055446512452e-06, "loss": 0.0011, "step": 95820 }, { "epoch": 0.783661119515885, "grad_norm": 0.16299152374267578, "learning_rate": 7.622447875086434e-06, "loss": 0.0016, "step": 95830 }, { "epoch": 0.7837428956944842, "grad_norm": 0.0548856146633625, "learning_rate": 7.6218402502390205e-06, "loss": 0.0015, "step": 95840 }, { "epoch": 0.7838246718730834, "grad_norm": 0.01919224113225937, "learning_rate": 7.621232571982591e-06, "loss": 0.0008, "step": 95850 }, { "epoch": 0.7839064480516825, "grad_norm": 0.039760809391736984, "learning_rate": 7.620624840329523e-06, "loss": 0.0012, "step": 95860 }, { "epoch": 0.7839882242302817, "grad_norm": 0.05329413339495659, "learning_rate": 7.620017055292197e-06, "loss": 0.0014, "step": 95870 }, { "epoch": 0.7840700004088809, "grad_norm": 0.014474403113126755, "learning_rate": 7.619409216882994e-06, "loss": 0.0007, "step": 95880 }, { "epoch": 0.78415177658748, "grad_norm": 0.04616173729300499, "learning_rate": 7.6188013251142955e-06, "loss": 0.0012, "step": 95890 }, { "epoch": 0.7842335527660792, "grad_norm": 0.008218633010983467, "learning_rate": 7.618193379998487e-06, "loss": 0.0022, "step": 95900 }, { "epoch": 0.7843153289446784, "grad_norm": 0.041444070637226105, "learning_rate": 7.61758538154795e-06, "loss": 0.0025, "step": 95910 }, { "epoch": 0.7843971051232775, "grad_norm": 0.03653478994965553, "learning_rate": 7.61697732977507e-06, "loss": 0.0014, "step": 95920 }, { "epoch": 0.7844788813018768, "grad_norm": 0.1634843647480011, "learning_rate": 7.616369224692236e-06, "loss": 0.0014, "step": 95930 }, { "epoch": 0.784560657480476, "grad_norm": 0.20729076862335205, "learning_rate": 7.615761066311835e-06, "loss": 0.0015, "step": 95940 }, { "epoch": 0.7846424336590752, "grad_norm": 0.06936967372894287, "learning_rate": 7.615152854646254e-06, "loss": 0.001, "step": 95950 }, { "epoch": 0.7847242098376743, "grad_norm": 0.3696775734424591, "learning_rate": 7.614544589707884e-06, "loss": 0.0014, "step": 95960 }, { "epoch": 0.7848059860162735, "grad_norm": 0.056957900524139404, "learning_rate": 7.613936271509115e-06, "loss": 0.0134, "step": 95970 }, { "epoch": 0.7848877621948727, "grad_norm": 0.1075117439031601, "learning_rate": 7.6133279000623395e-06, "loss": 0.0016, "step": 95980 }, { "epoch": 0.7849695383734718, "grad_norm": 0.042948801070451736, "learning_rate": 7.612719475379948e-06, "loss": 0.0014, "step": 95990 }, { "epoch": 0.785051314552071, "grad_norm": 0.03405385836958885, "learning_rate": 7.612110997474342e-06, "loss": 0.0024, "step": 96000 }, { "epoch": 0.7851330907306702, "grad_norm": 0.05070240795612335, "learning_rate": 7.611502466357909e-06, "loss": 0.0021, "step": 96010 }, { "epoch": 0.7852148669092693, "grad_norm": 0.04549655690789223, "learning_rate": 7.610893882043048e-06, "loss": 0.0016, "step": 96020 }, { "epoch": 0.7852966430878685, "grad_norm": 0.028646716848015785, "learning_rate": 7.610285244542156e-06, "loss": 0.0012, "step": 96030 }, { "epoch": 0.7853784192664677, "grad_norm": 0.2898423671722412, "learning_rate": 7.609676553867633e-06, "loss": 0.0017, "step": 96040 }, { "epoch": 0.7854601954450668, "grad_norm": 0.05696876719594002, "learning_rate": 7.609067810031876e-06, "loss": 0.0026, "step": 96050 }, { "epoch": 0.785541971623666, "grad_norm": 0.028529077768325806, "learning_rate": 7.608459013047289e-06, "loss": 0.001, "step": 96060 }, { "epoch": 0.7856237478022652, "grad_norm": 0.07614786177873611, "learning_rate": 7.607850162926269e-06, "loss": 0.0017, "step": 96070 }, { "epoch": 0.7857055239808644, "grad_norm": 0.0036876050289720297, "learning_rate": 7.607241259681224e-06, "loss": 0.0019, "step": 96080 }, { "epoch": 0.7857873001594635, "grad_norm": 0.050158869475126266, "learning_rate": 7.606632303324553e-06, "loss": 0.0018, "step": 96090 }, { "epoch": 0.7858690763380627, "grad_norm": 0.012729237787425518, "learning_rate": 7.606023293868665e-06, "loss": 0.0011, "step": 96100 }, { "epoch": 0.7859508525166619, "grad_norm": 0.09922077506780624, "learning_rate": 7.6054142313259625e-06, "loss": 0.0027, "step": 96110 }, { "epoch": 0.786032628695261, "grad_norm": 0.061386462301015854, "learning_rate": 7.604805115708856e-06, "loss": 0.0014, "step": 96120 }, { "epoch": 0.7861144048738602, "grad_norm": 0.006756200455129147, "learning_rate": 7.604195947029751e-06, "loss": 0.0014, "step": 96130 }, { "epoch": 0.7861961810524594, "grad_norm": 0.07816440612077713, "learning_rate": 7.603586725301059e-06, "loss": 0.001, "step": 96140 }, { "epoch": 0.7862779572310586, "grad_norm": 0.10520391166210175, "learning_rate": 7.602977450535187e-06, "loss": 0.0013, "step": 96150 }, { "epoch": 0.7863597334096578, "grad_norm": 0.06529128551483154, "learning_rate": 7.60236812274455e-06, "loss": 0.0013, "step": 96160 }, { "epoch": 0.786441509588257, "grad_norm": 0.10015774518251419, "learning_rate": 7.601758741941558e-06, "loss": 0.0011, "step": 96170 }, { "epoch": 0.7865232857668562, "grad_norm": 0.025015512481331825, "learning_rate": 7.601149308138628e-06, "loss": 0.002, "step": 96180 }, { "epoch": 0.7866050619454553, "grad_norm": 0.05055650323629379, "learning_rate": 7.600539821348169e-06, "loss": 0.0024, "step": 96190 }, { "epoch": 0.7866868381240545, "grad_norm": 0.05842667073011398, "learning_rate": 7.5999302815826036e-06, "loss": 0.0014, "step": 96200 }, { "epoch": 0.7867686143026537, "grad_norm": 0.022560181096196175, "learning_rate": 7.599320688854341e-06, "loss": 0.0015, "step": 96210 }, { "epoch": 0.7868503904812528, "grad_norm": 0.07198899984359741, "learning_rate": 7.598711043175806e-06, "loss": 0.0016, "step": 96220 }, { "epoch": 0.786932166659852, "grad_norm": 0.04838080704212189, "learning_rate": 7.598101344559414e-06, "loss": 0.0018, "step": 96230 }, { "epoch": 0.7870139428384512, "grad_norm": 0.012263840064406395, "learning_rate": 7.597491593017588e-06, "loss": 0.001, "step": 96240 }, { "epoch": 0.7870957190170503, "grad_norm": 0.10937926918268204, "learning_rate": 7.596881788562742e-06, "loss": 0.0022, "step": 96250 }, { "epoch": 0.7871774951956495, "grad_norm": 0.17280715703964233, "learning_rate": 7.596271931207307e-06, "loss": 0.0013, "step": 96260 }, { "epoch": 0.7872592713742487, "grad_norm": 0.49464163184165955, "learning_rate": 7.5956620209637e-06, "loss": 0.0024, "step": 96270 }, { "epoch": 0.7873410475528478, "grad_norm": 0.12303850054740906, "learning_rate": 7.59505205784435e-06, "loss": 0.0016, "step": 96280 }, { "epoch": 0.787422823731447, "grad_norm": 0.017176298424601555, "learning_rate": 7.594442041861678e-06, "loss": 0.001, "step": 96290 }, { "epoch": 0.7875045999100462, "grad_norm": 0.1953352689743042, "learning_rate": 7.593831973028114e-06, "loss": 0.0013, "step": 96300 }, { "epoch": 0.7875863760886453, "grad_norm": 0.13734310865402222, "learning_rate": 7.593221851356085e-06, "loss": 0.0021, "step": 96310 }, { "epoch": 0.7876681522672445, "grad_norm": 0.03276592865586281, "learning_rate": 7.592611676858017e-06, "loss": 0.002, "step": 96320 }, { "epoch": 0.7877499284458437, "grad_norm": 0.047885920852422714, "learning_rate": 7.592001449546342e-06, "loss": 0.0013, "step": 96330 }, { "epoch": 0.7878317046244429, "grad_norm": 0.03255777060985565, "learning_rate": 7.591391169433493e-06, "loss": 0.0013, "step": 96340 }, { "epoch": 0.787913480803042, "grad_norm": 0.03985992819070816, "learning_rate": 7.590780836531897e-06, "loss": 0.0027, "step": 96350 }, { "epoch": 0.7879952569816413, "grad_norm": 0.02710670232772827, "learning_rate": 7.590170450853992e-06, "loss": 0.0017, "step": 96360 }, { "epoch": 0.7880770331602405, "grad_norm": 0.06910136342048645, "learning_rate": 7.589560012412207e-06, "loss": 0.002, "step": 96370 }, { "epoch": 0.7881588093388396, "grad_norm": 0.045995477586984634, "learning_rate": 7.5889495212189816e-06, "loss": 0.0014, "step": 96380 }, { "epoch": 0.7882405855174388, "grad_norm": 0.058342862874269485, "learning_rate": 7.588338977286749e-06, "loss": 0.002, "step": 96390 }, { "epoch": 0.788322361696038, "grad_norm": 0.12953273952007294, "learning_rate": 7.587728380627948e-06, "loss": 0.0016, "step": 96400 }, { "epoch": 0.7884041378746371, "grad_norm": 0.15291564166545868, "learning_rate": 7.587117731255015e-06, "loss": 0.0016, "step": 96410 }, { "epoch": 0.7884859140532363, "grad_norm": 0.1180720180273056, "learning_rate": 7.586507029180392e-06, "loss": 0.0013, "step": 96420 }, { "epoch": 0.7885676902318355, "grad_norm": 0.01774776726961136, "learning_rate": 7.585896274416519e-06, "loss": 0.0013, "step": 96430 }, { "epoch": 0.7886494664104347, "grad_norm": 0.065824955701828, "learning_rate": 7.585285466975836e-06, "loss": 0.0008, "step": 96440 }, { "epoch": 0.7887312425890338, "grad_norm": 0.045210737735033035, "learning_rate": 7.584674606870786e-06, "loss": 0.0021, "step": 96450 }, { "epoch": 0.788813018767633, "grad_norm": 0.09738867729902267, "learning_rate": 7.584063694113815e-06, "loss": 0.0019, "step": 96460 }, { "epoch": 0.7888947949462322, "grad_norm": 0.12078002095222473, "learning_rate": 7.5834527287173665e-06, "loss": 0.0016, "step": 96470 }, { "epoch": 0.7889765711248313, "grad_norm": 0.044298913329839706, "learning_rate": 7.582841710693885e-06, "loss": 0.0062, "step": 96480 }, { "epoch": 0.7890583473034305, "grad_norm": 0.02848603017628193, "learning_rate": 7.582230640055818e-06, "loss": 0.0013, "step": 96490 }, { "epoch": 0.7891401234820297, "grad_norm": 0.07187865674495697, "learning_rate": 7.581619516815614e-06, "loss": 0.0013, "step": 96500 }, { "epoch": 0.7892218996606288, "grad_norm": 0.011565414257347584, "learning_rate": 7.581008340985722e-06, "loss": 0.0016, "step": 96510 }, { "epoch": 0.789303675839228, "grad_norm": 0.07250802963972092, "learning_rate": 7.580397112578592e-06, "loss": 0.0036, "step": 96520 }, { "epoch": 0.7893854520178272, "grad_norm": 0.04542141035199165, "learning_rate": 7.579785831606676e-06, "loss": 0.0021, "step": 96530 }, { "epoch": 0.7894672281964263, "grad_norm": 0.031179048120975494, "learning_rate": 7.579174498082426e-06, "loss": 0.0034, "step": 96540 }, { "epoch": 0.7895490043750255, "grad_norm": 0.037046995013952255, "learning_rate": 7.578563112018293e-06, "loss": 0.0013, "step": 96550 }, { "epoch": 0.7896307805536247, "grad_norm": 0.023311646655201912, "learning_rate": 7.577951673426736e-06, "loss": 0.0018, "step": 96560 }, { "epoch": 0.789712556732224, "grad_norm": 0.04632905498147011, "learning_rate": 7.577340182320206e-06, "loss": 0.001, "step": 96570 }, { "epoch": 0.7897943329108231, "grad_norm": 0.24743962287902832, "learning_rate": 7.576728638711163e-06, "loss": 0.0015, "step": 96580 }, { "epoch": 0.7898761090894223, "grad_norm": 0.022865990176796913, "learning_rate": 7.5761170426120634e-06, "loss": 0.0011, "step": 96590 }, { "epoch": 0.7899578852680215, "grad_norm": 0.07755618542432785, "learning_rate": 7.575505394035365e-06, "loss": 0.0021, "step": 96600 }, { "epoch": 0.7900396614466206, "grad_norm": 0.15906359255313873, "learning_rate": 7.574893692993527e-06, "loss": 0.0024, "step": 96610 }, { "epoch": 0.7901214376252198, "grad_norm": 0.055661190301179886, "learning_rate": 7.574281939499013e-06, "loss": 0.0013, "step": 96620 }, { "epoch": 0.790203213803819, "grad_norm": 0.2954411506652832, "learning_rate": 7.5736701335642815e-06, "loss": 0.002, "step": 96630 }, { "epoch": 0.7902849899824181, "grad_norm": 0.037490732967853546, "learning_rate": 7.573058275201799e-06, "loss": 0.0021, "step": 96640 }, { "epoch": 0.7903667661610173, "grad_norm": 0.04237654060125351, "learning_rate": 7.572446364424029e-06, "loss": 0.0015, "step": 96650 }, { "epoch": 0.7904485423396165, "grad_norm": 0.09827781468629837, "learning_rate": 7.5718344012434346e-06, "loss": 0.0038, "step": 96660 }, { "epoch": 0.7905303185182156, "grad_norm": 0.14635442197322845, "learning_rate": 7.571222385672481e-06, "loss": 0.0021, "step": 96670 }, { "epoch": 0.7906120946968148, "grad_norm": 0.02008740045130253, "learning_rate": 7.57061031772364e-06, "loss": 0.0022, "step": 96680 }, { "epoch": 0.790693870875414, "grad_norm": 0.06283927708864212, "learning_rate": 7.569998197409376e-06, "loss": 0.0011, "step": 96690 }, { "epoch": 0.7907756470540132, "grad_norm": 0.06635507941246033, "learning_rate": 7.56938602474216e-06, "loss": 0.0016, "step": 96700 }, { "epoch": 0.7908574232326123, "grad_norm": 0.06566780805587769, "learning_rate": 7.568773799734464e-06, "loss": 0.0014, "step": 96710 }, { "epoch": 0.7909391994112115, "grad_norm": 0.009248840622603893, "learning_rate": 7.568161522398755e-06, "loss": 0.0042, "step": 96720 }, { "epoch": 0.7910209755898107, "grad_norm": 0.12996014952659607, "learning_rate": 7.567549192747509e-06, "loss": 0.0023, "step": 96730 }, { "epoch": 0.7911027517684098, "grad_norm": 0.02198811247944832, "learning_rate": 7.5669368107932e-06, "loss": 0.0021, "step": 96740 }, { "epoch": 0.791184527947009, "grad_norm": 0.04449164494872093, "learning_rate": 7.5663243765483e-06, "loss": 0.0014, "step": 96750 }, { "epoch": 0.7912663041256082, "grad_norm": 0.1550726294517517, "learning_rate": 7.5657118900252865e-06, "loss": 0.0026, "step": 96760 }, { "epoch": 0.7913480803042073, "grad_norm": 0.02370004542171955, "learning_rate": 7.565099351236638e-06, "loss": 0.0015, "step": 96770 }, { "epoch": 0.7914298564828065, "grad_norm": 0.06908780336380005, "learning_rate": 7.56448676019483e-06, "loss": 0.0016, "step": 96780 }, { "epoch": 0.7915116326614058, "grad_norm": 0.03380589187145233, "learning_rate": 7.563874116912342e-06, "loss": 0.0011, "step": 96790 }, { "epoch": 0.791593408840005, "grad_norm": 0.037972159683704376, "learning_rate": 7.5632614214016535e-06, "loss": 0.0015, "step": 96800 }, { "epoch": 0.7916751850186041, "grad_norm": 0.09396262466907501, "learning_rate": 7.5626486736752454e-06, "loss": 0.0018, "step": 96810 }, { "epoch": 0.7917569611972033, "grad_norm": 0.03441665321588516, "learning_rate": 7.562035873745603e-06, "loss": 0.0012, "step": 96820 }, { "epoch": 0.7918387373758025, "grad_norm": 0.08683289587497711, "learning_rate": 7.561423021625207e-06, "loss": 0.0036, "step": 96830 }, { "epoch": 0.7919205135544016, "grad_norm": 0.017043769359588623, "learning_rate": 7.560810117326542e-06, "loss": 0.001, "step": 96840 }, { "epoch": 0.7920022897330008, "grad_norm": 0.016273178160190582, "learning_rate": 7.560197160862093e-06, "loss": 0.0014, "step": 96850 }, { "epoch": 0.7920840659116, "grad_norm": 0.057314690202474594, "learning_rate": 7.559584152244348e-06, "loss": 0.002, "step": 96860 }, { "epoch": 0.7921658420901991, "grad_norm": 0.047382038086652756, "learning_rate": 7.558971091485791e-06, "loss": 0.0018, "step": 96870 }, { "epoch": 0.7922476182687983, "grad_norm": 0.17501546442508698, "learning_rate": 7.558357978598915e-06, "loss": 0.0023, "step": 96880 }, { "epoch": 0.7923293944473975, "grad_norm": 0.059187304228544235, "learning_rate": 7.5577448135962065e-06, "loss": 0.0015, "step": 96890 }, { "epoch": 0.7924111706259966, "grad_norm": 0.003213284770026803, "learning_rate": 7.557131596490157e-06, "loss": 0.0012, "step": 96900 }, { "epoch": 0.7924929468045958, "grad_norm": 0.022320734336972237, "learning_rate": 7.5565183272932586e-06, "loss": 0.0007, "step": 96910 }, { "epoch": 0.792574722983195, "grad_norm": 0.03146963566541672, "learning_rate": 7.5559050060180025e-06, "loss": 0.0014, "step": 96920 }, { "epoch": 0.7926564991617941, "grad_norm": 0.019491873681545258, "learning_rate": 7.555291632676886e-06, "loss": 0.0014, "step": 96930 }, { "epoch": 0.7927382753403933, "grad_norm": 0.044917430728673935, "learning_rate": 7.5546782072824e-06, "loss": 0.0028, "step": 96940 }, { "epoch": 0.7928200515189925, "grad_norm": 0.0758703425526619, "learning_rate": 7.554064729847044e-06, "loss": 0.0026, "step": 96950 }, { "epoch": 0.7929018276975917, "grad_norm": 0.02159973792731762, "learning_rate": 7.553451200383313e-06, "loss": 0.0012, "step": 96960 }, { "epoch": 0.7929836038761908, "grad_norm": 0.07211658358573914, "learning_rate": 7.552837618903704e-06, "loss": 0.0017, "step": 96970 }, { "epoch": 0.79306538005479, "grad_norm": 0.29380491375923157, "learning_rate": 7.552223985420719e-06, "loss": 0.0026, "step": 96980 }, { "epoch": 0.7931471562333892, "grad_norm": 0.08375415951013565, "learning_rate": 7.551610299946857e-06, "loss": 0.002, "step": 96990 }, { "epoch": 0.7932289324119884, "grad_norm": 0.0793963074684143, "learning_rate": 7.550996562494619e-06, "loss": 0.0017, "step": 97000 }, { "epoch": 0.7933107085905876, "grad_norm": 0.04270555078983307, "learning_rate": 7.550382773076508e-06, "loss": 0.0014, "step": 97010 }, { "epoch": 0.7933924847691868, "grad_norm": 0.09374815225601196, "learning_rate": 7.549768931705026e-06, "loss": 0.0011, "step": 97020 }, { "epoch": 0.793474260947786, "grad_norm": 0.013902168720960617, "learning_rate": 7.549155038392678e-06, "loss": 0.0019, "step": 97030 }, { "epoch": 0.7935560371263851, "grad_norm": 0.1413252204656601, "learning_rate": 7.548541093151971e-06, "loss": 0.002, "step": 97040 }, { "epoch": 0.7936378133049843, "grad_norm": 0.023158902302384377, "learning_rate": 7.5479270959954085e-06, "loss": 0.0016, "step": 97050 }, { "epoch": 0.7937195894835835, "grad_norm": 0.06092539802193642, "learning_rate": 7.547313046935502e-06, "loss": 0.0017, "step": 97060 }, { "epoch": 0.7938013656621826, "grad_norm": 0.06917550414800644, "learning_rate": 7.546698945984757e-06, "loss": 0.0033, "step": 97070 }, { "epoch": 0.7938831418407818, "grad_norm": 0.030174946412444115, "learning_rate": 7.546084793155685e-06, "loss": 0.0034, "step": 97080 }, { "epoch": 0.793964918019381, "grad_norm": 0.03387168049812317, "learning_rate": 7.545470588460797e-06, "loss": 0.0007, "step": 97090 }, { "epoch": 0.7940466941979801, "grad_norm": 0.061235807836055756, "learning_rate": 7.544856331912602e-06, "loss": 0.0018, "step": 97100 }, { "epoch": 0.7941284703765793, "grad_norm": 0.0459212101995945, "learning_rate": 7.544242023523616e-06, "loss": 0.0015, "step": 97110 }, { "epoch": 0.7942102465551785, "grad_norm": 0.06670627743005753, "learning_rate": 7.543627663306351e-06, "loss": 0.0014, "step": 97120 }, { "epoch": 0.7942920227337776, "grad_norm": 0.08935212343931198, "learning_rate": 7.543013251273324e-06, "loss": 0.0023, "step": 97130 }, { "epoch": 0.7943737989123768, "grad_norm": 0.09223093092441559, "learning_rate": 7.542398787437051e-06, "loss": 0.0026, "step": 97140 }, { "epoch": 0.794455575090976, "grad_norm": 0.05362635478377342, "learning_rate": 7.541784271810046e-06, "loss": 0.0014, "step": 97150 }, { "epoch": 0.7945373512695751, "grad_norm": 0.12081380933523178, "learning_rate": 7.54116970440483e-06, "loss": 0.0019, "step": 97160 }, { "epoch": 0.7946191274481743, "grad_norm": 0.03358647972345352, "learning_rate": 7.540555085233922e-06, "loss": 0.0013, "step": 97170 }, { "epoch": 0.7947009036267735, "grad_norm": 0.01312971580773592, "learning_rate": 7.539940414309841e-06, "loss": 0.0022, "step": 97180 }, { "epoch": 0.7947826798053726, "grad_norm": 0.12093247473239899, "learning_rate": 7.5393256916451105e-06, "loss": 0.002, "step": 97190 }, { "epoch": 0.7948644559839718, "grad_norm": 0.03221052139997482, "learning_rate": 7.538710917252251e-06, "loss": 0.0025, "step": 97200 }, { "epoch": 0.7949462321625711, "grad_norm": 0.06482167541980743, "learning_rate": 7.5380960911437874e-06, "loss": 0.0009, "step": 97210 }, { "epoch": 0.7950280083411703, "grad_norm": 0.052912045270204544, "learning_rate": 7.537481213332242e-06, "loss": 0.002, "step": 97220 }, { "epoch": 0.7951097845197694, "grad_norm": 0.054058987647295, "learning_rate": 7.536866283830142e-06, "loss": 0.002, "step": 97230 }, { "epoch": 0.7951915606983686, "grad_norm": 0.05067291110754013, "learning_rate": 7.536251302650013e-06, "loss": 0.0023, "step": 97240 }, { "epoch": 0.7952733368769678, "grad_norm": 0.03725070506334305, "learning_rate": 7.535636269804385e-06, "loss": 0.0016, "step": 97250 }, { "epoch": 0.7953551130555669, "grad_norm": 0.07390396296977997, "learning_rate": 7.535021185305784e-06, "loss": 0.0006, "step": 97260 }, { "epoch": 0.7954368892341661, "grad_norm": 0.13684071600437164, "learning_rate": 7.534406049166742e-06, "loss": 0.0023, "step": 97270 }, { "epoch": 0.7955186654127653, "grad_norm": 0.004087713081389666, "learning_rate": 7.533790861399788e-06, "loss": 0.0014, "step": 97280 }, { "epoch": 0.7956004415913644, "grad_norm": 0.018316131085157394, "learning_rate": 7.533175622017455e-06, "loss": 0.0015, "step": 97290 }, { "epoch": 0.7956822177699636, "grad_norm": 0.03866387903690338, "learning_rate": 7.5325603310322735e-06, "loss": 0.0017, "step": 97300 }, { "epoch": 0.7957639939485628, "grad_norm": 0.12488853186368942, "learning_rate": 7.531944988456781e-06, "loss": 0.0019, "step": 97310 }, { "epoch": 0.795845770127162, "grad_norm": 0.050595588982105255, "learning_rate": 7.531329594303511e-06, "loss": 0.002, "step": 97320 }, { "epoch": 0.7959275463057611, "grad_norm": 0.11720416694879532, "learning_rate": 7.5307141485850014e-06, "loss": 0.0015, "step": 97330 }, { "epoch": 0.7960093224843603, "grad_norm": 0.04755751043558121, "learning_rate": 7.530098651313784e-06, "loss": 0.0016, "step": 97340 }, { "epoch": 0.7960910986629595, "grad_norm": 0.04863480105996132, "learning_rate": 7.529483102502404e-06, "loss": 0.0023, "step": 97350 }, { "epoch": 0.7961728748415586, "grad_norm": 0.1036110669374466, "learning_rate": 7.528867502163394e-06, "loss": 0.0019, "step": 97360 }, { "epoch": 0.7962546510201578, "grad_norm": 0.02220476232469082, "learning_rate": 7.528251850309299e-06, "loss": 0.001, "step": 97370 }, { "epoch": 0.796336427198757, "grad_norm": 0.019248127937316895, "learning_rate": 7.527636146952659e-06, "loss": 0.0018, "step": 97380 }, { "epoch": 0.7964182033773561, "grad_norm": 0.02431035228073597, "learning_rate": 7.527020392106017e-06, "loss": 0.0032, "step": 97390 }, { "epoch": 0.7964999795559553, "grad_norm": 0.3308769166469574, "learning_rate": 7.526404585781913e-06, "loss": 0.002, "step": 97400 }, { "epoch": 0.7965817557345545, "grad_norm": 0.021998198702931404, "learning_rate": 7.525788727992896e-06, "loss": 0.0013, "step": 97410 }, { "epoch": 0.7966635319131536, "grad_norm": 0.048361122608184814, "learning_rate": 7.525172818751507e-06, "loss": 0.0028, "step": 97420 }, { "epoch": 0.7967453080917529, "grad_norm": 0.08862130343914032, "learning_rate": 7.524556858070298e-06, "loss": 0.0027, "step": 97430 }, { "epoch": 0.7968270842703521, "grad_norm": 0.03358327969908714, "learning_rate": 7.523940845961813e-06, "loss": 0.0016, "step": 97440 }, { "epoch": 0.7969088604489513, "grad_norm": 0.06831242889165878, "learning_rate": 7.523324782438601e-06, "loss": 0.0027, "step": 97450 }, { "epoch": 0.7969906366275504, "grad_norm": 0.03229419142007828, "learning_rate": 7.522708667513212e-06, "loss": 0.0018, "step": 97460 }, { "epoch": 0.7970724128061496, "grad_norm": 0.03428957983851433, "learning_rate": 7.522092501198197e-06, "loss": 0.0013, "step": 97470 }, { "epoch": 0.7971541889847488, "grad_norm": 0.04639863595366478, "learning_rate": 7.521476283506108e-06, "loss": 0.0026, "step": 97480 }, { "epoch": 0.7972359651633479, "grad_norm": 0.05473144352436066, "learning_rate": 7.520860014449497e-06, "loss": 0.0019, "step": 97490 }, { "epoch": 0.7973177413419471, "grad_norm": 0.058347344398498535, "learning_rate": 7.520243694040918e-06, "loss": 0.0015, "step": 97500 }, { "epoch": 0.7973995175205463, "grad_norm": 0.07934046536684036, "learning_rate": 7.519627322292928e-06, "loss": 0.0015, "step": 97510 }, { "epoch": 0.7974812936991454, "grad_norm": 0.013533203862607479, "learning_rate": 7.51901089921808e-06, "loss": 0.0013, "step": 97520 }, { "epoch": 0.7975630698777446, "grad_norm": 0.01381996925920248, "learning_rate": 7.518394424828934e-06, "loss": 0.0021, "step": 97530 }, { "epoch": 0.7976448460563438, "grad_norm": 0.13711246848106384, "learning_rate": 7.517777899138046e-06, "loss": 0.0022, "step": 97540 }, { "epoch": 0.797726622234943, "grad_norm": 0.03501152619719505, "learning_rate": 7.517161322157974e-06, "loss": 0.0043, "step": 97550 }, { "epoch": 0.7978083984135421, "grad_norm": 0.04081515967845917, "learning_rate": 7.516544693901282e-06, "loss": 0.0026, "step": 97560 }, { "epoch": 0.7978901745921413, "grad_norm": 0.15264153480529785, "learning_rate": 7.515928014380528e-06, "loss": 0.0016, "step": 97570 }, { "epoch": 0.7979719507707405, "grad_norm": 0.0809212327003479, "learning_rate": 7.515311283608275e-06, "loss": 0.0018, "step": 97580 }, { "epoch": 0.7980537269493396, "grad_norm": 0.028668275102972984, "learning_rate": 7.514694501597086e-06, "loss": 0.0019, "step": 97590 }, { "epoch": 0.7981355031279388, "grad_norm": 0.04539119079709053, "learning_rate": 7.514077668359527e-06, "loss": 0.0016, "step": 97600 }, { "epoch": 0.798217279306538, "grad_norm": 0.02501104213297367, "learning_rate": 7.513460783908162e-06, "loss": 0.0009, "step": 97610 }, { "epoch": 0.7982990554851371, "grad_norm": 0.06215076893568039, "learning_rate": 7.5128438482555584e-06, "loss": 0.0016, "step": 97620 }, { "epoch": 0.7983808316637363, "grad_norm": 0.0414145328104496, "learning_rate": 7.5122268614142836e-06, "loss": 0.0013, "step": 97630 }, { "epoch": 0.7984626078423356, "grad_norm": 0.13902457058429718, "learning_rate": 7.5116098233969035e-06, "loss": 0.0014, "step": 97640 }, { "epoch": 0.7985443840209347, "grad_norm": 0.19963078200817108, "learning_rate": 7.5109927342159916e-06, "loss": 0.0026, "step": 97650 }, { "epoch": 0.7986261601995339, "grad_norm": 0.0069772908464074135, "learning_rate": 7.510375593884116e-06, "loss": 0.0013, "step": 97660 }, { "epoch": 0.7987079363781331, "grad_norm": 0.03012079745531082, "learning_rate": 7.5097584024138495e-06, "loss": 0.0023, "step": 97670 }, { "epoch": 0.7987897125567323, "grad_norm": 0.03597269207239151, "learning_rate": 7.5091411598177634e-06, "loss": 0.0032, "step": 97680 }, { "epoch": 0.7988714887353314, "grad_norm": 0.1215532049536705, "learning_rate": 7.508523866108432e-06, "loss": 0.0023, "step": 97690 }, { "epoch": 0.7989532649139306, "grad_norm": 0.1489132046699524, "learning_rate": 7.507906521298432e-06, "loss": 0.0018, "step": 97700 }, { "epoch": 0.7990350410925298, "grad_norm": 0.02331823855638504, "learning_rate": 7.507289125400337e-06, "loss": 0.0012, "step": 97710 }, { "epoch": 0.7991168172711289, "grad_norm": 0.10659895092248917, "learning_rate": 7.506671678426722e-06, "loss": 0.0032, "step": 97720 }, { "epoch": 0.7991985934497281, "grad_norm": 0.034801285713911057, "learning_rate": 7.506054180390171e-06, "loss": 0.003, "step": 97730 }, { "epoch": 0.7992803696283273, "grad_norm": 0.021061083301901817, "learning_rate": 7.505436631303258e-06, "loss": 0.0013, "step": 97740 }, { "epoch": 0.7993621458069264, "grad_norm": 0.050040844827890396, "learning_rate": 7.504819031178565e-06, "loss": 0.0017, "step": 97750 }, { "epoch": 0.7994439219855256, "grad_norm": 0.08443506807088852, "learning_rate": 7.504201380028671e-06, "loss": 0.0013, "step": 97760 }, { "epoch": 0.7995256981641248, "grad_norm": 0.06688939034938812, "learning_rate": 7.503583677866159e-06, "loss": 0.0022, "step": 97770 }, { "epoch": 0.799607474342724, "grad_norm": 0.03249901905655861, "learning_rate": 7.502965924703614e-06, "loss": 0.0011, "step": 97780 }, { "epoch": 0.7996892505213231, "grad_norm": 0.040841758251190186, "learning_rate": 7.502348120553618e-06, "loss": 0.0013, "step": 97790 }, { "epoch": 0.7997710266999223, "grad_norm": 0.06090517342090607, "learning_rate": 7.501730265428757e-06, "loss": 0.0014, "step": 97800 }, { "epoch": 0.7998528028785215, "grad_norm": 0.08616235852241516, "learning_rate": 7.501112359341618e-06, "loss": 0.0071, "step": 97810 }, { "epoch": 0.7999345790571206, "grad_norm": 0.026503030210733414, "learning_rate": 7.5004944023047856e-06, "loss": 0.0009, "step": 97820 }, { "epoch": 0.8000163552357198, "grad_norm": 0.04145899787545204, "learning_rate": 7.499876394330849e-06, "loss": 0.0011, "step": 97830 }, { "epoch": 0.800098131414319, "grad_norm": 0.05600389093160629, "learning_rate": 7.4992583354324e-06, "loss": 0.0022, "step": 97840 }, { "epoch": 0.8001799075929182, "grad_norm": 0.04798727110028267, "learning_rate": 7.498640225622027e-06, "loss": 0.0018, "step": 97850 }, { "epoch": 0.8002616837715174, "grad_norm": 0.030564863234758377, "learning_rate": 7.498022064912321e-06, "loss": 0.0011, "step": 97860 }, { "epoch": 0.8003434599501166, "grad_norm": 0.06322754919528961, "learning_rate": 7.497403853315875e-06, "loss": 0.0015, "step": 97870 }, { "epoch": 0.8004252361287157, "grad_norm": 0.029361464083194733, "learning_rate": 7.4967855908452825e-06, "loss": 0.0025, "step": 97880 }, { "epoch": 0.8005070123073149, "grad_norm": 0.05423925444483757, "learning_rate": 7.496167277513139e-06, "loss": 0.0028, "step": 97890 }, { "epoch": 0.8005887884859141, "grad_norm": 0.04723159596323967, "learning_rate": 7.495548913332038e-06, "loss": 0.0014, "step": 97900 }, { "epoch": 0.8006705646645133, "grad_norm": 0.04092499613761902, "learning_rate": 7.494930498314577e-06, "loss": 0.0016, "step": 97910 }, { "epoch": 0.8007523408431124, "grad_norm": 0.03353371098637581, "learning_rate": 7.494312032473355e-06, "loss": 0.0006, "step": 97920 }, { "epoch": 0.8008341170217116, "grad_norm": 0.049444861710071564, "learning_rate": 7.493693515820969e-06, "loss": 0.0051, "step": 97930 }, { "epoch": 0.8009158932003108, "grad_norm": 0.08934671431779861, "learning_rate": 7.493074948370018e-06, "loss": 0.0017, "step": 97940 }, { "epoch": 0.8009976693789099, "grad_norm": 0.01785139925777912, "learning_rate": 7.492456330133106e-06, "loss": 0.0015, "step": 97950 }, { "epoch": 0.8010794455575091, "grad_norm": 0.11896920949220657, "learning_rate": 7.491837661122829e-06, "loss": 0.0024, "step": 97960 }, { "epoch": 0.8011612217361083, "grad_norm": 0.11075820028781891, "learning_rate": 7.491218941351797e-06, "loss": 0.0019, "step": 97970 }, { "epoch": 0.8012429979147074, "grad_norm": 0.1958935707807541, "learning_rate": 7.490600170832609e-06, "loss": 0.0028, "step": 97980 }, { "epoch": 0.8013247740933066, "grad_norm": 0.024024948477745056, "learning_rate": 7.489981349577871e-06, "loss": 0.0017, "step": 97990 }, { "epoch": 0.8014065502719058, "grad_norm": 0.11627771705389023, "learning_rate": 7.489362477600188e-06, "loss": 0.0012, "step": 98000 }, { "epoch": 0.8014883264505049, "grad_norm": 0.08364278078079224, "learning_rate": 7.488743554912169e-06, "loss": 0.0015, "step": 98010 }, { "epoch": 0.8015701026291041, "grad_norm": 0.0026514807250350714, "learning_rate": 7.4881245815264204e-06, "loss": 0.0006, "step": 98020 }, { "epoch": 0.8016518788077033, "grad_norm": 0.058305807411670685, "learning_rate": 7.4875055574555526e-06, "loss": 0.0013, "step": 98030 }, { "epoch": 0.8017336549863024, "grad_norm": 0.14429299533367157, "learning_rate": 7.486886482712174e-06, "loss": 0.0021, "step": 98040 }, { "epoch": 0.8018154311649016, "grad_norm": 0.01125721912831068, "learning_rate": 7.486267357308896e-06, "loss": 0.0015, "step": 98050 }, { "epoch": 0.8018972073435008, "grad_norm": 0.07783470302820206, "learning_rate": 7.485648181258331e-06, "loss": 0.0008, "step": 98060 }, { "epoch": 0.8019789835221001, "grad_norm": 0.03377486765384674, "learning_rate": 7.485028954573092e-06, "loss": 0.0021, "step": 98070 }, { "epoch": 0.8020607597006992, "grad_norm": 0.08840518444776535, "learning_rate": 7.484409677265792e-06, "loss": 0.0014, "step": 98080 }, { "epoch": 0.8021425358792984, "grad_norm": 0.022498514503240585, "learning_rate": 7.4837903493490494e-06, "loss": 0.0017, "step": 98090 }, { "epoch": 0.8022243120578976, "grad_norm": 0.008316394872963428, "learning_rate": 7.483170970835478e-06, "loss": 0.0018, "step": 98100 }, { "epoch": 0.8023060882364967, "grad_norm": 0.047308869659900665, "learning_rate": 7.482551541737696e-06, "loss": 0.0036, "step": 98110 }, { "epoch": 0.8023878644150959, "grad_norm": 0.06824633479118347, "learning_rate": 7.48193206206832e-06, "loss": 0.0173, "step": 98120 }, { "epoch": 0.8024696405936951, "grad_norm": 0.037571173161268234, "learning_rate": 7.481312531839972e-06, "loss": 0.0013, "step": 98130 }, { "epoch": 0.8025514167722942, "grad_norm": 0.028896033763885498, "learning_rate": 7.4806929510652696e-06, "loss": 0.0012, "step": 98140 }, { "epoch": 0.8026331929508934, "grad_norm": 0.030673615634441376, "learning_rate": 7.480073319756835e-06, "loss": 0.0009, "step": 98150 }, { "epoch": 0.8027149691294926, "grad_norm": 0.007216393016278744, "learning_rate": 7.479453637927292e-06, "loss": 0.0012, "step": 98160 }, { "epoch": 0.8027967453080918, "grad_norm": 0.04039888083934784, "learning_rate": 7.478833905589263e-06, "loss": 0.0011, "step": 98170 }, { "epoch": 0.8028785214866909, "grad_norm": 0.06157094985246658, "learning_rate": 7.478214122755372e-06, "loss": 0.0016, "step": 98180 }, { "epoch": 0.8029602976652901, "grad_norm": 0.037096090614795685, "learning_rate": 7.477594289438245e-06, "loss": 0.0007, "step": 98190 }, { "epoch": 0.8030420738438893, "grad_norm": 0.13071461021900177, "learning_rate": 7.476974405650508e-06, "loss": 0.0017, "step": 98200 }, { "epoch": 0.8031238500224884, "grad_norm": 0.02195931412279606, "learning_rate": 7.47635447140479e-06, "loss": 0.0019, "step": 98210 }, { "epoch": 0.8032056262010876, "grad_norm": 0.028690064325928688, "learning_rate": 7.475734486713719e-06, "loss": 0.0018, "step": 98220 }, { "epoch": 0.8032874023796868, "grad_norm": 0.11352777481079102, "learning_rate": 7.475114451589922e-06, "loss": 0.0021, "step": 98230 }, { "epoch": 0.8033691785582859, "grad_norm": 0.011788670904934406, "learning_rate": 7.474494366046032e-06, "loss": 0.0011, "step": 98240 }, { "epoch": 0.8034509547368851, "grad_norm": 0.07476304471492767, "learning_rate": 7.473874230094682e-06, "loss": 0.0015, "step": 98250 }, { "epoch": 0.8035327309154843, "grad_norm": 0.00936722382903099, "learning_rate": 7.473254043748501e-06, "loss": 0.0008, "step": 98260 }, { "epoch": 0.8036145070940834, "grad_norm": 0.058936022222042084, "learning_rate": 7.472633807020128e-06, "loss": 0.0016, "step": 98270 }, { "epoch": 0.8036962832726827, "grad_norm": 0.02544388361275196, "learning_rate": 7.472013519922192e-06, "loss": 0.0015, "step": 98280 }, { "epoch": 0.8037780594512819, "grad_norm": 0.053343869745731354, "learning_rate": 7.471393182467333e-06, "loss": 0.0014, "step": 98290 }, { "epoch": 0.8038598356298811, "grad_norm": 0.1059168204665184, "learning_rate": 7.470772794668184e-06, "loss": 0.0035, "step": 98300 }, { "epoch": 0.8039416118084802, "grad_norm": 0.03217753395438194, "learning_rate": 7.470152356537385e-06, "loss": 0.0015, "step": 98310 }, { "epoch": 0.8040233879870794, "grad_norm": 0.07134006172418594, "learning_rate": 7.469531868087575e-06, "loss": 0.0016, "step": 98320 }, { "epoch": 0.8041051641656786, "grad_norm": 0.08676878362894058, "learning_rate": 7.4689113293313945e-06, "loss": 0.002, "step": 98330 }, { "epoch": 0.8041869403442777, "grad_norm": 0.018863074481487274, "learning_rate": 7.4682907402814835e-06, "loss": 0.0014, "step": 98340 }, { "epoch": 0.8042687165228769, "grad_norm": 0.018107891082763672, "learning_rate": 7.467670100950483e-06, "loss": 0.0024, "step": 98350 }, { "epoch": 0.8043504927014761, "grad_norm": 0.04779057949781418, "learning_rate": 7.467049411351036e-06, "loss": 0.0023, "step": 98360 }, { "epoch": 0.8044322688800752, "grad_norm": 0.14556875824928284, "learning_rate": 7.4664286714957876e-06, "loss": 0.0014, "step": 98370 }, { "epoch": 0.8045140450586744, "grad_norm": 0.08274409919977188, "learning_rate": 7.465807881397382e-06, "loss": 0.002, "step": 98380 }, { "epoch": 0.8045958212372736, "grad_norm": 0.002513034036383033, "learning_rate": 7.4651870410684665e-06, "loss": 0.0015, "step": 98390 }, { "epoch": 0.8046775974158727, "grad_norm": 0.00304386462084949, "learning_rate": 7.464566150521688e-06, "loss": 0.0019, "step": 98400 }, { "epoch": 0.8047593735944719, "grad_norm": 0.014302130788564682, "learning_rate": 7.463945209769692e-06, "loss": 0.0017, "step": 98410 }, { "epoch": 0.8048411497730711, "grad_norm": 0.11245591938495636, "learning_rate": 7.46332421882513e-06, "loss": 0.0016, "step": 98420 }, { "epoch": 0.8049229259516703, "grad_norm": 0.03281201049685478, "learning_rate": 7.462703177700651e-06, "loss": 0.0017, "step": 98430 }, { "epoch": 0.8050047021302694, "grad_norm": 0.034053754061460495, "learning_rate": 7.462082086408906e-06, "loss": 0.0008, "step": 98440 }, { "epoch": 0.8050864783088686, "grad_norm": 0.037057943642139435, "learning_rate": 7.461460944962548e-06, "loss": 0.0029, "step": 98450 }, { "epoch": 0.8051682544874678, "grad_norm": 0.44084781408309937, "learning_rate": 7.460839753374231e-06, "loss": 0.0012, "step": 98460 }, { "epoch": 0.8052500306660669, "grad_norm": 0.10404996573925018, "learning_rate": 7.460218511656607e-06, "loss": 0.0019, "step": 98470 }, { "epoch": 0.8053318068446661, "grad_norm": 0.06142672896385193, "learning_rate": 7.4595972198223325e-06, "loss": 0.0012, "step": 98480 }, { "epoch": 0.8054135830232654, "grad_norm": 0.033612653613090515, "learning_rate": 7.458975877884062e-06, "loss": 0.0035, "step": 98490 }, { "epoch": 0.8054953592018645, "grad_norm": 0.21335884928703308, "learning_rate": 7.458354485854457e-06, "loss": 0.0021, "step": 98500 }, { "epoch": 0.8055771353804637, "grad_norm": 0.03243471309542656, "learning_rate": 7.45773304374617e-06, "loss": 0.0016, "step": 98510 }, { "epoch": 0.8056589115590629, "grad_norm": 0.04174702614545822, "learning_rate": 7.457111551571865e-06, "loss": 0.0012, "step": 98520 }, { "epoch": 0.805740687737662, "grad_norm": 0.008883744478225708, "learning_rate": 7.4564900093442e-06, "loss": 0.0017, "step": 98530 }, { "epoch": 0.8058224639162612, "grad_norm": 0.018172891810536385, "learning_rate": 7.455868417075837e-06, "loss": 0.0006, "step": 98540 }, { "epoch": 0.8059042400948604, "grad_norm": 0.05031011253595352, "learning_rate": 7.455246774779436e-06, "loss": 0.0013, "step": 98550 }, { "epoch": 0.8059860162734596, "grad_norm": 0.026287518441677094, "learning_rate": 7.4546250824676646e-06, "loss": 0.0006, "step": 98560 }, { "epoch": 0.8060677924520587, "grad_norm": 0.06384099274873734, "learning_rate": 7.454003340153184e-06, "loss": 0.0033, "step": 98570 }, { "epoch": 0.8061495686306579, "grad_norm": 0.052956193685531616, "learning_rate": 7.4533815478486615e-06, "loss": 0.0015, "step": 98580 }, { "epoch": 0.8062313448092571, "grad_norm": 0.022024668753147125, "learning_rate": 7.452759705566762e-06, "loss": 0.0019, "step": 98590 }, { "epoch": 0.8063131209878562, "grad_norm": 0.07291548699140549, "learning_rate": 7.452137813320155e-06, "loss": 0.0013, "step": 98600 }, { "epoch": 0.8063948971664554, "grad_norm": 0.041512928903102875, "learning_rate": 7.451515871121505e-06, "loss": 0.0018, "step": 98610 }, { "epoch": 0.8064766733450546, "grad_norm": 0.11341299116611481, "learning_rate": 7.450893878983486e-06, "loss": 0.0017, "step": 98620 }, { "epoch": 0.8065584495236537, "grad_norm": 0.07963123172521591, "learning_rate": 7.450271836918765e-06, "loss": 0.0018, "step": 98630 }, { "epoch": 0.8066402257022529, "grad_norm": 0.05994035676121712, "learning_rate": 7.4496497449400175e-06, "loss": 0.0015, "step": 98640 }, { "epoch": 0.8067220018808521, "grad_norm": 0.033627402037382126, "learning_rate": 7.449027603059912e-06, "loss": 0.0016, "step": 98650 }, { "epoch": 0.8068037780594512, "grad_norm": 0.03664712607860565, "learning_rate": 7.448405411291124e-06, "loss": 0.0007, "step": 98660 }, { "epoch": 0.8068855542380504, "grad_norm": 0.023404238745570183, "learning_rate": 7.4477831696463254e-06, "loss": 0.0016, "step": 98670 }, { "epoch": 0.8069673304166496, "grad_norm": 0.0410505048930645, "learning_rate": 7.447160878138196e-06, "loss": 0.0013, "step": 98680 }, { "epoch": 0.8070491065952488, "grad_norm": 0.024804197251796722, "learning_rate": 7.446538536779411e-06, "loss": 0.0015, "step": 98690 }, { "epoch": 0.8071308827738479, "grad_norm": 0.037794459611177444, "learning_rate": 7.445916145582646e-06, "loss": 0.0019, "step": 98700 }, { "epoch": 0.8072126589524472, "grad_norm": 0.0894758552312851, "learning_rate": 7.445293704560581e-06, "loss": 0.0013, "step": 98710 }, { "epoch": 0.8072944351310464, "grad_norm": 0.014205535873770714, "learning_rate": 7.444671213725897e-06, "loss": 0.0015, "step": 98720 }, { "epoch": 0.8073762113096455, "grad_norm": 0.010636383667588234, "learning_rate": 7.444048673091271e-06, "loss": 0.0016, "step": 98730 }, { "epoch": 0.8074579874882447, "grad_norm": 0.06618047505617142, "learning_rate": 7.443426082669389e-06, "loss": 0.0015, "step": 98740 }, { "epoch": 0.8075397636668439, "grad_norm": 0.04672010615468025, "learning_rate": 7.44280344247293e-06, "loss": 0.0021, "step": 98750 }, { "epoch": 0.807621539845443, "grad_norm": 0.033887870609760284, "learning_rate": 7.44218075251458e-06, "loss": 0.0011, "step": 98760 }, { "epoch": 0.8077033160240422, "grad_norm": 0.07586242258548737, "learning_rate": 7.441558012807024e-06, "loss": 0.0019, "step": 98770 }, { "epoch": 0.8077850922026414, "grad_norm": 0.06950490921735764, "learning_rate": 7.440935223362946e-06, "loss": 0.0011, "step": 98780 }, { "epoch": 0.8078668683812406, "grad_norm": 0.15040096640586853, "learning_rate": 7.440312384195032e-06, "loss": 0.0013, "step": 98790 }, { "epoch": 0.8079486445598397, "grad_norm": 0.001823732629418373, "learning_rate": 7.439689495315973e-06, "loss": 0.0011, "step": 98800 }, { "epoch": 0.8080304207384389, "grad_norm": 0.015158092603087425, "learning_rate": 7.439066556738454e-06, "loss": 0.0015, "step": 98810 }, { "epoch": 0.8081121969170381, "grad_norm": 0.09400004148483276, "learning_rate": 7.438443568475168e-06, "loss": 0.0014, "step": 98820 }, { "epoch": 0.8081939730956372, "grad_norm": 0.032760847359895706, "learning_rate": 7.437820530538804e-06, "loss": 0.0013, "step": 98830 }, { "epoch": 0.8082757492742364, "grad_norm": 0.05170270428061485, "learning_rate": 7.437197442942056e-06, "loss": 0.0014, "step": 98840 }, { "epoch": 0.8083575254528356, "grad_norm": 0.08494019508361816, "learning_rate": 7.4365743056976125e-06, "loss": 0.0018, "step": 98850 }, { "epoch": 0.8084393016314347, "grad_norm": 0.04571569710969925, "learning_rate": 7.435951118818171e-06, "loss": 0.0018, "step": 98860 }, { "epoch": 0.8085210778100339, "grad_norm": 0.0764850527048111, "learning_rate": 7.435327882316424e-06, "loss": 0.0015, "step": 98870 }, { "epoch": 0.8086028539886331, "grad_norm": 0.03601120039820671, "learning_rate": 7.43470459620507e-06, "loss": 0.0016, "step": 98880 }, { "epoch": 0.8086846301672322, "grad_norm": 0.005814670119434595, "learning_rate": 7.434081260496804e-06, "loss": 0.0012, "step": 98890 }, { "epoch": 0.8087664063458314, "grad_norm": 0.07146141678094864, "learning_rate": 7.433457875204324e-06, "loss": 0.0018, "step": 98900 }, { "epoch": 0.8088481825244306, "grad_norm": 0.02817944623529911, "learning_rate": 7.432834440340328e-06, "loss": 0.0014, "step": 98910 }, { "epoch": 0.8089299587030299, "grad_norm": 0.13966608047485352, "learning_rate": 7.4322109559175194e-06, "loss": 0.0019, "step": 98920 }, { "epoch": 0.809011734881629, "grad_norm": 0.1448795646429062, "learning_rate": 7.431587421948594e-06, "loss": 0.002, "step": 98930 }, { "epoch": 0.8090935110602282, "grad_norm": 0.041050806641578674, "learning_rate": 7.430963838446259e-06, "loss": 0.0014, "step": 98940 }, { "epoch": 0.8091752872388274, "grad_norm": 0.04590952396392822, "learning_rate": 7.430340205423213e-06, "loss": 0.0013, "step": 98950 }, { "epoch": 0.8092570634174265, "grad_norm": 0.012505777180194855, "learning_rate": 7.429716522892162e-06, "loss": 0.0019, "step": 98960 }, { "epoch": 0.8093388395960257, "grad_norm": 0.11909721791744232, "learning_rate": 7.429092790865809e-06, "loss": 0.0013, "step": 98970 }, { "epoch": 0.8094206157746249, "grad_norm": 0.030832579359412193, "learning_rate": 7.428469009356863e-06, "loss": 0.0009, "step": 98980 }, { "epoch": 0.809502391953224, "grad_norm": 0.03740069642663002, "learning_rate": 7.4278451783780296e-06, "loss": 0.0014, "step": 98990 }, { "epoch": 0.8095841681318232, "grad_norm": 0.020027223974466324, "learning_rate": 7.427221297942015e-06, "loss": 0.0012, "step": 99000 }, { "epoch": 0.8096659443104224, "grad_norm": 0.05472042039036751, "learning_rate": 7.426597368061531e-06, "loss": 0.0071, "step": 99010 }, { "epoch": 0.8097477204890215, "grad_norm": 0.07404245436191559, "learning_rate": 7.4259733887492855e-06, "loss": 0.0008, "step": 99020 }, { "epoch": 0.8098294966676207, "grad_norm": 0.012453935109078884, "learning_rate": 7.42534936001799e-06, "loss": 0.0014, "step": 99030 }, { "epoch": 0.8099112728462199, "grad_norm": 0.028838584199547768, "learning_rate": 7.424725281880357e-06, "loss": 0.0014, "step": 99040 }, { "epoch": 0.8099930490248191, "grad_norm": 0.015653586015105247, "learning_rate": 7.424101154349098e-06, "loss": 0.001, "step": 99050 }, { "epoch": 0.8100748252034182, "grad_norm": 0.04518793895840645, "learning_rate": 7.42347697743693e-06, "loss": 0.0019, "step": 99060 }, { "epoch": 0.8101566013820174, "grad_norm": 0.033685389906167984, "learning_rate": 7.4228527511565655e-06, "loss": 0.0029, "step": 99070 }, { "epoch": 0.8102383775606166, "grad_norm": 0.0960293859243393, "learning_rate": 7.422228475520721e-06, "loss": 0.0014, "step": 99080 }, { "epoch": 0.8103201537392157, "grad_norm": 0.04415266588330269, "learning_rate": 7.421604150542112e-06, "loss": 0.0014, "step": 99090 }, { "epoch": 0.8104019299178149, "grad_norm": 0.24839000403881073, "learning_rate": 7.42097977623346e-06, "loss": 0.0021, "step": 99100 }, { "epoch": 0.8104837060964141, "grad_norm": 0.02508779615163803, "learning_rate": 7.42035535260748e-06, "loss": 0.0014, "step": 99110 }, { "epoch": 0.8105654822750132, "grad_norm": 0.018485601991415024, "learning_rate": 7.419730879676896e-06, "loss": 0.0021, "step": 99120 }, { "epoch": 0.8106472584536125, "grad_norm": 0.0679490715265274, "learning_rate": 7.419106357454427e-06, "loss": 0.0019, "step": 99130 }, { "epoch": 0.8107290346322117, "grad_norm": 0.03337160125374794, "learning_rate": 7.418481785952796e-06, "loss": 0.0013, "step": 99140 }, { "epoch": 0.8108108108108109, "grad_norm": 0.14098620414733887, "learning_rate": 7.417857165184723e-06, "loss": 0.0029, "step": 99150 }, { "epoch": 0.81089258698941, "grad_norm": 0.033714860677719116, "learning_rate": 7.417232495162935e-06, "loss": 0.0024, "step": 99160 }, { "epoch": 0.8109743631680092, "grad_norm": 0.015354262664914131, "learning_rate": 7.416607775900156e-06, "loss": 0.002, "step": 99170 }, { "epoch": 0.8110561393466084, "grad_norm": 0.034227751195430756, "learning_rate": 7.415983007409113e-06, "loss": 0.0011, "step": 99180 }, { "epoch": 0.8111379155252075, "grad_norm": 0.036530330777168274, "learning_rate": 7.415358189702532e-06, "loss": 0.0016, "step": 99190 }, { "epoch": 0.8112196917038067, "grad_norm": 0.16899792850017548, "learning_rate": 7.414733322793142e-06, "loss": 0.0016, "step": 99200 }, { "epoch": 0.8113014678824059, "grad_norm": 4.341464042663574, "learning_rate": 7.41410840669367e-06, "loss": 0.0024, "step": 99210 }, { "epoch": 0.811383244061005, "grad_norm": 0.1005290150642395, "learning_rate": 7.4134834414168484e-06, "loss": 0.002, "step": 99220 }, { "epoch": 0.8114650202396042, "grad_norm": 0.06814909726381302, "learning_rate": 7.412858426975407e-06, "loss": 0.0018, "step": 99230 }, { "epoch": 0.8115467964182034, "grad_norm": 0.004653541371226311, "learning_rate": 7.412233363382078e-06, "loss": 0.0009, "step": 99240 }, { "epoch": 0.8116285725968025, "grad_norm": 0.048203881829977036, "learning_rate": 7.411608250649596e-06, "loss": 0.0014, "step": 99250 }, { "epoch": 0.8117103487754017, "grad_norm": 0.06771823018789291, "learning_rate": 7.410983088790693e-06, "loss": 0.0013, "step": 99260 }, { "epoch": 0.8117921249540009, "grad_norm": 0.060682956129312515, "learning_rate": 7.410357877818103e-06, "loss": 0.002, "step": 99270 }, { "epoch": 0.8118739011326, "grad_norm": 0.02806716039776802, "learning_rate": 7.409732617744565e-06, "loss": 0.0042, "step": 99280 }, { "epoch": 0.8119556773111992, "grad_norm": 0.046989500522613525, "learning_rate": 7.409107308582814e-06, "loss": 0.0012, "step": 99290 }, { "epoch": 0.8120374534897984, "grad_norm": 0.11271841824054718, "learning_rate": 7.408481950345591e-06, "loss": 0.0011, "step": 99300 }, { "epoch": 0.8121192296683976, "grad_norm": 0.19013208150863647, "learning_rate": 7.4078565430456325e-06, "loss": 0.0018, "step": 99310 }, { "epoch": 0.8122010058469967, "grad_norm": 0.117242231965065, "learning_rate": 7.407231086695679e-06, "loss": 0.0012, "step": 99320 }, { "epoch": 0.8122827820255959, "grad_norm": 0.01463858038187027, "learning_rate": 7.40660558130847e-06, "loss": 0.0014, "step": 99330 }, { "epoch": 0.8123645582041951, "grad_norm": 0.07127523422241211, "learning_rate": 7.4059800268967505e-06, "loss": 0.0024, "step": 99340 }, { "epoch": 0.8124463343827943, "grad_norm": 0.09507807344198227, "learning_rate": 7.405354423473261e-06, "loss": 0.0016, "step": 99350 }, { "epoch": 0.8125281105613935, "grad_norm": 0.013767987489700317, "learning_rate": 7.404728771050747e-06, "loss": 0.0016, "step": 99360 }, { "epoch": 0.8126098867399927, "grad_norm": 0.08896959573030472, "learning_rate": 7.404103069641954e-06, "loss": 0.0012, "step": 99370 }, { "epoch": 0.8126916629185919, "grad_norm": 0.002786295022815466, "learning_rate": 7.403477319259629e-06, "loss": 0.0016, "step": 99380 }, { "epoch": 0.812773439097191, "grad_norm": 0.017485501244664192, "learning_rate": 7.402851519916514e-06, "loss": 0.0011, "step": 99390 }, { "epoch": 0.8128552152757902, "grad_norm": 1.9296342134475708, "learning_rate": 7.402225671625362e-06, "loss": 0.0017, "step": 99400 }, { "epoch": 0.8129369914543894, "grad_norm": 0.18589656054973602, "learning_rate": 7.40159977439892e-06, "loss": 0.0014, "step": 99410 }, { "epoch": 0.8130187676329885, "grad_norm": 0.058332327753305435, "learning_rate": 7.4009738282499386e-06, "loss": 0.0016, "step": 99420 }, { "epoch": 0.8131005438115877, "grad_norm": 0.04131993278861046, "learning_rate": 7.40034783319117e-06, "loss": 0.0015, "step": 99430 }, { "epoch": 0.8131823199901869, "grad_norm": 0.08702602237462997, "learning_rate": 7.399721789235363e-06, "loss": 0.0016, "step": 99440 }, { "epoch": 0.813264096168786, "grad_norm": 0.07165315747261047, "learning_rate": 7.3990956963952735e-06, "loss": 0.0024, "step": 99450 }, { "epoch": 0.8133458723473852, "grad_norm": 0.07754529267549515, "learning_rate": 7.3984695546836546e-06, "loss": 0.002, "step": 99460 }, { "epoch": 0.8134276485259844, "grad_norm": 0.0037920004688203335, "learning_rate": 7.397843364113261e-06, "loss": 0.0011, "step": 99470 }, { "epoch": 0.8135094247045835, "grad_norm": 0.011641736142337322, "learning_rate": 7.39721712469685e-06, "loss": 0.0012, "step": 99480 }, { "epoch": 0.8135912008831827, "grad_norm": 0.14328110218048096, "learning_rate": 7.3965908364471765e-06, "loss": 0.0011, "step": 99490 }, { "epoch": 0.8136729770617819, "grad_norm": 0.026018256321549416, "learning_rate": 7.395964499377e-06, "loss": 0.0031, "step": 99500 }, { "epoch": 0.813754753240381, "grad_norm": 0.06816237419843674, "learning_rate": 7.3953381134990776e-06, "loss": 0.0011, "step": 99510 }, { "epoch": 0.8138365294189802, "grad_norm": 0.015149150043725967, "learning_rate": 7.394711678826173e-06, "loss": 0.0016, "step": 99520 }, { "epoch": 0.8139183055975794, "grad_norm": 0.18346770107746124, "learning_rate": 7.3940851953710435e-06, "loss": 0.0009, "step": 99530 }, { "epoch": 0.8140000817761786, "grad_norm": 0.01433136872947216, "learning_rate": 7.393458663146453e-06, "loss": 0.0017, "step": 99540 }, { "epoch": 0.8140818579547777, "grad_norm": 0.04732279106974602, "learning_rate": 7.392832082165165e-06, "loss": 0.0035, "step": 99550 }, { "epoch": 0.814163634133377, "grad_norm": 0.03504921495914459, "learning_rate": 7.392205452439942e-06, "loss": 0.0009, "step": 99560 }, { "epoch": 0.8142454103119762, "grad_norm": 0.02202940359711647, "learning_rate": 7.391578773983549e-06, "loss": 0.0016, "step": 99570 }, { "epoch": 0.8143271864905753, "grad_norm": 0.05344052612781525, "learning_rate": 7.390952046808753e-06, "loss": 0.0017, "step": 99580 }, { "epoch": 0.8144089626691745, "grad_norm": 0.04002618044614792, "learning_rate": 7.390325270928319e-06, "loss": 0.0015, "step": 99590 }, { "epoch": 0.8144907388477737, "grad_norm": 0.03988203778862953, "learning_rate": 7.389698446355018e-06, "loss": 0.0011, "step": 99600 }, { "epoch": 0.8145725150263728, "grad_norm": 0.00869265291839838, "learning_rate": 7.389071573101616e-06, "loss": 0.0017, "step": 99610 }, { "epoch": 0.814654291204972, "grad_norm": 0.08272339403629303, "learning_rate": 7.388444651180885e-06, "loss": 0.0013, "step": 99620 }, { "epoch": 0.8147360673835712, "grad_norm": 0.0026003331877291203, "learning_rate": 7.387817680605594e-06, "loss": 0.001, "step": 99630 }, { "epoch": 0.8148178435621704, "grad_norm": 0.09458962827920914, "learning_rate": 7.387190661388517e-06, "loss": 0.0018, "step": 99640 }, { "epoch": 0.8148996197407695, "grad_norm": 0.0993826612830162, "learning_rate": 7.386563593542425e-06, "loss": 0.0047, "step": 99650 }, { "epoch": 0.8149813959193687, "grad_norm": 0.045993972569704056, "learning_rate": 7.385936477080094e-06, "loss": 0.0012, "step": 99660 }, { "epoch": 0.8150631720979679, "grad_norm": 0.018166126683354378, "learning_rate": 7.385309312014296e-06, "loss": 0.001, "step": 99670 }, { "epoch": 0.815144948276567, "grad_norm": 0.05419052019715309, "learning_rate": 7.3846820983578094e-06, "loss": 0.0017, "step": 99680 }, { "epoch": 0.8152267244551662, "grad_norm": 0.05904610827565193, "learning_rate": 7.384054836123409e-06, "loss": 0.0017, "step": 99690 }, { "epoch": 0.8153085006337654, "grad_norm": 0.0950470119714737, "learning_rate": 7.383427525323875e-06, "loss": 0.001, "step": 99700 }, { "epoch": 0.8153902768123645, "grad_norm": 0.04969453439116478, "learning_rate": 7.382800165971984e-06, "loss": 0.001, "step": 99710 }, { "epoch": 0.8154720529909637, "grad_norm": 0.011290113441646099, "learning_rate": 7.382172758080517e-06, "loss": 0.0012, "step": 99720 }, { "epoch": 0.8155538291695629, "grad_norm": 0.00497602391988039, "learning_rate": 7.3815453016622555e-06, "loss": 0.0009, "step": 99730 }, { "epoch": 0.815635605348162, "grad_norm": 0.034797850996255875, "learning_rate": 7.3809177967299795e-06, "loss": 0.0009, "step": 99740 }, { "epoch": 0.8157173815267612, "grad_norm": 0.07781219482421875, "learning_rate": 7.380290243296473e-06, "loss": 0.0012, "step": 99750 }, { "epoch": 0.8157991577053604, "grad_norm": 0.038503922522068024, "learning_rate": 7.379662641374518e-06, "loss": 0.0011, "step": 99760 }, { "epoch": 0.8158809338839595, "grad_norm": 0.07880889624357224, "learning_rate": 7.379034990976902e-06, "loss": 0.0025, "step": 99770 }, { "epoch": 0.8159627100625588, "grad_norm": 0.10480540990829468, "learning_rate": 7.37840729211641e-06, "loss": 0.0023, "step": 99780 }, { "epoch": 0.816044486241158, "grad_norm": 0.044019002467393875, "learning_rate": 7.377779544805828e-06, "loss": 0.0013, "step": 99790 }, { "epoch": 0.8161262624197572, "grad_norm": 0.0031665267888456583, "learning_rate": 7.377151749057944e-06, "loss": 0.0017, "step": 99800 }, { "epoch": 0.8162080385983563, "grad_norm": 0.10787748545408249, "learning_rate": 7.376523904885547e-06, "loss": 0.0023, "step": 99810 }, { "epoch": 0.8162898147769555, "grad_norm": 0.053645648062229156, "learning_rate": 7.375896012301425e-06, "loss": 0.0016, "step": 99820 }, { "epoch": 0.8163715909555547, "grad_norm": 0.007141947280615568, "learning_rate": 7.375268071318369e-06, "loss": 0.0034, "step": 99830 }, { "epoch": 0.8164533671341538, "grad_norm": 0.046848464757204056, "learning_rate": 7.374640081949174e-06, "loss": 0.0018, "step": 99840 }, { "epoch": 0.816535143312753, "grad_norm": 0.06115704029798508, "learning_rate": 7.3740120442066296e-06, "loss": 0.0041, "step": 99850 }, { "epoch": 0.8166169194913522, "grad_norm": 0.024208365008234978, "learning_rate": 7.37338395810353e-06, "loss": 0.0024, "step": 99860 }, { "epoch": 0.8166986956699513, "grad_norm": 0.03371575474739075, "learning_rate": 7.37275582365267e-06, "loss": 0.0011, "step": 99870 }, { "epoch": 0.8167804718485505, "grad_norm": 0.12302466481924057, "learning_rate": 7.372127640866846e-06, "loss": 0.0017, "step": 99880 }, { "epoch": 0.8168622480271497, "grad_norm": 0.0011795562459155917, "learning_rate": 7.371499409758852e-06, "loss": 0.001, "step": 99890 }, { "epoch": 0.8169440242057489, "grad_norm": 0.02777837961912155, "learning_rate": 7.370871130341489e-06, "loss": 0.0015, "step": 99900 }, { "epoch": 0.817025800384348, "grad_norm": 0.01225301530212164, "learning_rate": 7.370242802627554e-06, "loss": 0.001, "step": 99910 }, { "epoch": 0.8171075765629472, "grad_norm": 0.0589834488928318, "learning_rate": 7.369614426629845e-06, "loss": 0.0022, "step": 99920 }, { "epoch": 0.8171893527415464, "grad_norm": 0.05077548325061798, "learning_rate": 7.368986002361166e-06, "loss": 0.0017, "step": 99930 }, { "epoch": 0.8172711289201455, "grad_norm": 0.010837029665708542, "learning_rate": 7.368357529834315e-06, "loss": 0.0024, "step": 99940 }, { "epoch": 0.8173529050987447, "grad_norm": 0.00725974515080452, "learning_rate": 7.367729009062097e-06, "loss": 0.0021, "step": 99950 }, { "epoch": 0.8174346812773439, "grad_norm": 0.11364906281232834, "learning_rate": 7.367100440057313e-06, "loss": 0.0026, "step": 99960 }, { "epoch": 0.817516457455943, "grad_norm": 0.06155050918459892, "learning_rate": 7.36647182283277e-06, "loss": 0.0018, "step": 99970 }, { "epoch": 0.8175982336345422, "grad_norm": 0.05117125064134598, "learning_rate": 7.365843157401272e-06, "loss": 0.0018, "step": 99980 }, { "epoch": 0.8176800098131415, "grad_norm": 0.018102483823895454, "learning_rate": 7.365214443775627e-06, "loss": 0.0015, "step": 99990 }, { "epoch": 0.8177617859917407, "grad_norm": 0.11769493669271469, "learning_rate": 7.364585681968639e-06, "loss": 0.0023, "step": 100000 }, { "epoch": 0.8178435621703398, "grad_norm": 0.03793647512793541, "learning_rate": 7.363956871993121e-06, "loss": 0.0013, "step": 100010 }, { "epoch": 0.817925338348939, "grad_norm": 0.04270920157432556, "learning_rate": 7.363328013861878e-06, "loss": 0.0324, "step": 100020 }, { "epoch": 0.8180071145275382, "grad_norm": 0.009222193621098995, "learning_rate": 7.362699107587724e-06, "loss": 0.002, "step": 100030 }, { "epoch": 0.8180888907061373, "grad_norm": 0.01495645847171545, "learning_rate": 7.362070153183468e-06, "loss": 0.0016, "step": 100040 }, { "epoch": 0.8181706668847365, "grad_norm": 0.06970420479774475, "learning_rate": 7.361441150661923e-06, "loss": 0.0019, "step": 100050 }, { "epoch": 0.8182524430633357, "grad_norm": 0.14074382185935974, "learning_rate": 7.3608121000359025e-06, "loss": 0.0012, "step": 100060 }, { "epoch": 0.8183342192419348, "grad_norm": 0.08400578796863556, "learning_rate": 7.36018300131822e-06, "loss": 0.002, "step": 100070 }, { "epoch": 0.818415995420534, "grad_norm": 0.054517295211553574, "learning_rate": 7.359553854521691e-06, "loss": 0.003, "step": 100080 }, { "epoch": 0.8184977715991332, "grad_norm": 0.04210257530212402, "learning_rate": 7.358924659659134e-06, "loss": 0.0009, "step": 100090 }, { "epoch": 0.8185795477777323, "grad_norm": 0.05009615048766136, "learning_rate": 7.358295416743362e-06, "loss": 0.0015, "step": 100100 }, { "epoch": 0.8186613239563315, "grad_norm": 0.07373321056365967, "learning_rate": 7.357666125787197e-06, "loss": 0.0019, "step": 100110 }, { "epoch": 0.8187431001349307, "grad_norm": 0.1857115924358368, "learning_rate": 7.357036786803455e-06, "loss": 0.0016, "step": 100120 }, { "epoch": 0.8188248763135298, "grad_norm": 0.010356053709983826, "learning_rate": 7.356407399804959e-06, "loss": 0.001, "step": 100130 }, { "epoch": 0.818906652492129, "grad_norm": 0.026369694620370865, "learning_rate": 7.355777964804529e-06, "loss": 0.0015, "step": 100140 }, { "epoch": 0.8189884286707282, "grad_norm": 0.05611282214522362, "learning_rate": 7.355148481814986e-06, "loss": 0.0028, "step": 100150 }, { "epoch": 0.8190702048493274, "grad_norm": 0.05303254351019859, "learning_rate": 7.354518950849156e-06, "loss": 0.0012, "step": 100160 }, { "epoch": 0.8191519810279265, "grad_norm": 0.09096436947584152, "learning_rate": 7.35388937191986e-06, "loss": 0.0011, "step": 100170 }, { "epoch": 0.8192337572065257, "grad_norm": 0.04091178998351097, "learning_rate": 7.353259745039923e-06, "loss": 0.0016, "step": 100180 }, { "epoch": 0.8193155333851249, "grad_norm": 0.0323190800845623, "learning_rate": 7.352630070222173e-06, "loss": 0.002, "step": 100190 }, { "epoch": 0.8193973095637241, "grad_norm": 0.08029753714799881, "learning_rate": 7.352000347479436e-06, "loss": 0.0011, "step": 100200 }, { "epoch": 0.8194790857423233, "grad_norm": 0.01914237067103386, "learning_rate": 7.3513705768245404e-06, "loss": 0.0027, "step": 100210 }, { "epoch": 0.8195608619209225, "grad_norm": 0.06209805980324745, "learning_rate": 7.350740758270315e-06, "loss": 0.0017, "step": 100220 }, { "epoch": 0.8196426380995216, "grad_norm": 0.06549226492643356, "learning_rate": 7.350110891829589e-06, "loss": 0.0017, "step": 100230 }, { "epoch": 0.8197244142781208, "grad_norm": 0.07642751187086105, "learning_rate": 7.349480977515195e-06, "loss": 0.0011, "step": 100240 }, { "epoch": 0.81980619045672, "grad_norm": 0.03448014706373215, "learning_rate": 7.3488510153399625e-06, "loss": 0.0013, "step": 100250 }, { "epoch": 0.8198879666353192, "grad_norm": 0.0804133266210556, "learning_rate": 7.348221005316726e-06, "loss": 0.0017, "step": 100260 }, { "epoch": 0.8199697428139183, "grad_norm": 0.03930244967341423, "learning_rate": 7.347590947458319e-06, "loss": 0.0028, "step": 100270 }, { "epoch": 0.8200515189925175, "grad_norm": 0.022475987672805786, "learning_rate": 7.346960841777577e-06, "loss": 0.0025, "step": 100280 }, { "epoch": 0.8201332951711167, "grad_norm": 0.08195818960666656, "learning_rate": 7.3463306882873346e-06, "loss": 0.0012, "step": 100290 }, { "epoch": 0.8202150713497158, "grad_norm": 0.02415780909359455, "learning_rate": 7.3457004870004265e-06, "loss": 0.0042, "step": 100300 }, { "epoch": 0.820296847528315, "grad_norm": 0.04901761934161186, "learning_rate": 7.3450702379296954e-06, "loss": 0.0023, "step": 100310 }, { "epoch": 0.8203786237069142, "grad_norm": 0.05139680951833725, "learning_rate": 7.3444399410879765e-06, "loss": 0.0008, "step": 100320 }, { "epoch": 0.8204603998855133, "grad_norm": 0.023213177919387817, "learning_rate": 7.343809596488111e-06, "loss": 0.0021, "step": 100330 }, { "epoch": 0.8205421760641125, "grad_norm": 0.07887712121009827, "learning_rate": 7.343179204142938e-06, "loss": 0.0011, "step": 100340 }, { "epoch": 0.8206239522427117, "grad_norm": 0.03885149210691452, "learning_rate": 7.342548764065302e-06, "loss": 0.0012, "step": 100350 }, { "epoch": 0.8207057284213108, "grad_norm": 0.007533080410212278, "learning_rate": 7.341918276268041e-06, "loss": 0.0008, "step": 100360 }, { "epoch": 0.82078750459991, "grad_norm": 0.03671160340309143, "learning_rate": 7.341287740764001e-06, "loss": 0.0014, "step": 100370 }, { "epoch": 0.8208692807785092, "grad_norm": 0.026567207649350166, "learning_rate": 7.340657157566028e-06, "loss": 0.0013, "step": 100380 }, { "epoch": 0.8209510569571083, "grad_norm": 0.027618607506155968, "learning_rate": 7.3400265266869665e-06, "loss": 0.0018, "step": 100390 }, { "epoch": 0.8210328331357075, "grad_norm": 0.08367017656564713, "learning_rate": 7.339395848139662e-06, "loss": 0.0009, "step": 100400 }, { "epoch": 0.8211146093143067, "grad_norm": 0.03984853997826576, "learning_rate": 7.338765121936962e-06, "loss": 0.0008, "step": 100410 }, { "epoch": 0.821196385492906, "grad_norm": 0.08831505477428436, "learning_rate": 7.338134348091716e-06, "loss": 0.0028, "step": 100420 }, { "epoch": 0.8212781616715051, "grad_norm": 0.08569829910993576, "learning_rate": 7.337503526616773e-06, "loss": 0.0011, "step": 100430 }, { "epoch": 0.8213599378501043, "grad_norm": 0.0077294111251831055, "learning_rate": 7.336872657524982e-06, "loss": 0.002, "step": 100440 }, { "epoch": 0.8214417140287035, "grad_norm": 0.0913141593337059, "learning_rate": 7.3362417408291965e-06, "loss": 0.0185, "step": 100450 }, { "epoch": 0.8215234902073026, "grad_norm": 0.06143813207745552, "learning_rate": 7.335610776542268e-06, "loss": 0.0021, "step": 100460 }, { "epoch": 0.8216052663859018, "grad_norm": 0.1150851771235466, "learning_rate": 7.334979764677049e-06, "loss": 0.0019, "step": 100470 }, { "epoch": 0.821687042564501, "grad_norm": 0.0017398917116224766, "learning_rate": 7.334348705246394e-06, "loss": 0.0022, "step": 100480 }, { "epoch": 0.8217688187431001, "grad_norm": 0.0078072804026305676, "learning_rate": 7.333717598263158e-06, "loss": 0.0019, "step": 100490 }, { "epoch": 0.8218505949216993, "grad_norm": 0.07001884281635284, "learning_rate": 7.3330864437401986e-06, "loss": 0.0012, "step": 100500 }, { "epoch": 0.8219323711002985, "grad_norm": 0.023924650624394417, "learning_rate": 7.33245524169037e-06, "loss": 0.0013, "step": 100510 }, { "epoch": 0.8220141472788977, "grad_norm": 0.0428837351500988, "learning_rate": 7.3318239921265345e-06, "loss": 0.0009, "step": 100520 }, { "epoch": 0.8220959234574968, "grad_norm": 0.03430547937750816, "learning_rate": 7.3311926950615485e-06, "loss": 0.002, "step": 100530 }, { "epoch": 0.822177699636096, "grad_norm": 0.05460203438997269, "learning_rate": 7.330561350508272e-06, "loss": 0.0019, "step": 100540 }, { "epoch": 0.8222594758146952, "grad_norm": 0.08335592597723007, "learning_rate": 7.329929958479567e-06, "loss": 0.0013, "step": 100550 }, { "epoch": 0.8223412519932943, "grad_norm": 0.0031835995614528656, "learning_rate": 7.329298518988293e-06, "loss": 0.0012, "step": 100560 }, { "epoch": 0.8224230281718935, "grad_norm": 0.011360322125256062, "learning_rate": 7.328667032047317e-06, "loss": 0.0009, "step": 100570 }, { "epoch": 0.8225048043504927, "grad_norm": 0.020650992169976234, "learning_rate": 7.3280354976695e-06, "loss": 0.001, "step": 100580 }, { "epoch": 0.8225865805290918, "grad_norm": 0.018565906211733818, "learning_rate": 7.327403915867708e-06, "loss": 0.0011, "step": 100590 }, { "epoch": 0.822668356707691, "grad_norm": 0.008060033433139324, "learning_rate": 7.326772286654804e-06, "loss": 0.0009, "step": 100600 }, { "epoch": 0.8227501328862902, "grad_norm": 0.014577933587133884, "learning_rate": 7.326140610043659e-06, "loss": 0.0011, "step": 100610 }, { "epoch": 0.8228319090648893, "grad_norm": 0.048099398612976074, "learning_rate": 7.325508886047139e-06, "loss": 0.001, "step": 100620 }, { "epoch": 0.8229136852434886, "grad_norm": 0.04624094069004059, "learning_rate": 7.324877114678114e-06, "loss": 0.0022, "step": 100630 }, { "epoch": 0.8229954614220878, "grad_norm": 0.03899966925382614, "learning_rate": 7.324245295949452e-06, "loss": 0.0012, "step": 100640 }, { "epoch": 0.823077237600687, "grad_norm": 0.05071379616856575, "learning_rate": 7.323613429874023e-06, "loss": 0.001, "step": 100650 }, { "epoch": 0.8231590137792861, "grad_norm": 0.020107224583625793, "learning_rate": 7.322981516464699e-06, "loss": 0.0007, "step": 100660 }, { "epoch": 0.8232407899578853, "grad_norm": 0.02167273871600628, "learning_rate": 7.322349555734355e-06, "loss": 0.0028, "step": 100670 }, { "epoch": 0.8233225661364845, "grad_norm": 0.07582823932170868, "learning_rate": 7.321717547695861e-06, "loss": 0.0024, "step": 100680 }, { "epoch": 0.8234043423150836, "grad_norm": 0.011356541886925697, "learning_rate": 7.321085492362094e-06, "loss": 0.0016, "step": 100690 }, { "epoch": 0.8234861184936828, "grad_norm": 0.13242994248867035, "learning_rate": 7.320453389745931e-06, "loss": 0.0032, "step": 100700 }, { "epoch": 0.823567894672282, "grad_norm": 0.0430937334895134, "learning_rate": 7.319821239860245e-06, "loss": 0.0018, "step": 100710 }, { "epoch": 0.8236496708508811, "grad_norm": 0.04972162842750549, "learning_rate": 7.319189042717914e-06, "loss": 0.0022, "step": 100720 }, { "epoch": 0.8237314470294803, "grad_norm": 0.029491527006030083, "learning_rate": 7.318556798331817e-06, "loss": 0.0018, "step": 100730 }, { "epoch": 0.8238132232080795, "grad_norm": 0.0527539998292923, "learning_rate": 7.317924506714833e-06, "loss": 0.0011, "step": 100740 }, { "epoch": 0.8238949993866787, "grad_norm": 0.0647614598274231, "learning_rate": 7.317292167879845e-06, "loss": 0.0012, "step": 100750 }, { "epoch": 0.8239767755652778, "grad_norm": 0.18666748702526093, "learning_rate": 7.31665978183973e-06, "loss": 0.0014, "step": 100760 }, { "epoch": 0.824058551743877, "grad_norm": 0.09714909642934799, "learning_rate": 7.316027348607373e-06, "loss": 0.0017, "step": 100770 }, { "epoch": 0.8241403279224762, "grad_norm": 0.01171827781945467, "learning_rate": 7.315394868195655e-06, "loss": 0.0009, "step": 100780 }, { "epoch": 0.8242221041010753, "grad_norm": 0.0019314242526888847, "learning_rate": 7.3147623406174625e-06, "loss": 0.0036, "step": 100790 }, { "epoch": 0.8243038802796745, "grad_norm": 0.0844803899526596, "learning_rate": 7.314129765885679e-06, "loss": 0.0012, "step": 100800 }, { "epoch": 0.8243856564582737, "grad_norm": 0.07024659961462021, "learning_rate": 7.313497144013193e-06, "loss": 0.0017, "step": 100810 }, { "epoch": 0.8244674326368728, "grad_norm": 0.09614454209804535, "learning_rate": 7.312864475012888e-06, "loss": 0.0026, "step": 100820 }, { "epoch": 0.824549208815472, "grad_norm": 0.07505253702402115, "learning_rate": 7.3122317588976545e-06, "loss": 0.0021, "step": 100830 }, { "epoch": 0.8246309849940713, "grad_norm": 0.03496334329247475, "learning_rate": 7.311598995680379e-06, "loss": 0.0008, "step": 100840 }, { "epoch": 0.8247127611726704, "grad_norm": 0.07128652185201645, "learning_rate": 7.3109661853739545e-06, "loss": 0.0027, "step": 100850 }, { "epoch": 0.8247945373512696, "grad_norm": 0.02498781681060791, "learning_rate": 7.3103333279912695e-06, "loss": 0.0031, "step": 100860 }, { "epoch": 0.8248763135298688, "grad_norm": 0.05843578279018402, "learning_rate": 7.309700423545218e-06, "loss": 0.0021, "step": 100870 }, { "epoch": 0.824958089708468, "grad_norm": 0.041978947818279266, "learning_rate": 7.30906747204869e-06, "loss": 0.0016, "step": 100880 }, { "epoch": 0.8250398658870671, "grad_norm": 0.010852563194930553, "learning_rate": 7.308434473514582e-06, "loss": 0.002, "step": 100890 }, { "epoch": 0.8251216420656663, "grad_norm": 0.07449223846197128, "learning_rate": 7.307801427955786e-06, "loss": 0.0013, "step": 100900 }, { "epoch": 0.8252034182442655, "grad_norm": 0.03824128210544586, "learning_rate": 7.307168335385199e-06, "loss": 0.0018, "step": 100910 }, { "epoch": 0.8252851944228646, "grad_norm": 0.0332094207406044, "learning_rate": 7.306535195815718e-06, "loss": 0.0013, "step": 100920 }, { "epoch": 0.8253669706014638, "grad_norm": 0.017332587391138077, "learning_rate": 7.305902009260243e-06, "loss": 0.0012, "step": 100930 }, { "epoch": 0.825448746780063, "grad_norm": 0.15003293752670288, "learning_rate": 7.305268775731666e-06, "loss": 0.0026, "step": 100940 }, { "epoch": 0.8255305229586621, "grad_norm": 0.0578116849064827, "learning_rate": 7.304635495242892e-06, "loss": 0.0237, "step": 100950 }, { "epoch": 0.8256122991372613, "grad_norm": 0.09922733902931213, "learning_rate": 7.304002167806818e-06, "loss": 0.0024, "step": 100960 }, { "epoch": 0.8256940753158605, "grad_norm": 0.02358810231089592, "learning_rate": 7.30336879343635e-06, "loss": 0.0014, "step": 100970 }, { "epoch": 0.8257758514944596, "grad_norm": 0.05569781735539436, "learning_rate": 7.302735372144384e-06, "loss": 0.0025, "step": 100980 }, { "epoch": 0.8258576276730588, "grad_norm": 0.013182769529521465, "learning_rate": 7.30210190394383e-06, "loss": 0.0008, "step": 100990 }, { "epoch": 0.825939403851658, "grad_norm": 0.16072843968868256, "learning_rate": 7.3014683888475865e-06, "loss": 0.0034, "step": 101000 }, { "epoch": 0.8260211800302572, "grad_norm": 0.08777409791946411, "learning_rate": 7.300834826868562e-06, "loss": 0.0009, "step": 101010 }, { "epoch": 0.8261029562088563, "grad_norm": 0.17390258610248566, "learning_rate": 7.300201218019661e-06, "loss": 0.0011, "step": 101020 }, { "epoch": 0.8261847323874555, "grad_norm": 0.01747114025056362, "learning_rate": 7.299567562313793e-06, "loss": 0.0022, "step": 101030 }, { "epoch": 0.8262665085660547, "grad_norm": 0.005588173400610685, "learning_rate": 7.2989338597638634e-06, "loss": 0.001, "step": 101040 }, { "epoch": 0.8263482847446538, "grad_norm": 0.07670745998620987, "learning_rate": 7.298300110382784e-06, "loss": 0.0017, "step": 101050 }, { "epoch": 0.8264300609232531, "grad_norm": 0.010431366972625256, "learning_rate": 7.297666314183462e-06, "loss": 0.0006, "step": 101060 }, { "epoch": 0.8265118371018523, "grad_norm": 0.04595915973186493, "learning_rate": 7.29703247117881e-06, "loss": 0.0015, "step": 101070 }, { "epoch": 0.8265936132804514, "grad_norm": 0.0332406684756279, "learning_rate": 7.296398581381738e-06, "loss": 0.001, "step": 101080 }, { "epoch": 0.8266753894590506, "grad_norm": 0.11822092533111572, "learning_rate": 7.295764644805162e-06, "loss": 0.0016, "step": 101090 }, { "epoch": 0.8267571656376498, "grad_norm": 0.01929592341184616, "learning_rate": 7.295130661461993e-06, "loss": 0.0019, "step": 101100 }, { "epoch": 0.826838941816249, "grad_norm": 0.03148579224944115, "learning_rate": 7.294496631365149e-06, "loss": 0.0012, "step": 101110 }, { "epoch": 0.8269207179948481, "grad_norm": 0.07221879065036774, "learning_rate": 7.293862554527541e-06, "loss": 0.0011, "step": 101120 }, { "epoch": 0.8270024941734473, "grad_norm": 0.04031185805797577, "learning_rate": 7.2932284309620896e-06, "loss": 0.0017, "step": 101130 }, { "epoch": 0.8270842703520465, "grad_norm": 0.1008060947060585, "learning_rate": 7.29259426068171e-06, "loss": 0.0008, "step": 101140 }, { "epoch": 0.8271660465306456, "grad_norm": 0.001623708987608552, "learning_rate": 7.291960043699322e-06, "loss": 0.0019, "step": 101150 }, { "epoch": 0.8272478227092448, "grad_norm": 0.0776638314127922, "learning_rate": 7.291325780027844e-06, "loss": 0.0015, "step": 101160 }, { "epoch": 0.827329598887844, "grad_norm": 0.03886735066771507, "learning_rate": 7.290691469680201e-06, "loss": 0.001, "step": 101170 }, { "epoch": 0.8274113750664431, "grad_norm": 0.03750606253743172, "learning_rate": 7.290057112669308e-06, "loss": 0.0012, "step": 101180 }, { "epoch": 0.8274931512450423, "grad_norm": 0.04041677713394165, "learning_rate": 7.28942270900809e-06, "loss": 0.0012, "step": 101190 }, { "epoch": 0.8275749274236415, "grad_norm": 0.06349556893110275, "learning_rate": 7.288788258709471e-06, "loss": 0.0021, "step": 101200 }, { "epoch": 0.8276567036022406, "grad_norm": 0.09806328266859055, "learning_rate": 7.2881537617863755e-06, "loss": 0.0022, "step": 101210 }, { "epoch": 0.8277384797808398, "grad_norm": 0.022004663944244385, "learning_rate": 7.2875192182517265e-06, "loss": 0.0019, "step": 101220 }, { "epoch": 0.827820255959439, "grad_norm": 0.1439294070005417, "learning_rate": 7.2868846281184535e-06, "loss": 0.0013, "step": 101230 }, { "epoch": 0.8279020321380381, "grad_norm": 0.02865336649119854, "learning_rate": 7.2862499913994815e-06, "loss": 0.0014, "step": 101240 }, { "epoch": 0.8279838083166373, "grad_norm": 0.04839451238512993, "learning_rate": 7.285615308107738e-06, "loss": 0.0016, "step": 101250 }, { "epoch": 0.8280655844952365, "grad_norm": 0.019898805767297745, "learning_rate": 7.2849805782561514e-06, "loss": 0.002, "step": 101260 }, { "epoch": 0.8281473606738358, "grad_norm": 0.0652594119310379, "learning_rate": 7.284345801857655e-06, "loss": 0.0017, "step": 101270 }, { "epoch": 0.8282291368524349, "grad_norm": 0.04705861210823059, "learning_rate": 7.283710978925177e-06, "loss": 0.0012, "step": 101280 }, { "epoch": 0.8283109130310341, "grad_norm": 0.04362708702683449, "learning_rate": 7.283076109471652e-06, "loss": 0.0009, "step": 101290 }, { "epoch": 0.8283926892096333, "grad_norm": 0.024997998028993607, "learning_rate": 7.2824411935100085e-06, "loss": 0.0017, "step": 101300 }, { "epoch": 0.8284744653882324, "grad_norm": 0.10617388039827347, "learning_rate": 7.281806231053183e-06, "loss": 0.0025, "step": 101310 }, { "epoch": 0.8285562415668316, "grad_norm": 0.07439293712377548, "learning_rate": 7.28117122211411e-06, "loss": 0.0018, "step": 101320 }, { "epoch": 0.8286380177454308, "grad_norm": 0.019262094050645828, "learning_rate": 7.280536166705725e-06, "loss": 0.0016, "step": 101330 }, { "epoch": 0.82871979392403, "grad_norm": 0.06774985790252686, "learning_rate": 7.279901064840964e-06, "loss": 0.0012, "step": 101340 }, { "epoch": 0.8288015701026291, "grad_norm": 0.10610471665859222, "learning_rate": 7.279265916532767e-06, "loss": 0.0016, "step": 101350 }, { "epoch": 0.8288833462812283, "grad_norm": 0.04552796110510826, "learning_rate": 7.27863072179407e-06, "loss": 0.0046, "step": 101360 }, { "epoch": 0.8289651224598275, "grad_norm": 0.03232325240969658, "learning_rate": 7.2779954806378126e-06, "loss": 0.0013, "step": 101370 }, { "epoch": 0.8290468986384266, "grad_norm": 0.04902278631925583, "learning_rate": 7.277360193076936e-06, "loss": 0.0013, "step": 101380 }, { "epoch": 0.8291286748170258, "grad_norm": 0.05227605625987053, "learning_rate": 7.27672485912438e-06, "loss": 0.0009, "step": 101390 }, { "epoch": 0.829210450995625, "grad_norm": 0.07313884049654007, "learning_rate": 7.2760894787930895e-06, "loss": 0.003, "step": 101400 }, { "epoch": 0.8292922271742241, "grad_norm": 0.022101378068327904, "learning_rate": 7.275454052096005e-06, "loss": 0.0011, "step": 101410 }, { "epoch": 0.8293740033528233, "grad_norm": 0.1526128649711609, "learning_rate": 7.274818579046073e-06, "loss": 0.0062, "step": 101420 }, { "epoch": 0.8294557795314225, "grad_norm": 0.04477357864379883, "learning_rate": 7.274183059656236e-06, "loss": 0.0015, "step": 101430 }, { "epoch": 0.8295375557100216, "grad_norm": 0.04167131334543228, "learning_rate": 7.273547493939443e-06, "loss": 0.0034, "step": 101440 }, { "epoch": 0.8296193318886208, "grad_norm": 0.036407504230737686, "learning_rate": 7.272911881908637e-06, "loss": 0.0019, "step": 101450 }, { "epoch": 0.82970110806722, "grad_norm": 0.016885021701455116, "learning_rate": 7.272276223576771e-06, "loss": 0.0012, "step": 101460 }, { "epoch": 0.8297828842458191, "grad_norm": 0.032461103051900864, "learning_rate": 7.27164051895679e-06, "loss": 0.0014, "step": 101470 }, { "epoch": 0.8298646604244184, "grad_norm": 0.026396065950393677, "learning_rate": 7.271004768061647e-06, "loss": 0.0011, "step": 101480 }, { "epoch": 0.8299464366030176, "grad_norm": 0.02210584282875061, "learning_rate": 7.270368970904289e-06, "loss": 0.0018, "step": 101490 }, { "epoch": 0.8300282127816168, "grad_norm": 0.07803651690483093, "learning_rate": 7.269733127497671e-06, "loss": 0.0019, "step": 101500 }, { "epoch": 0.8301099889602159, "grad_norm": 0.05943732708692551, "learning_rate": 7.269097237854743e-06, "loss": 0.0013, "step": 101510 }, { "epoch": 0.8301917651388151, "grad_norm": 0.0476485900580883, "learning_rate": 7.2684613019884596e-06, "loss": 0.0025, "step": 101520 }, { "epoch": 0.8302735413174143, "grad_norm": 0.05255523696541786, "learning_rate": 7.267825319911778e-06, "loss": 0.0015, "step": 101530 }, { "epoch": 0.8303553174960134, "grad_norm": 0.06478128582239151, "learning_rate": 7.267189291637649e-06, "loss": 0.0023, "step": 101540 }, { "epoch": 0.8304370936746126, "grad_norm": 0.05433246120810509, "learning_rate": 7.266553217179031e-06, "loss": 0.0011, "step": 101550 }, { "epoch": 0.8305188698532118, "grad_norm": 0.12972979247570038, "learning_rate": 7.265917096548884e-06, "loss": 0.0031, "step": 101560 }, { "epoch": 0.8306006460318109, "grad_norm": 0.0317847803235054, "learning_rate": 7.2652809297601616e-06, "loss": 0.0014, "step": 101570 }, { "epoch": 0.8306824222104101, "grad_norm": 0.08325634151697159, "learning_rate": 7.264644716825826e-06, "loss": 0.0029, "step": 101580 }, { "epoch": 0.8307641983890093, "grad_norm": 0.02874918095767498, "learning_rate": 7.264008457758836e-06, "loss": 0.001, "step": 101590 }, { "epoch": 0.8308459745676084, "grad_norm": 0.027566466480493546, "learning_rate": 7.263372152572156e-06, "loss": 0.0017, "step": 101600 }, { "epoch": 0.8309277507462076, "grad_norm": 0.03640527278184891, "learning_rate": 7.262735801278743e-06, "loss": 0.0014, "step": 101610 }, { "epoch": 0.8310095269248068, "grad_norm": 0.03126810863614082, "learning_rate": 7.262099403891562e-06, "loss": 0.0031, "step": 101620 }, { "epoch": 0.831091303103406, "grad_norm": 0.02168644778430462, "learning_rate": 7.26146296042358e-06, "loss": 0.001, "step": 101630 }, { "epoch": 0.8311730792820051, "grad_norm": 0.0727812796831131, "learning_rate": 7.260826470887757e-06, "loss": 0.0019, "step": 101640 }, { "epoch": 0.8312548554606043, "grad_norm": 0.042244769632816315, "learning_rate": 7.260189935297063e-06, "loss": 0.002, "step": 101650 }, { "epoch": 0.8313366316392035, "grad_norm": 0.029246659949421883, "learning_rate": 7.259553353664462e-06, "loss": 0.0034, "step": 101660 }, { "epoch": 0.8314184078178026, "grad_norm": 0.029651181772351265, "learning_rate": 7.258916726002922e-06, "loss": 0.0021, "step": 101670 }, { "epoch": 0.8315001839964018, "grad_norm": 0.06856656819581985, "learning_rate": 7.258280052325413e-06, "loss": 0.0016, "step": 101680 }, { "epoch": 0.831581960175001, "grad_norm": 0.11421819031238556, "learning_rate": 7.257643332644903e-06, "loss": 0.0016, "step": 101690 }, { "epoch": 0.8316637363536002, "grad_norm": 0.13791821897029877, "learning_rate": 7.257006566974363e-06, "loss": 0.0021, "step": 101700 }, { "epoch": 0.8317455125321994, "grad_norm": 0.08169833570718765, "learning_rate": 7.256369755326767e-06, "loss": 0.0018, "step": 101710 }, { "epoch": 0.8318272887107986, "grad_norm": 0.006321663502603769, "learning_rate": 7.255732897715082e-06, "loss": 0.0011, "step": 101720 }, { "epoch": 0.8319090648893978, "grad_norm": 0.033458881080150604, "learning_rate": 7.255095994152286e-06, "loss": 0.0013, "step": 101730 }, { "epoch": 0.8319908410679969, "grad_norm": 0.0651574358344078, "learning_rate": 7.25445904465135e-06, "loss": 0.0012, "step": 101740 }, { "epoch": 0.8320726172465961, "grad_norm": 0.09332507103681564, "learning_rate": 7.253822049225252e-06, "loss": 0.0022, "step": 101750 }, { "epoch": 0.8321543934251953, "grad_norm": 0.2142844945192337, "learning_rate": 7.253185007886967e-06, "loss": 0.0033, "step": 101760 }, { "epoch": 0.8322361696037944, "grad_norm": 0.07292383164167404, "learning_rate": 7.252547920649472e-06, "loss": 0.0023, "step": 101770 }, { "epoch": 0.8323179457823936, "grad_norm": 0.11035557836294174, "learning_rate": 7.251910787525744e-06, "loss": 0.0013, "step": 101780 }, { "epoch": 0.8323997219609928, "grad_norm": 0.019222315400838852, "learning_rate": 7.251273608528762e-06, "loss": 0.0015, "step": 101790 }, { "epoch": 0.8324814981395919, "grad_norm": 0.1717061996459961, "learning_rate": 7.25063638367151e-06, "loss": 0.0013, "step": 101800 }, { "epoch": 0.8325632743181911, "grad_norm": 0.11201586574316025, "learning_rate": 7.249999112966962e-06, "loss": 0.0014, "step": 101810 }, { "epoch": 0.8326450504967903, "grad_norm": 0.12139135599136353, "learning_rate": 7.249361796428105e-06, "loss": 0.0019, "step": 101820 }, { "epoch": 0.8327268266753894, "grad_norm": 0.01865875907242298, "learning_rate": 7.248724434067919e-06, "loss": 0.0008, "step": 101830 }, { "epoch": 0.8328086028539886, "grad_norm": 0.009650206193327904, "learning_rate": 7.24808702589939e-06, "loss": 0.002, "step": 101840 }, { "epoch": 0.8328903790325878, "grad_norm": 0.01633201725780964, "learning_rate": 7.2474495719354985e-06, "loss": 0.0013, "step": 101850 }, { "epoch": 0.832972155211187, "grad_norm": 0.13955587148666382, "learning_rate": 7.246812072189234e-06, "loss": 0.001, "step": 101860 }, { "epoch": 0.8330539313897861, "grad_norm": 0.043783221393823624, "learning_rate": 7.24617452667358e-06, "loss": 0.0021, "step": 101870 }, { "epoch": 0.8331357075683853, "grad_norm": 0.05651950463652611, "learning_rate": 7.245536935401527e-06, "loss": 0.0014, "step": 101880 }, { "epoch": 0.8332174837469845, "grad_norm": 0.09842825680971146, "learning_rate": 7.244899298386061e-06, "loss": 0.0014, "step": 101890 }, { "epoch": 0.8332992599255836, "grad_norm": 0.03891829401254654, "learning_rate": 7.244261615640171e-06, "loss": 0.0012, "step": 101900 }, { "epoch": 0.8333810361041829, "grad_norm": 0.0440005399286747, "learning_rate": 7.243623887176847e-06, "loss": 0.0012, "step": 101910 }, { "epoch": 0.8334628122827821, "grad_norm": 0.07532274723052979, "learning_rate": 7.242986113009082e-06, "loss": 0.0019, "step": 101920 }, { "epoch": 0.8335445884613812, "grad_norm": 0.04747449979186058, "learning_rate": 7.242348293149865e-06, "loss": 0.0018, "step": 101930 }, { "epoch": 0.8336263646399804, "grad_norm": 0.053077634423971176, "learning_rate": 7.241710427612192e-06, "loss": 0.0011, "step": 101940 }, { "epoch": 0.8337081408185796, "grad_norm": 0.027584532275795937, "learning_rate": 7.241072516409056e-06, "loss": 0.002, "step": 101950 }, { "epoch": 0.8337899169971787, "grad_norm": 0.05649792402982712, "learning_rate": 7.2404345595534495e-06, "loss": 0.0012, "step": 101960 }, { "epoch": 0.8338716931757779, "grad_norm": 0.026723243296146393, "learning_rate": 7.2397965570583705e-06, "loss": 0.0013, "step": 101970 }, { "epoch": 0.8339534693543771, "grad_norm": 0.13853083550930023, "learning_rate": 7.239158508936816e-06, "loss": 0.0013, "step": 101980 }, { "epoch": 0.8340352455329763, "grad_norm": 0.102935791015625, "learning_rate": 7.2385204152017805e-06, "loss": 0.0019, "step": 101990 }, { "epoch": 0.8341170217115754, "grad_norm": 0.03127928078174591, "learning_rate": 7.237882275866266e-06, "loss": 0.0017, "step": 102000 }, { "epoch": 0.8341987978901746, "grad_norm": 0.1496502310037613, "learning_rate": 7.2372440909432715e-06, "loss": 0.0019, "step": 102010 }, { "epoch": 0.8342805740687738, "grad_norm": 0.06747673451900482, "learning_rate": 7.236605860445796e-06, "loss": 0.002, "step": 102020 }, { "epoch": 0.8343623502473729, "grad_norm": 0.013956922106444836, "learning_rate": 7.235967584386839e-06, "loss": 0.001, "step": 102030 }, { "epoch": 0.8344441264259721, "grad_norm": 0.03836566209793091, "learning_rate": 7.235329262779407e-06, "loss": 0.0018, "step": 102040 }, { "epoch": 0.8345259026045713, "grad_norm": 0.05822264403104782, "learning_rate": 7.2346908956365e-06, "loss": 0.0018, "step": 102050 }, { "epoch": 0.8346076787831704, "grad_norm": 0.0030739319045096636, "learning_rate": 7.2340524829711225e-06, "loss": 0.0015, "step": 102060 }, { "epoch": 0.8346894549617696, "grad_norm": 0.046854112297296524, "learning_rate": 7.233414024796281e-06, "loss": 0.0015, "step": 102070 }, { "epoch": 0.8347712311403688, "grad_norm": 0.06785255670547485, "learning_rate": 7.23277552112498e-06, "loss": 0.0032, "step": 102080 }, { "epoch": 0.8348530073189679, "grad_norm": 0.04610447958111763, "learning_rate": 7.232136971970226e-06, "loss": 0.0014, "step": 102090 }, { "epoch": 0.8349347834975671, "grad_norm": 0.03921421989798546, "learning_rate": 7.231498377345028e-06, "loss": 0.0011, "step": 102100 }, { "epoch": 0.8350165596761663, "grad_norm": 0.02891266532242298, "learning_rate": 7.230859737262394e-06, "loss": 0.0012, "step": 102110 }, { "epoch": 0.8350983358547656, "grad_norm": 0.06571493297815323, "learning_rate": 7.230221051735336e-06, "loss": 0.0013, "step": 102120 }, { "epoch": 0.8351801120333647, "grad_norm": 0.009271493181586266, "learning_rate": 7.22958232077686e-06, "loss": 0.0015, "step": 102130 }, { "epoch": 0.8352618882119639, "grad_norm": 0.07520284503698349, "learning_rate": 7.228943544399981e-06, "loss": 0.0011, "step": 102140 }, { "epoch": 0.8353436643905631, "grad_norm": 0.15395806729793549, "learning_rate": 7.22830472261771e-06, "loss": 0.002, "step": 102150 }, { "epoch": 0.8354254405691622, "grad_norm": 0.06996111571788788, "learning_rate": 7.22766585544306e-06, "loss": 0.0041, "step": 102160 }, { "epoch": 0.8355072167477614, "grad_norm": 0.018670054152607918, "learning_rate": 7.227026942889045e-06, "loss": 0.0011, "step": 102170 }, { "epoch": 0.8355889929263606, "grad_norm": 0.09038649499416351, "learning_rate": 7.2263879849686825e-06, "loss": 0.0012, "step": 102180 }, { "epoch": 0.8356707691049597, "grad_norm": 0.00148522958625108, "learning_rate": 7.225748981694987e-06, "loss": 0.0007, "step": 102190 }, { "epoch": 0.8357525452835589, "grad_norm": 0.05514383316040039, "learning_rate": 7.225109933080975e-06, "loss": 0.0012, "step": 102200 }, { "epoch": 0.8358343214621581, "grad_norm": 0.029661891981959343, "learning_rate": 7.2244708391396645e-06, "loss": 0.0042, "step": 102210 }, { "epoch": 0.8359160976407572, "grad_norm": 0.07182897627353668, "learning_rate": 7.223831699884076e-06, "loss": 0.0023, "step": 102220 }, { "epoch": 0.8359978738193564, "grad_norm": 0.0330551415681839, "learning_rate": 7.223192515327226e-06, "loss": 0.0019, "step": 102230 }, { "epoch": 0.8360796499979556, "grad_norm": 0.08489122241735458, "learning_rate": 7.22255328548214e-06, "loss": 0.0009, "step": 102240 }, { "epoch": 0.8361614261765548, "grad_norm": 0.0322849340736866, "learning_rate": 7.2219140103618364e-06, "loss": 0.0012, "step": 102250 }, { "epoch": 0.8362432023551539, "grad_norm": 0.07496284693479538, "learning_rate": 7.221274689979338e-06, "loss": 0.0017, "step": 102260 }, { "epoch": 0.8363249785337531, "grad_norm": 0.0723157525062561, "learning_rate": 7.220635324347668e-06, "loss": 0.0019, "step": 102270 }, { "epoch": 0.8364067547123523, "grad_norm": 0.043199531733989716, "learning_rate": 7.219995913479853e-06, "loss": 0.0014, "step": 102280 }, { "epoch": 0.8364885308909514, "grad_norm": 0.03447001799941063, "learning_rate": 7.219356457388914e-06, "loss": 0.0015, "step": 102290 }, { "epoch": 0.8365703070695506, "grad_norm": 0.0448235385119915, "learning_rate": 7.218716956087882e-06, "loss": 0.0011, "step": 102300 }, { "epoch": 0.8366520832481498, "grad_norm": 0.030025053769350052, "learning_rate": 7.218077409589783e-06, "loss": 0.001, "step": 102310 }, { "epoch": 0.8367338594267489, "grad_norm": 0.09518610686063766, "learning_rate": 7.217437817907643e-06, "loss": 0.0017, "step": 102320 }, { "epoch": 0.8368156356053481, "grad_norm": 0.07699653506278992, "learning_rate": 7.2167981810544915e-06, "loss": 0.0018, "step": 102330 }, { "epoch": 0.8368974117839474, "grad_norm": 0.01480898167937994, "learning_rate": 7.2161584990433596e-06, "loss": 0.0018, "step": 102340 }, { "epoch": 0.8369791879625466, "grad_norm": 0.04624556750059128, "learning_rate": 7.215518771887277e-06, "loss": 0.0018, "step": 102350 }, { "epoch": 0.8370609641411457, "grad_norm": 0.02921663038432598, "learning_rate": 7.214878999599278e-06, "loss": 0.0019, "step": 102360 }, { "epoch": 0.8371427403197449, "grad_norm": 0.0426996611058712, "learning_rate": 7.214239182192393e-06, "loss": 0.0017, "step": 102370 }, { "epoch": 0.8372245164983441, "grad_norm": 0.03595852851867676, "learning_rate": 7.213599319679656e-06, "loss": 0.0017, "step": 102380 }, { "epoch": 0.8373062926769432, "grad_norm": 0.04107136279344559, "learning_rate": 7.212959412074101e-06, "loss": 0.0019, "step": 102390 }, { "epoch": 0.8373880688555424, "grad_norm": 0.03725883364677429, "learning_rate": 7.212319459388764e-06, "loss": 0.0015, "step": 102400 }, { "epoch": 0.8374698450341416, "grad_norm": 0.029626816511154175, "learning_rate": 7.2116794616366815e-06, "loss": 0.002, "step": 102410 }, { "epoch": 0.8375516212127407, "grad_norm": 0.12422410398721695, "learning_rate": 7.211039418830891e-06, "loss": 0.0015, "step": 102420 }, { "epoch": 0.8376333973913399, "grad_norm": 0.013405713252723217, "learning_rate": 7.21039933098443e-06, "loss": 0.0012, "step": 102430 }, { "epoch": 0.8377151735699391, "grad_norm": 0.11251791566610336, "learning_rate": 7.2097591981103375e-06, "loss": 0.0037, "step": 102440 }, { "epoch": 0.8377969497485382, "grad_norm": 0.03835678845643997, "learning_rate": 7.209119020221654e-06, "loss": 0.0012, "step": 102450 }, { "epoch": 0.8378787259271374, "grad_norm": 0.05088243633508682, "learning_rate": 7.208478797331422e-06, "loss": 0.002, "step": 102460 }, { "epoch": 0.8379605021057366, "grad_norm": 0.024189606308937073, "learning_rate": 7.207838529452678e-06, "loss": 0.0011, "step": 102470 }, { "epoch": 0.8380422782843358, "grad_norm": 0.008208499290049076, "learning_rate": 7.2071982165984725e-06, "loss": 0.0007, "step": 102480 }, { "epoch": 0.8381240544629349, "grad_norm": 0.11095082014799118, "learning_rate": 7.206557858781844e-06, "loss": 0.0014, "step": 102490 }, { "epoch": 0.8382058306415341, "grad_norm": 0.060251638293266296, "learning_rate": 7.205917456015838e-06, "loss": 0.0014, "step": 102500 }, { "epoch": 0.8382876068201333, "grad_norm": 0.019221853464841843, "learning_rate": 7.2052770083135e-06, "loss": 0.001, "step": 102510 }, { "epoch": 0.8383693829987324, "grad_norm": 0.029601341113448143, "learning_rate": 7.204636515687877e-06, "loss": 0.0014, "step": 102520 }, { "epoch": 0.8384511591773316, "grad_norm": 0.034589529037475586, "learning_rate": 7.2039959781520166e-06, "loss": 0.0013, "step": 102530 }, { "epoch": 0.8385329353559308, "grad_norm": 0.04675985500216484, "learning_rate": 7.203355395718967e-06, "loss": 0.0013, "step": 102540 }, { "epoch": 0.83861471153453, "grad_norm": 0.08839640766382217, "learning_rate": 7.202714768401776e-06, "loss": 0.0024, "step": 102550 }, { "epoch": 0.8386964877131292, "grad_norm": 0.05301104485988617, "learning_rate": 7.2020740962134964e-06, "loss": 0.001, "step": 102560 }, { "epoch": 0.8387782638917284, "grad_norm": 0.06217478960752487, "learning_rate": 7.201433379167175e-06, "loss": 0.0017, "step": 102570 }, { "epoch": 0.8388600400703276, "grad_norm": 0.024561012163758278, "learning_rate": 7.200792617275868e-06, "loss": 0.0018, "step": 102580 }, { "epoch": 0.8389418162489267, "grad_norm": 0.016171535477042198, "learning_rate": 7.200151810552625e-06, "loss": 0.0018, "step": 102590 }, { "epoch": 0.8390235924275259, "grad_norm": 0.037150561809539795, "learning_rate": 7.199510959010503e-06, "loss": 0.0014, "step": 102600 }, { "epoch": 0.8391053686061251, "grad_norm": 0.15748636424541473, "learning_rate": 7.198870062662554e-06, "loss": 0.0009, "step": 102610 }, { "epoch": 0.8391871447847242, "grad_norm": 0.029421493411064148, "learning_rate": 7.198229121521836e-06, "loss": 0.0019, "step": 102620 }, { "epoch": 0.8392689209633234, "grad_norm": 0.03523409366607666, "learning_rate": 7.197588135601401e-06, "loss": 0.0022, "step": 102630 }, { "epoch": 0.8393506971419226, "grad_norm": 0.024597838521003723, "learning_rate": 7.196947104914311e-06, "loss": 0.0011, "step": 102640 }, { "epoch": 0.8394324733205217, "grad_norm": 0.05521765351295471, "learning_rate": 7.196306029473623e-06, "loss": 0.0015, "step": 102650 }, { "epoch": 0.8395142494991209, "grad_norm": 0.013712351210415363, "learning_rate": 7.195664909292396e-06, "loss": 0.0016, "step": 102660 }, { "epoch": 0.8395960256777201, "grad_norm": 0.01685906946659088, "learning_rate": 7.19502374438369e-06, "loss": 0.0014, "step": 102670 }, { "epoch": 0.8396778018563192, "grad_norm": 0.03120855987071991, "learning_rate": 7.194382534760566e-06, "loss": 0.0014, "step": 102680 }, { "epoch": 0.8397595780349184, "grad_norm": 0.009504144079983234, "learning_rate": 7.193741280436085e-06, "loss": 0.0018, "step": 102690 }, { "epoch": 0.8398413542135176, "grad_norm": 0.03353610262274742, "learning_rate": 7.193099981423313e-06, "loss": 0.002, "step": 102700 }, { "epoch": 0.8399231303921167, "grad_norm": 0.041744593530893326, "learning_rate": 7.192458637735309e-06, "loss": 0.0011, "step": 102710 }, { "epoch": 0.8400049065707159, "grad_norm": 0.06320015341043472, "learning_rate": 7.191817249385142e-06, "loss": 0.0022, "step": 102720 }, { "epoch": 0.8400866827493151, "grad_norm": 0.11297446489334106, "learning_rate": 7.191175816385877e-06, "loss": 0.0018, "step": 102730 }, { "epoch": 0.8401684589279143, "grad_norm": 0.08237769454717636, "learning_rate": 7.190534338750579e-06, "loss": 0.0023, "step": 102740 }, { "epoch": 0.8402502351065134, "grad_norm": 0.09585770219564438, "learning_rate": 7.189892816492316e-06, "loss": 0.0025, "step": 102750 }, { "epoch": 0.8403320112851127, "grad_norm": 0.04343237355351448, "learning_rate": 7.189251249624155e-06, "loss": 0.0032, "step": 102760 }, { "epoch": 0.8404137874637119, "grad_norm": 0.011486295610666275, "learning_rate": 7.188609638159167e-06, "loss": 0.0015, "step": 102770 }, { "epoch": 0.840495563642311, "grad_norm": 0.15922953188419342, "learning_rate": 7.187967982110422e-06, "loss": 0.0017, "step": 102780 }, { "epoch": 0.8405773398209102, "grad_norm": 0.18348029255867004, "learning_rate": 7.187326281490992e-06, "loss": 0.0028, "step": 102790 }, { "epoch": 0.8406591159995094, "grad_norm": 0.150659441947937, "learning_rate": 7.186684536313946e-06, "loss": 0.0014, "step": 102800 }, { "epoch": 0.8407408921781085, "grad_norm": 0.07159368693828583, "learning_rate": 7.18604274659236e-06, "loss": 0.0021, "step": 102810 }, { "epoch": 0.8408226683567077, "grad_norm": 0.05092545598745346, "learning_rate": 7.185400912339304e-06, "loss": 0.0013, "step": 102820 }, { "epoch": 0.8409044445353069, "grad_norm": 0.03474811837077141, "learning_rate": 7.184759033567858e-06, "loss": 0.0026, "step": 102830 }, { "epoch": 0.840986220713906, "grad_norm": 0.06329116970300674, "learning_rate": 7.184117110291092e-06, "loss": 0.0013, "step": 102840 }, { "epoch": 0.8410679968925052, "grad_norm": 0.020648999139666557, "learning_rate": 7.183475142522087e-06, "loss": 0.0015, "step": 102850 }, { "epoch": 0.8411497730711044, "grad_norm": 0.06415218859910965, "learning_rate": 7.1828331302739175e-06, "loss": 0.0019, "step": 102860 }, { "epoch": 0.8412315492497036, "grad_norm": 0.12363111972808838, "learning_rate": 7.182191073559663e-06, "loss": 0.0011, "step": 102870 }, { "epoch": 0.8413133254283027, "grad_norm": 0.07353710383176804, "learning_rate": 7.181548972392402e-06, "loss": 0.0013, "step": 102880 }, { "epoch": 0.8413951016069019, "grad_norm": 0.006535176187753677, "learning_rate": 7.180906826785217e-06, "loss": 0.0014, "step": 102890 }, { "epoch": 0.8414768777855011, "grad_norm": 0.0591895692050457, "learning_rate": 7.180264636751185e-06, "loss": 0.0024, "step": 102900 }, { "epoch": 0.8415586539641002, "grad_norm": 0.26580682396888733, "learning_rate": 7.179622402303392e-06, "loss": 0.0026, "step": 102910 }, { "epoch": 0.8416404301426994, "grad_norm": 0.01881428249180317, "learning_rate": 7.178980123454919e-06, "loss": 0.0008, "step": 102920 }, { "epoch": 0.8417222063212986, "grad_norm": 0.03796055167913437, "learning_rate": 7.178337800218849e-06, "loss": 0.0009, "step": 102930 }, { "epoch": 0.8418039824998977, "grad_norm": 0.04618750140070915, "learning_rate": 7.177695432608266e-06, "loss": 0.0016, "step": 102940 }, { "epoch": 0.8418857586784969, "grad_norm": 0.12048612534999847, "learning_rate": 7.177053020636259e-06, "loss": 0.0022, "step": 102950 }, { "epoch": 0.8419675348570961, "grad_norm": 0.029777204617857933, "learning_rate": 7.17641056431591e-06, "loss": 0.0019, "step": 102960 }, { "epoch": 0.8420493110356952, "grad_norm": 0.007920382544398308, "learning_rate": 7.175768063660311e-06, "loss": 0.002, "step": 102970 }, { "epoch": 0.8421310872142945, "grad_norm": 0.016709446907043457, "learning_rate": 7.175125518682547e-06, "loss": 0.0011, "step": 102980 }, { "epoch": 0.8422128633928937, "grad_norm": 0.04065863788127899, "learning_rate": 7.17448292939571e-06, "loss": 0.0017, "step": 102990 }, { "epoch": 0.8422946395714929, "grad_norm": 0.05864984914660454, "learning_rate": 7.173840295812885e-06, "loss": 0.0016, "step": 103000 }, { "epoch": 0.842376415750092, "grad_norm": 0.13303115963935852, "learning_rate": 7.173197617947169e-06, "loss": 0.0021, "step": 103010 }, { "epoch": 0.8424581919286912, "grad_norm": 0.11786720156669617, "learning_rate": 7.172554895811648e-06, "loss": 0.002, "step": 103020 }, { "epoch": 0.8425399681072904, "grad_norm": 0.08255334943532944, "learning_rate": 7.17191212941942e-06, "loss": 0.0011, "step": 103030 }, { "epoch": 0.8426217442858895, "grad_norm": 0.009506234899163246, "learning_rate": 7.171269318783577e-06, "loss": 0.0015, "step": 103040 }, { "epoch": 0.8427035204644887, "grad_norm": 0.021102724596858025, "learning_rate": 7.1706264639172126e-06, "loss": 0.0015, "step": 103050 }, { "epoch": 0.8427852966430879, "grad_norm": 0.06663418561220169, "learning_rate": 7.169983564833423e-06, "loss": 0.0014, "step": 103060 }, { "epoch": 0.842867072821687, "grad_norm": 0.07445157319307327, "learning_rate": 7.169340621545303e-06, "loss": 0.0014, "step": 103070 }, { "epoch": 0.8429488490002862, "grad_norm": 0.05160893127322197, "learning_rate": 7.168697634065951e-06, "loss": 0.0014, "step": 103080 }, { "epoch": 0.8430306251788854, "grad_norm": 0.04099003225564957, "learning_rate": 7.1680546024084675e-06, "loss": 0.0017, "step": 103090 }, { "epoch": 0.8431124013574846, "grad_norm": 0.07247602939605713, "learning_rate": 7.167411526585948e-06, "loss": 0.0033, "step": 103100 }, { "epoch": 0.8431941775360837, "grad_norm": 0.1281241476535797, "learning_rate": 7.166768406611495e-06, "loss": 0.0027, "step": 103110 }, { "epoch": 0.8432759537146829, "grad_norm": 0.06519928574562073, "learning_rate": 7.166125242498208e-06, "loss": 0.0028, "step": 103120 }, { "epoch": 0.8433577298932821, "grad_norm": 0.08704151213169098, "learning_rate": 7.165482034259188e-06, "loss": 0.0013, "step": 103130 }, { "epoch": 0.8434395060718812, "grad_norm": 0.016608504578471184, "learning_rate": 7.1648387819075394e-06, "loss": 0.0015, "step": 103140 }, { "epoch": 0.8435212822504804, "grad_norm": 0.069114089012146, "learning_rate": 7.164195485456365e-06, "loss": 0.0023, "step": 103150 }, { "epoch": 0.8436030584290796, "grad_norm": 0.05014804005622864, "learning_rate": 7.163552144918769e-06, "loss": 0.0047, "step": 103160 }, { "epoch": 0.8436848346076787, "grad_norm": 0.023549161851406097, "learning_rate": 7.1629087603078584e-06, "loss": 0.0011, "step": 103170 }, { "epoch": 0.8437666107862779, "grad_norm": 0.12852512300014496, "learning_rate": 7.162265331636737e-06, "loss": 0.0019, "step": 103180 }, { "epoch": 0.8438483869648772, "grad_norm": 0.03200376778841019, "learning_rate": 7.1616218589185145e-06, "loss": 0.002, "step": 103190 }, { "epoch": 0.8439301631434764, "grad_norm": 0.06908603012561798, "learning_rate": 7.160978342166297e-06, "loss": 0.0017, "step": 103200 }, { "epoch": 0.8440119393220755, "grad_norm": 0.02548893168568611, "learning_rate": 7.160334781393194e-06, "loss": 0.0013, "step": 103210 }, { "epoch": 0.8440937155006747, "grad_norm": 0.0031586012337356806, "learning_rate": 7.1596911766123155e-06, "loss": 0.0022, "step": 103220 }, { "epoch": 0.8441754916792739, "grad_norm": 0.12842395901679993, "learning_rate": 7.159047527836773e-06, "loss": 0.0009, "step": 103230 }, { "epoch": 0.844257267857873, "grad_norm": 0.05441722646355629, "learning_rate": 7.1584038350796766e-06, "loss": 0.0017, "step": 103240 }, { "epoch": 0.8443390440364722, "grad_norm": 0.013112293556332588, "learning_rate": 7.1577600983541404e-06, "loss": 0.0021, "step": 103250 }, { "epoch": 0.8444208202150714, "grad_norm": 0.018931709229946136, "learning_rate": 7.157116317673276e-06, "loss": 0.0013, "step": 103260 }, { "epoch": 0.8445025963936705, "grad_norm": 0.03656596317887306, "learning_rate": 7.1564724930501995e-06, "loss": 0.0013, "step": 103270 }, { "epoch": 0.8445843725722697, "grad_norm": 0.07064346224069595, "learning_rate": 7.1558286244980255e-06, "loss": 0.0022, "step": 103280 }, { "epoch": 0.8446661487508689, "grad_norm": 0.04295780882239342, "learning_rate": 7.15518471202987e-06, "loss": 0.0022, "step": 103290 }, { "epoch": 0.844747924929468, "grad_norm": 0.08440924435853958, "learning_rate": 7.15454075565885e-06, "loss": 0.0021, "step": 103300 }, { "epoch": 0.8448297011080672, "grad_norm": 0.029107939451932907, "learning_rate": 7.153896755398083e-06, "loss": 0.0009, "step": 103310 }, { "epoch": 0.8449114772866664, "grad_norm": 0.04120203107595444, "learning_rate": 7.153252711260689e-06, "loss": 0.0021, "step": 103320 }, { "epoch": 0.8449932534652655, "grad_norm": 0.016707317903637886, "learning_rate": 7.152608623259787e-06, "loss": 0.004, "step": 103330 }, { "epoch": 0.8450750296438647, "grad_norm": 0.0277677234262228, "learning_rate": 7.151964491408497e-06, "loss": 0.0009, "step": 103340 }, { "epoch": 0.8451568058224639, "grad_norm": 0.03345357999205589, "learning_rate": 7.151320315719943e-06, "loss": 0.0027, "step": 103350 }, { "epoch": 0.845238582001063, "grad_norm": 0.043454237282276154, "learning_rate": 7.150676096207243e-06, "loss": 0.0014, "step": 103360 }, { "epoch": 0.8453203581796622, "grad_norm": 0.039141517132520676, "learning_rate": 7.1500318328835236e-06, "loss": 0.0012, "step": 103370 }, { "epoch": 0.8454021343582614, "grad_norm": 0.13467442989349365, "learning_rate": 7.149387525761908e-06, "loss": 0.0024, "step": 103380 }, { "epoch": 0.8454839105368606, "grad_norm": 0.002896543126553297, "learning_rate": 7.148743174855523e-06, "loss": 0.0018, "step": 103390 }, { "epoch": 0.8455656867154598, "grad_norm": 0.04864015802741051, "learning_rate": 7.148098780177491e-06, "loss": 0.0021, "step": 103400 }, { "epoch": 0.845647462894059, "grad_norm": 0.06094222143292427, "learning_rate": 7.147454341740943e-06, "loss": 0.0013, "step": 103410 }, { "epoch": 0.8457292390726582, "grad_norm": 0.014470369555056095, "learning_rate": 7.1468098595590004e-06, "loss": 0.0007, "step": 103420 }, { "epoch": 0.8458110152512573, "grad_norm": 0.026148945093154907, "learning_rate": 7.1461653336448e-06, "loss": 0.002, "step": 103430 }, { "epoch": 0.8458927914298565, "grad_norm": 0.1253698170185089, "learning_rate": 7.1455207640114646e-06, "loss": 0.0017, "step": 103440 }, { "epoch": 0.8459745676084557, "grad_norm": 0.03107377514243126, "learning_rate": 7.144876150672128e-06, "loss": 0.0024, "step": 103450 }, { "epoch": 0.8460563437870549, "grad_norm": 0.02615460194647312, "learning_rate": 7.144231493639922e-06, "loss": 0.0023, "step": 103460 }, { "epoch": 0.846138119965654, "grad_norm": 0.12031330168247223, "learning_rate": 7.143586792927978e-06, "loss": 0.0015, "step": 103470 }, { "epoch": 0.8462198961442532, "grad_norm": 0.042769141495227814, "learning_rate": 7.142942048549426e-06, "loss": 0.0024, "step": 103480 }, { "epoch": 0.8463016723228524, "grad_norm": 0.05901133269071579, "learning_rate": 7.142297260517404e-06, "loss": 0.0021, "step": 103490 }, { "epoch": 0.8463834485014515, "grad_norm": 0.018897907808423042, "learning_rate": 7.141652428845045e-06, "loss": 0.0021, "step": 103500 }, { "epoch": 0.8464652246800507, "grad_norm": 0.03883230313658714, "learning_rate": 7.141007553545487e-06, "loss": 0.0024, "step": 103510 }, { "epoch": 0.8465470008586499, "grad_norm": 0.04581378027796745, "learning_rate": 7.140362634631863e-06, "loss": 0.0023, "step": 103520 }, { "epoch": 0.846628777037249, "grad_norm": 0.05969523265957832, "learning_rate": 7.139717672117313e-06, "loss": 0.0013, "step": 103530 }, { "epoch": 0.8467105532158482, "grad_norm": 0.06761202961206436, "learning_rate": 7.139072666014974e-06, "loss": 0.001, "step": 103540 }, { "epoch": 0.8467923293944474, "grad_norm": 0.14228805899620056, "learning_rate": 7.138427616337988e-06, "loss": 0.0017, "step": 103550 }, { "epoch": 0.8468741055730465, "grad_norm": 0.055182162672281265, "learning_rate": 7.13778252309949e-06, "loss": 0.0024, "step": 103560 }, { "epoch": 0.8469558817516457, "grad_norm": 0.011395049281418324, "learning_rate": 7.137137386312627e-06, "loss": 0.0017, "step": 103570 }, { "epoch": 0.8470376579302449, "grad_norm": 0.07324040681123734, "learning_rate": 7.136492205990539e-06, "loss": 0.0009, "step": 103580 }, { "epoch": 0.847119434108844, "grad_norm": 0.06871413439512253, "learning_rate": 7.135846982146366e-06, "loss": 0.0013, "step": 103590 }, { "epoch": 0.8472012102874432, "grad_norm": 0.06085021048784256, "learning_rate": 7.135201714793254e-06, "loss": 0.0013, "step": 103600 }, { "epoch": 0.8472829864660424, "grad_norm": 0.02527008205652237, "learning_rate": 7.134556403944348e-06, "loss": 0.0011, "step": 103610 }, { "epoch": 0.8473647626446417, "grad_norm": 0.07208900153636932, "learning_rate": 7.133911049612793e-06, "loss": 0.0015, "step": 103620 }, { "epoch": 0.8474465388232408, "grad_norm": 0.19036176800727844, "learning_rate": 7.133265651811736e-06, "loss": 0.0016, "step": 103630 }, { "epoch": 0.84752831500184, "grad_norm": 0.019476084038615227, "learning_rate": 7.132620210554323e-06, "loss": 0.0016, "step": 103640 }, { "epoch": 0.8476100911804392, "grad_norm": 0.05138067156076431, "learning_rate": 7.1319747258537035e-06, "loss": 0.0011, "step": 103650 }, { "epoch": 0.8476918673590383, "grad_norm": 0.13333916664123535, "learning_rate": 7.1313291977230245e-06, "loss": 0.0017, "step": 103660 }, { "epoch": 0.8477736435376375, "grad_norm": 0.051351167261600494, "learning_rate": 7.130683626175439e-06, "loss": 0.0013, "step": 103670 }, { "epoch": 0.8478554197162367, "grad_norm": 0.04541344195604324, "learning_rate": 7.130038011224094e-06, "loss": 0.0011, "step": 103680 }, { "epoch": 0.8479371958948358, "grad_norm": 0.03871338069438934, "learning_rate": 7.129392352882145e-06, "loss": 0.0008, "step": 103690 }, { "epoch": 0.848018972073435, "grad_norm": 0.14632394909858704, "learning_rate": 7.128746651162744e-06, "loss": 0.0024, "step": 103700 }, { "epoch": 0.8481007482520342, "grad_norm": 0.124860018491745, "learning_rate": 7.128100906079041e-06, "loss": 0.0011, "step": 103710 }, { "epoch": 0.8481825244306334, "grad_norm": 0.05748425051569939, "learning_rate": 7.127455117644193e-06, "loss": 0.0017, "step": 103720 }, { "epoch": 0.8482643006092325, "grad_norm": 0.0052126250229775906, "learning_rate": 7.126809285871355e-06, "loss": 0.001, "step": 103730 }, { "epoch": 0.8483460767878317, "grad_norm": 0.05342884361743927, "learning_rate": 7.126163410773683e-06, "loss": 0.0011, "step": 103740 }, { "epoch": 0.8484278529664309, "grad_norm": 0.049462828785181046, "learning_rate": 7.125517492364335e-06, "loss": 0.0015, "step": 103750 }, { "epoch": 0.84850962914503, "grad_norm": 0.02527948096394539, "learning_rate": 7.12487153065647e-06, "loss": 0.0014, "step": 103760 }, { "epoch": 0.8485914053236292, "grad_norm": 0.03583830967545509, "learning_rate": 7.124225525663242e-06, "loss": 0.0008, "step": 103770 }, { "epoch": 0.8486731815022284, "grad_norm": 0.04187038168311119, "learning_rate": 7.123579477397813e-06, "loss": 0.001, "step": 103780 }, { "epoch": 0.8487549576808275, "grad_norm": 0.008519892580807209, "learning_rate": 7.1229333858733455e-06, "loss": 0.0012, "step": 103790 }, { "epoch": 0.8488367338594267, "grad_norm": 0.0326998271048069, "learning_rate": 7.1222872511029975e-06, "loss": 0.0011, "step": 103800 }, { "epoch": 0.8489185100380259, "grad_norm": 0.10051427781581879, "learning_rate": 7.121641073099934e-06, "loss": 0.0008, "step": 103810 }, { "epoch": 0.849000286216625, "grad_norm": 0.005852695554494858, "learning_rate": 7.120994851877319e-06, "loss": 0.001, "step": 103820 }, { "epoch": 0.8490820623952243, "grad_norm": 0.07482270151376724, "learning_rate": 7.120348587448314e-06, "loss": 0.0014, "step": 103830 }, { "epoch": 0.8491638385738235, "grad_norm": 0.03160720691084862, "learning_rate": 7.1197022798260825e-06, "loss": 0.0017, "step": 103840 }, { "epoch": 0.8492456147524227, "grad_norm": 0.014697919599711895, "learning_rate": 7.119055929023795e-06, "loss": 0.0023, "step": 103850 }, { "epoch": 0.8493273909310218, "grad_norm": 0.017759891226887703, "learning_rate": 7.118409535054613e-06, "loss": 0.0012, "step": 103860 }, { "epoch": 0.849409167109621, "grad_norm": 0.11821889877319336, "learning_rate": 7.11776309793171e-06, "loss": 0.0019, "step": 103870 }, { "epoch": 0.8494909432882202, "grad_norm": 0.17002874612808228, "learning_rate": 7.11711661766825e-06, "loss": 0.002, "step": 103880 }, { "epoch": 0.8495727194668193, "grad_norm": 0.046623215079307556, "learning_rate": 7.116470094277405e-06, "loss": 0.0008, "step": 103890 }, { "epoch": 0.8496544956454185, "grad_norm": 0.006554634775966406, "learning_rate": 7.115823527772341e-06, "loss": 0.002, "step": 103900 }, { "epoch": 0.8497362718240177, "grad_norm": 0.029648438096046448, "learning_rate": 7.115176918166233e-06, "loss": 0.0014, "step": 103910 }, { "epoch": 0.8498180480026168, "grad_norm": 0.09095662832260132, "learning_rate": 7.1145302654722524e-06, "loss": 0.0028, "step": 103920 }, { "epoch": 0.849899824181216, "grad_norm": 0.05862424895167351, "learning_rate": 7.1138835697035724e-06, "loss": 0.0019, "step": 103930 }, { "epoch": 0.8499816003598152, "grad_norm": 0.04933278262615204, "learning_rate": 7.1132368308733646e-06, "loss": 0.0023, "step": 103940 }, { "epoch": 0.8500633765384144, "grad_norm": 0.03403124958276749, "learning_rate": 7.112590048994805e-06, "loss": 0.0019, "step": 103950 }, { "epoch": 0.8501451527170135, "grad_norm": 0.061516884714365005, "learning_rate": 7.111943224081069e-06, "loss": 0.0014, "step": 103960 }, { "epoch": 0.8502269288956127, "grad_norm": 0.03682940825819969, "learning_rate": 7.111296356145333e-06, "loss": 0.0009, "step": 103970 }, { "epoch": 0.8503087050742119, "grad_norm": 0.17470914125442505, "learning_rate": 7.110649445200774e-06, "loss": 0.0038, "step": 103980 }, { "epoch": 0.850390481252811, "grad_norm": 0.05351226031780243, "learning_rate": 7.1100024912605706e-06, "loss": 0.0009, "step": 103990 }, { "epoch": 0.8504722574314102, "grad_norm": 0.11291294544935226, "learning_rate": 7.109355494337901e-06, "loss": 0.0013, "step": 104000 }, { "epoch": 0.8505540336100094, "grad_norm": 0.0835283100605011, "learning_rate": 7.108708454445945e-06, "loss": 0.0011, "step": 104010 }, { "epoch": 0.8506358097886085, "grad_norm": 0.1179550290107727, "learning_rate": 7.108061371597883e-06, "loss": 0.0013, "step": 104020 }, { "epoch": 0.8507175859672077, "grad_norm": 0.012922324240207672, "learning_rate": 7.107414245806899e-06, "loss": 0.0016, "step": 104030 }, { "epoch": 0.850799362145807, "grad_norm": 0.05008261650800705, "learning_rate": 7.106767077086172e-06, "loss": 0.002, "step": 104040 }, { "epoch": 0.8508811383244061, "grad_norm": 0.00930931605398655, "learning_rate": 7.106119865448889e-06, "loss": 0.0043, "step": 104050 }, { "epoch": 0.8509629145030053, "grad_norm": 0.05080138146877289, "learning_rate": 7.105472610908233e-06, "loss": 0.0012, "step": 104060 }, { "epoch": 0.8510446906816045, "grad_norm": 0.03193739056587219, "learning_rate": 7.104825313477387e-06, "loss": 0.001, "step": 104070 }, { "epoch": 0.8511264668602037, "grad_norm": 0.06892012804746628, "learning_rate": 7.104177973169538e-06, "loss": 0.0014, "step": 104080 }, { "epoch": 0.8512082430388028, "grad_norm": 0.07392656058073044, "learning_rate": 7.103530589997873e-06, "loss": 0.0011, "step": 104090 }, { "epoch": 0.851290019217402, "grad_norm": 0.01607496663928032, "learning_rate": 7.1028831639755814e-06, "loss": 0.0011, "step": 104100 }, { "epoch": 0.8513717953960012, "grad_norm": 0.1615554839372635, "learning_rate": 7.102235695115851e-06, "loss": 0.0018, "step": 104110 }, { "epoch": 0.8514535715746003, "grad_norm": 0.014917544089257717, "learning_rate": 7.10158818343187e-06, "loss": 0.0015, "step": 104120 }, { "epoch": 0.8515353477531995, "grad_norm": 0.10418951511383057, "learning_rate": 7.10094062893683e-06, "loss": 0.0013, "step": 104130 }, { "epoch": 0.8516171239317987, "grad_norm": 0.05502527207136154, "learning_rate": 7.10029303164392e-06, "loss": 0.0012, "step": 104140 }, { "epoch": 0.8516989001103978, "grad_norm": 0.07301699370145798, "learning_rate": 7.0996453915663345e-06, "loss": 0.0023, "step": 104150 }, { "epoch": 0.851780676288997, "grad_norm": 0.019434833899140358, "learning_rate": 7.098997708717266e-06, "loss": 0.0011, "step": 104160 }, { "epoch": 0.8518624524675962, "grad_norm": 0.05461089685559273, "learning_rate": 7.098349983109908e-06, "loss": 0.0019, "step": 104170 }, { "epoch": 0.8519442286461953, "grad_norm": 0.08459483087062836, "learning_rate": 7.0977022147574546e-06, "loss": 0.0017, "step": 104180 }, { "epoch": 0.8520260048247945, "grad_norm": 0.25000619888305664, "learning_rate": 7.0970544036731024e-06, "loss": 0.0022, "step": 104190 }, { "epoch": 0.8521077810033937, "grad_norm": 0.057091124355793, "learning_rate": 7.096406549870047e-06, "loss": 0.0008, "step": 104200 }, { "epoch": 0.8521895571819929, "grad_norm": 0.06795026361942291, "learning_rate": 7.095758653361487e-06, "loss": 0.0018, "step": 104210 }, { "epoch": 0.852271333360592, "grad_norm": 0.01979052647948265, "learning_rate": 7.095110714160619e-06, "loss": 0.0039, "step": 104220 }, { "epoch": 0.8523531095391912, "grad_norm": 0.06287572532892227, "learning_rate": 7.0944627322806435e-06, "loss": 0.0015, "step": 104230 }, { "epoch": 0.8524348857177904, "grad_norm": 0.0992903783917427, "learning_rate": 7.093814707734759e-06, "loss": 0.0016, "step": 104240 }, { "epoch": 0.8525166618963895, "grad_norm": 0.031143201515078545, "learning_rate": 7.093166640536169e-06, "loss": 0.0027, "step": 104250 }, { "epoch": 0.8525984380749888, "grad_norm": 0.03784996271133423, "learning_rate": 7.0925185306980716e-06, "loss": 0.0012, "step": 104260 }, { "epoch": 0.852680214253588, "grad_norm": 0.09134666621685028, "learning_rate": 7.09187037823367e-06, "loss": 0.0016, "step": 104270 }, { "epoch": 0.8527619904321871, "grad_norm": 0.09689125418663025, "learning_rate": 7.091222183156169e-06, "loss": 0.0015, "step": 104280 }, { "epoch": 0.8528437666107863, "grad_norm": 0.02716798149049282, "learning_rate": 7.090573945478771e-06, "loss": 0.0018, "step": 104290 }, { "epoch": 0.8529255427893855, "grad_norm": 0.04843616485595703, "learning_rate": 7.0899256652146855e-06, "loss": 0.0019, "step": 104300 }, { "epoch": 0.8530073189679847, "grad_norm": 0.059281982481479645, "learning_rate": 7.089277342377114e-06, "loss": 0.0017, "step": 104310 }, { "epoch": 0.8530890951465838, "grad_norm": 0.04342102259397507, "learning_rate": 7.0886289769792635e-06, "loss": 0.0018, "step": 104320 }, { "epoch": 0.853170871325183, "grad_norm": 0.041008852422237396, "learning_rate": 7.087980569034344e-06, "loss": 0.001, "step": 104330 }, { "epoch": 0.8532526475037822, "grad_norm": 0.03844388201832771, "learning_rate": 7.0873321185555635e-06, "loss": 0.0012, "step": 104340 }, { "epoch": 0.8533344236823813, "grad_norm": 0.03497026115655899, "learning_rate": 7.08668362555613e-06, "loss": 0.0014, "step": 104350 }, { "epoch": 0.8534161998609805, "grad_norm": 0.009683799929916859, "learning_rate": 7.086035090049255e-06, "loss": 0.0017, "step": 104360 }, { "epoch": 0.8534979760395797, "grad_norm": 0.06587455421686172, "learning_rate": 7.0853865120481514e-06, "loss": 0.0012, "step": 104370 }, { "epoch": 0.8535797522181788, "grad_norm": 0.020222237333655357, "learning_rate": 7.084737891566029e-06, "loss": 0.0015, "step": 104380 }, { "epoch": 0.853661528396778, "grad_norm": 0.03356155380606651, "learning_rate": 7.0840892286161e-06, "loss": 0.0013, "step": 104390 }, { "epoch": 0.8537433045753772, "grad_norm": 0.03883408010005951, "learning_rate": 7.08344052321158e-06, "loss": 0.002, "step": 104400 }, { "epoch": 0.8538250807539763, "grad_norm": 0.22679030895233154, "learning_rate": 7.0827917753656824e-06, "loss": 0.0021, "step": 104410 }, { "epoch": 0.8539068569325755, "grad_norm": 0.07522869855165482, "learning_rate": 7.082142985091624e-06, "loss": 0.0024, "step": 104420 }, { "epoch": 0.8539886331111747, "grad_norm": 0.028547395020723343, "learning_rate": 7.08149415240262e-06, "loss": 0.001, "step": 104430 }, { "epoch": 0.8540704092897738, "grad_norm": 0.022187067195773125, "learning_rate": 7.080845277311889e-06, "loss": 0.0016, "step": 104440 }, { "epoch": 0.854152185468373, "grad_norm": 0.008608839474618435, "learning_rate": 7.080196359832647e-06, "loss": 0.0012, "step": 104450 }, { "epoch": 0.8542339616469722, "grad_norm": 0.03935929760336876, "learning_rate": 7.079547399978116e-06, "loss": 0.0022, "step": 104460 }, { "epoch": 0.8543157378255715, "grad_norm": 0.04422348365187645, "learning_rate": 7.078898397761513e-06, "loss": 0.0013, "step": 104470 }, { "epoch": 0.8543975140041706, "grad_norm": 0.03577479347586632, "learning_rate": 7.07824935319606e-06, "loss": 0.0017, "step": 104480 }, { "epoch": 0.8544792901827698, "grad_norm": 0.09444223344326019, "learning_rate": 7.07760026629498e-06, "loss": 0.0017, "step": 104490 }, { "epoch": 0.854561066361369, "grad_norm": 0.18719199299812317, "learning_rate": 7.0769511370714915e-06, "loss": 0.0026, "step": 104500 }, { "epoch": 0.8546428425399681, "grad_norm": 0.0093583595007658, "learning_rate": 7.076301965538821e-06, "loss": 0.0012, "step": 104510 }, { "epoch": 0.8547246187185673, "grad_norm": 0.046496596187353134, "learning_rate": 7.0756527517101915e-06, "loss": 0.0019, "step": 104520 }, { "epoch": 0.8548063948971665, "grad_norm": 0.08763566613197327, "learning_rate": 7.075003495598827e-06, "loss": 0.0012, "step": 104530 }, { "epoch": 0.8548881710757656, "grad_norm": 0.13857464492321014, "learning_rate": 7.074354197217958e-06, "loss": 0.0018, "step": 104540 }, { "epoch": 0.8549699472543648, "grad_norm": 0.07576869428157806, "learning_rate": 7.073704856580805e-06, "loss": 0.0015, "step": 104550 }, { "epoch": 0.855051723432964, "grad_norm": 0.019604744389653206, "learning_rate": 7.073055473700601e-06, "loss": 0.0008, "step": 104560 }, { "epoch": 0.8551334996115632, "grad_norm": 0.1182776540517807, "learning_rate": 7.0724060485905685e-06, "loss": 0.0023, "step": 104570 }, { "epoch": 0.8552152757901623, "grad_norm": 0.10844626277685165, "learning_rate": 7.071756581263944e-06, "loss": 0.0018, "step": 104580 }, { "epoch": 0.8552970519687615, "grad_norm": 0.021970391273498535, "learning_rate": 7.0711070717339505e-06, "loss": 0.0016, "step": 104590 }, { "epoch": 0.8553788281473607, "grad_norm": 0.01317379716783762, "learning_rate": 7.070457520013825e-06, "loss": 0.0013, "step": 104600 }, { "epoch": 0.8554606043259598, "grad_norm": 0.04278514161705971, "learning_rate": 7.069807926116796e-06, "loss": 0.0011, "step": 104610 }, { "epoch": 0.855542380504559, "grad_norm": 0.09806401282548904, "learning_rate": 7.069158290056097e-06, "loss": 0.0019, "step": 104620 }, { "epoch": 0.8556241566831582, "grad_norm": 0.07438260316848755, "learning_rate": 7.0685086118449604e-06, "loss": 0.0016, "step": 104630 }, { "epoch": 0.8557059328617573, "grad_norm": 0.039457544684410095, "learning_rate": 7.067858891496624e-06, "loss": 0.0015, "step": 104640 }, { "epoch": 0.8557877090403565, "grad_norm": 0.14848501980304718, "learning_rate": 7.06720912902432e-06, "loss": 0.0023, "step": 104650 }, { "epoch": 0.8558694852189557, "grad_norm": 0.10253055393695831, "learning_rate": 7.066559324441287e-06, "loss": 0.0019, "step": 104660 }, { "epoch": 0.8559512613975548, "grad_norm": 0.017209229990839958, "learning_rate": 7.065909477760761e-06, "loss": 0.0006, "step": 104670 }, { "epoch": 0.8560330375761541, "grad_norm": 0.06874877214431763, "learning_rate": 7.065259588995979e-06, "loss": 0.0016, "step": 104680 }, { "epoch": 0.8561148137547533, "grad_norm": 0.0067712124437093735, "learning_rate": 7.064609658160178e-06, "loss": 0.0011, "step": 104690 }, { "epoch": 0.8561965899333525, "grad_norm": 0.0676451027393341, "learning_rate": 7.0639596852666034e-06, "loss": 0.0016, "step": 104700 }, { "epoch": 0.8562783661119516, "grad_norm": 0.01647695153951645, "learning_rate": 7.063309670328491e-06, "loss": 0.0011, "step": 104710 }, { "epoch": 0.8563601422905508, "grad_norm": 0.05196654424071312, "learning_rate": 7.062659613359083e-06, "loss": 0.0014, "step": 104720 }, { "epoch": 0.85644191846915, "grad_norm": 0.08395159989595413, "learning_rate": 7.062009514371624e-06, "loss": 0.0009, "step": 104730 }, { "epoch": 0.8565236946477491, "grad_norm": 0.05882330238819122, "learning_rate": 7.061359373379354e-06, "loss": 0.0009, "step": 104740 }, { "epoch": 0.8566054708263483, "grad_norm": 0.09519201517105103, "learning_rate": 7.060709190395516e-06, "loss": 0.0014, "step": 104750 }, { "epoch": 0.8566872470049475, "grad_norm": 0.12582102417945862, "learning_rate": 7.0600589654333595e-06, "loss": 0.0018, "step": 104760 }, { "epoch": 0.8567690231835466, "grad_norm": 0.03134765103459358, "learning_rate": 7.0594086985061264e-06, "loss": 0.0012, "step": 104770 }, { "epoch": 0.8568507993621458, "grad_norm": 0.045739464461803436, "learning_rate": 7.058758389627064e-06, "loss": 0.0022, "step": 104780 }, { "epoch": 0.856932575540745, "grad_norm": 0.05406228452920914, "learning_rate": 7.05810803880942e-06, "loss": 0.0019, "step": 104790 }, { "epoch": 0.8570143517193441, "grad_norm": 0.12065661698579788, "learning_rate": 7.0574576460664435e-06, "loss": 0.0022, "step": 104800 }, { "epoch": 0.8570961278979433, "grad_norm": 0.049393802881240845, "learning_rate": 7.056807211411379e-06, "loss": 0.0014, "step": 104810 }, { "epoch": 0.8571779040765425, "grad_norm": 0.05321405082941055, "learning_rate": 7.056156734857483e-06, "loss": 0.0016, "step": 104820 }, { "epoch": 0.8572596802551417, "grad_norm": 0.09103573858737946, "learning_rate": 7.055506216418001e-06, "loss": 0.0026, "step": 104830 }, { "epoch": 0.8573414564337408, "grad_norm": 0.14236049354076385, "learning_rate": 7.0548556561061875e-06, "loss": 0.0023, "step": 104840 }, { "epoch": 0.85742323261234, "grad_norm": 0.035011738538742065, "learning_rate": 7.054205053935294e-06, "loss": 0.0024, "step": 104850 }, { "epoch": 0.8575050087909392, "grad_norm": 0.08696234226226807, "learning_rate": 7.053554409918575e-06, "loss": 0.0031, "step": 104860 }, { "epoch": 0.8575867849695383, "grad_norm": 0.020082613453269005, "learning_rate": 7.052903724069281e-06, "loss": 0.0017, "step": 104870 }, { "epoch": 0.8576685611481375, "grad_norm": 0.06855278462171555, "learning_rate": 7.0522529964006715e-06, "loss": 0.0032, "step": 104880 }, { "epoch": 0.8577503373267367, "grad_norm": 0.03544294461607933, "learning_rate": 7.051602226925999e-06, "loss": 0.0016, "step": 104890 }, { "epoch": 0.857832113505336, "grad_norm": 0.05791383609175682, "learning_rate": 7.050951415658523e-06, "loss": 0.0013, "step": 104900 }, { "epoch": 0.8579138896839351, "grad_norm": 0.029305394738912582, "learning_rate": 7.050300562611499e-06, "loss": 0.0016, "step": 104910 }, { "epoch": 0.8579956658625343, "grad_norm": 0.016563672572374344, "learning_rate": 7.0496496677981855e-06, "loss": 0.0008, "step": 104920 }, { "epoch": 0.8580774420411335, "grad_norm": 0.10684902966022491, "learning_rate": 7.048998731231841e-06, "loss": 0.0021, "step": 104930 }, { "epoch": 0.8581592182197326, "grad_norm": 0.2125598043203354, "learning_rate": 7.048347752925729e-06, "loss": 0.002, "step": 104940 }, { "epoch": 0.8582409943983318, "grad_norm": 0.151668518781662, "learning_rate": 7.0476967328931055e-06, "loss": 0.0025, "step": 104950 }, { "epoch": 0.858322770576931, "grad_norm": 0.02311839908361435, "learning_rate": 7.047045671147238e-06, "loss": 0.0017, "step": 104960 }, { "epoch": 0.8584045467555301, "grad_norm": 0.9013945460319519, "learning_rate": 7.046394567701386e-06, "loss": 0.0051, "step": 104970 }, { "epoch": 0.8584863229341293, "grad_norm": 0.0658651664853096, "learning_rate": 7.045743422568813e-06, "loss": 0.0015, "step": 104980 }, { "epoch": 0.8585680991127285, "grad_norm": 0.08547784388065338, "learning_rate": 7.0450922357627815e-06, "loss": 0.001, "step": 104990 }, { "epoch": 0.8586498752913276, "grad_norm": 0.02420874312520027, "learning_rate": 7.044441007296561e-06, "loss": 0.0022, "step": 105000 }, { "epoch": 0.8587316514699268, "grad_norm": 0.05778518691658974, "learning_rate": 7.043789737183414e-06, "loss": 0.0024, "step": 105010 }, { "epoch": 0.858813427648526, "grad_norm": 0.08636382967233658, "learning_rate": 7.043138425436609e-06, "loss": 0.0019, "step": 105020 }, { "epoch": 0.8588952038271251, "grad_norm": 0.08683478087186813, "learning_rate": 7.042487072069415e-06, "loss": 0.0014, "step": 105030 }, { "epoch": 0.8589769800057243, "grad_norm": 0.0340319387614727, "learning_rate": 7.041835677095099e-06, "loss": 0.0017, "step": 105040 }, { "epoch": 0.8590587561843235, "grad_norm": 0.20034238696098328, "learning_rate": 7.041184240526929e-06, "loss": 0.0025, "step": 105050 }, { "epoch": 0.8591405323629226, "grad_norm": 0.03881100192666054, "learning_rate": 7.040532762378178e-06, "loss": 0.0019, "step": 105060 }, { "epoch": 0.8592223085415218, "grad_norm": 0.10485556721687317, "learning_rate": 7.039881242662115e-06, "loss": 0.0024, "step": 105070 }, { "epoch": 0.859304084720121, "grad_norm": 0.01855313777923584, "learning_rate": 7.039229681392014e-06, "loss": 0.0011, "step": 105080 }, { "epoch": 0.8593858608987202, "grad_norm": 0.07057536393404007, "learning_rate": 7.038578078581148e-06, "loss": 0.0016, "step": 105090 }, { "epoch": 0.8594676370773193, "grad_norm": 0.10430701822042465, "learning_rate": 7.0379264342427875e-06, "loss": 0.0033, "step": 105100 }, { "epoch": 0.8595494132559186, "grad_norm": 0.03451748192310333, "learning_rate": 7.037274748390208e-06, "loss": 0.0016, "step": 105110 }, { "epoch": 0.8596311894345178, "grad_norm": 0.062094490975141525, "learning_rate": 7.0366230210366885e-06, "loss": 0.0019, "step": 105120 }, { "epoch": 0.8597129656131169, "grad_norm": 0.015532214194536209, "learning_rate": 7.0359712521955015e-06, "loss": 0.003, "step": 105130 }, { "epoch": 0.8597947417917161, "grad_norm": 0.03541145101189613, "learning_rate": 7.035319441879926e-06, "loss": 0.0008, "step": 105140 }, { "epoch": 0.8598765179703153, "grad_norm": 0.042288340628147125, "learning_rate": 7.0346675901032365e-06, "loss": 0.0013, "step": 105150 }, { "epoch": 0.8599582941489144, "grad_norm": 0.14740297198295593, "learning_rate": 7.034015696878718e-06, "loss": 0.002, "step": 105160 }, { "epoch": 0.8600400703275136, "grad_norm": 0.03622712567448616, "learning_rate": 7.033363762219642e-06, "loss": 0.002, "step": 105170 }, { "epoch": 0.8601218465061128, "grad_norm": 0.1427403688430786, "learning_rate": 7.032711786139297e-06, "loss": 0.0019, "step": 105180 }, { "epoch": 0.860203622684712, "grad_norm": 0.043212540447711945, "learning_rate": 7.03205976865096e-06, "loss": 0.0017, "step": 105190 }, { "epoch": 0.8602853988633111, "grad_norm": 0.08430377393960953, "learning_rate": 7.0314077097679125e-06, "loss": 0.0009, "step": 105200 }, { "epoch": 0.8603671750419103, "grad_norm": 0.10006046295166016, "learning_rate": 7.03075560950344e-06, "loss": 0.0022, "step": 105210 }, { "epoch": 0.8604489512205095, "grad_norm": 0.04755378141999245, "learning_rate": 7.030103467870826e-06, "loss": 0.001, "step": 105220 }, { "epoch": 0.8605307273991086, "grad_norm": 0.06047732010483742, "learning_rate": 7.029451284883352e-06, "loss": 0.0013, "step": 105230 }, { "epoch": 0.8606125035777078, "grad_norm": 0.08445196598768234, "learning_rate": 7.028799060554307e-06, "loss": 0.0014, "step": 105240 }, { "epoch": 0.860694279756307, "grad_norm": 0.020941127091646194, "learning_rate": 7.028146794896974e-06, "loss": 0.0007, "step": 105250 }, { "epoch": 0.8607760559349061, "grad_norm": 0.054313454777002335, "learning_rate": 7.027494487924644e-06, "loss": 0.0012, "step": 105260 }, { "epoch": 0.8608578321135053, "grad_norm": 0.01915002055466175, "learning_rate": 7.026842139650603e-06, "loss": 0.0009, "step": 105270 }, { "epoch": 0.8609396082921045, "grad_norm": 0.08402242511510849, "learning_rate": 7.0261897500881414e-06, "loss": 0.002, "step": 105280 }, { "epoch": 0.8610213844707036, "grad_norm": 0.07078076153993607, "learning_rate": 7.025537319250546e-06, "loss": 0.0013, "step": 105290 }, { "epoch": 0.8611031606493028, "grad_norm": 0.011109252460300922, "learning_rate": 7.024884847151109e-06, "loss": 0.0019, "step": 105300 }, { "epoch": 0.861184936827902, "grad_norm": 0.07291260361671448, "learning_rate": 7.024232333803122e-06, "loss": 0.0021, "step": 105310 }, { "epoch": 0.8612667130065013, "grad_norm": 0.0486454963684082, "learning_rate": 7.023579779219877e-06, "loss": 0.0011, "step": 105320 }, { "epoch": 0.8613484891851004, "grad_norm": 0.08477190881967545, "learning_rate": 7.022927183414668e-06, "loss": 0.0008, "step": 105330 }, { "epoch": 0.8614302653636996, "grad_norm": 0.031917620450258255, "learning_rate": 7.022274546400787e-06, "loss": 0.002, "step": 105340 }, { "epoch": 0.8615120415422988, "grad_norm": 0.06056647747755051, "learning_rate": 7.0216218681915285e-06, "loss": 0.0031, "step": 105350 }, { "epoch": 0.8615938177208979, "grad_norm": 0.23855441808700562, "learning_rate": 7.020969148800192e-06, "loss": 0.002, "step": 105360 }, { "epoch": 0.8616755938994971, "grad_norm": 0.06969080865383148, "learning_rate": 7.020316388240068e-06, "loss": 0.0014, "step": 105370 }, { "epoch": 0.8617573700780963, "grad_norm": 0.32066774368286133, "learning_rate": 7.019663586524459e-06, "loss": 0.0018, "step": 105380 }, { "epoch": 0.8618391462566954, "grad_norm": 0.09108544141054153, "learning_rate": 7.019010743666661e-06, "loss": 0.0016, "step": 105390 }, { "epoch": 0.8619209224352946, "grad_norm": 0.010744492523372173, "learning_rate": 7.018357859679974e-06, "loss": 0.0006, "step": 105400 }, { "epoch": 0.8620026986138938, "grad_norm": 0.24292267858982086, "learning_rate": 7.017704934577695e-06, "loss": 0.0016, "step": 105410 }, { "epoch": 0.862084474792493, "grad_norm": 0.010803901590406895, "learning_rate": 7.017051968373128e-06, "loss": 0.0011, "step": 105420 }, { "epoch": 0.8621662509710921, "grad_norm": 0.021723024547100067, "learning_rate": 7.0163989610795705e-06, "loss": 0.0017, "step": 105430 }, { "epoch": 0.8622480271496913, "grad_norm": 0.02279328741133213, "learning_rate": 7.015745912710329e-06, "loss": 0.0017, "step": 105440 }, { "epoch": 0.8623298033282905, "grad_norm": 0.016298700124025345, "learning_rate": 7.015092823278704e-06, "loss": 0.0011, "step": 105450 }, { "epoch": 0.8624115795068896, "grad_norm": 0.022487806156277657, "learning_rate": 7.0144396927980004e-06, "loss": 0.0019, "step": 105460 }, { "epoch": 0.8624933556854888, "grad_norm": 0.02699047513306141, "learning_rate": 7.0137865212815235e-06, "loss": 0.0011, "step": 105470 }, { "epoch": 0.862575131864088, "grad_norm": 0.03234937787055969, "learning_rate": 7.013133308742578e-06, "loss": 0.0009, "step": 105480 }, { "epoch": 0.8626569080426871, "grad_norm": 0.03273765742778778, "learning_rate": 7.012480055194469e-06, "loss": 0.0011, "step": 105490 }, { "epoch": 0.8627386842212863, "grad_norm": 0.05033639445900917, "learning_rate": 7.011826760650508e-06, "loss": 0.0016, "step": 105500 }, { "epoch": 0.8628204603998855, "grad_norm": 0.01778961904346943, "learning_rate": 7.011173425123999e-06, "loss": 0.0008, "step": 105510 }, { "epoch": 0.8629022365784846, "grad_norm": 0.06660252064466476, "learning_rate": 7.0105200486282544e-06, "loss": 0.0025, "step": 105520 }, { "epoch": 0.8629840127570838, "grad_norm": 0.054270852357149124, "learning_rate": 7.0098666311765795e-06, "loss": 0.0011, "step": 105530 }, { "epoch": 0.8630657889356831, "grad_norm": 0.043762583285570145, "learning_rate": 7.00921317278229e-06, "loss": 0.0011, "step": 105540 }, { "epoch": 0.8631475651142823, "grad_norm": 0.0814802423119545, "learning_rate": 7.008559673458693e-06, "loss": 0.0026, "step": 105550 }, { "epoch": 0.8632293412928814, "grad_norm": 0.11000844091176987, "learning_rate": 7.007906133219104e-06, "loss": 0.0022, "step": 105560 }, { "epoch": 0.8633111174714806, "grad_norm": 0.0476178340613842, "learning_rate": 7.007252552076836e-06, "loss": 0.0016, "step": 105570 }, { "epoch": 0.8633928936500798, "grad_norm": 0.13278774917125702, "learning_rate": 7.0065989300451995e-06, "loss": 0.0015, "step": 105580 }, { "epoch": 0.8634746698286789, "grad_norm": 0.20263399183750153, "learning_rate": 7.0059452671375125e-06, "loss": 0.0016, "step": 105590 }, { "epoch": 0.8635564460072781, "grad_norm": 0.04413658380508423, "learning_rate": 7.005291563367092e-06, "loss": 0.0026, "step": 105600 }, { "epoch": 0.8636382221858773, "grad_norm": 0.03199177607893944, "learning_rate": 7.004637818747249e-06, "loss": 0.0021, "step": 105610 }, { "epoch": 0.8637199983644764, "grad_norm": 0.11792347580194473, "learning_rate": 7.003984033291306e-06, "loss": 0.0015, "step": 105620 }, { "epoch": 0.8638017745430756, "grad_norm": 0.012258265167474747, "learning_rate": 7.00333020701258e-06, "loss": 0.0017, "step": 105630 }, { "epoch": 0.8638835507216748, "grad_norm": 0.03965618088841438, "learning_rate": 7.002676339924388e-06, "loss": 0.0015, "step": 105640 }, { "epoch": 0.8639653269002739, "grad_norm": 0.12876424193382263, "learning_rate": 7.002022432040051e-06, "loss": 0.0016, "step": 105650 }, { "epoch": 0.8640471030788731, "grad_norm": 0.00427526468411088, "learning_rate": 7.00136848337289e-06, "loss": 0.0014, "step": 105660 }, { "epoch": 0.8641288792574723, "grad_norm": 0.11389432847499847, "learning_rate": 7.000714493936227e-06, "loss": 0.0013, "step": 105670 }, { "epoch": 0.8642106554360715, "grad_norm": 0.055795818567276, "learning_rate": 7.000060463743384e-06, "loss": 0.0016, "step": 105680 }, { "epoch": 0.8642924316146706, "grad_norm": 0.03909457102417946, "learning_rate": 6.999406392807683e-06, "loss": 0.002, "step": 105690 }, { "epoch": 0.8643742077932698, "grad_norm": 0.04365752264857292, "learning_rate": 6.998752281142447e-06, "loss": 0.0015, "step": 105700 }, { "epoch": 0.864455983971869, "grad_norm": 0.06225830316543579, "learning_rate": 6.998098128761004e-06, "loss": 0.0013, "step": 105710 }, { "epoch": 0.8645377601504681, "grad_norm": 0.03396192938089371, "learning_rate": 6.997443935676678e-06, "loss": 0.0013, "step": 105720 }, { "epoch": 0.8646195363290673, "grad_norm": 0.051515962928533554, "learning_rate": 6.996789701902794e-06, "loss": 0.0013, "step": 105730 }, { "epoch": 0.8647013125076665, "grad_norm": 0.042619314044713974, "learning_rate": 6.996135427452681e-06, "loss": 0.0011, "step": 105740 }, { "epoch": 0.8647830886862657, "grad_norm": 0.0654740035533905, "learning_rate": 6.995481112339669e-06, "loss": 0.0011, "step": 105750 }, { "epoch": 0.8648648648648649, "grad_norm": 0.12641535699367523, "learning_rate": 6.994826756577082e-06, "loss": 0.0013, "step": 105760 }, { "epoch": 0.8649466410434641, "grad_norm": 0.06253847479820251, "learning_rate": 6.994172360178255e-06, "loss": 0.0016, "step": 105770 }, { "epoch": 0.8650284172220633, "grad_norm": 0.11866001784801483, "learning_rate": 6.993517923156514e-06, "loss": 0.0019, "step": 105780 }, { "epoch": 0.8651101934006624, "grad_norm": 0.1361132264137268, "learning_rate": 6.992863445525193e-06, "loss": 0.001, "step": 105790 }, { "epoch": 0.8651919695792616, "grad_norm": 0.10138031095266342, "learning_rate": 6.992208927297623e-06, "loss": 0.0019, "step": 105800 }, { "epoch": 0.8652737457578608, "grad_norm": 0.07113278657197952, "learning_rate": 6.991554368487141e-06, "loss": 0.001, "step": 105810 }, { "epoch": 0.8653555219364599, "grad_norm": 0.04339079186320305, "learning_rate": 6.990899769107075e-06, "loss": 0.0014, "step": 105820 }, { "epoch": 0.8654372981150591, "grad_norm": 0.020929748192429543, "learning_rate": 6.990245129170763e-06, "loss": 0.001, "step": 105830 }, { "epoch": 0.8655190742936583, "grad_norm": 0.04371660575270653, "learning_rate": 6.989590448691539e-06, "loss": 0.0009, "step": 105840 }, { "epoch": 0.8656008504722574, "grad_norm": 0.09783395379781723, "learning_rate": 6.988935727682742e-06, "loss": 0.0017, "step": 105850 }, { "epoch": 0.8656826266508566, "grad_norm": 0.056434743106365204, "learning_rate": 6.988280966157706e-06, "loss": 0.0015, "step": 105860 }, { "epoch": 0.8657644028294558, "grad_norm": 0.23063814640045166, "learning_rate": 6.9876261641297725e-06, "loss": 0.0024, "step": 105870 }, { "epoch": 0.8658461790080549, "grad_norm": 0.04734760522842407, "learning_rate": 6.986971321612277e-06, "loss": 0.0014, "step": 105880 }, { "epoch": 0.8659279551866541, "grad_norm": 0.08649162203073502, "learning_rate": 6.986316438618561e-06, "loss": 0.0015, "step": 105890 }, { "epoch": 0.8660097313652533, "grad_norm": 0.05617216229438782, "learning_rate": 6.985661515161963e-06, "loss": 0.0019, "step": 105900 }, { "epoch": 0.8660915075438524, "grad_norm": 0.10236205905675888, "learning_rate": 6.985006551255828e-06, "loss": 0.0013, "step": 105910 }, { "epoch": 0.8661732837224516, "grad_norm": 0.07676760107278824, "learning_rate": 6.984351546913495e-06, "loss": 0.0012, "step": 105920 }, { "epoch": 0.8662550599010508, "grad_norm": 0.009492137469351292, "learning_rate": 6.9836965021483095e-06, "loss": 0.0012, "step": 105930 }, { "epoch": 0.86633683607965, "grad_norm": 0.05444137379527092, "learning_rate": 6.983041416973611e-06, "loss": 0.0014, "step": 105940 }, { "epoch": 0.8664186122582491, "grad_norm": 0.014695867896080017, "learning_rate": 6.9823862914027495e-06, "loss": 0.0018, "step": 105950 }, { "epoch": 0.8665003884368484, "grad_norm": 0.02769613079726696, "learning_rate": 6.981731125449067e-06, "loss": 0.0013, "step": 105960 }, { "epoch": 0.8665821646154476, "grad_norm": 0.2131156325340271, "learning_rate": 6.98107591912591e-06, "loss": 0.0013, "step": 105970 }, { "epoch": 0.8666639407940467, "grad_norm": 0.011147445999085903, "learning_rate": 6.980420672446626e-06, "loss": 0.0021, "step": 105980 }, { "epoch": 0.8667457169726459, "grad_norm": 0.13587820529937744, "learning_rate": 6.9797653854245666e-06, "loss": 0.0016, "step": 105990 }, { "epoch": 0.8668274931512451, "grad_norm": 0.008726649917662144, "learning_rate": 6.979110058073073e-06, "loss": 0.0013, "step": 106000 }, { "epoch": 0.8669092693298442, "grad_norm": 0.003566359169781208, "learning_rate": 6.978454690405501e-06, "loss": 0.0007, "step": 106010 }, { "epoch": 0.8669910455084434, "grad_norm": 0.03883516043424606, "learning_rate": 6.977799282435198e-06, "loss": 0.0021, "step": 106020 }, { "epoch": 0.8670728216870426, "grad_norm": 0.04218843951821327, "learning_rate": 6.977143834175516e-06, "loss": 0.0011, "step": 106030 }, { "epoch": 0.8671545978656418, "grad_norm": 0.0778307095170021, "learning_rate": 6.976488345639807e-06, "loss": 0.0036, "step": 106040 }, { "epoch": 0.8672363740442409, "grad_norm": 0.09187952429056168, "learning_rate": 6.9758328168414245e-06, "loss": 0.0013, "step": 106050 }, { "epoch": 0.8673181502228401, "grad_norm": 0.02914910390973091, "learning_rate": 6.9751772477937206e-06, "loss": 0.0026, "step": 106060 }, { "epoch": 0.8673999264014393, "grad_norm": 0.24497227370738983, "learning_rate": 6.974521638510051e-06, "loss": 0.0023, "step": 106070 }, { "epoch": 0.8674817025800384, "grad_norm": 0.056209683418273926, "learning_rate": 6.97386598900377e-06, "loss": 0.0023, "step": 106080 }, { "epoch": 0.8675634787586376, "grad_norm": 0.025038592517375946, "learning_rate": 6.973210299288236e-06, "loss": 0.0017, "step": 106090 }, { "epoch": 0.8676452549372368, "grad_norm": 0.059201233088970184, "learning_rate": 6.9725545693768016e-06, "loss": 0.0018, "step": 106100 }, { "epoch": 0.8677270311158359, "grad_norm": 0.18088553845882416, "learning_rate": 6.97189879928283e-06, "loss": 0.0017, "step": 106110 }, { "epoch": 0.8678088072944351, "grad_norm": 0.10476038604974747, "learning_rate": 6.971242989019675e-06, "loss": 0.0017, "step": 106120 }, { "epoch": 0.8678905834730343, "grad_norm": 0.031828735023736954, "learning_rate": 6.970587138600699e-06, "loss": 0.0014, "step": 106130 }, { "epoch": 0.8679723596516334, "grad_norm": 0.05261782184243202, "learning_rate": 6.9699312480392615e-06, "loss": 0.001, "step": 106140 }, { "epoch": 0.8680541358302326, "grad_norm": 0.020153779536485672, "learning_rate": 6.969275317348722e-06, "loss": 0.0012, "step": 106150 }, { "epoch": 0.8681359120088318, "grad_norm": 0.02099648304283619, "learning_rate": 6.968619346542444e-06, "loss": 0.0012, "step": 106160 }, { "epoch": 0.868217688187431, "grad_norm": 0.0653328150510788, "learning_rate": 6.967963335633793e-06, "loss": 0.0015, "step": 106170 }, { "epoch": 0.8682994643660302, "grad_norm": 0.08802998811006546, "learning_rate": 6.967307284636124e-06, "loss": 0.001, "step": 106180 }, { "epoch": 0.8683812405446294, "grad_norm": 0.12329713255167007, "learning_rate": 6.96665119356281e-06, "loss": 0.0023, "step": 106190 }, { "epoch": 0.8684630167232286, "grad_norm": 0.05656733736395836, "learning_rate": 6.965995062427211e-06, "loss": 0.0016, "step": 106200 }, { "epoch": 0.8685447929018277, "grad_norm": 0.05102654919028282, "learning_rate": 6.965338891242696e-06, "loss": 0.0012, "step": 106210 }, { "epoch": 0.8686265690804269, "grad_norm": 0.10962910950183868, "learning_rate": 6.964682680022629e-06, "loss": 0.0015, "step": 106220 }, { "epoch": 0.8687083452590261, "grad_norm": 0.017431050539016724, "learning_rate": 6.964026428780379e-06, "loss": 0.0014, "step": 106230 }, { "epoch": 0.8687901214376252, "grad_norm": 0.2512877285480499, "learning_rate": 6.963370137529315e-06, "loss": 0.0036, "step": 106240 }, { "epoch": 0.8688718976162244, "grad_norm": 0.026913302019238472, "learning_rate": 6.9627138062828046e-06, "loss": 0.001, "step": 106250 }, { "epoch": 0.8689536737948236, "grad_norm": 0.06368813663721085, "learning_rate": 6.962057435054218e-06, "loss": 0.0016, "step": 106260 }, { "epoch": 0.8690354499734227, "grad_norm": 0.014411618001759052, "learning_rate": 6.961401023856927e-06, "loss": 0.0028, "step": 106270 }, { "epoch": 0.8691172261520219, "grad_norm": 0.10299868136644363, "learning_rate": 6.960744572704303e-06, "loss": 0.0013, "step": 106280 }, { "epoch": 0.8691990023306211, "grad_norm": 0.05358655005693436, "learning_rate": 6.9600880816097205e-06, "loss": 0.001, "step": 106290 }, { "epoch": 0.8692807785092203, "grad_norm": 0.09260594844818115, "learning_rate": 6.9594315505865476e-06, "loss": 0.0015, "step": 106300 }, { "epoch": 0.8693625546878194, "grad_norm": 0.03670576959848404, "learning_rate": 6.958774979648162e-06, "loss": 0.0011, "step": 106310 }, { "epoch": 0.8694443308664186, "grad_norm": 0.022146174684166908, "learning_rate": 6.958118368807938e-06, "loss": 0.0025, "step": 106320 }, { "epoch": 0.8695261070450178, "grad_norm": 0.0359870046377182, "learning_rate": 6.957461718079251e-06, "loss": 0.0013, "step": 106330 }, { "epoch": 0.8696078832236169, "grad_norm": 0.11976772546768188, "learning_rate": 6.9568050274754775e-06, "loss": 0.0022, "step": 106340 }, { "epoch": 0.8696896594022161, "grad_norm": 0.09285338222980499, "learning_rate": 6.956148297009997e-06, "loss": 0.0016, "step": 106350 }, { "epoch": 0.8697714355808153, "grad_norm": 0.012334546074271202, "learning_rate": 6.955491526696183e-06, "loss": 0.0011, "step": 106360 }, { "epoch": 0.8698532117594144, "grad_norm": 0.004827096126973629, "learning_rate": 6.95483471654742e-06, "loss": 0.001, "step": 106370 }, { "epoch": 0.8699349879380136, "grad_norm": 0.0465291365981102, "learning_rate": 6.954177866577082e-06, "loss": 0.0018, "step": 106380 }, { "epoch": 0.8700167641166129, "grad_norm": 0.06291325390338898, "learning_rate": 6.953520976798553e-06, "loss": 0.0011, "step": 106390 }, { "epoch": 0.870098540295212, "grad_norm": 0.041650597006082535, "learning_rate": 6.952864047225214e-06, "loss": 0.0014, "step": 106400 }, { "epoch": 0.8701803164738112, "grad_norm": 0.08150541037321091, "learning_rate": 6.952207077870449e-06, "loss": 0.0015, "step": 106410 }, { "epoch": 0.8702620926524104, "grad_norm": 0.07433758676052094, "learning_rate": 6.951550068747636e-06, "loss": 0.0016, "step": 106420 }, { "epoch": 0.8703438688310096, "grad_norm": 0.023246729746460915, "learning_rate": 6.950893019870164e-06, "loss": 0.0015, "step": 106430 }, { "epoch": 0.8704256450096087, "grad_norm": 0.04004980996251106, "learning_rate": 6.950235931251415e-06, "loss": 0.0014, "step": 106440 }, { "epoch": 0.8705074211882079, "grad_norm": 0.017322702333331108, "learning_rate": 6.949578802904774e-06, "loss": 0.002, "step": 106450 }, { "epoch": 0.8705891973668071, "grad_norm": 0.02320878766477108, "learning_rate": 6.948921634843627e-06, "loss": 0.0012, "step": 106460 }, { "epoch": 0.8706709735454062, "grad_norm": 0.087941013276577, "learning_rate": 6.948264427081366e-06, "loss": 0.0016, "step": 106470 }, { "epoch": 0.8707527497240054, "grad_norm": 0.02882285788655281, "learning_rate": 6.947607179631372e-06, "loss": 0.0016, "step": 106480 }, { "epoch": 0.8708345259026046, "grad_norm": 0.15495562553405762, "learning_rate": 6.946949892507037e-06, "loss": 0.0017, "step": 106490 }, { "epoch": 0.8709163020812037, "grad_norm": 0.04347090795636177, "learning_rate": 6.946292565721749e-06, "loss": 0.0014, "step": 106500 }, { "epoch": 0.8709980782598029, "grad_norm": 0.05425155162811279, "learning_rate": 6.945635199288901e-06, "loss": 0.0013, "step": 106510 }, { "epoch": 0.8710798544384021, "grad_norm": 0.10210223495960236, "learning_rate": 6.9449777932218835e-06, "loss": 0.0027, "step": 106520 }, { "epoch": 0.8711616306170012, "grad_norm": 0.026396598666906357, "learning_rate": 6.9443203475340856e-06, "loss": 0.0017, "step": 106530 }, { "epoch": 0.8712434067956004, "grad_norm": 0.08051260560750961, "learning_rate": 6.9436628622389025e-06, "loss": 0.0012, "step": 106540 }, { "epoch": 0.8713251829741996, "grad_norm": 0.04578210785984993, "learning_rate": 6.943005337349728e-06, "loss": 0.0012, "step": 106550 }, { "epoch": 0.8714069591527988, "grad_norm": 0.04979884251952171, "learning_rate": 6.9423477728799525e-06, "loss": 0.0017, "step": 106560 }, { "epoch": 0.8714887353313979, "grad_norm": 0.054618775844573975, "learning_rate": 6.941690168842976e-06, "loss": 0.0019, "step": 106570 }, { "epoch": 0.8715705115099971, "grad_norm": 0.06514421850442886, "learning_rate": 6.9410325252521925e-06, "loss": 0.0013, "step": 106580 }, { "epoch": 0.8716522876885963, "grad_norm": 0.04331188276410103, "learning_rate": 6.940374842121e-06, "loss": 0.0014, "step": 106590 }, { "epoch": 0.8717340638671955, "grad_norm": 0.049099113792181015, "learning_rate": 6.939717119462792e-06, "loss": 0.001, "step": 106600 }, { "epoch": 0.8718158400457947, "grad_norm": 0.04940013960003853, "learning_rate": 6.939059357290971e-06, "loss": 0.0013, "step": 106610 }, { "epoch": 0.8718976162243939, "grad_norm": 0.0806920975446701, "learning_rate": 6.938401555618935e-06, "loss": 0.0017, "step": 106620 }, { "epoch": 0.871979392402993, "grad_norm": 0.04218107461929321, "learning_rate": 6.937743714460083e-06, "loss": 0.0015, "step": 106630 }, { "epoch": 0.8720611685815922, "grad_norm": 0.019539669156074524, "learning_rate": 6.937085833827817e-06, "loss": 0.0013, "step": 106640 }, { "epoch": 0.8721429447601914, "grad_norm": 0.04159099608659744, "learning_rate": 6.936427913735538e-06, "loss": 0.0032, "step": 106650 }, { "epoch": 0.8722247209387906, "grad_norm": 0.07262604683637619, "learning_rate": 6.935769954196649e-06, "loss": 0.0021, "step": 106660 }, { "epoch": 0.8723064971173897, "grad_norm": 0.03712190315127373, "learning_rate": 6.935111955224552e-06, "loss": 0.0024, "step": 106670 }, { "epoch": 0.8723882732959889, "grad_norm": 0.09851130098104477, "learning_rate": 6.93445391683265e-06, "loss": 0.0018, "step": 106680 }, { "epoch": 0.8724700494745881, "grad_norm": 0.10552255809307098, "learning_rate": 6.933795839034351e-06, "loss": 0.007, "step": 106690 }, { "epoch": 0.8725518256531872, "grad_norm": 0.06383722275495529, "learning_rate": 6.9331377218430575e-06, "loss": 0.0013, "step": 106700 }, { "epoch": 0.8726336018317864, "grad_norm": 0.14700187742710114, "learning_rate": 6.93247956527218e-06, "loss": 0.0016, "step": 106710 }, { "epoch": 0.8727153780103856, "grad_norm": 0.06610540300607681, "learning_rate": 6.931821369335121e-06, "loss": 0.0012, "step": 106720 }, { "epoch": 0.8727971541889847, "grad_norm": 0.029498709365725517, "learning_rate": 6.931163134045291e-06, "loss": 0.0029, "step": 106730 }, { "epoch": 0.8728789303675839, "grad_norm": 0.03533543273806572, "learning_rate": 6.930504859416096e-06, "loss": 0.0012, "step": 106740 }, { "epoch": 0.8729607065461831, "grad_norm": 0.1333547979593277, "learning_rate": 6.929846545460951e-06, "loss": 0.0015, "step": 106750 }, { "epoch": 0.8730424827247822, "grad_norm": 0.11677832901477814, "learning_rate": 6.929188192193262e-06, "loss": 0.0019, "step": 106760 }, { "epoch": 0.8731242589033814, "grad_norm": 0.012479768134653568, "learning_rate": 6.928529799626442e-06, "loss": 0.0008, "step": 106770 }, { "epoch": 0.8732060350819806, "grad_norm": 0.039394546300172806, "learning_rate": 6.9278713677739e-06, "loss": 0.0016, "step": 106780 }, { "epoch": 0.8732878112605797, "grad_norm": 0.07971654832363129, "learning_rate": 6.927212896649054e-06, "loss": 0.0026, "step": 106790 }, { "epoch": 0.8733695874391789, "grad_norm": 0.047490574419498444, "learning_rate": 6.926554386265313e-06, "loss": 0.0021, "step": 106800 }, { "epoch": 0.8734513636177781, "grad_norm": 0.11424040049314499, "learning_rate": 6.925895836636094e-06, "loss": 0.0016, "step": 106810 }, { "epoch": 0.8735331397963774, "grad_norm": 0.05944684520363808, "learning_rate": 6.925237247774811e-06, "loss": 0.0016, "step": 106820 }, { "epoch": 0.8736149159749765, "grad_norm": 0.03721550852060318, "learning_rate": 6.9245786196948815e-06, "loss": 0.0027, "step": 106830 }, { "epoch": 0.8736966921535757, "grad_norm": 0.016549723222851753, "learning_rate": 6.92391995240972e-06, "loss": 0.0014, "step": 106840 }, { "epoch": 0.8737784683321749, "grad_norm": 0.03155979514122009, "learning_rate": 6.923261245932745e-06, "loss": 0.0014, "step": 106850 }, { "epoch": 0.873860244510774, "grad_norm": 0.03788137808442116, "learning_rate": 6.922602500277376e-06, "loss": 0.001, "step": 106860 }, { "epoch": 0.8739420206893732, "grad_norm": 0.02099214866757393, "learning_rate": 6.921943715457031e-06, "loss": 0.0014, "step": 106870 }, { "epoch": 0.8740237968679724, "grad_norm": 0.03877878561615944, "learning_rate": 6.921284891485129e-06, "loss": 0.0017, "step": 106880 }, { "epoch": 0.8741055730465715, "grad_norm": 0.15537628531455994, "learning_rate": 6.920626028375094e-06, "loss": 0.0014, "step": 106890 }, { "epoch": 0.8741873492251707, "grad_norm": 0.15543578565120697, "learning_rate": 6.919967126140346e-06, "loss": 0.0017, "step": 106900 }, { "epoch": 0.8742691254037699, "grad_norm": 0.02400175854563713, "learning_rate": 6.919308184794305e-06, "loss": 0.0011, "step": 106910 }, { "epoch": 0.8743509015823691, "grad_norm": 0.04946797341108322, "learning_rate": 6.918649204350396e-06, "loss": 0.0025, "step": 106920 }, { "epoch": 0.8744326777609682, "grad_norm": 0.06972391158342361, "learning_rate": 6.917990184822045e-06, "loss": 0.0027, "step": 106930 }, { "epoch": 0.8745144539395674, "grad_norm": 0.014867939986288548, "learning_rate": 6.917331126222674e-06, "loss": 0.0019, "step": 106940 }, { "epoch": 0.8745962301181666, "grad_norm": 0.15503479540348053, "learning_rate": 6.91667202856571e-06, "loss": 0.0017, "step": 106950 }, { "epoch": 0.8746780062967657, "grad_norm": 0.09061739593744278, "learning_rate": 6.916012891864578e-06, "loss": 0.0015, "step": 106960 }, { "epoch": 0.8747597824753649, "grad_norm": 0.15034295618534088, "learning_rate": 6.9153537161327065e-06, "loss": 0.0032, "step": 106970 }, { "epoch": 0.8748415586539641, "grad_norm": 0.005853181704878807, "learning_rate": 6.914694501383523e-06, "loss": 0.0008, "step": 106980 }, { "epoch": 0.8749233348325632, "grad_norm": 0.29597902297973633, "learning_rate": 6.914035247630456e-06, "loss": 0.0089, "step": 106990 }, { "epoch": 0.8750051110111624, "grad_norm": 0.008570664562284946, "learning_rate": 6.913375954886934e-06, "loss": 0.0014, "step": 107000 }, { "epoch": 0.8750868871897616, "grad_norm": 0.11203950643539429, "learning_rate": 6.912716623166391e-06, "loss": 0.003, "step": 107010 }, { "epoch": 0.8751686633683607, "grad_norm": 0.046602047979831696, "learning_rate": 6.912057252482254e-06, "loss": 0.0016, "step": 107020 }, { "epoch": 0.87525043954696, "grad_norm": 0.10259818285703659, "learning_rate": 6.911397842847958e-06, "loss": 0.0017, "step": 107030 }, { "epoch": 0.8753322157255592, "grad_norm": 0.0851420983672142, "learning_rate": 6.910738394276932e-06, "loss": 0.0015, "step": 107040 }, { "epoch": 0.8754139919041584, "grad_norm": 0.08363012224435806, "learning_rate": 6.910078906782613e-06, "loss": 0.0019, "step": 107050 }, { "epoch": 0.8754957680827575, "grad_norm": 0.026029760017991066, "learning_rate": 6.909419380378434e-06, "loss": 0.0013, "step": 107060 }, { "epoch": 0.8755775442613567, "grad_norm": 0.09031818807125092, "learning_rate": 6.908759815077832e-06, "loss": 0.0019, "step": 107070 }, { "epoch": 0.8756593204399559, "grad_norm": 0.08777855336666107, "learning_rate": 6.908100210894238e-06, "loss": 0.003, "step": 107080 }, { "epoch": 0.875741096618555, "grad_norm": 0.09939534217119217, "learning_rate": 6.907440567841093e-06, "loss": 0.0028, "step": 107090 }, { "epoch": 0.8758228727971542, "grad_norm": 0.03867356851696968, "learning_rate": 6.906780885931834e-06, "loss": 0.0012, "step": 107100 }, { "epoch": 0.8759046489757534, "grad_norm": 0.03278437256813049, "learning_rate": 6.9061211651798965e-06, "loss": 0.0017, "step": 107110 }, { "epoch": 0.8759864251543525, "grad_norm": 0.04027596861124039, "learning_rate": 6.905461405598723e-06, "loss": 0.0014, "step": 107120 }, { "epoch": 0.8760682013329517, "grad_norm": 0.08229346573352814, "learning_rate": 6.904801607201752e-06, "loss": 0.0015, "step": 107130 }, { "epoch": 0.8761499775115509, "grad_norm": 0.10616220533847809, "learning_rate": 6.9041417700024236e-06, "loss": 0.0032, "step": 107140 }, { "epoch": 0.87623175369015, "grad_norm": 0.034055959433317184, "learning_rate": 6.903481894014179e-06, "loss": 0.0091, "step": 107150 }, { "epoch": 0.8763135298687492, "grad_norm": 0.09114623069763184, "learning_rate": 6.902821979250461e-06, "loss": 0.0016, "step": 107160 }, { "epoch": 0.8763953060473484, "grad_norm": 0.09947092086076736, "learning_rate": 6.9021620257247124e-06, "loss": 0.0013, "step": 107170 }, { "epoch": 0.8764770822259476, "grad_norm": 0.09258384257555008, "learning_rate": 6.9015020334503776e-06, "loss": 0.0018, "step": 107180 }, { "epoch": 0.8765588584045467, "grad_norm": 0.0939384177327156, "learning_rate": 6.9008420024409016e-06, "loss": 0.0018, "step": 107190 }, { "epoch": 0.8766406345831459, "grad_norm": 0.006484206300228834, "learning_rate": 6.900181932709729e-06, "loss": 0.0023, "step": 107200 }, { "epoch": 0.8767224107617451, "grad_norm": 0.010097651742398739, "learning_rate": 6.8995218242703045e-06, "loss": 0.0011, "step": 107210 }, { "epoch": 0.8768041869403442, "grad_norm": 0.04558032378554344, "learning_rate": 6.8988616771360775e-06, "loss": 0.0024, "step": 107220 }, { "epoch": 0.8768859631189434, "grad_norm": 0.08070600777864456, "learning_rate": 6.8982014913204934e-06, "loss": 0.0029, "step": 107230 }, { "epoch": 0.8769677392975427, "grad_norm": 0.0377863310277462, "learning_rate": 6.897541266837003e-06, "loss": 0.0014, "step": 107240 }, { "epoch": 0.8770495154761418, "grad_norm": 0.008439616300165653, "learning_rate": 6.896881003699055e-06, "loss": 0.0031, "step": 107250 }, { "epoch": 0.877131291654741, "grad_norm": 0.033622559159994125, "learning_rate": 6.896220701920099e-06, "loss": 0.0031, "step": 107260 }, { "epoch": 0.8772130678333402, "grad_norm": 0.052456386387348175, "learning_rate": 6.895560361513585e-06, "loss": 0.0012, "step": 107270 }, { "epoch": 0.8772948440119394, "grad_norm": 0.07279093563556671, "learning_rate": 6.894899982492966e-06, "loss": 0.0022, "step": 107280 }, { "epoch": 0.8773766201905385, "grad_norm": 0.003215770237147808, "learning_rate": 6.894239564871694e-06, "loss": 0.0019, "step": 107290 }, { "epoch": 0.8774583963691377, "grad_norm": 0.1500951647758484, "learning_rate": 6.893579108663222e-06, "loss": 0.0039, "step": 107300 }, { "epoch": 0.8775401725477369, "grad_norm": 0.007459989748895168, "learning_rate": 6.892918613881006e-06, "loss": 0.0012, "step": 107310 }, { "epoch": 0.877621948726336, "grad_norm": 1.1529875993728638, "learning_rate": 6.892258080538498e-06, "loss": 0.0026, "step": 107320 }, { "epoch": 0.8777037249049352, "grad_norm": 0.06330493837594986, "learning_rate": 6.891597508649155e-06, "loss": 0.002, "step": 107330 }, { "epoch": 0.8777855010835344, "grad_norm": 0.0763983204960823, "learning_rate": 6.890936898226433e-06, "loss": 0.001, "step": 107340 }, { "epoch": 0.8778672772621335, "grad_norm": 0.0056594558991491795, "learning_rate": 6.8902762492837895e-06, "loss": 0.002, "step": 107350 }, { "epoch": 0.8779490534407327, "grad_norm": 0.04775898903608322, "learning_rate": 6.889615561834682e-06, "loss": 0.0021, "step": 107360 }, { "epoch": 0.8780308296193319, "grad_norm": 0.05055097118020058, "learning_rate": 6.8889548358925705e-06, "loss": 0.0011, "step": 107370 }, { "epoch": 0.878112605797931, "grad_norm": 0.13472293317317963, "learning_rate": 6.888294071470914e-06, "loss": 0.0013, "step": 107380 }, { "epoch": 0.8781943819765302, "grad_norm": 0.04042058438062668, "learning_rate": 6.887633268583169e-06, "loss": 0.0022, "step": 107390 }, { "epoch": 0.8782761581551294, "grad_norm": 0.09205321967601776, "learning_rate": 6.886972427242803e-06, "loss": 0.0011, "step": 107400 }, { "epoch": 0.8783579343337286, "grad_norm": 0.05775652453303337, "learning_rate": 6.886311547463274e-06, "loss": 0.0013, "step": 107410 }, { "epoch": 0.8784397105123277, "grad_norm": 0.04079585149884224, "learning_rate": 6.885650629258045e-06, "loss": 0.0017, "step": 107420 }, { "epoch": 0.8785214866909269, "grad_norm": 0.0214751735329628, "learning_rate": 6.884989672640582e-06, "loss": 0.0021, "step": 107430 }, { "epoch": 0.8786032628695261, "grad_norm": 0.11190391331911087, "learning_rate": 6.8843286776243455e-06, "loss": 0.002, "step": 107440 }, { "epoch": 0.8786850390481252, "grad_norm": 0.09001140296459198, "learning_rate": 6.8836676442228025e-06, "loss": 0.0008, "step": 107450 }, { "epoch": 0.8787668152267245, "grad_norm": 0.022385485470294952, "learning_rate": 6.883006572449418e-06, "loss": 0.002, "step": 107460 }, { "epoch": 0.8788485914053237, "grad_norm": 0.002282550325617194, "learning_rate": 6.882345462317659e-06, "loss": 0.0009, "step": 107470 }, { "epoch": 0.8789303675839228, "grad_norm": 0.08432559669017792, "learning_rate": 6.881684313840993e-06, "loss": 0.0011, "step": 107480 }, { "epoch": 0.879012143762522, "grad_norm": 0.162089541554451, "learning_rate": 6.881023127032889e-06, "loss": 0.0021, "step": 107490 }, { "epoch": 0.8790939199411212, "grad_norm": 0.09636601060628891, "learning_rate": 6.880361901906815e-06, "loss": 0.0016, "step": 107500 }, { "epoch": 0.8791756961197204, "grad_norm": 0.014723604544997215, "learning_rate": 6.879700638476239e-06, "loss": 0.0016, "step": 107510 }, { "epoch": 0.8792574722983195, "grad_norm": 0.03138267621397972, "learning_rate": 6.879039336754634e-06, "loss": 0.0028, "step": 107520 }, { "epoch": 0.8793392484769187, "grad_norm": 0.017434390261769295, "learning_rate": 6.87837799675547e-06, "loss": 0.0015, "step": 107530 }, { "epoch": 0.8794210246555179, "grad_norm": 0.09970039129257202, "learning_rate": 6.8777166184922195e-06, "loss": 0.0017, "step": 107540 }, { "epoch": 0.879502800834117, "grad_norm": 0.30239611864089966, "learning_rate": 6.8770552019783554e-06, "loss": 0.0013, "step": 107550 }, { "epoch": 0.8795845770127162, "grad_norm": 0.09534364938735962, "learning_rate": 6.876393747227352e-06, "loss": 0.0012, "step": 107560 }, { "epoch": 0.8796663531913154, "grad_norm": 0.033263467252254486, "learning_rate": 6.8757322542526805e-06, "loss": 0.0016, "step": 107570 }, { "epoch": 0.8797481293699145, "grad_norm": 0.04776487872004509, "learning_rate": 6.8750707230678205e-06, "loss": 0.0022, "step": 107580 }, { "epoch": 0.8798299055485137, "grad_norm": 0.06588605046272278, "learning_rate": 6.874409153686244e-06, "loss": 0.001, "step": 107590 }, { "epoch": 0.8799116817271129, "grad_norm": 0.039038002490997314, "learning_rate": 6.873747546121431e-06, "loss": 0.0012, "step": 107600 }, { "epoch": 0.879993457905712, "grad_norm": 0.025296377018094063, "learning_rate": 6.873085900386858e-06, "loss": 0.0007, "step": 107610 }, { "epoch": 0.8800752340843112, "grad_norm": 0.06784768402576447, "learning_rate": 6.872424216496001e-06, "loss": 0.0019, "step": 107620 }, { "epoch": 0.8801570102629104, "grad_norm": 0.04639407619833946, "learning_rate": 6.871762494462341e-06, "loss": 0.0011, "step": 107630 }, { "epoch": 0.8802387864415095, "grad_norm": 0.09806643426418304, "learning_rate": 6.871100734299357e-06, "loss": 0.0015, "step": 107640 }, { "epoch": 0.8803205626201087, "grad_norm": 0.05340098962187767, "learning_rate": 6.8704389360205305e-06, "loss": 0.0019, "step": 107650 }, { "epoch": 0.8804023387987079, "grad_norm": 0.017576009035110474, "learning_rate": 6.869777099639343e-06, "loss": 0.0012, "step": 107660 }, { "epoch": 0.8804841149773072, "grad_norm": 0.030575614422559738, "learning_rate": 6.869115225169276e-06, "loss": 0.0009, "step": 107670 }, { "epoch": 0.8805658911559063, "grad_norm": 0.0054940409027040005, "learning_rate": 6.868453312623814e-06, "loss": 0.0015, "step": 107680 }, { "epoch": 0.8806476673345055, "grad_norm": 0.02590702846646309, "learning_rate": 6.867791362016437e-06, "loss": 0.0021, "step": 107690 }, { "epoch": 0.8807294435131047, "grad_norm": 0.0014613046078011394, "learning_rate": 6.867129373360632e-06, "loss": 0.0015, "step": 107700 }, { "epoch": 0.8808112196917038, "grad_norm": 0.029047789052128792, "learning_rate": 6.866467346669885e-06, "loss": 0.0012, "step": 107710 }, { "epoch": 0.880892995870303, "grad_norm": 0.012704821303486824, "learning_rate": 6.86580528195768e-06, "loss": 0.0009, "step": 107720 }, { "epoch": 0.8809747720489022, "grad_norm": 0.11026623845100403, "learning_rate": 6.865143179237505e-06, "loss": 0.0014, "step": 107730 }, { "epoch": 0.8810565482275013, "grad_norm": 0.09908763319253922, "learning_rate": 6.864481038522849e-06, "loss": 0.0017, "step": 107740 }, { "epoch": 0.8811383244061005, "grad_norm": 0.01709401048719883, "learning_rate": 6.863818859827197e-06, "loss": 0.0023, "step": 107750 }, { "epoch": 0.8812201005846997, "grad_norm": 0.0704055055975914, "learning_rate": 6.863156643164041e-06, "loss": 0.001, "step": 107760 }, { "epoch": 0.8813018767632989, "grad_norm": 0.011621961370110512, "learning_rate": 6.862494388546868e-06, "loss": 0.0013, "step": 107770 }, { "epoch": 0.881383652941898, "grad_norm": 0.0770491287112236, "learning_rate": 6.861832095989171e-06, "loss": 0.0018, "step": 107780 }, { "epoch": 0.8814654291204972, "grad_norm": 0.11301235854625702, "learning_rate": 6.861169765504443e-06, "loss": 0.0015, "step": 107790 }, { "epoch": 0.8815472052990964, "grad_norm": 0.08191944658756256, "learning_rate": 6.860507397106172e-06, "loss": 0.0017, "step": 107800 }, { "epoch": 0.8816289814776955, "grad_norm": 0.004220790695399046, "learning_rate": 6.8598449908078525e-06, "loss": 0.0011, "step": 107810 }, { "epoch": 0.8817107576562947, "grad_norm": 0.04795852303504944, "learning_rate": 6.85918254662298e-06, "loss": 0.0019, "step": 107820 }, { "epoch": 0.8817925338348939, "grad_norm": 0.06339829415082932, "learning_rate": 6.858520064565046e-06, "loss": 0.0009, "step": 107830 }, { "epoch": 0.881874310013493, "grad_norm": 0.07598251849412918, "learning_rate": 6.85785754464755e-06, "loss": 0.0008, "step": 107840 }, { "epoch": 0.8819560861920922, "grad_norm": 0.04571510851383209, "learning_rate": 6.857194986883985e-06, "loss": 0.0022, "step": 107850 }, { "epoch": 0.8820378623706914, "grad_norm": 0.05076493322849274, "learning_rate": 6.856532391287849e-06, "loss": 0.0016, "step": 107860 }, { "epoch": 0.8821196385492905, "grad_norm": 0.049102310091257095, "learning_rate": 6.855869757872637e-06, "loss": 0.0014, "step": 107870 }, { "epoch": 0.8822014147278898, "grad_norm": 0.03326069191098213, "learning_rate": 6.855207086651852e-06, "loss": 0.0022, "step": 107880 }, { "epoch": 0.882283190906489, "grad_norm": 0.10892632603645325, "learning_rate": 6.854544377638989e-06, "loss": 0.0011, "step": 107890 }, { "epoch": 0.8823649670850882, "grad_norm": 0.06390148401260376, "learning_rate": 6.85388163084755e-06, "loss": 0.0018, "step": 107900 }, { "epoch": 0.8824467432636873, "grad_norm": 0.035280391573905945, "learning_rate": 6.853218846291035e-06, "loss": 0.0025, "step": 107910 }, { "epoch": 0.8825285194422865, "grad_norm": 0.03422386571764946, "learning_rate": 6.852556023982947e-06, "loss": 0.0009, "step": 107920 }, { "epoch": 0.8826102956208857, "grad_norm": 0.039611537009477615, "learning_rate": 6.851893163936785e-06, "loss": 0.0011, "step": 107930 }, { "epoch": 0.8826920717994848, "grad_norm": 0.01709209755063057, "learning_rate": 6.851230266166056e-06, "loss": 0.002, "step": 107940 }, { "epoch": 0.882773847978084, "grad_norm": 0.07011476904153824, "learning_rate": 6.8505673306842594e-06, "loss": 0.0009, "step": 107950 }, { "epoch": 0.8828556241566832, "grad_norm": 0.04662817344069481, "learning_rate": 6.849904357504904e-06, "loss": 0.0018, "step": 107960 }, { "epoch": 0.8829374003352823, "grad_norm": 0.0021804776042699814, "learning_rate": 6.849241346641493e-06, "loss": 0.0008, "step": 107970 }, { "epoch": 0.8830191765138815, "grad_norm": 0.022656898945569992, "learning_rate": 6.848578298107532e-06, "loss": 0.0019, "step": 107980 }, { "epoch": 0.8831009526924807, "grad_norm": 0.02936732955276966, "learning_rate": 6.847915211916528e-06, "loss": 0.001, "step": 107990 }, { "epoch": 0.8831827288710798, "grad_norm": 0.04178357124328613, "learning_rate": 6.847252088081991e-06, "loss": 0.0028, "step": 108000 }, { "epoch": 0.883264505049679, "grad_norm": 0.030610114336013794, "learning_rate": 6.846588926617425e-06, "loss": 0.0014, "step": 108010 }, { "epoch": 0.8833462812282782, "grad_norm": 0.0075703030452132225, "learning_rate": 6.845925727536342e-06, "loss": 0.003, "step": 108020 }, { "epoch": 0.8834280574068774, "grad_norm": 0.10424458980560303, "learning_rate": 6.845262490852254e-06, "loss": 0.0018, "step": 108030 }, { "epoch": 0.8835098335854765, "grad_norm": 0.04104026407003403, "learning_rate": 6.844599216578667e-06, "loss": 0.0026, "step": 108040 }, { "epoch": 0.8835916097640757, "grad_norm": 0.045358847826719284, "learning_rate": 6.843935904729093e-06, "loss": 0.0029, "step": 108050 }, { "epoch": 0.8836733859426749, "grad_norm": 0.0366031788289547, "learning_rate": 6.8432725553170485e-06, "loss": 0.0011, "step": 108060 }, { "epoch": 0.883755162121274, "grad_norm": 0.1742670238018036, "learning_rate": 6.842609168356042e-06, "loss": 0.0018, "step": 108070 }, { "epoch": 0.8838369382998732, "grad_norm": 0.07369711995124817, "learning_rate": 6.84194574385959e-06, "loss": 0.0028, "step": 108080 }, { "epoch": 0.8839187144784724, "grad_norm": 0.03872724622488022, "learning_rate": 6.8412822818412075e-06, "loss": 0.0013, "step": 108090 }, { "epoch": 0.8840004906570716, "grad_norm": 0.05002765357494354, "learning_rate": 6.840618782314407e-06, "loss": 0.001, "step": 108100 }, { "epoch": 0.8840822668356708, "grad_norm": 0.05756249651312828, "learning_rate": 6.839955245292704e-06, "loss": 0.0013, "step": 108110 }, { "epoch": 0.88416404301427, "grad_norm": 0.031669940799474716, "learning_rate": 6.83929167078962e-06, "loss": 0.0014, "step": 108120 }, { "epoch": 0.8842458191928692, "grad_norm": 0.016905268654227257, "learning_rate": 6.8386280588186675e-06, "loss": 0.0022, "step": 108130 }, { "epoch": 0.8843275953714683, "grad_norm": 0.048262983560562134, "learning_rate": 6.8379644093933676e-06, "loss": 0.0014, "step": 108140 }, { "epoch": 0.8844093715500675, "grad_norm": 0.0036413846537470818, "learning_rate": 6.8373007225272395e-06, "loss": 0.004, "step": 108150 }, { "epoch": 0.8844911477286667, "grad_norm": 0.01720391772687435, "learning_rate": 6.836636998233801e-06, "loss": 0.0011, "step": 108160 }, { "epoch": 0.8845729239072658, "grad_norm": 0.05665495991706848, "learning_rate": 6.835973236526574e-06, "loss": 0.0012, "step": 108170 }, { "epoch": 0.884654700085865, "grad_norm": 0.0654551312327385, "learning_rate": 6.835309437419081e-06, "loss": 0.0016, "step": 108180 }, { "epoch": 0.8847364762644642, "grad_norm": 0.0824538841843605, "learning_rate": 6.8346456009248405e-06, "loss": 0.0016, "step": 108190 }, { "epoch": 0.8848182524430633, "grad_norm": 0.026436805725097656, "learning_rate": 6.83398172705738e-06, "loss": 0.002, "step": 108200 }, { "epoch": 0.8849000286216625, "grad_norm": 0.03425956144928932, "learning_rate": 6.833317815830221e-06, "loss": 0.0019, "step": 108210 }, { "epoch": 0.8849818048002617, "grad_norm": 0.020913755521178246, "learning_rate": 6.832653867256888e-06, "loss": 0.0017, "step": 108220 }, { "epoch": 0.8850635809788608, "grad_norm": 0.034771937876939774, "learning_rate": 6.831989881350905e-06, "loss": 0.0015, "step": 108230 }, { "epoch": 0.88514535715746, "grad_norm": 0.05678741633892059, "learning_rate": 6.831325858125799e-06, "loss": 0.0015, "step": 108240 }, { "epoch": 0.8852271333360592, "grad_norm": 0.10171013325452805, "learning_rate": 6.830661797595097e-06, "loss": 0.0018, "step": 108250 }, { "epoch": 0.8853089095146583, "grad_norm": 0.007443380542099476, "learning_rate": 6.829997699772326e-06, "loss": 0.0019, "step": 108260 }, { "epoch": 0.8853906856932575, "grad_norm": 0.05402359738945961, "learning_rate": 6.8293335646710155e-06, "loss": 0.0018, "step": 108270 }, { "epoch": 0.8854724618718567, "grad_norm": 0.05578259751200676, "learning_rate": 6.828669392304693e-06, "loss": 0.0019, "step": 108280 }, { "epoch": 0.8855542380504559, "grad_norm": 0.03533506765961647, "learning_rate": 6.828005182686888e-06, "loss": 0.0009, "step": 108290 }, { "epoch": 0.885636014229055, "grad_norm": 0.08237937837839127, "learning_rate": 6.827340935831131e-06, "loss": 0.0016, "step": 108300 }, { "epoch": 0.8857177904076543, "grad_norm": 0.03180750831961632, "learning_rate": 6.826676651750954e-06, "loss": 0.0011, "step": 108310 }, { "epoch": 0.8857995665862535, "grad_norm": 0.08016997575759888, "learning_rate": 6.826012330459891e-06, "loss": 0.0018, "step": 108320 }, { "epoch": 0.8858813427648526, "grad_norm": 0.25658202171325684, "learning_rate": 6.82534797197147e-06, "loss": 0.0015, "step": 108330 }, { "epoch": 0.8859631189434518, "grad_norm": 0.032233137637376785, "learning_rate": 6.824683576299228e-06, "loss": 0.0039, "step": 108340 }, { "epoch": 0.886044895122051, "grad_norm": 0.010422333143651485, "learning_rate": 6.824019143456697e-06, "loss": 0.0032, "step": 108350 }, { "epoch": 0.8861266713006501, "grad_norm": 0.09022430330514908, "learning_rate": 6.8233546734574156e-06, "loss": 0.0015, "step": 108360 }, { "epoch": 0.8862084474792493, "grad_norm": 0.008982970379292965, "learning_rate": 6.822690166314916e-06, "loss": 0.001, "step": 108370 }, { "epoch": 0.8862902236578485, "grad_norm": 0.11160280555486679, "learning_rate": 6.822025622042736e-06, "loss": 0.0017, "step": 108380 }, { "epoch": 0.8863719998364477, "grad_norm": 0.03016880340874195, "learning_rate": 6.821361040654415e-06, "loss": 0.002, "step": 108390 }, { "epoch": 0.8864537760150468, "grad_norm": 0.047077883034944534, "learning_rate": 6.820696422163487e-06, "loss": 0.0008, "step": 108400 }, { "epoch": 0.886535552193646, "grad_norm": 0.15121954679489136, "learning_rate": 6.820031766583494e-06, "loss": 0.0017, "step": 108410 }, { "epoch": 0.8866173283722452, "grad_norm": 0.08199520409107208, "learning_rate": 6.8193670739279735e-06, "loss": 0.0013, "step": 108420 }, { "epoch": 0.8866991045508443, "grad_norm": 0.3133271634578705, "learning_rate": 6.818702344210469e-06, "loss": 0.0016, "step": 108430 }, { "epoch": 0.8867808807294435, "grad_norm": 0.05693161487579346, "learning_rate": 6.818037577444517e-06, "loss": 0.0014, "step": 108440 }, { "epoch": 0.8868626569080427, "grad_norm": 0.0772382840514183, "learning_rate": 6.817372773643663e-06, "loss": 0.002, "step": 108450 }, { "epoch": 0.8869444330866418, "grad_norm": 0.1447361260652542, "learning_rate": 6.816707932821449e-06, "loss": 0.0016, "step": 108460 }, { "epoch": 0.887026209265241, "grad_norm": 0.04565304145216942, "learning_rate": 6.816043054991416e-06, "loss": 0.0023, "step": 108470 }, { "epoch": 0.8871079854438402, "grad_norm": 0.05359319597482681, "learning_rate": 6.81537814016711e-06, "loss": 0.0021, "step": 108480 }, { "epoch": 0.8871897616224393, "grad_norm": 0.011251126416027546, "learning_rate": 6.814713188362076e-06, "loss": 0.0016, "step": 108490 }, { "epoch": 0.8872715378010385, "grad_norm": 0.02318955399096012, "learning_rate": 6.814048199589861e-06, "loss": 0.0015, "step": 108500 }, { "epoch": 0.8873533139796377, "grad_norm": 0.05216340348124504, "learning_rate": 6.813383173864008e-06, "loss": 0.0028, "step": 108510 }, { "epoch": 0.887435090158237, "grad_norm": 0.1389438807964325, "learning_rate": 6.812718111198067e-06, "loss": 0.0016, "step": 108520 }, { "epoch": 0.8875168663368361, "grad_norm": 0.14553602039813995, "learning_rate": 6.812053011605584e-06, "loss": 0.0017, "step": 108530 }, { "epoch": 0.8875986425154353, "grad_norm": 0.04376359283924103, "learning_rate": 6.811387875100107e-06, "loss": 0.0021, "step": 108540 }, { "epoch": 0.8876804186940345, "grad_norm": 0.028094567358493805, "learning_rate": 6.810722701695188e-06, "loss": 0.0013, "step": 108550 }, { "epoch": 0.8877621948726336, "grad_norm": 0.01702945865690708, "learning_rate": 6.810057491404374e-06, "loss": 0.0025, "step": 108560 }, { "epoch": 0.8878439710512328, "grad_norm": 0.04874099791049957, "learning_rate": 6.8093922442412195e-06, "loss": 0.0012, "step": 108570 }, { "epoch": 0.887925747229832, "grad_norm": 0.03389471396803856, "learning_rate": 6.808726960219274e-06, "loss": 0.0012, "step": 108580 }, { "epoch": 0.8880075234084311, "grad_norm": 0.026380641385912895, "learning_rate": 6.808061639352091e-06, "loss": 0.0014, "step": 108590 }, { "epoch": 0.8880892995870303, "grad_norm": 0.03817233070731163, "learning_rate": 6.807396281653221e-06, "loss": 0.0009, "step": 108600 }, { "epoch": 0.8881710757656295, "grad_norm": 0.07629255205392838, "learning_rate": 6.806730887136219e-06, "loss": 0.0017, "step": 108610 }, { "epoch": 0.8882528519442286, "grad_norm": 0.07690459489822388, "learning_rate": 6.806065455814641e-06, "loss": 0.0021, "step": 108620 }, { "epoch": 0.8883346281228278, "grad_norm": 0.04091205075383186, "learning_rate": 6.805399987702043e-06, "loss": 0.0017, "step": 108630 }, { "epoch": 0.888416404301427, "grad_norm": 0.15591378509998322, "learning_rate": 6.804734482811979e-06, "loss": 0.0019, "step": 108640 }, { "epoch": 0.8884981804800262, "grad_norm": 0.032364923506975174, "learning_rate": 6.804068941158006e-06, "loss": 0.001, "step": 108650 }, { "epoch": 0.8885799566586253, "grad_norm": 0.007394295185804367, "learning_rate": 6.803403362753682e-06, "loss": 0.001, "step": 108660 }, { "epoch": 0.8886617328372245, "grad_norm": 0.08517186343669891, "learning_rate": 6.8027377476125665e-06, "loss": 0.0022, "step": 108670 }, { "epoch": 0.8887435090158237, "grad_norm": 0.009413342922925949, "learning_rate": 6.802072095748217e-06, "loss": 0.0017, "step": 108680 }, { "epoch": 0.8888252851944228, "grad_norm": 0.04822613671422005, "learning_rate": 6.801406407174195e-06, "loss": 0.001, "step": 108690 }, { "epoch": 0.888907061373022, "grad_norm": 0.061079371720552444, "learning_rate": 6.80074068190406e-06, "loss": 0.001, "step": 108700 }, { "epoch": 0.8889888375516212, "grad_norm": 0.016518980264663696, "learning_rate": 6.8000749199513735e-06, "loss": 0.0011, "step": 108710 }, { "epoch": 0.8890706137302203, "grad_norm": 0.12386152893304825, "learning_rate": 6.799409121329697e-06, "loss": 0.0017, "step": 108720 }, { "epoch": 0.8891523899088195, "grad_norm": 0.011298500932753086, "learning_rate": 6.798743286052595e-06, "loss": 0.0015, "step": 108730 }, { "epoch": 0.8892341660874188, "grad_norm": 0.05943495035171509, "learning_rate": 6.798077414133629e-06, "loss": 0.0014, "step": 108740 }, { "epoch": 0.889315942266018, "grad_norm": 0.10687505453824997, "learning_rate": 6.797411505586365e-06, "loss": 0.001, "step": 108750 }, { "epoch": 0.8893977184446171, "grad_norm": 0.04907141253352165, "learning_rate": 6.796745560424369e-06, "loss": 0.001, "step": 108760 }, { "epoch": 0.8894794946232163, "grad_norm": 0.027003414928913116, "learning_rate": 6.796079578661204e-06, "loss": 0.0012, "step": 108770 }, { "epoch": 0.8895612708018155, "grad_norm": 0.09905006736516953, "learning_rate": 6.795413560310439e-06, "loss": 0.0021, "step": 108780 }, { "epoch": 0.8896430469804146, "grad_norm": 0.13808809220790863, "learning_rate": 6.794747505385639e-06, "loss": 0.0017, "step": 108790 }, { "epoch": 0.8897248231590138, "grad_norm": 0.0958363264799118, "learning_rate": 6.794081413900374e-06, "loss": 0.0008, "step": 108800 }, { "epoch": 0.889806599337613, "grad_norm": 0.019692549481987953, "learning_rate": 6.793415285868213e-06, "loss": 0.0016, "step": 108810 }, { "epoch": 0.8898883755162121, "grad_norm": 0.11747201532125473, "learning_rate": 6.792749121302725e-06, "loss": 0.002, "step": 108820 }, { "epoch": 0.8899701516948113, "grad_norm": 0.04416310042142868, "learning_rate": 6.7920829202174796e-06, "loss": 0.0016, "step": 108830 }, { "epoch": 0.8900519278734105, "grad_norm": 0.044928163290023804, "learning_rate": 6.791416682626049e-06, "loss": 0.0014, "step": 108840 }, { "epoch": 0.8901337040520096, "grad_norm": 0.1032390147447586, "learning_rate": 6.790750408542004e-06, "loss": 0.0012, "step": 108850 }, { "epoch": 0.8902154802306088, "grad_norm": 0.05794624984264374, "learning_rate": 6.790084097978916e-06, "loss": 0.0015, "step": 108860 }, { "epoch": 0.890297256409208, "grad_norm": 0.03224949538707733, "learning_rate": 6.7894177509503625e-06, "loss": 0.0022, "step": 108870 }, { "epoch": 0.8903790325878072, "grad_norm": 0.08457336574792862, "learning_rate": 6.788751367469913e-06, "loss": 0.0019, "step": 108880 }, { "epoch": 0.8904608087664063, "grad_norm": 0.1898285299539566, "learning_rate": 6.788084947551146e-06, "loss": 0.0021, "step": 108890 }, { "epoch": 0.8905425849450055, "grad_norm": 0.11127657443284988, "learning_rate": 6.787418491207634e-06, "loss": 0.0019, "step": 108900 }, { "epoch": 0.8906243611236047, "grad_norm": 0.12123270332813263, "learning_rate": 6.786751998452954e-06, "loss": 0.0013, "step": 108910 }, { "epoch": 0.8907061373022038, "grad_norm": 0.03643696755170822, "learning_rate": 6.786085469300684e-06, "loss": 0.0012, "step": 108920 }, { "epoch": 0.890787913480803, "grad_norm": 0.02137971855700016, "learning_rate": 6.7854189037644014e-06, "loss": 0.0013, "step": 108930 }, { "epoch": 0.8908696896594022, "grad_norm": 0.03099953942000866, "learning_rate": 6.784752301857685e-06, "loss": 0.002, "step": 108940 }, { "epoch": 0.8909514658380014, "grad_norm": 0.029305588454008102, "learning_rate": 6.784085663594113e-06, "loss": 0.0013, "step": 108950 }, { "epoch": 0.8910332420166006, "grad_norm": 0.06429821252822876, "learning_rate": 6.783418988987265e-06, "loss": 0.001, "step": 108960 }, { "epoch": 0.8911150181951998, "grad_norm": 0.04499805346131325, "learning_rate": 6.782752278050724e-06, "loss": 0.0023, "step": 108970 }, { "epoch": 0.891196794373799, "grad_norm": 0.004713966976851225, "learning_rate": 6.782085530798068e-06, "loss": 0.0005, "step": 108980 }, { "epoch": 0.8912785705523981, "grad_norm": 0.09274092316627502, "learning_rate": 6.781418747242883e-06, "loss": 0.0017, "step": 108990 }, { "epoch": 0.8913603467309973, "grad_norm": 0.071272112429142, "learning_rate": 6.78075192739875e-06, "loss": 0.0016, "step": 109000 }, { "epoch": 0.8914421229095965, "grad_norm": 0.043200574815273285, "learning_rate": 6.780085071279252e-06, "loss": 0.0012, "step": 109010 }, { "epoch": 0.8915238990881956, "grad_norm": 0.020173469558358192, "learning_rate": 6.779418178897974e-06, "loss": 0.0018, "step": 109020 }, { "epoch": 0.8916056752667948, "grad_norm": 0.07368070632219315, "learning_rate": 6.778751250268503e-06, "loss": 0.0021, "step": 109030 }, { "epoch": 0.891687451445394, "grad_norm": 0.03432858735322952, "learning_rate": 6.77808428540442e-06, "loss": 0.0011, "step": 109040 }, { "epoch": 0.8917692276239931, "grad_norm": 0.06042156368494034, "learning_rate": 6.777417284319317e-06, "loss": 0.0016, "step": 109050 }, { "epoch": 0.8918510038025923, "grad_norm": 0.020551394671201706, "learning_rate": 6.7767502470267795e-06, "loss": 0.0007, "step": 109060 }, { "epoch": 0.8919327799811915, "grad_norm": 0.05841290205717087, "learning_rate": 6.776083173540395e-06, "loss": 0.0013, "step": 109070 }, { "epoch": 0.8920145561597906, "grad_norm": 0.03314611315727234, "learning_rate": 6.775416063873752e-06, "loss": 0.0008, "step": 109080 }, { "epoch": 0.8920963323383898, "grad_norm": 0.021560491994023323, "learning_rate": 6.774748918040441e-06, "loss": 0.0006, "step": 109090 }, { "epoch": 0.892178108516989, "grad_norm": 0.037139687687158585, "learning_rate": 6.774081736054051e-06, "loss": 0.0019, "step": 109100 }, { "epoch": 0.8922598846955881, "grad_norm": 0.13133855164051056, "learning_rate": 6.7734145179281764e-06, "loss": 0.0013, "step": 109110 }, { "epoch": 0.8923416608741873, "grad_norm": 0.04083551466464996, "learning_rate": 6.7727472636764045e-06, "loss": 0.0015, "step": 109120 }, { "epoch": 0.8924234370527865, "grad_norm": 0.061887145042419434, "learning_rate": 6.772079973312331e-06, "loss": 0.0019, "step": 109130 }, { "epoch": 0.8925052132313857, "grad_norm": 0.03568841516971588, "learning_rate": 6.7714126468495465e-06, "loss": 0.0018, "step": 109140 }, { "epoch": 0.8925869894099848, "grad_norm": 0.0765315517783165, "learning_rate": 6.770745284301648e-06, "loss": 0.0014, "step": 109150 }, { "epoch": 0.892668765588584, "grad_norm": 0.06483536958694458, "learning_rate": 6.7700778856822265e-06, "loss": 0.0014, "step": 109160 }, { "epoch": 0.8927505417671833, "grad_norm": 0.03962337225675583, "learning_rate": 6.769410451004883e-06, "loss": 0.0023, "step": 109170 }, { "epoch": 0.8928323179457824, "grad_norm": 0.11636677384376526, "learning_rate": 6.768742980283209e-06, "loss": 0.0011, "step": 109180 }, { "epoch": 0.8929140941243816, "grad_norm": 0.017483120784163475, "learning_rate": 6.7680754735308016e-06, "loss": 0.001, "step": 109190 }, { "epoch": 0.8929958703029808, "grad_norm": 0.05173274502158165, "learning_rate": 6.76740793076126e-06, "loss": 0.0014, "step": 109200 }, { "epoch": 0.89307764648158, "grad_norm": 0.1273118257522583, "learning_rate": 6.766740351988184e-06, "loss": 0.0022, "step": 109210 }, { "epoch": 0.8931594226601791, "grad_norm": 0.07513117790222168, "learning_rate": 6.766072737225169e-06, "loss": 0.001, "step": 109220 }, { "epoch": 0.8932411988387783, "grad_norm": 0.032297369092702866, "learning_rate": 6.765405086485818e-06, "loss": 0.0011, "step": 109230 }, { "epoch": 0.8933229750173775, "grad_norm": 0.040493641048669815, "learning_rate": 6.7647373997837296e-06, "loss": 0.0012, "step": 109240 }, { "epoch": 0.8934047511959766, "grad_norm": 0.05025317519903183, "learning_rate": 6.764069677132507e-06, "loss": 0.0009, "step": 109250 }, { "epoch": 0.8934865273745758, "grad_norm": 0.005467443261295557, "learning_rate": 6.763401918545749e-06, "loss": 0.0007, "step": 109260 }, { "epoch": 0.893568303553175, "grad_norm": 0.057367291301488876, "learning_rate": 6.762734124037062e-06, "loss": 0.0021, "step": 109270 }, { "epoch": 0.8936500797317741, "grad_norm": 0.018397493287920952, "learning_rate": 6.762066293620047e-06, "loss": 0.0016, "step": 109280 }, { "epoch": 0.8937318559103733, "grad_norm": 0.045068833976984024, "learning_rate": 6.761398427308312e-06, "loss": 0.002, "step": 109290 }, { "epoch": 0.8938136320889725, "grad_norm": 0.07527024298906326, "learning_rate": 6.760730525115457e-06, "loss": 0.0019, "step": 109300 }, { "epoch": 0.8938954082675716, "grad_norm": 0.1107420027256012, "learning_rate": 6.760062587055092e-06, "loss": 0.0012, "step": 109310 }, { "epoch": 0.8939771844461708, "grad_norm": 0.041639748960733414, "learning_rate": 6.759394613140819e-06, "loss": 0.0014, "step": 109320 }, { "epoch": 0.89405896062477, "grad_norm": 0.04411737993359566, "learning_rate": 6.758726603386249e-06, "loss": 0.0024, "step": 109330 }, { "epoch": 0.8941407368033691, "grad_norm": 0.23438598215579987, "learning_rate": 6.758058557804987e-06, "loss": 0.0018, "step": 109340 }, { "epoch": 0.8942225129819683, "grad_norm": 0.025546450167894363, "learning_rate": 6.7573904764106454e-06, "loss": 0.0009, "step": 109350 }, { "epoch": 0.8943042891605675, "grad_norm": 0.03435976803302765, "learning_rate": 6.756722359216831e-06, "loss": 0.0015, "step": 109360 }, { "epoch": 0.8943860653391666, "grad_norm": 0.03780800849199295, "learning_rate": 6.756054206237154e-06, "loss": 0.0008, "step": 109370 }, { "epoch": 0.8944678415177659, "grad_norm": 0.018555760383605957, "learning_rate": 6.755386017485224e-06, "loss": 0.0011, "step": 109380 }, { "epoch": 0.8945496176963651, "grad_norm": 0.02987142652273178, "learning_rate": 6.754717792974655e-06, "loss": 0.001, "step": 109390 }, { "epoch": 0.8946313938749643, "grad_norm": 0.018772730603814125, "learning_rate": 6.754049532719057e-06, "loss": 0.0019, "step": 109400 }, { "epoch": 0.8947131700535634, "grad_norm": 0.0014739522011950612, "learning_rate": 6.753381236732046e-06, "loss": 0.0037, "step": 109410 }, { "epoch": 0.8947949462321626, "grad_norm": 0.048280976712703705, "learning_rate": 6.752712905027234e-06, "loss": 0.0011, "step": 109420 }, { "epoch": 0.8948767224107618, "grad_norm": 0.014816836453974247, "learning_rate": 6.752044537618235e-06, "loss": 0.0018, "step": 109430 }, { "epoch": 0.8949584985893609, "grad_norm": 0.048613984137773514, "learning_rate": 6.751376134518663e-06, "loss": 0.0011, "step": 109440 }, { "epoch": 0.8950402747679601, "grad_norm": 0.08087647706270218, "learning_rate": 6.750707695742138e-06, "loss": 0.0009, "step": 109450 }, { "epoch": 0.8951220509465593, "grad_norm": 0.18610556423664093, "learning_rate": 6.750039221302273e-06, "loss": 0.0024, "step": 109460 }, { "epoch": 0.8952038271251584, "grad_norm": 0.026479050517082214, "learning_rate": 6.749370711212687e-06, "loss": 0.002, "step": 109470 }, { "epoch": 0.8952856033037576, "grad_norm": 0.11541638523340225, "learning_rate": 6.748702165486998e-06, "loss": 0.0023, "step": 109480 }, { "epoch": 0.8953673794823568, "grad_norm": 0.07864056527614594, "learning_rate": 6.748033584138825e-06, "loss": 0.0014, "step": 109490 }, { "epoch": 0.895449155660956, "grad_norm": 0.054795317351818085, "learning_rate": 6.747364967181784e-06, "loss": 0.0019, "step": 109500 }, { "epoch": 0.8955309318395551, "grad_norm": 0.06653732806444168, "learning_rate": 6.746696314629501e-06, "loss": 0.0014, "step": 109510 }, { "epoch": 0.8956127080181543, "grad_norm": 0.06961508095264435, "learning_rate": 6.7460276264955925e-06, "loss": 0.0014, "step": 109520 }, { "epoch": 0.8956944841967535, "grad_norm": 0.00831299927085638, "learning_rate": 6.745358902793684e-06, "loss": 0.0011, "step": 109530 }, { "epoch": 0.8957762603753526, "grad_norm": 0.05330129340291023, "learning_rate": 6.7446901435373945e-06, "loss": 0.0025, "step": 109540 }, { "epoch": 0.8958580365539518, "grad_norm": 0.03962234780192375, "learning_rate": 6.744021348740349e-06, "loss": 0.0018, "step": 109550 }, { "epoch": 0.895939812732551, "grad_norm": 0.0862000435590744, "learning_rate": 6.743352518416171e-06, "loss": 0.0012, "step": 109560 }, { "epoch": 0.8960215889111501, "grad_norm": 0.11443120986223221, "learning_rate": 6.742683652578486e-06, "loss": 0.0025, "step": 109570 }, { "epoch": 0.8961033650897493, "grad_norm": 0.15915943682193756, "learning_rate": 6.742014751240916e-06, "loss": 0.002, "step": 109580 }, { "epoch": 0.8961851412683486, "grad_norm": 0.10712530463933945, "learning_rate": 6.741345814417091e-06, "loss": 0.0028, "step": 109590 }, { "epoch": 0.8962669174469478, "grad_norm": 0.006195442285388708, "learning_rate": 6.740676842120638e-06, "loss": 0.0006, "step": 109600 }, { "epoch": 0.8963486936255469, "grad_norm": 0.0199090875685215, "learning_rate": 6.740007834365182e-06, "loss": 0.0014, "step": 109610 }, { "epoch": 0.8964304698041461, "grad_norm": 0.039143431931734085, "learning_rate": 6.7393387911643504e-06, "loss": 0.0051, "step": 109620 }, { "epoch": 0.8965122459827453, "grad_norm": 0.045227162539958954, "learning_rate": 6.7386697125317755e-06, "loss": 0.002, "step": 109630 }, { "epoch": 0.8965940221613444, "grad_norm": 0.19436004757881165, "learning_rate": 6.738000598481084e-06, "loss": 0.0015, "step": 109640 }, { "epoch": 0.8966757983399436, "grad_norm": 0.05118144676089287, "learning_rate": 6.737331449025908e-06, "loss": 0.0015, "step": 109650 }, { "epoch": 0.8967575745185428, "grad_norm": 0.029014647006988525, "learning_rate": 6.73666226417988e-06, "loss": 0.0028, "step": 109660 }, { "epoch": 0.8968393506971419, "grad_norm": 0.028633881360292435, "learning_rate": 6.735993043956628e-06, "loss": 0.0018, "step": 109670 }, { "epoch": 0.8969211268757411, "grad_norm": 0.0036693448200821877, "learning_rate": 6.735323788369787e-06, "loss": 0.0007, "step": 109680 }, { "epoch": 0.8970029030543403, "grad_norm": 0.00181824981700629, "learning_rate": 6.734654497432989e-06, "loss": 0.002, "step": 109690 }, { "epoch": 0.8970846792329394, "grad_norm": 0.08938514441251755, "learning_rate": 6.733985171159871e-06, "loss": 0.0016, "step": 109700 }, { "epoch": 0.8971664554115386, "grad_norm": 0.03719092532992363, "learning_rate": 6.733315809564063e-06, "loss": 0.001, "step": 109710 }, { "epoch": 0.8972482315901378, "grad_norm": 0.08771036565303802, "learning_rate": 6.732646412659206e-06, "loss": 0.002, "step": 109720 }, { "epoch": 0.897330007768737, "grad_norm": 0.02035568282008171, "learning_rate": 6.731976980458932e-06, "loss": 0.0013, "step": 109730 }, { "epoch": 0.8974117839473361, "grad_norm": 0.14641587436199188, "learning_rate": 6.7313075129768776e-06, "loss": 0.0021, "step": 109740 }, { "epoch": 0.8974935601259353, "grad_norm": 0.028948772698640823, "learning_rate": 6.730638010226685e-06, "loss": 0.001, "step": 109750 }, { "epoch": 0.8975753363045345, "grad_norm": 0.047182779759168625, "learning_rate": 6.729968472221986e-06, "loss": 0.001, "step": 109760 }, { "epoch": 0.8976571124831336, "grad_norm": 0.043730370700359344, "learning_rate": 6.729298898976424e-06, "loss": 0.0012, "step": 109770 }, { "epoch": 0.8977388886617328, "grad_norm": 0.04033910483121872, "learning_rate": 6.728629290503639e-06, "loss": 0.0017, "step": 109780 }, { "epoch": 0.897820664840332, "grad_norm": 0.02690613828599453, "learning_rate": 6.72795964681727e-06, "loss": 0.0018, "step": 109790 }, { "epoch": 0.8979024410189311, "grad_norm": 0.01289381179958582, "learning_rate": 6.7272899679309575e-06, "loss": 0.0029, "step": 109800 }, { "epoch": 0.8979842171975304, "grad_norm": 0.04768380522727966, "learning_rate": 6.726620253858345e-06, "loss": 0.0014, "step": 109810 }, { "epoch": 0.8980659933761296, "grad_norm": 0.12303517758846283, "learning_rate": 6.7259505046130745e-06, "loss": 0.0014, "step": 109820 }, { "epoch": 0.8981477695547287, "grad_norm": 0.07711783051490784, "learning_rate": 6.725280720208789e-06, "loss": 0.0011, "step": 109830 }, { "epoch": 0.8982295457333279, "grad_norm": 0.1370539665222168, "learning_rate": 6.724610900659133e-06, "loss": 0.0026, "step": 109840 }, { "epoch": 0.8983113219119271, "grad_norm": 0.08734691143035889, "learning_rate": 6.723941045977752e-06, "loss": 0.0031, "step": 109850 }, { "epoch": 0.8983930980905263, "grad_norm": 0.03900478035211563, "learning_rate": 6.723271156178288e-06, "loss": 0.005, "step": 109860 }, { "epoch": 0.8984748742691254, "grad_norm": 0.07627961039543152, "learning_rate": 6.722601231274393e-06, "loss": 0.0012, "step": 109870 }, { "epoch": 0.8985566504477246, "grad_norm": 0.03451848030090332, "learning_rate": 6.721931271279708e-06, "loss": 0.0014, "step": 109880 }, { "epoch": 0.8986384266263238, "grad_norm": 0.00754318805411458, "learning_rate": 6.721261276207886e-06, "loss": 0.0016, "step": 109890 }, { "epoch": 0.8987202028049229, "grad_norm": 0.041254736483097076, "learning_rate": 6.720591246072571e-06, "loss": 0.0013, "step": 109900 }, { "epoch": 0.8988019789835221, "grad_norm": 0.02057648077607155, "learning_rate": 6.719921180887414e-06, "loss": 0.0015, "step": 109910 }, { "epoch": 0.8988837551621213, "grad_norm": 0.017496217042207718, "learning_rate": 6.719251080666064e-06, "loss": 0.0016, "step": 109920 }, { "epoch": 0.8989655313407204, "grad_norm": 0.03401012718677521, "learning_rate": 6.718580945422173e-06, "loss": 0.0011, "step": 109930 }, { "epoch": 0.8990473075193196, "grad_norm": 0.013531887903809547, "learning_rate": 6.717910775169389e-06, "loss": 0.0011, "step": 109940 }, { "epoch": 0.8991290836979188, "grad_norm": 0.058726027607917786, "learning_rate": 6.717240569921369e-06, "loss": 0.002, "step": 109950 }, { "epoch": 0.8992108598765179, "grad_norm": 0.03748275339603424, "learning_rate": 6.716570329691762e-06, "loss": 0.0035, "step": 109960 }, { "epoch": 0.8992926360551171, "grad_norm": 0.03941868618130684, "learning_rate": 6.7159000544942225e-06, "loss": 0.0013, "step": 109970 }, { "epoch": 0.8993744122337163, "grad_norm": 0.06656702607870102, "learning_rate": 6.715229744342403e-06, "loss": 0.003, "step": 109980 }, { "epoch": 0.8994561884123154, "grad_norm": 0.06063298135995865, "learning_rate": 6.714559399249959e-06, "loss": 0.0013, "step": 109990 }, { "epoch": 0.8995379645909146, "grad_norm": 0.003389491932466626, "learning_rate": 6.713889019230548e-06, "loss": 0.0014, "step": 110000 }, { "epoch": 0.8996197407695138, "grad_norm": 0.07054244726896286, "learning_rate": 6.713218604297823e-06, "loss": 0.0019, "step": 110010 }, { "epoch": 0.8997015169481131, "grad_norm": 0.05729660764336586, "learning_rate": 6.712548154465443e-06, "loss": 0.0018, "step": 110020 }, { "epoch": 0.8997832931267122, "grad_norm": 0.0210425928235054, "learning_rate": 6.7118776697470655e-06, "loss": 0.0016, "step": 110030 }, { "epoch": 0.8998650693053114, "grad_norm": 0.04132002964615822, "learning_rate": 6.711207150156349e-06, "loss": 0.001, "step": 110040 }, { "epoch": 0.8999468454839106, "grad_norm": 0.0471789613366127, "learning_rate": 6.71053659570695e-06, "loss": 0.0012, "step": 110050 }, { "epoch": 0.9000286216625097, "grad_norm": 0.040081411600112915, "learning_rate": 6.709866006412531e-06, "loss": 0.0011, "step": 110060 }, { "epoch": 0.9001103978411089, "grad_norm": 0.04577656090259552, "learning_rate": 6.709195382286752e-06, "loss": 0.0011, "step": 110070 }, { "epoch": 0.9001921740197081, "grad_norm": 0.023000871762633324, "learning_rate": 6.708524723343274e-06, "loss": 0.0015, "step": 110080 }, { "epoch": 0.9002739501983072, "grad_norm": 0.03749355673789978, "learning_rate": 6.707854029595759e-06, "loss": 0.0017, "step": 110090 }, { "epoch": 0.9003557263769064, "grad_norm": 0.05880194529891014, "learning_rate": 6.707183301057868e-06, "loss": 0.0007, "step": 110100 }, { "epoch": 0.9004375025555056, "grad_norm": 0.04799357429146767, "learning_rate": 6.706512537743265e-06, "loss": 0.002, "step": 110110 }, { "epoch": 0.9005192787341048, "grad_norm": 0.05566835775971413, "learning_rate": 6.705841739665615e-06, "loss": 0.0025, "step": 110120 }, { "epoch": 0.9006010549127039, "grad_norm": 0.09952142834663391, "learning_rate": 6.705170906838583e-06, "loss": 0.0011, "step": 110130 }, { "epoch": 0.9006828310913031, "grad_norm": 0.024717267602682114, "learning_rate": 6.704500039275833e-06, "loss": 0.0009, "step": 110140 }, { "epoch": 0.9007646072699023, "grad_norm": 0.07651036977767944, "learning_rate": 6.703829136991031e-06, "loss": 0.0016, "step": 110150 }, { "epoch": 0.9008463834485014, "grad_norm": 0.018848303705453873, "learning_rate": 6.7031581999978465e-06, "loss": 0.001, "step": 110160 }, { "epoch": 0.9009281596271006, "grad_norm": 0.019658923149108887, "learning_rate": 6.7024872283099425e-06, "loss": 0.0015, "step": 110170 }, { "epoch": 0.9010099358056998, "grad_norm": 0.11622994393110275, "learning_rate": 6.701816221940992e-06, "loss": 0.0013, "step": 110180 }, { "epoch": 0.9010917119842989, "grad_norm": 0.0408882237970829, "learning_rate": 6.70114518090466e-06, "loss": 0.0007, "step": 110190 }, { "epoch": 0.9011734881628981, "grad_norm": 0.013770630583167076, "learning_rate": 6.7004741052146195e-06, "loss": 0.0015, "step": 110200 }, { "epoch": 0.9012552643414973, "grad_norm": 0.05483337119221687, "learning_rate": 6.699802994884539e-06, "loss": 0.0015, "step": 110210 }, { "epoch": 0.9013370405200964, "grad_norm": 0.031488362699747086, "learning_rate": 6.69913184992809e-06, "loss": 0.0007, "step": 110220 }, { "epoch": 0.9014188166986957, "grad_norm": 0.36250969767570496, "learning_rate": 6.698460670358943e-06, "loss": 0.0019, "step": 110230 }, { "epoch": 0.9015005928772949, "grad_norm": 0.00847859401255846, "learning_rate": 6.697789456190773e-06, "loss": 0.0015, "step": 110240 }, { "epoch": 0.9015823690558941, "grad_norm": 0.04186883941292763, "learning_rate": 6.69711820743725e-06, "loss": 0.0015, "step": 110250 }, { "epoch": 0.9016641452344932, "grad_norm": 0.03199222683906555, "learning_rate": 6.69644692411205e-06, "loss": 0.0011, "step": 110260 }, { "epoch": 0.9017459214130924, "grad_norm": 0.10052941739559174, "learning_rate": 6.695775606228848e-06, "loss": 0.0013, "step": 110270 }, { "epoch": 0.9018276975916916, "grad_norm": 0.05150418356060982, "learning_rate": 6.695104253801319e-06, "loss": 0.0017, "step": 110280 }, { "epoch": 0.9019094737702907, "grad_norm": 0.06420738995075226, "learning_rate": 6.694432866843137e-06, "loss": 0.0019, "step": 110290 }, { "epoch": 0.9019912499488899, "grad_norm": 0.11724955588579178, "learning_rate": 6.693761445367981e-06, "loss": 0.0013, "step": 110300 }, { "epoch": 0.9020730261274891, "grad_norm": 0.07014112174510956, "learning_rate": 6.693089989389527e-06, "loss": 0.0014, "step": 110310 }, { "epoch": 0.9021548023060882, "grad_norm": 0.07177947461605072, "learning_rate": 6.692418498921455e-06, "loss": 0.0017, "step": 110320 }, { "epoch": 0.9022365784846874, "grad_norm": 0.07306567579507828, "learning_rate": 6.691746973977442e-06, "loss": 0.0027, "step": 110330 }, { "epoch": 0.9023183546632866, "grad_norm": 0.10916212201118469, "learning_rate": 6.691075414571168e-06, "loss": 0.0014, "step": 110340 }, { "epoch": 0.9024001308418858, "grad_norm": 0.040584102272987366, "learning_rate": 6.6904038207163114e-06, "loss": 0.0015, "step": 110350 }, { "epoch": 0.9024819070204849, "grad_norm": 0.023320326581597328, "learning_rate": 6.689732192426557e-06, "loss": 0.0006, "step": 110360 }, { "epoch": 0.9025636831990841, "grad_norm": 0.09289339929819107, "learning_rate": 6.689060529715583e-06, "loss": 0.0016, "step": 110370 }, { "epoch": 0.9026454593776833, "grad_norm": 0.05499417334794998, "learning_rate": 6.688388832597075e-06, "loss": 0.0021, "step": 110380 }, { "epoch": 0.9027272355562824, "grad_norm": 0.14779795706272125, "learning_rate": 6.687717101084713e-06, "loss": 0.0023, "step": 110390 }, { "epoch": 0.9028090117348816, "grad_norm": 0.039462216198444366, "learning_rate": 6.687045335192183e-06, "loss": 0.0011, "step": 110400 }, { "epoch": 0.9028907879134808, "grad_norm": 0.05290451645851135, "learning_rate": 6.686373534933166e-06, "loss": 0.0014, "step": 110410 }, { "epoch": 0.9029725640920799, "grad_norm": 0.053862620145082474, "learning_rate": 6.685701700321351e-06, "loss": 0.0009, "step": 110420 }, { "epoch": 0.9030543402706791, "grad_norm": 0.06412267684936523, "learning_rate": 6.68502983137042e-06, "loss": 0.0014, "step": 110430 }, { "epoch": 0.9031361164492783, "grad_norm": 0.1018972098827362, "learning_rate": 6.684357928094064e-06, "loss": 0.0018, "step": 110440 }, { "epoch": 0.9032178926278775, "grad_norm": 0.01305451150983572, "learning_rate": 6.683685990505968e-06, "loss": 0.0004, "step": 110450 }, { "epoch": 0.9032996688064767, "grad_norm": 0.011189252138137817, "learning_rate": 6.683014018619819e-06, "loss": 0.0012, "step": 110460 }, { "epoch": 0.9033814449850759, "grad_norm": 0.018602833151817322, "learning_rate": 6.682342012449305e-06, "loss": 0.0017, "step": 110470 }, { "epoch": 0.9034632211636751, "grad_norm": 0.05453198030591011, "learning_rate": 6.6816699720081195e-06, "loss": 0.0014, "step": 110480 }, { "epoch": 0.9035449973422742, "grad_norm": 0.04672149196267128, "learning_rate": 6.680997897309947e-06, "loss": 0.0019, "step": 110490 }, { "epoch": 0.9036267735208734, "grad_norm": 0.031412273645401, "learning_rate": 6.680325788368482e-06, "loss": 0.0025, "step": 110500 }, { "epoch": 0.9037085496994726, "grad_norm": 0.014352823607623577, "learning_rate": 6.6796536451974155e-06, "loss": 0.0014, "step": 110510 }, { "epoch": 0.9037903258780717, "grad_norm": 0.03211761265993118, "learning_rate": 6.678981467810439e-06, "loss": 0.0012, "step": 110520 }, { "epoch": 0.9038721020566709, "grad_norm": 0.012761754915118217, "learning_rate": 6.678309256221245e-06, "loss": 0.001, "step": 110530 }, { "epoch": 0.9039538782352701, "grad_norm": 0.006219740025699139, "learning_rate": 6.677637010443527e-06, "loss": 0.0016, "step": 110540 }, { "epoch": 0.9040356544138692, "grad_norm": 0.10375582426786423, "learning_rate": 6.676964730490979e-06, "loss": 0.0014, "step": 110550 }, { "epoch": 0.9041174305924684, "grad_norm": 0.029505234211683273, "learning_rate": 6.676292416377297e-06, "loss": 0.0008, "step": 110560 }, { "epoch": 0.9041992067710676, "grad_norm": 0.15401901304721832, "learning_rate": 6.675620068116175e-06, "loss": 0.0018, "step": 110570 }, { "epoch": 0.9042809829496667, "grad_norm": 0.024744689464569092, "learning_rate": 6.674947685721312e-06, "loss": 0.0013, "step": 110580 }, { "epoch": 0.9043627591282659, "grad_norm": 0.10077838599681854, "learning_rate": 6.674275269206401e-06, "loss": 0.0011, "step": 110590 }, { "epoch": 0.9044445353068651, "grad_norm": 0.031017178669571877, "learning_rate": 6.6736028185851445e-06, "loss": 0.0009, "step": 110600 }, { "epoch": 0.9045263114854643, "grad_norm": 0.05220730975270271, "learning_rate": 6.672930333871236e-06, "loss": 0.0012, "step": 110610 }, { "epoch": 0.9046080876640634, "grad_norm": 0.08788339048624039, "learning_rate": 6.672257815078378e-06, "loss": 0.0014, "step": 110620 }, { "epoch": 0.9046898638426626, "grad_norm": 0.38484811782836914, "learning_rate": 6.671585262220268e-06, "loss": 0.0015, "step": 110630 }, { "epoch": 0.9047716400212618, "grad_norm": 0.014144046232104301, "learning_rate": 6.670912675310608e-06, "loss": 0.0012, "step": 110640 }, { "epoch": 0.9048534161998609, "grad_norm": 0.5639244318008423, "learning_rate": 6.670240054363098e-06, "loss": 0.0027, "step": 110650 }, { "epoch": 0.9049351923784602, "grad_norm": 0.03394836187362671, "learning_rate": 6.669567399391442e-06, "loss": 0.0009, "step": 110660 }, { "epoch": 0.9050169685570594, "grad_norm": 0.11818312853574753, "learning_rate": 6.668894710409339e-06, "loss": 0.0026, "step": 110670 }, { "epoch": 0.9050987447356585, "grad_norm": 0.06669893860816956, "learning_rate": 6.668221987430495e-06, "loss": 0.0019, "step": 110680 }, { "epoch": 0.9051805209142577, "grad_norm": 0.06361077725887299, "learning_rate": 6.667549230468615e-06, "loss": 0.0016, "step": 110690 }, { "epoch": 0.9052622970928569, "grad_norm": 0.07479546219110489, "learning_rate": 6.6668764395373995e-06, "loss": 0.0013, "step": 110700 }, { "epoch": 0.905344073271456, "grad_norm": 0.04690024256706238, "learning_rate": 6.666203614650556e-06, "loss": 0.0012, "step": 110710 }, { "epoch": 0.9054258494500552, "grad_norm": 0.025750331580638885, "learning_rate": 6.665530755821792e-06, "loss": 0.0009, "step": 110720 }, { "epoch": 0.9055076256286544, "grad_norm": 0.049564022570848465, "learning_rate": 6.664857863064811e-06, "loss": 0.0019, "step": 110730 }, { "epoch": 0.9055894018072536, "grad_norm": 0.08046179264783859, "learning_rate": 6.664184936393323e-06, "loss": 0.0018, "step": 110740 }, { "epoch": 0.9056711779858527, "grad_norm": 0.02984550967812538, "learning_rate": 6.6635119758210355e-06, "loss": 0.0014, "step": 110750 }, { "epoch": 0.9057529541644519, "grad_norm": 0.02993766963481903, "learning_rate": 6.662838981361657e-06, "loss": 0.0014, "step": 110760 }, { "epoch": 0.9058347303430511, "grad_norm": 0.13944856822490692, "learning_rate": 6.662165953028895e-06, "loss": 0.0033, "step": 110770 }, { "epoch": 0.9059165065216502, "grad_norm": 0.08667682111263275, "learning_rate": 6.661492890836463e-06, "loss": 0.0008, "step": 110780 }, { "epoch": 0.9059982827002494, "grad_norm": 0.011525130830705166, "learning_rate": 6.660819794798069e-06, "loss": 0.0007, "step": 110790 }, { "epoch": 0.9060800588788486, "grad_norm": 0.1490626484155655, "learning_rate": 6.660146664927427e-06, "loss": 0.002, "step": 110800 }, { "epoch": 0.9061618350574477, "grad_norm": 0.08423575013875961, "learning_rate": 6.659473501238248e-06, "loss": 0.0013, "step": 110810 }, { "epoch": 0.9062436112360469, "grad_norm": 0.12713715434074402, "learning_rate": 6.658800303744246e-06, "loss": 0.0012, "step": 110820 }, { "epoch": 0.9063253874146461, "grad_norm": 0.051415566354990005, "learning_rate": 6.658127072459131e-06, "loss": 0.0008, "step": 110830 }, { "epoch": 0.9064071635932452, "grad_norm": 0.037952810525894165, "learning_rate": 6.657453807396621e-06, "loss": 0.0008, "step": 110840 }, { "epoch": 0.9064889397718444, "grad_norm": 0.03347018361091614, "learning_rate": 6.656780508570429e-06, "loss": 0.0015, "step": 110850 }, { "epoch": 0.9065707159504436, "grad_norm": 0.01451737992465496, "learning_rate": 6.656107175994272e-06, "loss": 0.0008, "step": 110860 }, { "epoch": 0.9066524921290429, "grad_norm": 0.05115540325641632, "learning_rate": 6.655433809681866e-06, "loss": 0.0017, "step": 110870 }, { "epoch": 0.906734268307642, "grad_norm": 0.0782671794295311, "learning_rate": 6.654760409646927e-06, "loss": 0.0031, "step": 110880 }, { "epoch": 0.9068160444862412, "grad_norm": 0.02558884210884571, "learning_rate": 6.654086975903174e-06, "loss": 0.0032, "step": 110890 }, { "epoch": 0.9068978206648404, "grad_norm": 0.03027796931564808, "learning_rate": 6.653413508464325e-06, "loss": 0.0011, "step": 110900 }, { "epoch": 0.9069795968434395, "grad_norm": 0.030233215540647507, "learning_rate": 6.652740007344098e-06, "loss": 0.0008, "step": 110910 }, { "epoch": 0.9070613730220387, "grad_norm": 0.018220242112874985, "learning_rate": 6.652066472556213e-06, "loss": 0.0019, "step": 110920 }, { "epoch": 0.9071431492006379, "grad_norm": 0.09440448880195618, "learning_rate": 6.6513929041143936e-06, "loss": 0.0012, "step": 110930 }, { "epoch": 0.907224925379237, "grad_norm": 0.05704875662922859, "learning_rate": 6.6507193020323554e-06, "loss": 0.0013, "step": 110940 }, { "epoch": 0.9073067015578362, "grad_norm": 0.013922121375799179, "learning_rate": 6.650045666323826e-06, "loss": 0.0011, "step": 110950 }, { "epoch": 0.9073884777364354, "grad_norm": 0.128306046128273, "learning_rate": 6.649371997002523e-06, "loss": 0.0018, "step": 110960 }, { "epoch": 0.9074702539150346, "grad_norm": 0.08322174847126007, "learning_rate": 6.648698294082172e-06, "loss": 0.0037, "step": 110970 }, { "epoch": 0.9075520300936337, "grad_norm": 0.035568393766880035, "learning_rate": 6.648024557576498e-06, "loss": 0.0021, "step": 110980 }, { "epoch": 0.9076338062722329, "grad_norm": 0.034515250474214554, "learning_rate": 6.647350787499224e-06, "loss": 0.0037, "step": 110990 }, { "epoch": 0.9077155824508321, "grad_norm": 0.15685737133026123, "learning_rate": 6.646676983864075e-06, "loss": 0.0019, "step": 111000 }, { "epoch": 0.9077973586294312, "grad_norm": 0.12379888445138931, "learning_rate": 6.646003146684776e-06, "loss": 0.0031, "step": 111010 }, { "epoch": 0.9078791348080304, "grad_norm": 0.04250122606754303, "learning_rate": 6.645329275975056e-06, "loss": 0.001, "step": 111020 }, { "epoch": 0.9079609109866296, "grad_norm": 0.09966003894805908, "learning_rate": 6.644655371748641e-06, "loss": 0.0014, "step": 111030 }, { "epoch": 0.9080426871652287, "grad_norm": 0.05146842822432518, "learning_rate": 6.64398143401926e-06, "loss": 0.001, "step": 111040 }, { "epoch": 0.9081244633438279, "grad_norm": 0.07056836038827896, "learning_rate": 6.643307462800641e-06, "loss": 0.0024, "step": 111050 }, { "epoch": 0.9082062395224271, "grad_norm": 0.024287888780236244, "learning_rate": 6.642633458106513e-06, "loss": 0.0011, "step": 111060 }, { "epoch": 0.9082880157010262, "grad_norm": 0.029019275680184364, "learning_rate": 6.641959419950605e-06, "loss": 0.0019, "step": 111070 }, { "epoch": 0.9083697918796254, "grad_norm": 0.051547806710004807, "learning_rate": 6.641285348346651e-06, "loss": 0.0016, "step": 111080 }, { "epoch": 0.9084515680582247, "grad_norm": 0.05223840847611427, "learning_rate": 6.640611243308378e-06, "loss": 0.0007, "step": 111090 }, { "epoch": 0.9085333442368239, "grad_norm": 0.05157272517681122, "learning_rate": 6.639937104849523e-06, "loss": 0.0019, "step": 111100 }, { "epoch": 0.908615120415423, "grad_norm": 0.0355159193277359, "learning_rate": 6.639262932983814e-06, "loss": 0.0016, "step": 111110 }, { "epoch": 0.9086968965940222, "grad_norm": 0.18942905962467194, "learning_rate": 6.638588727724988e-06, "loss": 0.0015, "step": 111120 }, { "epoch": 0.9087786727726214, "grad_norm": 0.06698083877563477, "learning_rate": 6.637914489086778e-06, "loss": 0.0017, "step": 111130 }, { "epoch": 0.9088604489512205, "grad_norm": 0.006957830861210823, "learning_rate": 6.637240217082917e-06, "loss": 0.001, "step": 111140 }, { "epoch": 0.9089422251298197, "grad_norm": 0.047552939504384995, "learning_rate": 6.636565911727142e-06, "loss": 0.001, "step": 111150 }, { "epoch": 0.9090240013084189, "grad_norm": 0.038129113614559174, "learning_rate": 6.635891573033191e-06, "loss": 0.0009, "step": 111160 }, { "epoch": 0.909105777487018, "grad_norm": 0.10740208625793457, "learning_rate": 6.635217201014797e-06, "loss": 0.0014, "step": 111170 }, { "epoch": 0.9091875536656172, "grad_norm": 0.03356509655714035, "learning_rate": 6.634542795685701e-06, "loss": 0.0014, "step": 111180 }, { "epoch": 0.9092693298442164, "grad_norm": 0.16453762352466583, "learning_rate": 6.633868357059638e-06, "loss": 0.0013, "step": 111190 }, { "epoch": 0.9093511060228155, "grad_norm": 0.16868655383586884, "learning_rate": 6.63319388515035e-06, "loss": 0.0025, "step": 111200 }, { "epoch": 0.9094328822014147, "grad_norm": 0.030253160744905472, "learning_rate": 6.632519379971574e-06, "loss": 0.0015, "step": 111210 }, { "epoch": 0.9095146583800139, "grad_norm": 0.012824158184230328, "learning_rate": 6.631844841537052e-06, "loss": 0.0014, "step": 111220 }, { "epoch": 0.909596434558613, "grad_norm": 0.03143121302127838, "learning_rate": 6.6311702698605226e-06, "loss": 0.0014, "step": 111230 }, { "epoch": 0.9096782107372122, "grad_norm": 0.09555386006832123, "learning_rate": 6.630495664955731e-06, "loss": 0.0008, "step": 111240 }, { "epoch": 0.9097599869158114, "grad_norm": 0.03519536182284355, "learning_rate": 6.629821026836416e-06, "loss": 0.0015, "step": 111250 }, { "epoch": 0.9098417630944106, "grad_norm": 0.0035886294208467007, "learning_rate": 6.629146355516323e-06, "loss": 0.0013, "step": 111260 }, { "epoch": 0.9099235392730097, "grad_norm": 0.03847942873835564, "learning_rate": 6.628471651009192e-06, "loss": 0.0012, "step": 111270 }, { "epoch": 0.9100053154516089, "grad_norm": 0.03393099084496498, "learning_rate": 6.627796913328772e-06, "loss": 0.0024, "step": 111280 }, { "epoch": 0.9100870916302081, "grad_norm": 0.03682620823383331, "learning_rate": 6.627122142488806e-06, "loss": 0.001, "step": 111290 }, { "epoch": 0.9101688678088073, "grad_norm": 0.11027471721172333, "learning_rate": 6.6264473385030395e-06, "loss": 0.001, "step": 111300 }, { "epoch": 0.9102506439874065, "grad_norm": 0.08486850559711456, "learning_rate": 6.625772501385217e-06, "loss": 0.0016, "step": 111310 }, { "epoch": 0.9103324201660057, "grad_norm": 0.02837400883436203, "learning_rate": 6.625097631149088e-06, "loss": 0.0019, "step": 111320 }, { "epoch": 0.9104141963446049, "grad_norm": 0.026338059455156326, "learning_rate": 6.6244227278083985e-06, "loss": 0.0012, "step": 111330 }, { "epoch": 0.910495972523204, "grad_norm": 0.05531701073050499, "learning_rate": 6.623747791376899e-06, "loss": 0.0014, "step": 111340 }, { "epoch": 0.9105777487018032, "grad_norm": 0.07565833628177643, "learning_rate": 6.623072821868337e-06, "loss": 0.0022, "step": 111350 }, { "epoch": 0.9106595248804024, "grad_norm": 0.007059028372168541, "learning_rate": 6.6223978192964625e-06, "loss": 0.0033, "step": 111360 }, { "epoch": 0.9107413010590015, "grad_norm": 0.04992915317416191, "learning_rate": 6.621722783675024e-06, "loss": 0.0015, "step": 111370 }, { "epoch": 0.9108230772376007, "grad_norm": 0.10829342901706696, "learning_rate": 6.621047715017775e-06, "loss": 0.0016, "step": 111380 }, { "epoch": 0.9109048534161999, "grad_norm": 0.0018398426473140717, "learning_rate": 6.620372613338468e-06, "loss": 0.0016, "step": 111390 }, { "epoch": 0.910986629594799, "grad_norm": 0.12879185378551483, "learning_rate": 6.6196974786508515e-06, "loss": 0.0031, "step": 111400 }, { "epoch": 0.9110684057733982, "grad_norm": 0.018699264153838158, "learning_rate": 6.619022310968683e-06, "loss": 0.0013, "step": 111410 }, { "epoch": 0.9111501819519974, "grad_norm": 0.04253331944346428, "learning_rate": 6.618347110305712e-06, "loss": 0.0024, "step": 111420 }, { "epoch": 0.9112319581305965, "grad_norm": 0.023889079689979553, "learning_rate": 6.617671876675696e-06, "loss": 0.0024, "step": 111430 }, { "epoch": 0.9113137343091957, "grad_norm": 0.11973873525857925, "learning_rate": 6.616996610092388e-06, "loss": 0.0019, "step": 111440 }, { "epoch": 0.9113955104877949, "grad_norm": 0.056629203259944916, "learning_rate": 6.616321310569546e-06, "loss": 0.0012, "step": 111450 }, { "epoch": 0.911477286666394, "grad_norm": 0.1792614758014679, "learning_rate": 6.615645978120924e-06, "loss": 0.0016, "step": 111460 }, { "epoch": 0.9115590628449932, "grad_norm": 0.03370242193341255, "learning_rate": 6.6149706127602806e-06, "loss": 0.0015, "step": 111470 }, { "epoch": 0.9116408390235924, "grad_norm": 0.033265918493270874, "learning_rate": 6.6142952145013725e-06, "loss": 0.001, "step": 111480 }, { "epoch": 0.9117226152021916, "grad_norm": 0.15239554643630981, "learning_rate": 6.61361978335796e-06, "loss": 0.0018, "step": 111490 }, { "epoch": 0.9118043913807907, "grad_norm": 0.07085265964269638, "learning_rate": 6.612944319343799e-06, "loss": 0.002, "step": 111500 }, { "epoch": 0.91188616755939, "grad_norm": 0.05541121959686279, "learning_rate": 6.612268822472654e-06, "loss": 0.0029, "step": 111510 }, { "epoch": 0.9119679437379892, "grad_norm": 0.02033992111682892, "learning_rate": 6.611593292758279e-06, "loss": 0.0009, "step": 111520 }, { "epoch": 0.9120497199165883, "grad_norm": 0.05043129622936249, "learning_rate": 6.610917730214442e-06, "loss": 0.0038, "step": 111530 }, { "epoch": 0.9121314960951875, "grad_norm": 0.049064550548791885, "learning_rate": 6.6102421348549e-06, "loss": 0.0049, "step": 111540 }, { "epoch": 0.9122132722737867, "grad_norm": 0.12338969856500626, "learning_rate": 6.609566506693416e-06, "loss": 0.0044, "step": 111550 }, { "epoch": 0.9122950484523858, "grad_norm": 0.018028611317276955, "learning_rate": 6.608890845743755e-06, "loss": 0.0021, "step": 111560 }, { "epoch": 0.912376824630985, "grad_norm": 0.028184941038489342, "learning_rate": 6.608215152019679e-06, "loss": 0.0017, "step": 111570 }, { "epoch": 0.9124586008095842, "grad_norm": 0.050243813544511795, "learning_rate": 6.607539425534953e-06, "loss": 0.0011, "step": 111580 }, { "epoch": 0.9125403769881834, "grad_norm": 0.03178197145462036, "learning_rate": 6.606863666303345e-06, "loss": 0.0014, "step": 111590 }, { "epoch": 0.9126221531667825, "grad_norm": 0.04936854913830757, "learning_rate": 6.606187874338614e-06, "loss": 0.0008, "step": 111600 }, { "epoch": 0.9127039293453817, "grad_norm": 0.04993860796093941, "learning_rate": 6.605512049654533e-06, "loss": 0.0019, "step": 111610 }, { "epoch": 0.9127857055239809, "grad_norm": 0.028452962636947632, "learning_rate": 6.604836192264865e-06, "loss": 0.001, "step": 111620 }, { "epoch": 0.91286748170258, "grad_norm": 0.033398762345314026, "learning_rate": 6.604160302183381e-06, "loss": 0.0017, "step": 111630 }, { "epoch": 0.9129492578811792, "grad_norm": 0.014421354047954082, "learning_rate": 6.603484379423846e-06, "loss": 0.0011, "step": 111640 }, { "epoch": 0.9130310340597784, "grad_norm": 0.04187149927020073, "learning_rate": 6.602808424000034e-06, "loss": 0.0011, "step": 111650 }, { "epoch": 0.9131128102383775, "grad_norm": 0.325687050819397, "learning_rate": 6.602132435925708e-06, "loss": 0.0025, "step": 111660 }, { "epoch": 0.9131945864169767, "grad_norm": 0.10453324764966965, "learning_rate": 6.601456415214642e-06, "loss": 0.0013, "step": 111670 }, { "epoch": 0.9132763625955759, "grad_norm": 0.0031333081424236298, "learning_rate": 6.600780361880607e-06, "loss": 0.0006, "step": 111680 }, { "epoch": 0.913358138774175, "grad_norm": 0.07245223969221115, "learning_rate": 6.6001042759373776e-06, "loss": 0.0023, "step": 111690 }, { "epoch": 0.9134399149527742, "grad_norm": 0.0668063759803772, "learning_rate": 6.5994281573987215e-06, "loss": 0.0023, "step": 111700 }, { "epoch": 0.9135216911313734, "grad_norm": 0.025091836228966713, "learning_rate": 6.598752006278415e-06, "loss": 0.001, "step": 111710 }, { "epoch": 0.9136034673099726, "grad_norm": 0.06994844973087311, "learning_rate": 6.59807582259023e-06, "loss": 0.0019, "step": 111720 }, { "epoch": 0.9136852434885718, "grad_norm": 0.04752924665808678, "learning_rate": 6.597399606347941e-06, "loss": 0.0018, "step": 111730 }, { "epoch": 0.913767019667171, "grad_norm": 0.049297917634248734, "learning_rate": 6.596723357565323e-06, "loss": 0.0013, "step": 111740 }, { "epoch": 0.9138487958457702, "grad_norm": 0.05081038177013397, "learning_rate": 6.596047076256154e-06, "loss": 0.0012, "step": 111750 }, { "epoch": 0.9139305720243693, "grad_norm": 0.10552426427602768, "learning_rate": 6.595370762434208e-06, "loss": 0.0014, "step": 111760 }, { "epoch": 0.9140123482029685, "grad_norm": 0.04032139480113983, "learning_rate": 6.594694416113263e-06, "loss": 0.0018, "step": 111770 }, { "epoch": 0.9140941243815677, "grad_norm": 0.018434811383485794, "learning_rate": 6.594018037307096e-06, "loss": 0.0012, "step": 111780 }, { "epoch": 0.9141759005601668, "grad_norm": 0.0359150692820549, "learning_rate": 6.593341626029486e-06, "loss": 0.0012, "step": 111790 }, { "epoch": 0.914257676738766, "grad_norm": 0.07960968464612961, "learning_rate": 6.5926651822942115e-06, "loss": 0.0015, "step": 111800 }, { "epoch": 0.9143394529173652, "grad_norm": 0.009563969448208809, "learning_rate": 6.591988706115053e-06, "loss": 0.0011, "step": 111810 }, { "epoch": 0.9144212290959643, "grad_norm": 0.061575666069984436, "learning_rate": 6.5913121975057905e-06, "loss": 0.0011, "step": 111820 }, { "epoch": 0.9145030052745635, "grad_norm": 0.01918899081647396, "learning_rate": 6.5906356564802065e-06, "loss": 0.0022, "step": 111830 }, { "epoch": 0.9145847814531627, "grad_norm": 0.04903915897011757, "learning_rate": 6.58995908305208e-06, "loss": 0.0015, "step": 111840 }, { "epoch": 0.9146665576317619, "grad_norm": 0.07230972498655319, "learning_rate": 6.589282477235195e-06, "loss": 0.0008, "step": 111850 }, { "epoch": 0.914748333810361, "grad_norm": 0.141591414809227, "learning_rate": 6.588605839043333e-06, "loss": 0.0012, "step": 111860 }, { "epoch": 0.9148301099889602, "grad_norm": 0.055342599749565125, "learning_rate": 6.5879291684902804e-06, "loss": 0.0023, "step": 111870 }, { "epoch": 0.9149118861675594, "grad_norm": 0.021372053772211075, "learning_rate": 6.587252465589817e-06, "loss": 0.0009, "step": 111880 }, { "epoch": 0.9149936623461585, "grad_norm": 0.030558830127120018, "learning_rate": 6.586575730355736e-06, "loss": 0.0017, "step": 111890 }, { "epoch": 0.9150754385247577, "grad_norm": 0.08453860878944397, "learning_rate": 6.585898962801813e-06, "loss": 0.0025, "step": 111900 }, { "epoch": 0.9151572147033569, "grad_norm": 0.03330855071544647, "learning_rate": 6.58522216294184e-06, "loss": 0.0013, "step": 111910 }, { "epoch": 0.915238990881956, "grad_norm": 0.07210449129343033, "learning_rate": 6.584545330789603e-06, "loss": 0.0014, "step": 111920 }, { "epoch": 0.9153207670605552, "grad_norm": 0.02783016487956047, "learning_rate": 6.583868466358892e-06, "loss": 0.0014, "step": 111930 }, { "epoch": 0.9154025432391545, "grad_norm": 0.3219718039035797, "learning_rate": 6.5831915696634895e-06, "loss": 0.0018, "step": 111940 }, { "epoch": 0.9154843194177537, "grad_norm": 0.01684548147022724, "learning_rate": 6.58251464071719e-06, "loss": 0.0012, "step": 111950 }, { "epoch": 0.9155660955963528, "grad_norm": 0.09533952176570892, "learning_rate": 6.58183767953378e-06, "loss": 0.0016, "step": 111960 }, { "epoch": 0.915647871774952, "grad_norm": 0.033649832010269165, "learning_rate": 6.581160686127051e-06, "loss": 0.0008, "step": 111970 }, { "epoch": 0.9157296479535512, "grad_norm": 0.07909967005252838, "learning_rate": 6.5804836605107934e-06, "loss": 0.0016, "step": 111980 }, { "epoch": 0.9158114241321503, "grad_norm": 0.03030361793935299, "learning_rate": 6.5798066026988e-06, "loss": 0.0023, "step": 111990 }, { "epoch": 0.9158932003107495, "grad_norm": 0.02087913639843464, "learning_rate": 6.579129512704861e-06, "loss": 0.0017, "step": 112000 }, { "epoch": 0.9159749764893487, "grad_norm": 0.03729262575507164, "learning_rate": 6.578452390542771e-06, "loss": 0.0003, "step": 112010 }, { "epoch": 0.9160567526679478, "grad_norm": 0.039097439497709274, "learning_rate": 6.577775236226322e-06, "loss": 0.0018, "step": 112020 }, { "epoch": 0.916138528846547, "grad_norm": 0.015076865442097187, "learning_rate": 6.5770980497693085e-06, "loss": 0.0023, "step": 112030 }, { "epoch": 0.9162203050251462, "grad_norm": 0.04324567690491676, "learning_rate": 6.576420831185526e-06, "loss": 0.0016, "step": 112040 }, { "epoch": 0.9163020812037453, "grad_norm": 0.04793090000748634, "learning_rate": 6.575743580488771e-06, "loss": 0.0019, "step": 112050 }, { "epoch": 0.9163838573823445, "grad_norm": 0.03242390230298042, "learning_rate": 6.575066297692837e-06, "loss": 0.0039, "step": 112060 }, { "epoch": 0.9164656335609437, "grad_norm": 0.07995034754276276, "learning_rate": 6.574388982811524e-06, "loss": 0.0014, "step": 112070 }, { "epoch": 0.9165474097395429, "grad_norm": 0.03668982908129692, "learning_rate": 6.573711635858625e-06, "loss": 0.0017, "step": 112080 }, { "epoch": 0.916629185918142, "grad_norm": 0.030047820881009102, "learning_rate": 6.573034256847944e-06, "loss": 0.0018, "step": 112090 }, { "epoch": 0.9167109620967412, "grad_norm": 0.05223666504025459, "learning_rate": 6.572356845793273e-06, "loss": 0.0015, "step": 112100 }, { "epoch": 0.9167927382753404, "grad_norm": 0.010479618795216084, "learning_rate": 6.5716794027084174e-06, "loss": 0.0008, "step": 112110 }, { "epoch": 0.9168745144539395, "grad_norm": 0.044797614216804504, "learning_rate": 6.5710019276071746e-06, "loss": 0.0013, "step": 112120 }, { "epoch": 0.9169562906325387, "grad_norm": 0.030262209475040436, "learning_rate": 6.570324420503346e-06, "loss": 0.0013, "step": 112130 }, { "epoch": 0.9170380668111379, "grad_norm": 0.04375317320227623, "learning_rate": 6.569646881410732e-06, "loss": 0.0024, "step": 112140 }, { "epoch": 0.9171198429897371, "grad_norm": 0.041840337216854095, "learning_rate": 6.568969310343134e-06, "loss": 0.0009, "step": 112150 }, { "epoch": 0.9172016191683363, "grad_norm": 0.019095513969659805, "learning_rate": 6.568291707314356e-06, "loss": 0.0018, "step": 112160 }, { "epoch": 0.9172833953469355, "grad_norm": 0.045075930655002594, "learning_rate": 6.567614072338203e-06, "loss": 0.0017, "step": 112170 }, { "epoch": 0.9173651715255347, "grad_norm": 0.049775153398513794, "learning_rate": 6.566936405428477e-06, "loss": 0.0008, "step": 112180 }, { "epoch": 0.9174469477041338, "grad_norm": 0.03530993312597275, "learning_rate": 6.566258706598981e-06, "loss": 0.0015, "step": 112190 }, { "epoch": 0.917528723882733, "grad_norm": 0.026903372257947922, "learning_rate": 6.5655809758635226e-06, "loss": 0.0024, "step": 112200 }, { "epoch": 0.9176105000613322, "grad_norm": 0.04127797856926918, "learning_rate": 6.564903213235908e-06, "loss": 0.0018, "step": 112210 }, { "epoch": 0.9176922762399313, "grad_norm": 0.05790995806455612, "learning_rate": 6.564225418729941e-06, "loss": 0.0021, "step": 112220 }, { "epoch": 0.9177740524185305, "grad_norm": 0.04616306349635124, "learning_rate": 6.563547592359433e-06, "loss": 0.0009, "step": 112230 }, { "epoch": 0.9178558285971297, "grad_norm": 0.049254409968853, "learning_rate": 6.562869734138188e-06, "loss": 0.0018, "step": 112240 }, { "epoch": 0.9179376047757288, "grad_norm": 0.041398994624614716, "learning_rate": 6.5621918440800185e-06, "loss": 0.0027, "step": 112250 }, { "epoch": 0.918019380954328, "grad_norm": 0.0606796033680439, "learning_rate": 6.561513922198729e-06, "loss": 0.0014, "step": 112260 }, { "epoch": 0.9181011571329272, "grad_norm": 0.003738312516361475, "learning_rate": 6.5608359685081325e-06, "loss": 0.0007, "step": 112270 }, { "epoch": 0.9181829333115263, "grad_norm": 0.04480670765042305, "learning_rate": 6.560157983022037e-06, "loss": 0.0011, "step": 112280 }, { "epoch": 0.9182647094901255, "grad_norm": 0.05771587789058685, "learning_rate": 6.559479965754257e-06, "loss": 0.0014, "step": 112290 }, { "epoch": 0.9183464856687247, "grad_norm": 0.24186107516288757, "learning_rate": 6.558801916718604e-06, "loss": 0.0016, "step": 112300 }, { "epoch": 0.9184282618473238, "grad_norm": 0.04693204164505005, "learning_rate": 6.558123835928887e-06, "loss": 0.0013, "step": 112310 }, { "epoch": 0.918510038025923, "grad_norm": 0.08583194762468338, "learning_rate": 6.5574457233989195e-06, "loss": 0.001, "step": 112320 }, { "epoch": 0.9185918142045222, "grad_norm": 0.01726595126092434, "learning_rate": 6.556767579142519e-06, "loss": 0.0033, "step": 112330 }, { "epoch": 0.9186735903831214, "grad_norm": 0.04234429448843002, "learning_rate": 6.556089403173496e-06, "loss": 0.0012, "step": 112340 }, { "epoch": 0.9187553665617205, "grad_norm": 0.02575475536286831, "learning_rate": 6.555411195505668e-06, "loss": 0.0019, "step": 112350 }, { "epoch": 0.9188371427403197, "grad_norm": 0.09427446126937866, "learning_rate": 6.5547329561528494e-06, "loss": 0.0015, "step": 112360 }, { "epoch": 0.918918918918919, "grad_norm": 0.036925867199897766, "learning_rate": 6.554054685128857e-06, "loss": 0.0018, "step": 112370 }, { "epoch": 0.9190006950975181, "grad_norm": 0.023036226630210876, "learning_rate": 6.553376382447507e-06, "loss": 0.0014, "step": 112380 }, { "epoch": 0.9190824712761173, "grad_norm": 0.06215042993426323, "learning_rate": 6.552698048122618e-06, "loss": 0.001, "step": 112390 }, { "epoch": 0.9191642474547165, "grad_norm": 0.03288661688566208, "learning_rate": 6.552019682168006e-06, "loss": 0.0008, "step": 112400 }, { "epoch": 0.9192460236333156, "grad_norm": 0.07240336388349533, "learning_rate": 6.551341284597493e-06, "loss": 0.0014, "step": 112410 }, { "epoch": 0.9193277998119148, "grad_norm": 0.11046160757541656, "learning_rate": 6.550662855424897e-06, "loss": 0.0015, "step": 112420 }, { "epoch": 0.919409575990514, "grad_norm": 0.057141825556755066, "learning_rate": 6.549984394664038e-06, "loss": 0.0014, "step": 112430 }, { "epoch": 0.9194913521691132, "grad_norm": 0.015401741489768028, "learning_rate": 6.5493059023287374e-06, "loss": 0.0011, "step": 112440 }, { "epoch": 0.9195731283477123, "grad_norm": 0.022951334714889526, "learning_rate": 6.5486273784328155e-06, "loss": 0.0015, "step": 112450 }, { "epoch": 0.9196549045263115, "grad_norm": 0.038073159754276276, "learning_rate": 6.547948822990094e-06, "loss": 0.0011, "step": 112460 }, { "epoch": 0.9197366807049107, "grad_norm": 0.009690165519714355, "learning_rate": 6.5472702360143996e-06, "loss": 0.0007, "step": 112470 }, { "epoch": 0.9198184568835098, "grad_norm": 0.09841857105493546, "learning_rate": 6.546591617519551e-06, "loss": 0.0022, "step": 112480 }, { "epoch": 0.919900233062109, "grad_norm": 0.0009376408997923136, "learning_rate": 6.545912967519376e-06, "loss": 0.0008, "step": 112490 }, { "epoch": 0.9199820092407082, "grad_norm": 0.03192472457885742, "learning_rate": 6.545234286027694e-06, "loss": 0.0022, "step": 112500 }, { "epoch": 0.9200637854193073, "grad_norm": 0.008728871122002602, "learning_rate": 6.544555573058336e-06, "loss": 0.0014, "step": 112510 }, { "epoch": 0.9201455615979065, "grad_norm": 0.28296443819999695, "learning_rate": 6.543876828625124e-06, "loss": 0.0016, "step": 112520 }, { "epoch": 0.9202273377765057, "grad_norm": 0.15180324018001556, "learning_rate": 6.543198052741887e-06, "loss": 0.0023, "step": 112530 }, { "epoch": 0.9203091139551048, "grad_norm": 0.0440109521150589, "learning_rate": 6.542519245422452e-06, "loss": 0.0014, "step": 112540 }, { "epoch": 0.920390890133704, "grad_norm": 0.13003048300743103, "learning_rate": 6.541840406680645e-06, "loss": 0.0021, "step": 112550 }, { "epoch": 0.9204726663123032, "grad_norm": 0.09474939852952957, "learning_rate": 6.541161536530295e-06, "loss": 0.0012, "step": 112560 }, { "epoch": 0.9205544424909023, "grad_norm": 0.05757734924554825, "learning_rate": 6.540482634985234e-06, "loss": 0.002, "step": 112570 }, { "epoch": 0.9206362186695016, "grad_norm": 0.09393919259309769, "learning_rate": 6.539803702059286e-06, "loss": 0.0018, "step": 112580 }, { "epoch": 0.9207179948481008, "grad_norm": 0.02873852290213108, "learning_rate": 6.539124737766288e-06, "loss": 0.0016, "step": 112590 }, { "epoch": 0.9207997710267, "grad_norm": 0.08207271248102188, "learning_rate": 6.538445742120068e-06, "loss": 0.0022, "step": 112600 }, { "epoch": 0.9208815472052991, "grad_norm": 0.021179012954235077, "learning_rate": 6.537766715134457e-06, "loss": 0.0013, "step": 112610 }, { "epoch": 0.9209633233838983, "grad_norm": 0.03436599299311638, "learning_rate": 6.537087656823288e-06, "loss": 0.0015, "step": 112620 }, { "epoch": 0.9210450995624975, "grad_norm": 0.29361552000045776, "learning_rate": 6.536408567200394e-06, "loss": 0.0024, "step": 112630 }, { "epoch": 0.9211268757410966, "grad_norm": 0.09641070663928986, "learning_rate": 6.535729446279608e-06, "loss": 0.0014, "step": 112640 }, { "epoch": 0.9212086519196958, "grad_norm": 0.11003749817609787, "learning_rate": 6.535050294074765e-06, "loss": 0.0013, "step": 112650 }, { "epoch": 0.921290428098295, "grad_norm": 0.062059804797172546, "learning_rate": 6.534371110599701e-06, "loss": 0.002, "step": 112660 }, { "epoch": 0.9213722042768941, "grad_norm": 0.028417207300662994, "learning_rate": 6.533691895868251e-06, "loss": 0.0015, "step": 112670 }, { "epoch": 0.9214539804554933, "grad_norm": 0.045715149492025375, "learning_rate": 6.533012649894248e-06, "loss": 0.0017, "step": 112680 }, { "epoch": 0.9215357566340925, "grad_norm": 0.12429045140743256, "learning_rate": 6.5323333726915326e-06, "loss": 0.0024, "step": 112690 }, { "epoch": 0.9216175328126917, "grad_norm": 0.0024129494559019804, "learning_rate": 6.531654064273939e-06, "loss": 0.0011, "step": 112700 }, { "epoch": 0.9216993089912908, "grad_norm": 0.03176503628492355, "learning_rate": 6.530974724655309e-06, "loss": 0.0011, "step": 112710 }, { "epoch": 0.92178108516989, "grad_norm": 0.05712536349892616, "learning_rate": 6.5302953538494796e-06, "loss": 0.0013, "step": 112720 }, { "epoch": 0.9218628613484892, "grad_norm": 0.1334579735994339, "learning_rate": 6.529615951870289e-06, "loss": 0.0011, "step": 112730 }, { "epoch": 0.9219446375270883, "grad_norm": 0.07498221099376678, "learning_rate": 6.528936518731579e-06, "loss": 0.0026, "step": 112740 }, { "epoch": 0.9220264137056875, "grad_norm": 0.007187316194176674, "learning_rate": 6.5282570544471895e-06, "loss": 0.0017, "step": 112750 }, { "epoch": 0.9221081898842867, "grad_norm": 0.020372239872813225, "learning_rate": 6.5275775590309595e-06, "loss": 0.0012, "step": 112760 }, { "epoch": 0.9221899660628858, "grad_norm": 0.05088583379983902, "learning_rate": 6.526898032496735e-06, "loss": 0.0025, "step": 112770 }, { "epoch": 0.922271742241485, "grad_norm": 0.027148766443133354, "learning_rate": 6.526218474858355e-06, "loss": 0.0022, "step": 112780 }, { "epoch": 0.9223535184200843, "grad_norm": 0.0514717772603035, "learning_rate": 6.525538886129666e-06, "loss": 0.0018, "step": 112790 }, { "epoch": 0.9224352945986835, "grad_norm": 0.04976190999150276, "learning_rate": 6.524859266324508e-06, "loss": 0.002, "step": 112800 }, { "epoch": 0.9225170707772826, "grad_norm": 0.02169678919017315, "learning_rate": 6.524179615456728e-06, "loss": 0.0006, "step": 112810 }, { "epoch": 0.9225988469558818, "grad_norm": 0.04115292802453041, "learning_rate": 6.5234999335401715e-06, "loss": 0.0009, "step": 112820 }, { "epoch": 0.922680623134481, "grad_norm": 0.10519489645957947, "learning_rate": 6.522820220588682e-06, "loss": 0.0024, "step": 112830 }, { "epoch": 0.9227623993130801, "grad_norm": 0.07495833933353424, "learning_rate": 6.522140476616106e-06, "loss": 0.0013, "step": 112840 }, { "epoch": 0.9228441754916793, "grad_norm": 0.04673561453819275, "learning_rate": 6.521460701636292e-06, "loss": 0.0012, "step": 112850 }, { "epoch": 0.9229259516702785, "grad_norm": 0.025135163217782974, "learning_rate": 6.520780895663087e-06, "loss": 0.0012, "step": 112860 }, { "epoch": 0.9230077278488776, "grad_norm": 0.05740809440612793, "learning_rate": 6.520101058710338e-06, "loss": 0.001, "step": 112870 }, { "epoch": 0.9230895040274768, "grad_norm": 0.002061534207314253, "learning_rate": 6.519421190791896e-06, "loss": 0.0023, "step": 112880 }, { "epoch": 0.923171280206076, "grad_norm": 0.13883210718631744, "learning_rate": 6.518741291921609e-06, "loss": 0.002, "step": 112890 }, { "epoch": 0.9232530563846751, "grad_norm": 0.04547727853059769, "learning_rate": 6.518061362113326e-06, "loss": 0.0015, "step": 112900 }, { "epoch": 0.9233348325632743, "grad_norm": 0.06421004235744476, "learning_rate": 6.517381401380901e-06, "loss": 0.0009, "step": 112910 }, { "epoch": 0.9234166087418735, "grad_norm": 0.0027797261718660593, "learning_rate": 6.5167014097381834e-06, "loss": 0.0012, "step": 112920 }, { "epoch": 0.9234983849204726, "grad_norm": 0.04803290218114853, "learning_rate": 6.5160213871990245e-06, "loss": 0.0018, "step": 112930 }, { "epoch": 0.9235801610990718, "grad_norm": 0.00441631767898798, "learning_rate": 6.515341333777277e-06, "loss": 0.0016, "step": 112940 }, { "epoch": 0.923661937277671, "grad_norm": 0.0191956739872694, "learning_rate": 6.514661249486796e-06, "loss": 0.0014, "step": 112950 }, { "epoch": 0.9237437134562702, "grad_norm": 0.05815044790506363, "learning_rate": 6.513981134341434e-06, "loss": 0.0033, "step": 112960 }, { "epoch": 0.9238254896348693, "grad_norm": 0.07054898887872696, "learning_rate": 6.513300988355045e-06, "loss": 0.0017, "step": 112970 }, { "epoch": 0.9239072658134685, "grad_norm": 0.008930395357310772, "learning_rate": 6.512620811541486e-06, "loss": 0.0015, "step": 112980 }, { "epoch": 0.9239890419920677, "grad_norm": 0.03870313987135887, "learning_rate": 6.51194060391461e-06, "loss": 0.0008, "step": 112990 }, { "epoch": 0.9240708181706668, "grad_norm": 0.04539823532104492, "learning_rate": 6.5112603654882755e-06, "loss": 0.0009, "step": 113000 }, { "epoch": 0.9241525943492661, "grad_norm": 0.0716259628534317, "learning_rate": 6.510580096276339e-06, "loss": 0.0011, "step": 113010 }, { "epoch": 0.9242343705278653, "grad_norm": 0.04285624250769615, "learning_rate": 6.509899796292658e-06, "loss": 0.0003, "step": 113020 }, { "epoch": 0.9243161467064644, "grad_norm": 0.024097876623272896, "learning_rate": 6.509219465551092e-06, "loss": 0.0006, "step": 113030 }, { "epoch": 0.9243979228850636, "grad_norm": 0.028539424762129784, "learning_rate": 6.508539104065498e-06, "loss": 0.0016, "step": 113040 }, { "epoch": 0.9244796990636628, "grad_norm": 0.03888820856809616, "learning_rate": 6.507858711849735e-06, "loss": 0.0011, "step": 113050 }, { "epoch": 0.924561475242262, "grad_norm": 0.019454432651400566, "learning_rate": 6.507178288917666e-06, "loss": 0.0015, "step": 113060 }, { "epoch": 0.9246432514208611, "grad_norm": 0.0329718217253685, "learning_rate": 6.506497835283149e-06, "loss": 0.0014, "step": 113070 }, { "epoch": 0.9247250275994603, "grad_norm": 0.028642039746046066, "learning_rate": 6.5058173509600476e-06, "loss": 0.0017, "step": 113080 }, { "epoch": 0.9248068037780595, "grad_norm": 0.12895067036151886, "learning_rate": 6.505136835962222e-06, "loss": 0.0031, "step": 113090 }, { "epoch": 0.9248885799566586, "grad_norm": 0.028695039451122284, "learning_rate": 6.5044562903035366e-06, "loss": 0.002, "step": 113100 }, { "epoch": 0.9249703561352578, "grad_norm": 0.0626087412238121, "learning_rate": 6.503775713997853e-06, "loss": 0.0017, "step": 113110 }, { "epoch": 0.925052132313857, "grad_norm": 0.018770139664411545, "learning_rate": 6.503095107059035e-06, "loss": 0.0018, "step": 113120 }, { "epoch": 0.9251339084924561, "grad_norm": 0.020036417990922928, "learning_rate": 6.5024144695009485e-06, "loss": 0.0014, "step": 113130 }, { "epoch": 0.9252156846710553, "grad_norm": 0.02486097253859043, "learning_rate": 6.501733801337459e-06, "loss": 0.0017, "step": 113140 }, { "epoch": 0.9252974608496545, "grad_norm": 0.030096057802438736, "learning_rate": 6.501053102582431e-06, "loss": 0.0016, "step": 113150 }, { "epoch": 0.9253792370282536, "grad_norm": 0.02918025106191635, "learning_rate": 6.50037237324973e-06, "loss": 0.0008, "step": 113160 }, { "epoch": 0.9254610132068528, "grad_norm": 0.022561676800251007, "learning_rate": 6.499691613353225e-06, "loss": 0.001, "step": 113170 }, { "epoch": 0.925542789385452, "grad_norm": 0.04892484471201897, "learning_rate": 6.499010822906783e-06, "loss": 0.0008, "step": 113180 }, { "epoch": 0.9256245655640511, "grad_norm": 0.04522506520152092, "learning_rate": 6.498330001924271e-06, "loss": 0.001, "step": 113190 }, { "epoch": 0.9257063417426503, "grad_norm": 0.13605915009975433, "learning_rate": 6.497649150419559e-06, "loss": 0.0011, "step": 113200 }, { "epoch": 0.9257881179212495, "grad_norm": 0.03146231174468994, "learning_rate": 6.4969682684065185e-06, "loss": 0.0017, "step": 113210 }, { "epoch": 0.9258698940998488, "grad_norm": 0.0725826546549797, "learning_rate": 6.496287355899016e-06, "loss": 0.0016, "step": 113220 }, { "epoch": 0.9259516702784479, "grad_norm": 0.024453870952129364, "learning_rate": 6.495606412910924e-06, "loss": 0.0026, "step": 113230 }, { "epoch": 0.9260334464570471, "grad_norm": 0.008881643414497375, "learning_rate": 6.494925439456114e-06, "loss": 0.001, "step": 113240 }, { "epoch": 0.9261152226356463, "grad_norm": 0.09905242919921875, "learning_rate": 6.494244435548457e-06, "loss": 0.0018, "step": 113250 }, { "epoch": 0.9261969988142454, "grad_norm": 0.035334158688783646, "learning_rate": 6.4935634012018264e-06, "loss": 0.0025, "step": 113260 }, { "epoch": 0.9262787749928446, "grad_norm": 0.01110609620809555, "learning_rate": 6.492882336430097e-06, "loss": 0.0012, "step": 113270 }, { "epoch": 0.9263605511714438, "grad_norm": 0.11816290020942688, "learning_rate": 6.492201241247139e-06, "loss": 0.0029, "step": 113280 }, { "epoch": 0.926442327350043, "grad_norm": 0.014194319024682045, "learning_rate": 6.491520115666828e-06, "loss": 0.0012, "step": 113290 }, { "epoch": 0.9265241035286421, "grad_norm": 0.08501524478197098, "learning_rate": 6.490838959703042e-06, "loss": 0.0023, "step": 113300 }, { "epoch": 0.9266058797072413, "grad_norm": 0.03916659206151962, "learning_rate": 6.490157773369653e-06, "loss": 0.0012, "step": 113310 }, { "epoch": 0.9266876558858405, "grad_norm": 0.03527222201228142, "learning_rate": 6.4894765566805405e-06, "loss": 0.0032, "step": 113320 }, { "epoch": 0.9267694320644396, "grad_norm": 0.02712135575711727, "learning_rate": 6.48879530964958e-06, "loss": 0.0017, "step": 113330 }, { "epoch": 0.9268512082430388, "grad_norm": 0.0741027444601059, "learning_rate": 6.4881140322906465e-06, "loss": 0.0019, "step": 113340 }, { "epoch": 0.926932984421638, "grad_norm": 0.012994709424674511, "learning_rate": 6.487432724617621e-06, "loss": 0.0014, "step": 113350 }, { "epoch": 0.9270147606002371, "grad_norm": 0.04589194059371948, "learning_rate": 6.4867513866443845e-06, "loss": 0.0019, "step": 113360 }, { "epoch": 0.9270965367788363, "grad_norm": 0.03282718360424042, "learning_rate": 6.486070018384811e-06, "loss": 0.0021, "step": 113370 }, { "epoch": 0.9271783129574355, "grad_norm": 0.0977344885468483, "learning_rate": 6.4853886198527836e-06, "loss": 0.0027, "step": 113380 }, { "epoch": 0.9272600891360346, "grad_norm": 0.12573082745075226, "learning_rate": 6.484707191062185e-06, "loss": 0.0011, "step": 113390 }, { "epoch": 0.9273418653146338, "grad_norm": 0.05056726932525635, "learning_rate": 6.484025732026893e-06, "loss": 0.0012, "step": 113400 }, { "epoch": 0.927423641493233, "grad_norm": 0.028318947181105614, "learning_rate": 6.4833442427607885e-06, "loss": 0.0029, "step": 113410 }, { "epoch": 0.9275054176718321, "grad_norm": 0.15796791017055511, "learning_rate": 6.482662723277758e-06, "loss": 0.0019, "step": 113420 }, { "epoch": 0.9275871938504314, "grad_norm": 0.0252036415040493, "learning_rate": 6.4819811735916815e-06, "loss": 0.0016, "step": 113430 }, { "epoch": 0.9276689700290306, "grad_norm": 0.04399068281054497, "learning_rate": 6.481299593716445e-06, "loss": 0.0008, "step": 113440 }, { "epoch": 0.9277507462076298, "grad_norm": 0.021156199276447296, "learning_rate": 6.480617983665931e-06, "loss": 0.002, "step": 113450 }, { "epoch": 0.9278325223862289, "grad_norm": 0.07269558310508728, "learning_rate": 6.479936343454026e-06, "loss": 0.0013, "step": 113460 }, { "epoch": 0.9279142985648281, "grad_norm": 0.04738078638911247, "learning_rate": 6.479254673094614e-06, "loss": 0.0009, "step": 113470 }, { "epoch": 0.9279960747434273, "grad_norm": 0.05385998636484146, "learning_rate": 6.478572972601582e-06, "loss": 0.0012, "step": 113480 }, { "epoch": 0.9280778509220264, "grad_norm": 0.18462255597114563, "learning_rate": 6.477891241988816e-06, "loss": 0.0013, "step": 113490 }, { "epoch": 0.9281596271006256, "grad_norm": 0.04482119530439377, "learning_rate": 6.477209481270206e-06, "loss": 0.001, "step": 113500 }, { "epoch": 0.9282414032792248, "grad_norm": 0.14964830875396729, "learning_rate": 6.476527690459637e-06, "loss": 0.0031, "step": 113510 }, { "epoch": 0.9283231794578239, "grad_norm": 0.050919853150844574, "learning_rate": 6.4758458695709995e-06, "loss": 0.0011, "step": 113520 }, { "epoch": 0.9284049556364231, "grad_norm": 0.09170804172754288, "learning_rate": 6.475164018618181e-06, "loss": 0.0015, "step": 113530 }, { "epoch": 0.9284867318150223, "grad_norm": 0.04973380267620087, "learning_rate": 6.4744821376150725e-06, "loss": 0.0015, "step": 113540 }, { "epoch": 0.9285685079936215, "grad_norm": 0.11664500087499619, "learning_rate": 6.473800226575564e-06, "loss": 0.0023, "step": 113550 }, { "epoch": 0.9286502841722206, "grad_norm": 0.013835393823683262, "learning_rate": 6.473118285513548e-06, "loss": 0.0015, "step": 113560 }, { "epoch": 0.9287320603508198, "grad_norm": 0.020802032202482224, "learning_rate": 6.472436314442915e-06, "loss": 0.0009, "step": 113570 }, { "epoch": 0.928813836529419, "grad_norm": 0.07606750726699829, "learning_rate": 6.4717543133775575e-06, "loss": 0.0009, "step": 113580 }, { "epoch": 0.9288956127080181, "grad_norm": 0.08720461279153824, "learning_rate": 6.471072282331367e-06, "loss": 0.0016, "step": 113590 }, { "epoch": 0.9289773888866173, "grad_norm": 0.05469295382499695, "learning_rate": 6.470390221318239e-06, "loss": 0.0021, "step": 113600 }, { "epoch": 0.9290591650652165, "grad_norm": 0.08922986686229706, "learning_rate": 6.4697081303520656e-06, "loss": 0.0024, "step": 113610 }, { "epoch": 0.9291409412438156, "grad_norm": 0.19633519649505615, "learning_rate": 6.469026009446746e-06, "loss": 0.0017, "step": 113620 }, { "epoch": 0.9292227174224148, "grad_norm": 0.12504105269908905, "learning_rate": 6.468343858616171e-06, "loss": 0.0009, "step": 113630 }, { "epoch": 0.929304493601014, "grad_norm": 0.03351587429642677, "learning_rate": 6.4676616778742385e-06, "loss": 0.0011, "step": 113640 }, { "epoch": 0.9293862697796132, "grad_norm": 0.07502948492765427, "learning_rate": 6.466979467234844e-06, "loss": 0.0012, "step": 113650 }, { "epoch": 0.9294680459582124, "grad_norm": 0.049213115125894547, "learning_rate": 6.4662972267118864e-06, "loss": 0.002, "step": 113660 }, { "epoch": 0.9295498221368116, "grad_norm": 0.05526502802968025, "learning_rate": 6.46561495631926e-06, "loss": 0.0019, "step": 113670 }, { "epoch": 0.9296315983154108, "grad_norm": 0.04291631653904915, "learning_rate": 6.464932656070868e-06, "loss": 0.0014, "step": 113680 }, { "epoch": 0.9297133744940099, "grad_norm": 0.045892391353845596, "learning_rate": 6.464250325980607e-06, "loss": 0.002, "step": 113690 }, { "epoch": 0.9297951506726091, "grad_norm": 0.15557989478111267, "learning_rate": 6.463567966062377e-06, "loss": 0.0044, "step": 113700 }, { "epoch": 0.9298769268512083, "grad_norm": 0.017277922481298447, "learning_rate": 6.462885576330075e-06, "loss": 0.0012, "step": 113710 }, { "epoch": 0.9299587030298074, "grad_norm": 0.11729079484939575, "learning_rate": 6.462203156797607e-06, "loss": 0.0012, "step": 113720 }, { "epoch": 0.9300404792084066, "grad_norm": 0.09947002679109573, "learning_rate": 6.46152070747887e-06, "loss": 0.0012, "step": 113730 }, { "epoch": 0.9301222553870058, "grad_norm": 0.02490793727338314, "learning_rate": 6.460838228387771e-06, "loss": 0.0016, "step": 113740 }, { "epoch": 0.9302040315656049, "grad_norm": 0.12093684077262878, "learning_rate": 6.460155719538209e-06, "loss": 0.0017, "step": 113750 }, { "epoch": 0.9302858077442041, "grad_norm": 0.05270720273256302, "learning_rate": 6.459473180944088e-06, "loss": 0.0011, "step": 113760 }, { "epoch": 0.9303675839228033, "grad_norm": 0.031390946358442307, "learning_rate": 6.458790612619311e-06, "loss": 0.001, "step": 113770 }, { "epoch": 0.9304493601014024, "grad_norm": 0.028879854828119278, "learning_rate": 6.4581080145777854e-06, "loss": 0.0014, "step": 113780 }, { "epoch": 0.9305311362800016, "grad_norm": 0.04294925555586815, "learning_rate": 6.4574253868334116e-06, "loss": 0.0018, "step": 113790 }, { "epoch": 0.9306129124586008, "grad_norm": 0.05408714711666107, "learning_rate": 6.456742729400101e-06, "loss": 0.0016, "step": 113800 }, { "epoch": 0.9306946886372, "grad_norm": 0.0845654085278511, "learning_rate": 6.456060042291755e-06, "loss": 0.0022, "step": 113810 }, { "epoch": 0.9307764648157991, "grad_norm": 0.1531200259923935, "learning_rate": 6.455377325522283e-06, "loss": 0.0012, "step": 113820 }, { "epoch": 0.9308582409943983, "grad_norm": 0.010244208388030529, "learning_rate": 6.454694579105591e-06, "loss": 0.0013, "step": 113830 }, { "epoch": 0.9309400171729975, "grad_norm": 0.05731968581676483, "learning_rate": 6.45401180305559e-06, "loss": 0.0013, "step": 113840 }, { "epoch": 0.9310217933515966, "grad_norm": 0.01848728023469448, "learning_rate": 6.453328997386185e-06, "loss": 0.0021, "step": 113850 }, { "epoch": 0.9311035695301959, "grad_norm": 0.07354427874088287, "learning_rate": 6.452646162111288e-06, "loss": 0.0013, "step": 113860 }, { "epoch": 0.9311853457087951, "grad_norm": 0.13504964113235474, "learning_rate": 6.451963297244807e-06, "loss": 0.0025, "step": 113870 }, { "epoch": 0.9312671218873942, "grad_norm": 0.025898782536387444, "learning_rate": 6.451280402800655e-06, "loss": 0.0021, "step": 113880 }, { "epoch": 0.9313488980659934, "grad_norm": 0.06300898641347885, "learning_rate": 6.450597478792739e-06, "loss": 0.0015, "step": 113890 }, { "epoch": 0.9314306742445926, "grad_norm": 0.0186481811106205, "learning_rate": 6.4499145252349745e-06, "loss": 0.0009, "step": 113900 }, { "epoch": 0.9315124504231918, "grad_norm": 0.03263134881854057, "learning_rate": 6.449231542141272e-06, "loss": 0.0016, "step": 113910 }, { "epoch": 0.9315942266017909, "grad_norm": 0.007054077927023172, "learning_rate": 6.448548529525547e-06, "loss": 0.0014, "step": 113920 }, { "epoch": 0.9316760027803901, "grad_norm": 0.10769878327846527, "learning_rate": 6.44786548740171e-06, "loss": 0.0015, "step": 113930 }, { "epoch": 0.9317577789589893, "grad_norm": 0.06280164420604706, "learning_rate": 6.447182415783676e-06, "loss": 0.0034, "step": 113940 }, { "epoch": 0.9318395551375884, "grad_norm": 0.08830609917640686, "learning_rate": 6.44649931468536e-06, "loss": 0.0011, "step": 113950 }, { "epoch": 0.9319213313161876, "grad_norm": 0.0028129001148045063, "learning_rate": 6.445816184120676e-06, "loss": 0.0017, "step": 113960 }, { "epoch": 0.9320031074947868, "grad_norm": 0.03293561562895775, "learning_rate": 6.445133024103541e-06, "loss": 0.0016, "step": 113970 }, { "epoch": 0.9320848836733859, "grad_norm": 0.038056716322898865, "learning_rate": 6.444449834647873e-06, "loss": 0.0013, "step": 113980 }, { "epoch": 0.9321666598519851, "grad_norm": 0.02787361852824688, "learning_rate": 6.443766615767589e-06, "loss": 0.0016, "step": 113990 }, { "epoch": 0.9322484360305843, "grad_norm": 0.008347095921635628, "learning_rate": 6.443083367476604e-06, "loss": 0.0005, "step": 114000 }, { "epoch": 0.9323302122091834, "grad_norm": 0.001816612551920116, "learning_rate": 6.442400089788836e-06, "loss": 0.0007, "step": 114010 }, { "epoch": 0.9324119883877826, "grad_norm": 0.019573574885725975, "learning_rate": 6.441716782718208e-06, "loss": 0.0016, "step": 114020 }, { "epoch": 0.9324937645663818, "grad_norm": 0.06672553718090057, "learning_rate": 6.441033446278636e-06, "loss": 0.0027, "step": 114030 }, { "epoch": 0.932575540744981, "grad_norm": 0.10872884094715118, "learning_rate": 6.4403500804840424e-06, "loss": 0.0009, "step": 114040 }, { "epoch": 0.9326573169235801, "grad_norm": 0.03730698674917221, "learning_rate": 6.439666685348347e-06, "loss": 0.0013, "step": 114050 }, { "epoch": 0.9327390931021793, "grad_norm": 0.06637415289878845, "learning_rate": 6.438983260885471e-06, "loss": 0.001, "step": 114060 }, { "epoch": 0.9328208692807786, "grad_norm": 0.08917161822319031, "learning_rate": 6.438299807109335e-06, "loss": 0.0015, "step": 114070 }, { "epoch": 0.9329026454593777, "grad_norm": 0.03827190399169922, "learning_rate": 6.437616324033864e-06, "loss": 0.0013, "step": 114080 }, { "epoch": 0.9329844216379769, "grad_norm": 0.03168809786438942, "learning_rate": 6.436932811672979e-06, "loss": 0.0005, "step": 114090 }, { "epoch": 0.9330661978165761, "grad_norm": 0.11145489662885666, "learning_rate": 6.436249270040606e-06, "loss": 0.0012, "step": 114100 }, { "epoch": 0.9331479739951752, "grad_norm": 0.3646372854709625, "learning_rate": 6.435565699150667e-06, "loss": 0.0033, "step": 114110 }, { "epoch": 0.9332297501737744, "grad_norm": 0.03205716609954834, "learning_rate": 6.434882099017088e-06, "loss": 0.0006, "step": 114120 }, { "epoch": 0.9333115263523736, "grad_norm": 0.05001559108495712, "learning_rate": 6.434198469653794e-06, "loss": 0.0015, "step": 114130 }, { "epoch": 0.9333933025309727, "grad_norm": 0.0296181607991457, "learning_rate": 6.433514811074712e-06, "loss": 0.0018, "step": 114140 }, { "epoch": 0.9334750787095719, "grad_norm": 0.23310361802577972, "learning_rate": 6.432831123293767e-06, "loss": 0.0032, "step": 114150 }, { "epoch": 0.9335568548881711, "grad_norm": 0.23011159896850586, "learning_rate": 6.432147406324888e-06, "loss": 0.0021, "step": 114160 }, { "epoch": 0.9336386310667703, "grad_norm": 0.013365563005208969, "learning_rate": 6.431463660182002e-06, "loss": 0.0014, "step": 114170 }, { "epoch": 0.9337204072453694, "grad_norm": 0.06770612299442291, "learning_rate": 6.4307798848790396e-06, "loss": 0.002, "step": 114180 }, { "epoch": 0.9338021834239686, "grad_norm": 0.03634801134467125, "learning_rate": 6.430096080429925e-06, "loss": 0.0017, "step": 114190 }, { "epoch": 0.9338839596025678, "grad_norm": 0.05051731318235397, "learning_rate": 6.429412246848593e-06, "loss": 0.0028, "step": 114200 }, { "epoch": 0.9339657357811669, "grad_norm": 0.07279819250106812, "learning_rate": 6.42872838414897e-06, "loss": 0.0013, "step": 114210 }, { "epoch": 0.9340475119597661, "grad_norm": 0.2052282989025116, "learning_rate": 6.428044492344989e-06, "loss": 0.0022, "step": 114220 }, { "epoch": 0.9341292881383653, "grad_norm": 0.005408533848822117, "learning_rate": 6.4273605714505824e-06, "loss": 0.0012, "step": 114230 }, { "epoch": 0.9342110643169644, "grad_norm": 0.06157290190458298, "learning_rate": 6.4266766214796794e-06, "loss": 0.0014, "step": 114240 }, { "epoch": 0.9342928404955636, "grad_norm": 0.0753219723701477, "learning_rate": 6.425992642446215e-06, "loss": 0.0016, "step": 114250 }, { "epoch": 0.9343746166741628, "grad_norm": 0.05823032557964325, "learning_rate": 6.425308634364119e-06, "loss": 0.001, "step": 114260 }, { "epoch": 0.9344563928527619, "grad_norm": 0.052769072353839874, "learning_rate": 6.42462459724733e-06, "loss": 0.0011, "step": 114270 }, { "epoch": 0.9345381690313611, "grad_norm": 0.01669558882713318, "learning_rate": 6.423940531109778e-06, "loss": 0.0013, "step": 114280 }, { "epoch": 0.9346199452099604, "grad_norm": 0.062474362552165985, "learning_rate": 6.423256435965401e-06, "loss": 0.0008, "step": 114290 }, { "epoch": 0.9347017213885596, "grad_norm": 0.023745128884911537, "learning_rate": 6.422572311828135e-06, "loss": 0.0022, "step": 114300 }, { "epoch": 0.9347834975671587, "grad_norm": 0.004595499951392412, "learning_rate": 6.421888158711913e-06, "loss": 0.0024, "step": 114310 }, { "epoch": 0.9348652737457579, "grad_norm": 0.027809781953692436, "learning_rate": 6.421203976630672e-06, "loss": 0.0014, "step": 114320 }, { "epoch": 0.9349470499243571, "grad_norm": 0.0845877081155777, "learning_rate": 6.420519765598353e-06, "loss": 0.0017, "step": 114330 }, { "epoch": 0.9350288261029562, "grad_norm": 0.019635608419775963, "learning_rate": 6.4198355256288906e-06, "loss": 0.0011, "step": 114340 }, { "epoch": 0.9351106022815554, "grad_norm": 0.014562639407813549, "learning_rate": 6.4191512567362255e-06, "loss": 0.0011, "step": 114350 }, { "epoch": 0.9351923784601546, "grad_norm": 0.043820250779390335, "learning_rate": 6.418466958934295e-06, "loss": 0.0023, "step": 114360 }, { "epoch": 0.9352741546387537, "grad_norm": 0.003904118202626705, "learning_rate": 6.417782632237041e-06, "loss": 0.0007, "step": 114370 }, { "epoch": 0.9353559308173529, "grad_norm": 0.09213554859161377, "learning_rate": 6.4170982766584004e-06, "loss": 0.0016, "step": 114380 }, { "epoch": 0.9354377069959521, "grad_norm": 0.03453688323497772, "learning_rate": 6.4164138922123165e-06, "loss": 0.0015, "step": 114390 }, { "epoch": 0.9355194831745512, "grad_norm": 0.06547275930643082, "learning_rate": 6.41572947891273e-06, "loss": 0.0023, "step": 114400 }, { "epoch": 0.9356012593531504, "grad_norm": 0.022656841203570366, "learning_rate": 6.415045036773584e-06, "loss": 0.0014, "step": 114410 }, { "epoch": 0.9356830355317496, "grad_norm": 0.05099387839436531, "learning_rate": 6.414360565808821e-06, "loss": 0.0008, "step": 114420 }, { "epoch": 0.9357648117103488, "grad_norm": 0.03160422295331955, "learning_rate": 6.413676066032382e-06, "loss": 0.0013, "step": 114430 }, { "epoch": 0.9358465878889479, "grad_norm": 0.007250691764056683, "learning_rate": 6.412991537458213e-06, "loss": 0.0016, "step": 114440 }, { "epoch": 0.9359283640675471, "grad_norm": 0.05980858579277992, "learning_rate": 6.412306980100259e-06, "loss": 0.0016, "step": 114450 }, { "epoch": 0.9360101402461463, "grad_norm": 0.07613560557365417, "learning_rate": 6.411622393972462e-06, "loss": 0.0014, "step": 114460 }, { "epoch": 0.9360919164247454, "grad_norm": 0.035054199397563934, "learning_rate": 6.4109377790887706e-06, "loss": 0.0014, "step": 114470 }, { "epoch": 0.9361736926033446, "grad_norm": 0.08304272592067719, "learning_rate": 6.410253135463129e-06, "loss": 0.0014, "step": 114480 }, { "epoch": 0.9362554687819438, "grad_norm": 0.025001585483551025, "learning_rate": 6.409568463109487e-06, "loss": 0.0031, "step": 114490 }, { "epoch": 0.936337244960543, "grad_norm": 0.04634769633412361, "learning_rate": 6.408883762041787e-06, "loss": 0.0013, "step": 114500 }, { "epoch": 0.9364190211391422, "grad_norm": 0.016831601038575172, "learning_rate": 6.408199032273981e-06, "loss": 0.0007, "step": 114510 }, { "epoch": 0.9365007973177414, "grad_norm": 0.012366901151835918, "learning_rate": 6.407514273820015e-06, "loss": 0.0008, "step": 114520 }, { "epoch": 0.9365825734963406, "grad_norm": 0.11112143099308014, "learning_rate": 6.406829486693839e-06, "loss": 0.0013, "step": 114530 }, { "epoch": 0.9366643496749397, "grad_norm": 0.07532721757888794, "learning_rate": 6.406144670909404e-06, "loss": 0.0016, "step": 114540 }, { "epoch": 0.9367461258535389, "grad_norm": 0.089744433760643, "learning_rate": 6.405459826480659e-06, "loss": 0.0016, "step": 114550 }, { "epoch": 0.9368279020321381, "grad_norm": 0.04427432641386986, "learning_rate": 6.404774953421553e-06, "loss": 0.0014, "step": 114560 }, { "epoch": 0.9369096782107372, "grad_norm": 0.031017867848277092, "learning_rate": 6.404090051746041e-06, "loss": 0.0024, "step": 114570 }, { "epoch": 0.9369914543893364, "grad_norm": 0.1318168193101883, "learning_rate": 6.403405121468074e-06, "loss": 0.0011, "step": 114580 }, { "epoch": 0.9370732305679356, "grad_norm": 0.06398452818393707, "learning_rate": 6.402720162601603e-06, "loss": 0.001, "step": 114590 }, { "epoch": 0.9371550067465347, "grad_norm": 0.043650250881910324, "learning_rate": 6.402035175160582e-06, "loss": 0.0009, "step": 114600 }, { "epoch": 0.9372367829251339, "grad_norm": 0.05765238776803017, "learning_rate": 6.401350159158966e-06, "loss": 0.0015, "step": 114610 }, { "epoch": 0.9373185591037331, "grad_norm": 0.04233906790614128, "learning_rate": 6.400665114610707e-06, "loss": 0.0016, "step": 114620 }, { "epoch": 0.9374003352823322, "grad_norm": 0.03229007497429848, "learning_rate": 6.399980041529761e-06, "loss": 0.0012, "step": 114630 }, { "epoch": 0.9374821114609314, "grad_norm": 0.03653326630592346, "learning_rate": 6.399294939930083e-06, "loss": 0.0007, "step": 114640 }, { "epoch": 0.9375638876395306, "grad_norm": 0.08727747201919556, "learning_rate": 6.398609809825631e-06, "loss": 0.002, "step": 114650 }, { "epoch": 0.9376456638181297, "grad_norm": 0.06519556045532227, "learning_rate": 6.397924651230361e-06, "loss": 0.0022, "step": 114660 }, { "epoch": 0.9377274399967289, "grad_norm": 0.07600785791873932, "learning_rate": 6.397239464158228e-06, "loss": 0.0017, "step": 114670 }, { "epoch": 0.9378092161753281, "grad_norm": 0.022014625370502472, "learning_rate": 6.396554248623192e-06, "loss": 0.0016, "step": 114680 }, { "epoch": 0.9378909923539273, "grad_norm": 0.014012739062309265, "learning_rate": 6.39586900463921e-06, "loss": 0.0011, "step": 114690 }, { "epoch": 0.9379727685325264, "grad_norm": 0.06532984226942062, "learning_rate": 6.395183732220242e-06, "loss": 0.001, "step": 114700 }, { "epoch": 0.9380545447111257, "grad_norm": 0.04617753252387047, "learning_rate": 6.394498431380249e-06, "loss": 0.0013, "step": 114710 }, { "epoch": 0.9381363208897249, "grad_norm": 0.062286585569381714, "learning_rate": 6.393813102133188e-06, "loss": 0.0021, "step": 114720 }, { "epoch": 0.938218097068324, "grad_norm": 0.044590242207050323, "learning_rate": 6.393127744493022e-06, "loss": 0.0009, "step": 114730 }, { "epoch": 0.9382998732469232, "grad_norm": 0.027867838740348816, "learning_rate": 6.39244235847371e-06, "loss": 0.0016, "step": 114740 }, { "epoch": 0.9383816494255224, "grad_norm": 0.015813227742910385, "learning_rate": 6.3917569440892155e-06, "loss": 0.0022, "step": 114750 }, { "epoch": 0.9384634256041215, "grad_norm": 0.1589936763048172, "learning_rate": 6.391071501353501e-06, "loss": 0.0024, "step": 114760 }, { "epoch": 0.9385452017827207, "grad_norm": 0.0821346715092659, "learning_rate": 6.39038603028053e-06, "loss": 0.001, "step": 114770 }, { "epoch": 0.9386269779613199, "grad_norm": 0.07839398831129074, "learning_rate": 6.389700530884265e-06, "loss": 0.001, "step": 114780 }, { "epoch": 0.938708754139919, "grad_norm": 0.2761627435684204, "learning_rate": 6.389015003178671e-06, "loss": 0.0016, "step": 114790 }, { "epoch": 0.9387905303185182, "grad_norm": 0.02576798014342785, "learning_rate": 6.388329447177713e-06, "loss": 0.0011, "step": 114800 }, { "epoch": 0.9388723064971174, "grad_norm": 0.02147071808576584, "learning_rate": 6.387643862895354e-06, "loss": 0.0011, "step": 114810 }, { "epoch": 0.9389540826757166, "grad_norm": 0.05323721468448639, "learning_rate": 6.386958250345561e-06, "loss": 0.0019, "step": 114820 }, { "epoch": 0.9390358588543157, "grad_norm": 0.10349570959806442, "learning_rate": 6.386272609542303e-06, "loss": 0.0013, "step": 114830 }, { "epoch": 0.9391176350329149, "grad_norm": 0.04069576412439346, "learning_rate": 6.385586940499545e-06, "loss": 0.0017, "step": 114840 }, { "epoch": 0.9391994112115141, "grad_norm": 0.029560932889580727, "learning_rate": 6.384901243231254e-06, "loss": 0.0015, "step": 114850 }, { "epoch": 0.9392811873901132, "grad_norm": 0.014805394224822521, "learning_rate": 6.384215517751399e-06, "loss": 0.0011, "step": 114860 }, { "epoch": 0.9393629635687124, "grad_norm": 0.01180979609489441, "learning_rate": 6.383529764073949e-06, "loss": 0.0023, "step": 114870 }, { "epoch": 0.9394447397473116, "grad_norm": 0.001972742145881057, "learning_rate": 6.382843982212872e-06, "loss": 0.0022, "step": 114880 }, { "epoch": 0.9395265159259107, "grad_norm": 0.06989099085330963, "learning_rate": 6.38215817218214e-06, "loss": 0.0012, "step": 114890 }, { "epoch": 0.9396082921045099, "grad_norm": 0.027290301397442818, "learning_rate": 6.381472333995724e-06, "loss": 0.0008, "step": 114900 }, { "epoch": 0.9396900682831091, "grad_norm": 0.08362311869859695, "learning_rate": 6.3807864676675925e-06, "loss": 0.0009, "step": 114910 }, { "epoch": 0.9397718444617083, "grad_norm": 0.01261112093925476, "learning_rate": 6.380100573211717e-06, "loss": 0.0015, "step": 114920 }, { "epoch": 0.9398536206403075, "grad_norm": 0.12806813418865204, "learning_rate": 6.379414650642073e-06, "loss": 0.0016, "step": 114930 }, { "epoch": 0.9399353968189067, "grad_norm": 0.09802709519863129, "learning_rate": 6.37872869997263e-06, "loss": 0.0016, "step": 114940 }, { "epoch": 0.9400171729975059, "grad_norm": 0.01740308851003647, "learning_rate": 6.378042721217363e-06, "loss": 0.001, "step": 114950 }, { "epoch": 0.940098949176105, "grad_norm": 0.016820546239614487, "learning_rate": 6.377356714390246e-06, "loss": 0.0022, "step": 114960 }, { "epoch": 0.9401807253547042, "grad_norm": 0.01767844893038273, "learning_rate": 6.376670679505254e-06, "loss": 0.0012, "step": 114970 }, { "epoch": 0.9402625015333034, "grad_norm": 0.28490838408470154, "learning_rate": 6.375984616576358e-06, "loss": 0.002, "step": 114980 }, { "epoch": 0.9403442777119025, "grad_norm": 0.03133266791701317, "learning_rate": 6.375298525617541e-06, "loss": 0.0013, "step": 114990 }, { "epoch": 0.9404260538905017, "grad_norm": 0.025076674297451973, "learning_rate": 6.374612406642772e-06, "loss": 0.003, "step": 115000 }, { "epoch": 0.9405078300691009, "grad_norm": 0.11665286868810654, "learning_rate": 6.373926259666032e-06, "loss": 0.003, "step": 115010 }, { "epoch": 0.9405896062477, "grad_norm": 0.07128658145666122, "learning_rate": 6.373240084701298e-06, "loss": 0.0012, "step": 115020 }, { "epoch": 0.9406713824262992, "grad_norm": 0.057501811534166336, "learning_rate": 6.372553881762546e-06, "loss": 0.0013, "step": 115030 }, { "epoch": 0.9407531586048984, "grad_norm": 0.0173567496240139, "learning_rate": 6.371867650863757e-06, "loss": 0.0018, "step": 115040 }, { "epoch": 0.9408349347834976, "grad_norm": 0.07979501038789749, "learning_rate": 6.371181392018908e-06, "loss": 0.0072, "step": 115050 }, { "epoch": 0.9409167109620967, "grad_norm": 0.007210724987089634, "learning_rate": 6.370495105241979e-06, "loss": 0.0007, "step": 115060 }, { "epoch": 0.9409984871406959, "grad_norm": 0.016147447749972343, "learning_rate": 6.3698087905469515e-06, "loss": 0.001, "step": 115070 }, { "epoch": 0.9410802633192951, "grad_norm": 0.06100485101342201, "learning_rate": 6.3691224479478065e-06, "loss": 0.0009, "step": 115080 }, { "epoch": 0.9411620394978942, "grad_norm": 0.058856796473264694, "learning_rate": 6.368436077458524e-06, "loss": 0.0013, "step": 115090 }, { "epoch": 0.9412438156764934, "grad_norm": 0.047695886343717575, "learning_rate": 6.367749679093085e-06, "loss": 0.001, "step": 115100 }, { "epoch": 0.9413255918550926, "grad_norm": 0.1254473477602005, "learning_rate": 6.367063252865474e-06, "loss": 0.0017, "step": 115110 }, { "epoch": 0.9414073680336917, "grad_norm": 0.03457029163837433, "learning_rate": 6.366376798789673e-06, "loss": 0.001, "step": 115120 }, { "epoch": 0.9414891442122909, "grad_norm": 0.13497748970985413, "learning_rate": 6.365690316879666e-06, "loss": 0.0024, "step": 115130 }, { "epoch": 0.9415709203908902, "grad_norm": 0.001896972768008709, "learning_rate": 6.365003807149437e-06, "loss": 0.0017, "step": 115140 }, { "epoch": 0.9416526965694894, "grad_norm": 0.0031054671853780746, "learning_rate": 6.3643172696129715e-06, "loss": 0.0018, "step": 115150 }, { "epoch": 0.9417344727480885, "grad_norm": 0.06389009207487106, "learning_rate": 6.363630704284254e-06, "loss": 0.001, "step": 115160 }, { "epoch": 0.9418162489266877, "grad_norm": 0.09895344823598862, "learning_rate": 6.362944111177271e-06, "loss": 0.0026, "step": 115170 }, { "epoch": 0.9418980251052869, "grad_norm": 0.024463623762130737, "learning_rate": 6.362257490306007e-06, "loss": 0.0018, "step": 115180 }, { "epoch": 0.941979801283886, "grad_norm": 0.06744742393493652, "learning_rate": 6.361570841684452e-06, "loss": 0.0017, "step": 115190 }, { "epoch": 0.9420615774624852, "grad_norm": 0.021567445248365402, "learning_rate": 6.360884165326593e-06, "loss": 0.0012, "step": 115200 }, { "epoch": 0.9421433536410844, "grad_norm": 0.11750642210245132, "learning_rate": 6.360197461246417e-06, "loss": 0.0014, "step": 115210 }, { "epoch": 0.9422251298196835, "grad_norm": 0.015098760835826397, "learning_rate": 6.359510729457912e-06, "loss": 0.0013, "step": 115220 }, { "epoch": 0.9423069059982827, "grad_norm": 0.01279840525239706, "learning_rate": 6.358823969975069e-06, "loss": 0.0021, "step": 115230 }, { "epoch": 0.9423886821768819, "grad_norm": 0.07084978371858597, "learning_rate": 6.3581371828118766e-06, "loss": 0.001, "step": 115240 }, { "epoch": 0.942470458355481, "grad_norm": 0.011780471540987492, "learning_rate": 6.357450367982326e-06, "loss": 0.0019, "step": 115250 }, { "epoch": 0.9425522345340802, "grad_norm": 0.006380585953593254, "learning_rate": 6.356763525500409e-06, "loss": 0.003, "step": 115260 }, { "epoch": 0.9426340107126794, "grad_norm": 0.04626069590449333, "learning_rate": 6.356076655380117e-06, "loss": 0.001, "step": 115270 }, { "epoch": 0.9427157868912786, "grad_norm": 0.04528703913092613, "learning_rate": 6.355389757635438e-06, "loss": 0.0013, "step": 115280 }, { "epoch": 0.9427975630698777, "grad_norm": 0.025756657123565674, "learning_rate": 6.354702832280371e-06, "loss": 0.0012, "step": 115290 }, { "epoch": 0.9428793392484769, "grad_norm": 0.03075864166021347, "learning_rate": 6.354015879328903e-06, "loss": 0.0008, "step": 115300 }, { "epoch": 0.9429611154270761, "grad_norm": 0.04002496972680092, "learning_rate": 6.353328898795034e-06, "loss": 0.0038, "step": 115310 }, { "epoch": 0.9430428916056752, "grad_norm": 0.10940422117710114, "learning_rate": 6.3526418906927545e-06, "loss": 0.0015, "step": 115320 }, { "epoch": 0.9431246677842744, "grad_norm": 0.05691441148519516, "learning_rate": 6.35195485503606e-06, "loss": 0.0011, "step": 115330 }, { "epoch": 0.9432064439628736, "grad_norm": 0.027117466554045677, "learning_rate": 6.351267791838946e-06, "loss": 0.0016, "step": 115340 }, { "epoch": 0.9432882201414728, "grad_norm": 0.052851203829050064, "learning_rate": 6.350580701115407e-06, "loss": 0.001, "step": 115350 }, { "epoch": 0.943369996320072, "grad_norm": 0.049886494874954224, "learning_rate": 6.349893582879443e-06, "loss": 0.0009, "step": 115360 }, { "epoch": 0.9434517724986712, "grad_norm": 0.039788223803043365, "learning_rate": 6.34920643714505e-06, "loss": 0.0014, "step": 115370 }, { "epoch": 0.9435335486772704, "grad_norm": 0.039769940078258514, "learning_rate": 6.348519263926225e-06, "loss": 0.0016, "step": 115380 }, { "epoch": 0.9436153248558695, "grad_norm": 0.08193966001272202, "learning_rate": 6.347832063236967e-06, "loss": 0.002, "step": 115390 }, { "epoch": 0.9436971010344687, "grad_norm": 0.062334418296813965, "learning_rate": 6.347144835091272e-06, "loss": 0.001, "step": 115400 }, { "epoch": 0.9437788772130679, "grad_norm": 0.03763753920793533, "learning_rate": 6.346457579503143e-06, "loss": 0.0016, "step": 115410 }, { "epoch": 0.943860653391667, "grad_norm": 0.048964884132146835, "learning_rate": 6.345770296486578e-06, "loss": 0.0011, "step": 115420 }, { "epoch": 0.9439424295702662, "grad_norm": 0.1784355342388153, "learning_rate": 6.345082986055579e-06, "loss": 0.0015, "step": 115430 }, { "epoch": 0.9440242057488654, "grad_norm": 0.04881017282605171, "learning_rate": 6.344395648224148e-06, "loss": 0.0014, "step": 115440 }, { "epoch": 0.9441059819274645, "grad_norm": 0.1412244588136673, "learning_rate": 6.343708283006284e-06, "loss": 0.0016, "step": 115450 }, { "epoch": 0.9441877581060637, "grad_norm": 0.13315892219543457, "learning_rate": 6.3430208904159875e-06, "loss": 0.0023, "step": 115460 }, { "epoch": 0.9442695342846629, "grad_norm": 0.05376588553190231, "learning_rate": 6.342333470467266e-06, "loss": 0.0016, "step": 115470 }, { "epoch": 0.944351310463262, "grad_norm": 0.11730486899614334, "learning_rate": 6.34164602317412e-06, "loss": 0.0033, "step": 115480 }, { "epoch": 0.9444330866418612, "grad_norm": 0.046453800052404404, "learning_rate": 6.3409585485505535e-06, "loss": 0.0018, "step": 115490 }, { "epoch": 0.9445148628204604, "grad_norm": 0.014851142652332783, "learning_rate": 6.340271046610572e-06, "loss": 0.0018, "step": 115500 }, { "epoch": 0.9445966389990595, "grad_norm": 0.05657714605331421, "learning_rate": 6.3395835173681795e-06, "loss": 0.0009, "step": 115510 }, { "epoch": 0.9446784151776587, "grad_norm": 0.015776531770825386, "learning_rate": 6.338895960837381e-06, "loss": 0.0014, "step": 115520 }, { "epoch": 0.9447601913562579, "grad_norm": 0.00874718651175499, "learning_rate": 6.3382083770321855e-06, "loss": 0.0009, "step": 115530 }, { "epoch": 0.944841967534857, "grad_norm": 0.03614165261387825, "learning_rate": 6.3375207659665964e-06, "loss": 0.0009, "step": 115540 }, { "epoch": 0.9449237437134562, "grad_norm": 0.026891054585576057, "learning_rate": 6.336833127654622e-06, "loss": 0.0014, "step": 115550 }, { "epoch": 0.9450055198920554, "grad_norm": 0.015691276639699936, "learning_rate": 6.336145462110271e-06, "loss": 0.0007, "step": 115560 }, { "epoch": 0.9450872960706547, "grad_norm": 0.015979086980223656, "learning_rate": 6.3354577693475505e-06, "loss": 0.001, "step": 115570 }, { "epoch": 0.9451690722492538, "grad_norm": 0.05563776567578316, "learning_rate": 6.334770049380469e-06, "loss": 0.001, "step": 115580 }, { "epoch": 0.945250848427853, "grad_norm": 0.00838090106844902, "learning_rate": 6.334082302223038e-06, "loss": 0.0016, "step": 115590 }, { "epoch": 0.9453326246064522, "grad_norm": 0.05581248924136162, "learning_rate": 6.333394527889265e-06, "loss": 0.0008, "step": 115600 }, { "epoch": 0.9454144007850513, "grad_norm": 0.06613742560148239, "learning_rate": 6.332706726393162e-06, "loss": 0.0019, "step": 115610 }, { "epoch": 0.9454961769636505, "grad_norm": 0.07728986442089081, "learning_rate": 6.33201889774874e-06, "loss": 0.0015, "step": 115620 }, { "epoch": 0.9455779531422497, "grad_norm": 0.0442599393427372, "learning_rate": 6.33133104197001e-06, "loss": 0.0012, "step": 115630 }, { "epoch": 0.9456597293208489, "grad_norm": 0.003551851725205779, "learning_rate": 6.3306431590709834e-06, "loss": 0.001, "step": 115640 }, { "epoch": 0.945741505499448, "grad_norm": 0.03302472084760666, "learning_rate": 6.329955249065676e-06, "loss": 0.0031, "step": 115650 }, { "epoch": 0.9458232816780472, "grad_norm": 0.14094187319278717, "learning_rate": 6.329267311968098e-06, "loss": 0.0016, "step": 115660 }, { "epoch": 0.9459050578566464, "grad_norm": 0.04073433578014374, "learning_rate": 6.328579347792265e-06, "loss": 0.0014, "step": 115670 }, { "epoch": 0.9459868340352455, "grad_norm": 0.19623631238937378, "learning_rate": 6.3278913565521905e-06, "loss": 0.0018, "step": 115680 }, { "epoch": 0.9460686102138447, "grad_norm": 0.051548369228839874, "learning_rate": 6.32720333826189e-06, "loss": 0.001, "step": 115690 }, { "epoch": 0.9461503863924439, "grad_norm": 0.04558957740664482, "learning_rate": 6.326515292935379e-06, "loss": 0.0014, "step": 115700 }, { "epoch": 0.946232162571043, "grad_norm": 0.038180623203516006, "learning_rate": 6.325827220586671e-06, "loss": 0.0014, "step": 115710 }, { "epoch": 0.9463139387496422, "grad_norm": 0.06398911774158478, "learning_rate": 6.325139121229787e-06, "loss": 0.0012, "step": 115720 }, { "epoch": 0.9463957149282414, "grad_norm": 0.0585104338824749, "learning_rate": 6.324450994878742e-06, "loss": 0.0015, "step": 115730 }, { "epoch": 0.9464774911068405, "grad_norm": 0.08595334738492966, "learning_rate": 6.323762841547553e-06, "loss": 0.001, "step": 115740 }, { "epoch": 0.9465592672854397, "grad_norm": 0.041650593280792236, "learning_rate": 6.323074661250239e-06, "loss": 0.0013, "step": 115750 }, { "epoch": 0.9466410434640389, "grad_norm": 0.3144663870334625, "learning_rate": 6.322386454000818e-06, "loss": 0.0014, "step": 115760 }, { "epoch": 0.946722819642638, "grad_norm": 0.0637480616569519, "learning_rate": 6.32169821981331e-06, "loss": 0.0013, "step": 115770 }, { "epoch": 0.9468045958212373, "grad_norm": 0.07098367810249329, "learning_rate": 6.321009958701737e-06, "loss": 0.0013, "step": 115780 }, { "epoch": 0.9468863719998365, "grad_norm": 0.04012155160307884, "learning_rate": 6.320321670680115e-06, "loss": 0.0013, "step": 115790 }, { "epoch": 0.9469681481784357, "grad_norm": 0.055398039519786835, "learning_rate": 6.319633355762469e-06, "loss": 0.0014, "step": 115800 }, { "epoch": 0.9470499243570348, "grad_norm": 0.010460293851792812, "learning_rate": 6.318945013962818e-06, "loss": 0.0009, "step": 115810 }, { "epoch": 0.947131700535634, "grad_norm": 0.20988808572292328, "learning_rate": 6.318256645295186e-06, "loss": 0.0009, "step": 115820 }, { "epoch": 0.9472134767142332, "grad_norm": 0.009259041398763657, "learning_rate": 6.317568249773593e-06, "loss": 0.0009, "step": 115830 }, { "epoch": 0.9472952528928323, "grad_norm": 0.03720977157354355, "learning_rate": 6.316879827412065e-06, "loss": 0.0036, "step": 115840 }, { "epoch": 0.9473770290714315, "grad_norm": 0.03883969411253929, "learning_rate": 6.3161913782246234e-06, "loss": 0.0006, "step": 115850 }, { "epoch": 0.9474588052500307, "grad_norm": 0.023785434663295746, "learning_rate": 6.315502902225294e-06, "loss": 0.0013, "step": 115860 }, { "epoch": 0.9475405814286298, "grad_norm": 0.05439053475856781, "learning_rate": 6.314814399428102e-06, "loss": 0.0008, "step": 115870 }, { "epoch": 0.947622357607229, "grad_norm": 0.07971154153347015, "learning_rate": 6.314125869847072e-06, "loss": 0.0013, "step": 115880 }, { "epoch": 0.9477041337858282, "grad_norm": 0.06235380470752716, "learning_rate": 6.313437313496229e-06, "loss": 0.002, "step": 115890 }, { "epoch": 0.9477859099644274, "grad_norm": 0.03952133283019066, "learning_rate": 6.3127487303896005e-06, "loss": 0.0022, "step": 115900 }, { "epoch": 0.9478676861430265, "grad_norm": 0.04836156964302063, "learning_rate": 6.312060120541213e-06, "loss": 0.0014, "step": 115910 }, { "epoch": 0.9479494623216257, "grad_norm": 0.12332258373498917, "learning_rate": 6.311371483965094e-06, "loss": 0.0012, "step": 115920 }, { "epoch": 0.9480312385002249, "grad_norm": 0.011486672796308994, "learning_rate": 6.310682820675274e-06, "loss": 0.0013, "step": 115930 }, { "epoch": 0.948113014678824, "grad_norm": 0.014378727413713932, "learning_rate": 6.3099941306857784e-06, "loss": 0.0018, "step": 115940 }, { "epoch": 0.9481947908574232, "grad_norm": 0.07026229798793793, "learning_rate": 6.309305414010637e-06, "loss": 0.002, "step": 115950 }, { "epoch": 0.9482765670360224, "grad_norm": 0.03372858837246895, "learning_rate": 6.3086166706638804e-06, "loss": 0.0011, "step": 115960 }, { "epoch": 0.9483583432146215, "grad_norm": 0.08057098835706711, "learning_rate": 6.307927900659538e-06, "loss": 0.0026, "step": 115970 }, { "epoch": 0.9484401193932207, "grad_norm": 0.0155758336186409, "learning_rate": 6.307239104011642e-06, "loss": 0.0007, "step": 115980 }, { "epoch": 0.94852189557182, "grad_norm": 0.12119333446025848, "learning_rate": 6.3065502807342225e-06, "loss": 0.0028, "step": 115990 }, { "epoch": 0.9486036717504192, "grad_norm": 0.026331907138228416, "learning_rate": 6.305861430841312e-06, "loss": 0.0015, "step": 116000 }, { "epoch": 0.9486854479290183, "grad_norm": 0.04396258667111397, "learning_rate": 6.3051725543469425e-06, "loss": 0.0016, "step": 116010 }, { "epoch": 0.9487672241076175, "grad_norm": 0.015882642939686775, "learning_rate": 6.304483651265147e-06, "loss": 0.0008, "step": 116020 }, { "epoch": 0.9488490002862167, "grad_norm": 0.04370403662323952, "learning_rate": 6.303794721609959e-06, "loss": 0.0022, "step": 116030 }, { "epoch": 0.9489307764648158, "grad_norm": 0.029250487685203552, "learning_rate": 6.303105765395413e-06, "loss": 0.0012, "step": 116040 }, { "epoch": 0.949012552643415, "grad_norm": 0.01945631392300129, "learning_rate": 6.302416782635544e-06, "loss": 0.0014, "step": 116050 }, { "epoch": 0.9490943288220142, "grad_norm": 0.10118228942155838, "learning_rate": 6.301727773344386e-06, "loss": 0.0016, "step": 116060 }, { "epoch": 0.9491761050006133, "grad_norm": 0.02234061248600483, "learning_rate": 6.301038737535975e-06, "loss": 0.0015, "step": 116070 }, { "epoch": 0.9492578811792125, "grad_norm": 0.05301297456026077, "learning_rate": 6.300349675224347e-06, "loss": 0.0021, "step": 116080 }, { "epoch": 0.9493396573578117, "grad_norm": 0.024300821125507355, "learning_rate": 6.299660586423539e-06, "loss": 0.0019, "step": 116090 }, { "epoch": 0.9494214335364108, "grad_norm": 0.05760015547275543, "learning_rate": 6.29897147114759e-06, "loss": 0.0019, "step": 116100 }, { "epoch": 0.94950320971501, "grad_norm": 0.045359835028648376, "learning_rate": 6.298282329410536e-06, "loss": 0.0012, "step": 116110 }, { "epoch": 0.9495849858936092, "grad_norm": 0.06562212854623795, "learning_rate": 6.297593161226415e-06, "loss": 0.001, "step": 116120 }, { "epoch": 0.9496667620722083, "grad_norm": 0.09477580338716507, "learning_rate": 6.296903966609266e-06, "loss": 0.0028, "step": 116130 }, { "epoch": 0.9497485382508075, "grad_norm": 0.06396538764238358, "learning_rate": 6.296214745573129e-06, "loss": 0.0022, "step": 116140 }, { "epoch": 0.9498303144294067, "grad_norm": 0.10371707379817963, "learning_rate": 6.295525498132045e-06, "loss": 0.0012, "step": 116150 }, { "epoch": 0.9499120906080059, "grad_norm": 0.1615855097770691, "learning_rate": 6.2948362243000525e-06, "loss": 0.0021, "step": 116160 }, { "epoch": 0.949993866786605, "grad_norm": 0.07904863357543945, "learning_rate": 6.294146924091195e-06, "loss": 0.0019, "step": 116170 }, { "epoch": 0.9500756429652042, "grad_norm": 0.07468724250793457, "learning_rate": 6.293457597519513e-06, "loss": 0.0013, "step": 116180 }, { "epoch": 0.9501574191438034, "grad_norm": 0.08346028625965118, "learning_rate": 6.292768244599046e-06, "loss": 0.0029, "step": 116190 }, { "epoch": 0.9502391953224025, "grad_norm": 0.0578484833240509, "learning_rate": 6.2920788653438405e-06, "loss": 0.0013, "step": 116200 }, { "epoch": 0.9503209715010018, "grad_norm": 0.030174631625413895, "learning_rate": 6.291389459767938e-06, "loss": 0.0011, "step": 116210 }, { "epoch": 0.950402747679601, "grad_norm": 0.029084082692861557, "learning_rate": 6.290700027885383e-06, "loss": 0.0011, "step": 116220 }, { "epoch": 0.9504845238582001, "grad_norm": 0.04602271318435669, "learning_rate": 6.290010569710219e-06, "loss": 0.0009, "step": 116230 }, { "epoch": 0.9505663000367993, "grad_norm": 0.028276827186346054, "learning_rate": 6.289321085256491e-06, "loss": 0.0017, "step": 116240 }, { "epoch": 0.9506480762153985, "grad_norm": 0.03506544977426529, "learning_rate": 6.288631574538244e-06, "loss": 0.0016, "step": 116250 }, { "epoch": 0.9507298523939977, "grad_norm": 0.023907756432890892, "learning_rate": 6.287942037569526e-06, "loss": 0.0009, "step": 116260 }, { "epoch": 0.9508116285725968, "grad_norm": 0.005827227607369423, "learning_rate": 6.287252474364379e-06, "loss": 0.0009, "step": 116270 }, { "epoch": 0.950893404751196, "grad_norm": 0.07257051765918732, "learning_rate": 6.286562884936856e-06, "loss": 0.0015, "step": 116280 }, { "epoch": 0.9509751809297952, "grad_norm": 0.04590883105993271, "learning_rate": 6.285873269301e-06, "loss": 0.0007, "step": 116290 }, { "epoch": 0.9510569571083943, "grad_norm": 0.044323354959487915, "learning_rate": 6.285183627470859e-06, "loss": 0.0017, "step": 116300 }, { "epoch": 0.9511387332869935, "grad_norm": 0.05756257474422455, "learning_rate": 6.284493959460484e-06, "loss": 0.0012, "step": 116310 }, { "epoch": 0.9512205094655927, "grad_norm": 0.00570911867544055, "learning_rate": 6.2838042652839225e-06, "loss": 0.0007, "step": 116320 }, { "epoch": 0.9513022856441918, "grad_norm": 0.030987931415438652, "learning_rate": 6.283114544955225e-06, "loss": 0.001, "step": 116330 }, { "epoch": 0.951384061822791, "grad_norm": 0.062217313796281815, "learning_rate": 6.282424798488442e-06, "loss": 0.0018, "step": 116340 }, { "epoch": 0.9514658380013902, "grad_norm": 0.09133443236351013, "learning_rate": 6.281735025897622e-06, "loss": 0.0016, "step": 116350 }, { "epoch": 0.9515476141799893, "grad_norm": 0.05256152153015137, "learning_rate": 6.281045227196818e-06, "loss": 0.0014, "step": 116360 }, { "epoch": 0.9516293903585885, "grad_norm": 0.11940018832683563, "learning_rate": 6.280355402400082e-06, "loss": 0.0014, "step": 116370 }, { "epoch": 0.9517111665371877, "grad_norm": 0.0644911676645279, "learning_rate": 6.279665551521465e-06, "loss": 0.0013, "step": 116380 }, { "epoch": 0.9517929427157868, "grad_norm": 0.024929504841566086, "learning_rate": 6.278975674575021e-06, "loss": 0.0009, "step": 116390 }, { "epoch": 0.951874718894386, "grad_norm": 0.18272076547145844, "learning_rate": 6.278285771574805e-06, "loss": 0.0017, "step": 116400 }, { "epoch": 0.9519564950729852, "grad_norm": 0.06970062851905823, "learning_rate": 6.277595842534865e-06, "loss": 0.0015, "step": 116410 }, { "epoch": 0.9520382712515845, "grad_norm": 0.08133266866207123, "learning_rate": 6.276905887469261e-06, "loss": 0.0019, "step": 116420 }, { "epoch": 0.9521200474301836, "grad_norm": 0.08116908371448517, "learning_rate": 6.276215906392046e-06, "loss": 0.0015, "step": 116430 }, { "epoch": 0.9522018236087828, "grad_norm": 0.04772324115037918, "learning_rate": 6.275525899317276e-06, "loss": 0.0012, "step": 116440 }, { "epoch": 0.952283599787382, "grad_norm": 0.009801807813346386, "learning_rate": 6.274835866259006e-06, "loss": 0.0009, "step": 116450 }, { "epoch": 0.9523653759659811, "grad_norm": 0.043431662023067474, "learning_rate": 6.274145807231295e-06, "loss": 0.0011, "step": 116460 }, { "epoch": 0.9524471521445803, "grad_norm": 0.036874935030937195, "learning_rate": 6.273455722248195e-06, "loss": 0.001, "step": 116470 }, { "epoch": 0.9525289283231795, "grad_norm": 0.03504765033721924, "learning_rate": 6.27276561132377e-06, "loss": 0.0008, "step": 116480 }, { "epoch": 0.9526107045017786, "grad_norm": 0.09151574224233627, "learning_rate": 6.272075474472072e-06, "loss": 0.0031, "step": 116490 }, { "epoch": 0.9526924806803778, "grad_norm": 0.02174736186861992, "learning_rate": 6.2713853117071645e-06, "loss": 0.0007, "step": 116500 }, { "epoch": 0.952774256858977, "grad_norm": 0.018610693514347076, "learning_rate": 6.270695123043103e-06, "loss": 0.0007, "step": 116510 }, { "epoch": 0.9528560330375762, "grad_norm": 0.026624998077750206, "learning_rate": 6.270004908493952e-06, "loss": 0.0012, "step": 116520 }, { "epoch": 0.9529378092161753, "grad_norm": 0.2417346090078354, "learning_rate": 6.269314668073767e-06, "loss": 0.0027, "step": 116530 }, { "epoch": 0.9530195853947745, "grad_norm": 0.020759375765919685, "learning_rate": 6.26862440179661e-06, "loss": 0.0074, "step": 116540 }, { "epoch": 0.9531013615733737, "grad_norm": 0.04910730570554733, "learning_rate": 6.2679341096765435e-06, "loss": 0.0015, "step": 116550 }, { "epoch": 0.9531831377519728, "grad_norm": 0.06172230839729309, "learning_rate": 6.2672437917276284e-06, "loss": 0.0008, "step": 116560 }, { "epoch": 0.953264913930572, "grad_norm": 0.015734726563096046, "learning_rate": 6.266553447963926e-06, "loss": 0.0018, "step": 116570 }, { "epoch": 0.9533466901091712, "grad_norm": 0.08307162672281265, "learning_rate": 6.265863078399504e-06, "loss": 0.0016, "step": 116580 }, { "epoch": 0.9534284662877703, "grad_norm": 0.027427351102232933, "learning_rate": 6.265172683048417e-06, "loss": 0.0008, "step": 116590 }, { "epoch": 0.9535102424663695, "grad_norm": 0.07566012442111969, "learning_rate": 6.264482261924738e-06, "loss": 0.0011, "step": 116600 }, { "epoch": 0.9535920186449687, "grad_norm": 0.21887394785881042, "learning_rate": 6.263791815042526e-06, "loss": 0.0036, "step": 116610 }, { "epoch": 0.9536737948235678, "grad_norm": 0.0030381090473383665, "learning_rate": 6.263101342415847e-06, "loss": 0.0013, "step": 116620 }, { "epoch": 0.9537555710021671, "grad_norm": 0.029851781204342842, "learning_rate": 6.2624108440587675e-06, "loss": 0.0011, "step": 116630 }, { "epoch": 0.9538373471807663, "grad_norm": 0.04865192994475365, "learning_rate": 6.261720319985354e-06, "loss": 0.0013, "step": 116640 }, { "epoch": 0.9539191233593655, "grad_norm": 0.023604482412338257, "learning_rate": 6.26102977020967e-06, "loss": 0.0021, "step": 116650 }, { "epoch": 0.9540008995379646, "grad_norm": 0.06005754694342613, "learning_rate": 6.260339194745786e-06, "loss": 0.0017, "step": 116660 }, { "epoch": 0.9540826757165638, "grad_norm": 0.06078211963176727, "learning_rate": 6.259648593607768e-06, "loss": 0.0014, "step": 116670 }, { "epoch": 0.954164451895163, "grad_norm": 0.05120062455534935, "learning_rate": 6.258957966809684e-06, "loss": 0.0007, "step": 116680 }, { "epoch": 0.9542462280737621, "grad_norm": 0.07447551935911179, "learning_rate": 6.258267314365603e-06, "loss": 0.0034, "step": 116690 }, { "epoch": 0.9543280042523613, "grad_norm": 0.02851862646639347, "learning_rate": 6.257576636289594e-06, "loss": 0.002, "step": 116700 }, { "epoch": 0.9544097804309605, "grad_norm": 0.03231555223464966, "learning_rate": 6.256885932595726e-06, "loss": 0.0024, "step": 116710 }, { "epoch": 0.9544915566095596, "grad_norm": 0.0043501765467226505, "learning_rate": 6.256195203298071e-06, "loss": 0.0018, "step": 116720 }, { "epoch": 0.9545733327881588, "grad_norm": 0.04669904336333275, "learning_rate": 6.255504448410697e-06, "loss": 0.0012, "step": 116730 }, { "epoch": 0.954655108966758, "grad_norm": 0.03602784872055054, "learning_rate": 6.254813667947679e-06, "loss": 0.001, "step": 116740 }, { "epoch": 0.9547368851453572, "grad_norm": 0.030175602063536644, "learning_rate": 6.2541228619230844e-06, "loss": 0.0014, "step": 116750 }, { "epoch": 0.9548186613239563, "grad_norm": 0.0653962790966034, "learning_rate": 6.253432030350991e-06, "loss": 0.0014, "step": 116760 }, { "epoch": 0.9549004375025555, "grad_norm": 0.09358455240726471, "learning_rate": 6.252741173245465e-06, "loss": 0.0023, "step": 116770 }, { "epoch": 0.9549822136811547, "grad_norm": 0.03014371544122696, "learning_rate": 6.252050290620585e-06, "loss": 0.0016, "step": 116780 }, { "epoch": 0.9550639898597538, "grad_norm": 0.053589995950460434, "learning_rate": 6.2513593824904205e-06, "loss": 0.002, "step": 116790 }, { "epoch": 0.955145766038353, "grad_norm": 0.004996279254555702, "learning_rate": 6.250668448869051e-06, "loss": 0.0013, "step": 116800 }, { "epoch": 0.9552275422169522, "grad_norm": 0.0471605621278286, "learning_rate": 6.249977489770547e-06, "loss": 0.0012, "step": 116810 }, { "epoch": 0.9553093183955513, "grad_norm": 0.03840157017111778, "learning_rate": 6.249286505208987e-06, "loss": 0.0019, "step": 116820 }, { "epoch": 0.9553910945741505, "grad_norm": 0.03836783766746521, "learning_rate": 6.248595495198444e-06, "loss": 0.0017, "step": 116830 }, { "epoch": 0.9554728707527497, "grad_norm": 0.05718247964978218, "learning_rate": 6.2479044597529955e-06, "loss": 0.0008, "step": 116840 }, { "epoch": 0.955554646931349, "grad_norm": 0.078152135014534, "learning_rate": 6.2472133988867205e-06, "loss": 0.0025, "step": 116850 }, { "epoch": 0.9556364231099481, "grad_norm": 0.0049774618819355965, "learning_rate": 6.246522312613693e-06, "loss": 0.0011, "step": 116860 }, { "epoch": 0.9557181992885473, "grad_norm": 0.07167283445596695, "learning_rate": 6.245831200947993e-06, "loss": 0.0013, "step": 116870 }, { "epoch": 0.9557999754671465, "grad_norm": 0.13094380497932434, "learning_rate": 6.245140063903701e-06, "loss": 0.0021, "step": 116880 }, { "epoch": 0.9558817516457456, "grad_norm": 0.039106328040361404, "learning_rate": 6.244448901494891e-06, "loss": 0.0015, "step": 116890 }, { "epoch": 0.9559635278243448, "grad_norm": 0.06473477184772491, "learning_rate": 6.243757713735648e-06, "loss": 0.0019, "step": 116900 }, { "epoch": 0.956045304002944, "grad_norm": 0.009371532127261162, "learning_rate": 6.243066500640046e-06, "loss": 0.0011, "step": 116910 }, { "epoch": 0.9561270801815431, "grad_norm": 0.04159796983003616, "learning_rate": 6.242375262222171e-06, "loss": 0.0008, "step": 116920 }, { "epoch": 0.9562088563601423, "grad_norm": 0.04316852614283562, "learning_rate": 6.241683998496102e-06, "loss": 0.0009, "step": 116930 }, { "epoch": 0.9562906325387415, "grad_norm": 0.11033610254526138, "learning_rate": 6.240992709475921e-06, "loss": 0.0015, "step": 116940 }, { "epoch": 0.9563724087173406, "grad_norm": 0.08578525483608246, "learning_rate": 6.24030139517571e-06, "loss": 0.001, "step": 116950 }, { "epoch": 0.9564541848959398, "grad_norm": 0.012313738465309143, "learning_rate": 6.23961005560955e-06, "loss": 0.0013, "step": 116960 }, { "epoch": 0.956535961074539, "grad_norm": 0.017619507387280464, "learning_rate": 6.2389186907915255e-06, "loss": 0.001, "step": 116970 }, { "epoch": 0.9566177372531381, "grad_norm": 0.04778284579515457, "learning_rate": 6.238227300735722e-06, "loss": 0.0017, "step": 116980 }, { "epoch": 0.9566995134317373, "grad_norm": 0.047668859362602234, "learning_rate": 6.237535885456219e-06, "loss": 0.0019, "step": 116990 }, { "epoch": 0.9567812896103365, "grad_norm": 0.03868691623210907, "learning_rate": 6.236844444967108e-06, "loss": 0.0019, "step": 117000 }, { "epoch": 0.9568630657889357, "grad_norm": 0.046670857816934586, "learning_rate": 6.236152979282468e-06, "loss": 0.0013, "step": 117010 }, { "epoch": 0.9569448419675348, "grad_norm": 0.044182002544403076, "learning_rate": 6.235461488416388e-06, "loss": 0.001, "step": 117020 }, { "epoch": 0.957026618146134, "grad_norm": 0.06728929281234741, "learning_rate": 6.234769972382951e-06, "loss": 0.0013, "step": 117030 }, { "epoch": 0.9571083943247332, "grad_norm": 0.02813738025724888, "learning_rate": 6.234078431196249e-06, "loss": 0.0008, "step": 117040 }, { "epoch": 0.9571901705033323, "grad_norm": 0.06758385896682739, "learning_rate": 6.233386864870363e-06, "loss": 0.0013, "step": 117050 }, { "epoch": 0.9572719466819316, "grad_norm": 0.06364341825246811, "learning_rate": 6.232695273419388e-06, "loss": 0.002, "step": 117060 }, { "epoch": 0.9573537228605308, "grad_norm": 0.0650518536567688, "learning_rate": 6.232003656857405e-06, "loss": 0.0012, "step": 117070 }, { "epoch": 0.95743549903913, "grad_norm": 0.010743817314505577, "learning_rate": 6.231312015198507e-06, "loss": 0.0014, "step": 117080 }, { "epoch": 0.9575172752177291, "grad_norm": 0.0763862282037735, "learning_rate": 6.230620348456783e-06, "loss": 0.0013, "step": 117090 }, { "epoch": 0.9575990513963283, "grad_norm": 0.044273048639297485, "learning_rate": 6.229928656646323e-06, "loss": 0.0032, "step": 117100 }, { "epoch": 0.9576808275749275, "grad_norm": 0.16527296602725983, "learning_rate": 6.229236939781213e-06, "loss": 0.0015, "step": 117110 }, { "epoch": 0.9577626037535266, "grad_norm": 0.04427790269255638, "learning_rate": 6.228545197875552e-06, "loss": 0.0015, "step": 117120 }, { "epoch": 0.9578443799321258, "grad_norm": 0.04124308004975319, "learning_rate": 6.227853430943424e-06, "loss": 0.0012, "step": 117130 }, { "epoch": 0.957926156110725, "grad_norm": 0.03319956362247467, "learning_rate": 6.227161638998923e-06, "loss": 0.0015, "step": 117140 }, { "epoch": 0.9580079322893241, "grad_norm": 0.10563656687736511, "learning_rate": 6.2264698220561435e-06, "loss": 0.0013, "step": 117150 }, { "epoch": 0.9580897084679233, "grad_norm": 0.09704191982746124, "learning_rate": 6.225777980129176e-06, "loss": 0.001, "step": 117160 }, { "epoch": 0.9581714846465225, "grad_norm": 0.050762951374053955, "learning_rate": 6.225086113232115e-06, "loss": 0.0011, "step": 117170 }, { "epoch": 0.9582532608251216, "grad_norm": 0.09678912162780762, "learning_rate": 6.224394221379054e-06, "loss": 0.0013, "step": 117180 }, { "epoch": 0.9583350370037208, "grad_norm": 0.013776657171547413, "learning_rate": 6.223702304584089e-06, "loss": 0.002, "step": 117190 }, { "epoch": 0.95841681318232, "grad_norm": 0.07316771894693375, "learning_rate": 6.22301036286131e-06, "loss": 0.0012, "step": 117200 }, { "epoch": 0.9584985893609191, "grad_norm": 0.011524772271513939, "learning_rate": 6.222318396224818e-06, "loss": 0.0018, "step": 117210 }, { "epoch": 0.9585803655395183, "grad_norm": 0.05413190275430679, "learning_rate": 6.221626404688705e-06, "loss": 0.0013, "step": 117220 }, { "epoch": 0.9586621417181175, "grad_norm": 0.026706399396061897, "learning_rate": 6.220934388267071e-06, "loss": 0.0012, "step": 117230 }, { "epoch": 0.9587439178967166, "grad_norm": 0.052704352885484695, "learning_rate": 6.220242346974011e-06, "loss": 0.003, "step": 117240 }, { "epoch": 0.9588256940753158, "grad_norm": 0.03908892348408699, "learning_rate": 6.219550280823623e-06, "loss": 0.0011, "step": 117250 }, { "epoch": 0.958907470253915, "grad_norm": 0.02618199773132801, "learning_rate": 6.218858189830003e-06, "loss": 0.001, "step": 117260 }, { "epoch": 0.9589892464325143, "grad_norm": 0.20929943025112152, "learning_rate": 6.218166074007251e-06, "loss": 0.0014, "step": 117270 }, { "epoch": 0.9590710226111134, "grad_norm": 0.041127365082502365, "learning_rate": 6.217473933369467e-06, "loss": 0.0009, "step": 117280 }, { "epoch": 0.9591527987897126, "grad_norm": 0.02647331915795803, "learning_rate": 6.2167817679307486e-06, "loss": 0.0011, "step": 117290 }, { "epoch": 0.9592345749683118, "grad_norm": 0.10188023000955582, "learning_rate": 6.216089577705197e-06, "loss": 0.0009, "step": 117300 }, { "epoch": 0.9593163511469109, "grad_norm": 0.10012245923280716, "learning_rate": 6.215397362706912e-06, "loss": 0.002, "step": 117310 }, { "epoch": 0.9593981273255101, "grad_norm": 0.022708900272846222, "learning_rate": 6.214705122949995e-06, "loss": 0.0021, "step": 117320 }, { "epoch": 0.9594799035041093, "grad_norm": 0.07342012226581573, "learning_rate": 6.214012858448546e-06, "loss": 0.0019, "step": 117330 }, { "epoch": 0.9595616796827084, "grad_norm": 0.023420076817274094, "learning_rate": 6.213320569216668e-06, "loss": 0.0009, "step": 117340 }, { "epoch": 0.9596434558613076, "grad_norm": 0.061573758721351624, "learning_rate": 6.212628255268467e-06, "loss": 0.001, "step": 117350 }, { "epoch": 0.9597252320399068, "grad_norm": 0.03879563882946968, "learning_rate": 6.21193591661804e-06, "loss": 0.0009, "step": 117360 }, { "epoch": 0.959807008218506, "grad_norm": 0.04333481937646866, "learning_rate": 6.2112435532794945e-06, "loss": 0.0011, "step": 117370 }, { "epoch": 0.9598887843971051, "grad_norm": 0.007775238249450922, "learning_rate": 6.210551165266932e-06, "loss": 0.0012, "step": 117380 }, { "epoch": 0.9599705605757043, "grad_norm": 0.027792448177933693, "learning_rate": 6.2098587525944595e-06, "loss": 0.0017, "step": 117390 }, { "epoch": 0.9600523367543035, "grad_norm": 0.067650206387043, "learning_rate": 6.209166315276181e-06, "loss": 0.0012, "step": 117400 }, { "epoch": 0.9601341129329026, "grad_norm": 0.044904716312885284, "learning_rate": 6.2084738533262016e-06, "loss": 0.0019, "step": 117410 }, { "epoch": 0.9602158891115018, "grad_norm": 0.060187116265296936, "learning_rate": 6.207781366758626e-06, "loss": 0.0021, "step": 117420 }, { "epoch": 0.960297665290101, "grad_norm": 0.12569300830364227, "learning_rate": 6.207088855587564e-06, "loss": 0.0043, "step": 117430 }, { "epoch": 0.9603794414687001, "grad_norm": 0.0293381679803133, "learning_rate": 6.2063963198271195e-06, "loss": 0.0016, "step": 117440 }, { "epoch": 0.9604612176472993, "grad_norm": 0.027405109256505966, "learning_rate": 6.205703759491403e-06, "loss": 0.0019, "step": 117450 }, { "epoch": 0.9605429938258985, "grad_norm": 0.038229282945394516, "learning_rate": 6.205011174594519e-06, "loss": 0.0009, "step": 117460 }, { "epoch": 0.9606247700044976, "grad_norm": 0.07264494150876999, "learning_rate": 6.204318565150579e-06, "loss": 0.001, "step": 117470 }, { "epoch": 0.9607065461830968, "grad_norm": 0.034220218658447266, "learning_rate": 6.203625931173691e-06, "loss": 0.0016, "step": 117480 }, { "epoch": 0.9607883223616961, "grad_norm": 0.01694486103951931, "learning_rate": 6.202933272677964e-06, "loss": 0.0018, "step": 117490 }, { "epoch": 0.9608700985402953, "grad_norm": 0.05328937992453575, "learning_rate": 6.202240589677508e-06, "loss": 0.0007, "step": 117500 }, { "epoch": 0.9609518747188944, "grad_norm": 0.014850745908915997, "learning_rate": 6.201547882186435e-06, "loss": 0.0013, "step": 117510 }, { "epoch": 0.9610336508974936, "grad_norm": 0.028343893587589264, "learning_rate": 6.200855150218854e-06, "loss": 0.0014, "step": 117520 }, { "epoch": 0.9611154270760928, "grad_norm": 0.01389115210622549, "learning_rate": 6.200162393788879e-06, "loss": 0.0014, "step": 117530 }, { "epoch": 0.9611972032546919, "grad_norm": 0.06867435574531555, "learning_rate": 6.199469612910619e-06, "loss": 0.0009, "step": 117540 }, { "epoch": 0.9612789794332911, "grad_norm": 0.003545168088749051, "learning_rate": 6.198776807598189e-06, "loss": 0.0009, "step": 117550 }, { "epoch": 0.9613607556118903, "grad_norm": 0.02017776481807232, "learning_rate": 6.1980839778657e-06, "loss": 0.0017, "step": 117560 }, { "epoch": 0.9614425317904894, "grad_norm": 0.047713104635477066, "learning_rate": 6.197391123727268e-06, "loss": 0.001, "step": 117570 }, { "epoch": 0.9615243079690886, "grad_norm": 0.029799828305840492, "learning_rate": 6.1966982451970035e-06, "loss": 0.0017, "step": 117580 }, { "epoch": 0.9616060841476878, "grad_norm": 0.0013819647720083594, "learning_rate": 6.196005342289024e-06, "loss": 0.0013, "step": 117590 }, { "epoch": 0.961687860326287, "grad_norm": 0.04201536625623703, "learning_rate": 6.195312415017443e-06, "loss": 0.0016, "step": 117600 }, { "epoch": 0.9617696365048861, "grad_norm": 0.006080368999391794, "learning_rate": 6.194619463396378e-06, "loss": 0.0015, "step": 117610 }, { "epoch": 0.9618514126834853, "grad_norm": 0.04672010615468025, "learning_rate": 6.1939264874399414e-06, "loss": 0.0012, "step": 117620 }, { "epoch": 0.9619331888620845, "grad_norm": 0.037864845246076584, "learning_rate": 6.1932334871622545e-06, "loss": 0.0011, "step": 117630 }, { "epoch": 0.9620149650406836, "grad_norm": 0.009046857245266438, "learning_rate": 6.192540462577429e-06, "loss": 0.0014, "step": 117640 }, { "epoch": 0.9620967412192828, "grad_norm": 0.06308820098638535, "learning_rate": 6.191847413699586e-06, "loss": 0.0019, "step": 117650 }, { "epoch": 0.962178517397882, "grad_norm": 0.05082124471664429, "learning_rate": 6.191154340542843e-06, "loss": 0.0012, "step": 117660 }, { "epoch": 0.9622602935764811, "grad_norm": 0.012021353468298912, "learning_rate": 6.1904612431213175e-06, "loss": 0.0014, "step": 117670 }, { "epoch": 0.9623420697550803, "grad_norm": 0.013428574427962303, "learning_rate": 6.189768121449128e-06, "loss": 0.0012, "step": 117680 }, { "epoch": 0.9624238459336795, "grad_norm": 0.015612087212502956, "learning_rate": 6.189074975540395e-06, "loss": 0.0025, "step": 117690 }, { "epoch": 0.9625056221122787, "grad_norm": 0.3248068392276764, "learning_rate": 6.188381805409238e-06, "loss": 0.002, "step": 117700 }, { "epoch": 0.9625873982908779, "grad_norm": 0.02059864066541195, "learning_rate": 6.187688611069779e-06, "loss": 0.0016, "step": 117710 }, { "epoch": 0.9626691744694771, "grad_norm": 0.004017376806586981, "learning_rate": 6.186995392536137e-06, "loss": 0.0018, "step": 117720 }, { "epoch": 0.9627509506480763, "grad_norm": 0.06106133386492729, "learning_rate": 6.186302149822434e-06, "loss": 0.0011, "step": 117730 }, { "epoch": 0.9628327268266754, "grad_norm": 0.024937927722930908, "learning_rate": 6.18560888294279e-06, "loss": 0.0013, "step": 117740 }, { "epoch": 0.9629145030052746, "grad_norm": 0.199965238571167, "learning_rate": 6.184915591911332e-06, "loss": 0.0011, "step": 117750 }, { "epoch": 0.9629962791838738, "grad_norm": 0.03477005288004875, "learning_rate": 6.184222276742179e-06, "loss": 0.0012, "step": 117760 }, { "epoch": 0.9630780553624729, "grad_norm": 0.009656672365963459, "learning_rate": 6.1835289374494564e-06, "loss": 0.0017, "step": 117770 }, { "epoch": 0.9631598315410721, "grad_norm": 0.0036837614607065916, "learning_rate": 6.182835574047288e-06, "loss": 0.0028, "step": 117780 }, { "epoch": 0.9632416077196713, "grad_norm": 0.1075076088309288, "learning_rate": 6.182142186549796e-06, "loss": 0.0008, "step": 117790 }, { "epoch": 0.9633233838982704, "grad_norm": 0.01158890314400196, "learning_rate": 6.181448774971106e-06, "loss": 0.0023, "step": 117800 }, { "epoch": 0.9634051600768696, "grad_norm": 0.02194402553141117, "learning_rate": 6.180755339325346e-06, "loss": 0.001, "step": 117810 }, { "epoch": 0.9634869362554688, "grad_norm": 0.04083224758505821, "learning_rate": 6.180061879626639e-06, "loss": 0.0024, "step": 117820 }, { "epoch": 0.9635687124340679, "grad_norm": 0.11541463434696198, "learning_rate": 6.179368395889113e-06, "loss": 0.0023, "step": 117830 }, { "epoch": 0.9636504886126671, "grad_norm": 0.06507251411676407, "learning_rate": 6.178674888126893e-06, "loss": 0.0012, "step": 117840 }, { "epoch": 0.9637322647912663, "grad_norm": 0.04544222354888916, "learning_rate": 6.1779813563541094e-06, "loss": 0.0012, "step": 117850 }, { "epoch": 0.9638140409698654, "grad_norm": 0.11659097671508789, "learning_rate": 6.1772878005848855e-06, "loss": 0.002, "step": 117860 }, { "epoch": 0.9638958171484646, "grad_norm": 0.05633866786956787, "learning_rate": 6.176594220833354e-06, "loss": 0.0013, "step": 117870 }, { "epoch": 0.9639775933270638, "grad_norm": 0.012188569642603397, "learning_rate": 6.17590061711364e-06, "loss": 0.0015, "step": 117880 }, { "epoch": 0.964059369505663, "grad_norm": 0.11472143977880478, "learning_rate": 6.175206989439877e-06, "loss": 0.001, "step": 117890 }, { "epoch": 0.9641411456842621, "grad_norm": 0.03329504653811455, "learning_rate": 6.174513337826191e-06, "loss": 0.001, "step": 117900 }, { "epoch": 0.9642229218628613, "grad_norm": 0.10410812497138977, "learning_rate": 6.173819662286715e-06, "loss": 0.0009, "step": 117910 }, { "epoch": 0.9643046980414606, "grad_norm": 0.061767637729644775, "learning_rate": 6.173125962835577e-06, "loss": 0.001, "step": 117920 }, { "epoch": 0.9643864742200597, "grad_norm": 0.0234026201069355, "learning_rate": 6.172432239486909e-06, "loss": 0.0014, "step": 117930 }, { "epoch": 0.9644682503986589, "grad_norm": 0.0689297616481781, "learning_rate": 6.171738492254844e-06, "loss": 0.0011, "step": 117940 }, { "epoch": 0.9645500265772581, "grad_norm": 0.03352544456720352, "learning_rate": 6.171044721153514e-06, "loss": 0.001, "step": 117950 }, { "epoch": 0.9646318027558572, "grad_norm": 0.06642263382673264, "learning_rate": 6.170350926197051e-06, "loss": 0.0025, "step": 117960 }, { "epoch": 0.9647135789344564, "grad_norm": 0.039695534855127335, "learning_rate": 6.169657107399588e-06, "loss": 0.0018, "step": 117970 }, { "epoch": 0.9647953551130556, "grad_norm": 0.06436241418123245, "learning_rate": 6.168963264775258e-06, "loss": 0.0016, "step": 117980 }, { "epoch": 0.9648771312916548, "grad_norm": 0.030698716640472412, "learning_rate": 6.168269398338198e-06, "loss": 0.0006, "step": 117990 }, { "epoch": 0.9649589074702539, "grad_norm": 0.03564858064055443, "learning_rate": 6.167575508102539e-06, "loss": 0.0009, "step": 118000 }, { "epoch": 0.9650406836488531, "grad_norm": 0.09497355669736862, "learning_rate": 6.166881594082419e-06, "loss": 0.0018, "step": 118010 }, { "epoch": 0.9651224598274523, "grad_norm": 0.027972575277090073, "learning_rate": 6.166187656291973e-06, "loss": 0.001, "step": 118020 }, { "epoch": 0.9652042360060514, "grad_norm": 0.04455135762691498, "learning_rate": 6.1654936947453355e-06, "loss": 0.0012, "step": 118030 }, { "epoch": 0.9652860121846506, "grad_norm": 0.10675613582134247, "learning_rate": 6.1647997094566434e-06, "loss": 0.0015, "step": 118040 }, { "epoch": 0.9653677883632498, "grad_norm": 0.10760918259620667, "learning_rate": 6.164105700440036e-06, "loss": 0.001, "step": 118050 }, { "epoch": 0.9654495645418489, "grad_norm": 0.0723041519522667, "learning_rate": 6.163411667709648e-06, "loss": 0.0013, "step": 118060 }, { "epoch": 0.9655313407204481, "grad_norm": 0.02268681302666664, "learning_rate": 6.162717611279619e-06, "loss": 0.0011, "step": 118070 }, { "epoch": 0.9656131168990473, "grad_norm": 0.002378934295848012, "learning_rate": 6.1620235311640884e-06, "loss": 0.0032, "step": 118080 }, { "epoch": 0.9656948930776464, "grad_norm": 0.05090107023715973, "learning_rate": 6.161329427377193e-06, "loss": 0.0021, "step": 118090 }, { "epoch": 0.9657766692562456, "grad_norm": 0.0477689765393734, "learning_rate": 6.160635299933072e-06, "loss": 0.0055, "step": 118100 }, { "epoch": 0.9658584454348448, "grad_norm": 0.06238953396677971, "learning_rate": 6.159941148845868e-06, "loss": 0.0017, "step": 118110 }, { "epoch": 0.965940221613444, "grad_norm": 0.017976941540837288, "learning_rate": 6.159246974129719e-06, "loss": 0.0005, "step": 118120 }, { "epoch": 0.9660219977920432, "grad_norm": 0.02829856052994728, "learning_rate": 6.158552775798767e-06, "loss": 0.002, "step": 118130 }, { "epoch": 0.9661037739706424, "grad_norm": 0.04979415237903595, "learning_rate": 6.157858553867156e-06, "loss": 0.0032, "step": 118140 }, { "epoch": 0.9661855501492416, "grad_norm": 0.02683733031153679, "learning_rate": 6.1571643083490214e-06, "loss": 0.0014, "step": 118150 }, { "epoch": 0.9662673263278407, "grad_norm": 0.05431805178523064, "learning_rate": 6.156470039258511e-06, "loss": 0.0012, "step": 118160 }, { "epoch": 0.9663491025064399, "grad_norm": 0.031375348567962646, "learning_rate": 6.155775746609766e-06, "loss": 0.0014, "step": 118170 }, { "epoch": 0.9664308786850391, "grad_norm": 0.05990069732069969, "learning_rate": 6.155081430416927e-06, "loss": 0.0016, "step": 118180 }, { "epoch": 0.9665126548636382, "grad_norm": 0.04120078310370445, "learning_rate": 6.154387090694143e-06, "loss": 0.0012, "step": 118190 }, { "epoch": 0.9665944310422374, "grad_norm": 0.03259221836924553, "learning_rate": 6.153692727455556e-06, "loss": 0.0013, "step": 118200 }, { "epoch": 0.9666762072208366, "grad_norm": 0.016001366078853607, "learning_rate": 6.15299834071531e-06, "loss": 0.0019, "step": 118210 }, { "epoch": 0.9667579833994357, "grad_norm": 0.010849523358047009, "learning_rate": 6.152303930487549e-06, "loss": 0.0013, "step": 118220 }, { "epoch": 0.9668397595780349, "grad_norm": 0.012711514718830585, "learning_rate": 6.151609496786421e-06, "loss": 0.0016, "step": 118230 }, { "epoch": 0.9669215357566341, "grad_norm": 0.020670559257268906, "learning_rate": 6.150915039626071e-06, "loss": 0.0017, "step": 118240 }, { "epoch": 0.9670033119352333, "grad_norm": 0.08737830072641373, "learning_rate": 6.150220559020648e-06, "loss": 0.0015, "step": 118250 }, { "epoch": 0.9670850881138324, "grad_norm": 0.029791930690407753, "learning_rate": 6.149526054984296e-06, "loss": 0.0016, "step": 118260 }, { "epoch": 0.9671668642924316, "grad_norm": 0.039872828871011734, "learning_rate": 6.148831527531165e-06, "loss": 0.0015, "step": 118270 }, { "epoch": 0.9672486404710308, "grad_norm": 0.0575275644659996, "learning_rate": 6.1481369766753994e-06, "loss": 0.002, "step": 118280 }, { "epoch": 0.9673304166496299, "grad_norm": 0.08687355369329453, "learning_rate": 6.1474424024311526e-06, "loss": 0.001, "step": 118290 }, { "epoch": 0.9674121928282291, "grad_norm": 0.050447016954422, "learning_rate": 6.14674780481257e-06, "loss": 0.0014, "step": 118300 }, { "epoch": 0.9674939690068283, "grad_norm": 0.06675112247467041, "learning_rate": 6.1460531838338036e-06, "loss": 0.001, "step": 118310 }, { "epoch": 0.9675757451854274, "grad_norm": 0.008011947385966778, "learning_rate": 6.145358539509002e-06, "loss": 0.0005, "step": 118320 }, { "epoch": 0.9676575213640266, "grad_norm": 0.02566443756222725, "learning_rate": 6.1446638718523165e-06, "loss": 0.0015, "step": 118330 }, { "epoch": 0.9677392975426259, "grad_norm": 0.07404148578643799, "learning_rate": 6.143969180877896e-06, "loss": 0.0018, "step": 118340 }, { "epoch": 0.9678210737212251, "grad_norm": 0.04000525549054146, "learning_rate": 6.143274466599895e-06, "loss": 0.001, "step": 118350 }, { "epoch": 0.9679028498998242, "grad_norm": 0.11339858919382095, "learning_rate": 6.142579729032462e-06, "loss": 0.001, "step": 118360 }, { "epoch": 0.9679846260784234, "grad_norm": 0.03561299666762352, "learning_rate": 6.141884968189753e-06, "loss": 0.0012, "step": 118370 }, { "epoch": 0.9680664022570226, "grad_norm": 0.01918567530810833, "learning_rate": 6.141190184085919e-06, "loss": 0.0011, "step": 118380 }, { "epoch": 0.9681481784356217, "grad_norm": 0.058565910905599594, "learning_rate": 6.140495376735113e-06, "loss": 0.0017, "step": 118390 }, { "epoch": 0.9682299546142209, "grad_norm": 0.12675614655017853, "learning_rate": 6.139800546151488e-06, "loss": 0.0025, "step": 118400 }, { "epoch": 0.9683117307928201, "grad_norm": 0.06272677332162857, "learning_rate": 6.1391056923492e-06, "loss": 0.0012, "step": 118410 }, { "epoch": 0.9683935069714192, "grad_norm": 0.012036609463393688, "learning_rate": 6.138410815342402e-06, "loss": 0.0025, "step": 118420 }, { "epoch": 0.9684752831500184, "grad_norm": 0.025698937475681305, "learning_rate": 6.1377159151452525e-06, "loss": 0.001, "step": 118430 }, { "epoch": 0.9685570593286176, "grad_norm": 0.05213986337184906, "learning_rate": 6.137020991771905e-06, "loss": 0.0023, "step": 118440 }, { "epoch": 0.9686388355072167, "grad_norm": 0.031756918877363205, "learning_rate": 6.136326045236515e-06, "loss": 0.0027, "step": 118450 }, { "epoch": 0.9687206116858159, "grad_norm": 0.03960719332098961, "learning_rate": 6.135631075553239e-06, "loss": 0.0034, "step": 118460 }, { "epoch": 0.9688023878644151, "grad_norm": 0.09577985852956772, "learning_rate": 6.134936082736236e-06, "loss": 0.0018, "step": 118470 }, { "epoch": 0.9688841640430143, "grad_norm": 0.030601147562265396, "learning_rate": 6.134241066799661e-06, "loss": 0.0023, "step": 118480 }, { "epoch": 0.9689659402216134, "grad_norm": 0.0569775328040123, "learning_rate": 6.133546027757675e-06, "loss": 0.0008, "step": 118490 }, { "epoch": 0.9690477164002126, "grad_norm": 0.0306679867208004, "learning_rate": 6.132850965624435e-06, "loss": 0.001, "step": 118500 }, { "epoch": 0.9691294925788118, "grad_norm": 0.012256946414709091, "learning_rate": 6.1321558804141e-06, "loss": 0.0018, "step": 118510 }, { "epoch": 0.9692112687574109, "grad_norm": 0.06629938632249832, "learning_rate": 6.131460772140828e-06, "loss": 0.0018, "step": 118520 }, { "epoch": 0.9692930449360101, "grad_norm": 0.17722569406032562, "learning_rate": 6.130765640818782e-06, "loss": 0.0038, "step": 118530 }, { "epoch": 0.9693748211146093, "grad_norm": 0.04649289324879646, "learning_rate": 6.13007048646212e-06, "loss": 0.0014, "step": 118540 }, { "epoch": 0.9694565972932084, "grad_norm": 0.07154955714941025, "learning_rate": 6.129375309085004e-06, "loss": 0.0017, "step": 118550 }, { "epoch": 0.9695383734718077, "grad_norm": 0.05852198600769043, "learning_rate": 6.1286801087015946e-06, "loss": 0.0015, "step": 118560 }, { "epoch": 0.9696201496504069, "grad_norm": 0.05363190174102783, "learning_rate": 6.127984885326055e-06, "loss": 0.0022, "step": 118570 }, { "epoch": 0.969701925829006, "grad_norm": 0.004610266070812941, "learning_rate": 6.127289638972546e-06, "loss": 0.0012, "step": 118580 }, { "epoch": 0.9697837020076052, "grad_norm": 0.06370856612920761, "learning_rate": 6.126594369655228e-06, "loss": 0.0013, "step": 118590 }, { "epoch": 0.9698654781862044, "grad_norm": 0.12864436209201813, "learning_rate": 6.125899077388271e-06, "loss": 0.0027, "step": 118600 }, { "epoch": 0.9699472543648036, "grad_norm": 0.03242245689034462, "learning_rate": 6.1252037621858315e-06, "loss": 0.0008, "step": 118610 }, { "epoch": 0.9700290305434027, "grad_norm": 0.09653852880001068, "learning_rate": 6.124508424062079e-06, "loss": 0.0019, "step": 118620 }, { "epoch": 0.9701108067220019, "grad_norm": 0.023744024336338043, "learning_rate": 6.123813063031174e-06, "loss": 0.0015, "step": 118630 }, { "epoch": 0.9701925829006011, "grad_norm": 0.0763510912656784, "learning_rate": 6.123117679107285e-06, "loss": 0.0027, "step": 118640 }, { "epoch": 0.9702743590792002, "grad_norm": 0.12509378790855408, "learning_rate": 6.122422272304575e-06, "loss": 0.0016, "step": 118650 }, { "epoch": 0.9703561352577994, "grad_norm": 0.12799204885959625, "learning_rate": 6.121726842637211e-06, "loss": 0.0016, "step": 118660 }, { "epoch": 0.9704379114363986, "grad_norm": 0.06870239973068237, "learning_rate": 6.12103139011936e-06, "loss": 0.0011, "step": 118670 }, { "epoch": 0.9705196876149977, "grad_norm": 0.12511461973190308, "learning_rate": 6.120335914765188e-06, "loss": 0.0024, "step": 118680 }, { "epoch": 0.9706014637935969, "grad_norm": 0.03725529462099075, "learning_rate": 6.119640416588863e-06, "loss": 0.0016, "step": 118690 }, { "epoch": 0.9706832399721961, "grad_norm": 0.0677080824971199, "learning_rate": 6.118944895604553e-06, "loss": 0.0015, "step": 118700 }, { "epoch": 0.9707650161507952, "grad_norm": 0.00741029204800725, "learning_rate": 6.118249351826424e-06, "loss": 0.0009, "step": 118710 }, { "epoch": 0.9708467923293944, "grad_norm": 0.05221608653664589, "learning_rate": 6.117553785268649e-06, "loss": 0.0021, "step": 118720 }, { "epoch": 0.9709285685079936, "grad_norm": 0.021639984101057053, "learning_rate": 6.116858195945393e-06, "loss": 0.0009, "step": 118730 }, { "epoch": 0.9710103446865928, "grad_norm": 0.03709079325199127, "learning_rate": 6.11616258387083e-06, "loss": 0.001, "step": 118740 }, { "epoch": 0.9710921208651919, "grad_norm": 0.014659546315670013, "learning_rate": 6.115466949059127e-06, "loss": 0.0022, "step": 118750 }, { "epoch": 0.9711738970437911, "grad_norm": 0.019561413675546646, "learning_rate": 6.114771291524456e-06, "loss": 0.0015, "step": 118760 }, { "epoch": 0.9712556732223904, "grad_norm": 0.04592665284872055, "learning_rate": 6.1140756112809864e-06, "loss": 0.0018, "step": 118770 }, { "epoch": 0.9713374494009895, "grad_norm": 0.01500186137855053, "learning_rate": 6.113379908342891e-06, "loss": 0.0011, "step": 118780 }, { "epoch": 0.9714192255795887, "grad_norm": 0.2184567153453827, "learning_rate": 6.112684182724342e-06, "loss": 0.0024, "step": 118790 }, { "epoch": 0.9715010017581879, "grad_norm": 0.16934287548065186, "learning_rate": 6.111988434439513e-06, "loss": 0.0019, "step": 118800 }, { "epoch": 0.971582777936787, "grad_norm": 0.022479061037302017, "learning_rate": 6.111292663502575e-06, "loss": 0.001, "step": 118810 }, { "epoch": 0.9716645541153862, "grad_norm": 0.040469616651535034, "learning_rate": 6.110596869927703e-06, "loss": 0.0012, "step": 118820 }, { "epoch": 0.9717463302939854, "grad_norm": 0.04014146327972412, "learning_rate": 6.109901053729068e-06, "loss": 0.0025, "step": 118830 }, { "epoch": 0.9718281064725846, "grad_norm": 0.03078174777328968, "learning_rate": 6.109205214920848e-06, "loss": 0.0016, "step": 118840 }, { "epoch": 0.9719098826511837, "grad_norm": 0.03373146429657936, "learning_rate": 6.108509353517215e-06, "loss": 0.0018, "step": 118850 }, { "epoch": 0.9719916588297829, "grad_norm": 0.03303525224328041, "learning_rate": 6.1078134695323465e-06, "loss": 0.0006, "step": 118860 }, { "epoch": 0.9720734350083821, "grad_norm": 0.10140520334243774, "learning_rate": 6.107117562980417e-06, "loss": 0.0029, "step": 118870 }, { "epoch": 0.9721552111869812, "grad_norm": 0.048001568764448166, "learning_rate": 6.106421633875603e-06, "loss": 0.0018, "step": 118880 }, { "epoch": 0.9722369873655804, "grad_norm": 0.03481404110789299, "learning_rate": 6.10572568223208e-06, "loss": 0.0016, "step": 118890 }, { "epoch": 0.9723187635441796, "grad_norm": 0.10145768523216248, "learning_rate": 6.105029708064026e-06, "loss": 0.0018, "step": 118900 }, { "epoch": 0.9724005397227787, "grad_norm": 0.02802492491900921, "learning_rate": 6.1043337113856195e-06, "loss": 0.0009, "step": 118910 }, { "epoch": 0.9724823159013779, "grad_norm": 0.05080891773104668, "learning_rate": 6.103637692211038e-06, "loss": 0.0014, "step": 118920 }, { "epoch": 0.9725640920799771, "grad_norm": 0.02535337395966053, "learning_rate": 6.102941650554459e-06, "loss": 0.0013, "step": 118930 }, { "epoch": 0.9726458682585762, "grad_norm": 0.04222891479730606, "learning_rate": 6.102245586430062e-06, "loss": 0.0014, "step": 118940 }, { "epoch": 0.9727276444371754, "grad_norm": 0.004256769083440304, "learning_rate": 6.101549499852026e-06, "loss": 0.0013, "step": 118950 }, { "epoch": 0.9728094206157746, "grad_norm": 0.02830694429576397, "learning_rate": 6.1008533908345324e-06, "loss": 0.0016, "step": 118960 }, { "epoch": 0.9728911967943737, "grad_norm": 0.030008789151906967, "learning_rate": 6.100157259391759e-06, "loss": 0.0006, "step": 118970 }, { "epoch": 0.972972972972973, "grad_norm": 0.06683377921581268, "learning_rate": 6.099461105537889e-06, "loss": 0.0008, "step": 118980 }, { "epoch": 0.9730547491515722, "grad_norm": 0.09773881733417511, "learning_rate": 6.098764929287103e-06, "loss": 0.0016, "step": 118990 }, { "epoch": 0.9731365253301714, "grad_norm": 0.03696797043085098, "learning_rate": 6.098068730653582e-06, "loss": 0.0015, "step": 119000 }, { "epoch": 0.9732183015087705, "grad_norm": 0.06669703125953674, "learning_rate": 6.097372509651507e-06, "loss": 0.0027, "step": 119010 }, { "epoch": 0.9733000776873697, "grad_norm": 0.027545372024178505, "learning_rate": 6.0966762662950644e-06, "loss": 0.0008, "step": 119020 }, { "epoch": 0.9733818538659689, "grad_norm": 0.008404785767197609, "learning_rate": 6.095980000598431e-06, "loss": 0.0011, "step": 119030 }, { "epoch": 0.973463630044568, "grad_norm": 0.050702955573797226, "learning_rate": 6.095283712575798e-06, "loss": 0.002, "step": 119040 }, { "epoch": 0.9735454062231672, "grad_norm": 0.05344754457473755, "learning_rate": 6.094587402241344e-06, "loss": 0.0029, "step": 119050 }, { "epoch": 0.9736271824017664, "grad_norm": 0.007957551628351212, "learning_rate": 6.0938910696092555e-06, "loss": 0.0009, "step": 119060 }, { "epoch": 0.9737089585803655, "grad_norm": 0.06542064249515533, "learning_rate": 6.093194714693715e-06, "loss": 0.0014, "step": 119070 }, { "epoch": 0.9737907347589647, "grad_norm": 0.16166894137859344, "learning_rate": 6.092498337508912e-06, "loss": 0.003, "step": 119080 }, { "epoch": 0.9738725109375639, "grad_norm": 0.02396336756646633, "learning_rate": 6.091801938069029e-06, "loss": 0.0007, "step": 119090 }, { "epoch": 0.973954287116163, "grad_norm": 0.04074249789118767, "learning_rate": 6.091105516388251e-06, "loss": 0.0017, "step": 119100 }, { "epoch": 0.9740360632947622, "grad_norm": 0.008730081841349602, "learning_rate": 6.09040907248077e-06, "loss": 0.0024, "step": 119110 }, { "epoch": 0.9741178394733614, "grad_norm": 0.046020831912755966, "learning_rate": 6.089712606360769e-06, "loss": 0.0016, "step": 119120 }, { "epoch": 0.9741996156519606, "grad_norm": 0.04207199439406395, "learning_rate": 6.0890161180424345e-06, "loss": 0.0011, "step": 119130 }, { "epoch": 0.9742813918305597, "grad_norm": 0.059715189039707184, "learning_rate": 6.088319607539958e-06, "loss": 0.0014, "step": 119140 }, { "epoch": 0.9743631680091589, "grad_norm": 0.011768062599003315, "learning_rate": 6.087623074867526e-06, "loss": 0.001, "step": 119150 }, { "epoch": 0.9744449441877581, "grad_norm": 0.09248387068510056, "learning_rate": 6.086926520039328e-06, "loss": 0.0009, "step": 119160 }, { "epoch": 0.9745267203663572, "grad_norm": 0.014865919947624207, "learning_rate": 6.086229943069553e-06, "loss": 0.002, "step": 119170 }, { "epoch": 0.9746084965449564, "grad_norm": 0.08026771247386932, "learning_rate": 6.085533343972393e-06, "loss": 0.0011, "step": 119180 }, { "epoch": 0.9746902727235556, "grad_norm": 0.03871900960803032, "learning_rate": 6.084836722762034e-06, "loss": 0.0008, "step": 119190 }, { "epoch": 0.9747720489021549, "grad_norm": 0.15172629058361053, "learning_rate": 6.084140079452671e-06, "loss": 0.0015, "step": 119200 }, { "epoch": 0.974853825080754, "grad_norm": 0.039196331053972244, "learning_rate": 6.083443414058492e-06, "loss": 0.0006, "step": 119210 }, { "epoch": 0.9749356012593532, "grad_norm": 0.02748965285718441, "learning_rate": 6.08274672659369e-06, "loss": 0.0016, "step": 119220 }, { "epoch": 0.9750173774379524, "grad_norm": 0.11767901480197906, "learning_rate": 6.082050017072459e-06, "loss": 0.0009, "step": 119230 }, { "epoch": 0.9750991536165515, "grad_norm": 0.0073121231980621815, "learning_rate": 6.081353285508988e-06, "loss": 0.0036, "step": 119240 }, { "epoch": 0.9751809297951507, "grad_norm": 0.08299683779478073, "learning_rate": 6.0806565319174706e-06, "loss": 0.0021, "step": 119250 }, { "epoch": 0.9752627059737499, "grad_norm": 0.04797539487481117, "learning_rate": 6.079959756312102e-06, "loss": 0.0017, "step": 119260 }, { "epoch": 0.975344482152349, "grad_norm": 0.005646319128572941, "learning_rate": 6.079262958707076e-06, "loss": 0.0018, "step": 119270 }, { "epoch": 0.9754262583309482, "grad_norm": 0.02465970255434513, "learning_rate": 6.078566139116586e-06, "loss": 0.0012, "step": 119280 }, { "epoch": 0.9755080345095474, "grad_norm": 0.03330414742231369, "learning_rate": 6.077869297554827e-06, "loss": 0.0008, "step": 119290 }, { "epoch": 0.9755898106881465, "grad_norm": 0.03659316897392273, "learning_rate": 6.077172434035995e-06, "loss": 0.0017, "step": 119300 }, { "epoch": 0.9756715868667457, "grad_norm": 0.009536856785416603, "learning_rate": 6.076475548574283e-06, "loss": 0.0012, "step": 119310 }, { "epoch": 0.9757533630453449, "grad_norm": 0.033899106085300446, "learning_rate": 6.075778641183891e-06, "loss": 0.0008, "step": 119320 }, { "epoch": 0.975835139223944, "grad_norm": 0.005173260811716318, "learning_rate": 6.075081711879013e-06, "loss": 0.0013, "step": 119330 }, { "epoch": 0.9759169154025432, "grad_norm": 0.036538101732730865, "learning_rate": 6.074384760673846e-06, "loss": 0.0029, "step": 119340 }, { "epoch": 0.9759986915811424, "grad_norm": 0.03279019519686699, "learning_rate": 6.073687787582589e-06, "loss": 0.0006, "step": 119350 }, { "epoch": 0.9760804677597416, "grad_norm": 0.21353240311145782, "learning_rate": 6.07299079261944e-06, "loss": 0.0018, "step": 119360 }, { "epoch": 0.9761622439383407, "grad_norm": 0.05093717202544212, "learning_rate": 6.072293775798595e-06, "loss": 0.0013, "step": 119370 }, { "epoch": 0.9762440201169399, "grad_norm": 0.0952310562133789, "learning_rate": 6.071596737134255e-06, "loss": 0.0018, "step": 119380 }, { "epoch": 0.9763257962955391, "grad_norm": 0.019269265234470367, "learning_rate": 6.070899676640616e-06, "loss": 0.0011, "step": 119390 }, { "epoch": 0.9764075724741382, "grad_norm": 0.02232486382126808, "learning_rate": 6.070202594331882e-06, "loss": 0.0013, "step": 119400 }, { "epoch": 0.9764893486527375, "grad_norm": 0.05473720654845238, "learning_rate": 6.069505490222252e-06, "loss": 0.0017, "step": 119410 }, { "epoch": 0.9765711248313367, "grad_norm": 0.055044811218976974, "learning_rate": 6.068808364325924e-06, "loss": 0.0006, "step": 119420 }, { "epoch": 0.9766529010099358, "grad_norm": 0.08420320600271225, "learning_rate": 6.068111216657102e-06, "loss": 0.0014, "step": 119430 }, { "epoch": 0.976734677188535, "grad_norm": 0.03710236772894859, "learning_rate": 6.067414047229987e-06, "loss": 0.0009, "step": 119440 }, { "epoch": 0.9768164533671342, "grad_norm": 0.044161200523376465, "learning_rate": 6.066716856058778e-06, "loss": 0.0012, "step": 119450 }, { "epoch": 0.9768982295457334, "grad_norm": 0.06518907845020294, "learning_rate": 6.066019643157681e-06, "loss": 0.0011, "step": 119460 }, { "epoch": 0.9769800057243325, "grad_norm": 0.079835906624794, "learning_rate": 6.065322408540898e-06, "loss": 0.0018, "step": 119470 }, { "epoch": 0.9770617819029317, "grad_norm": 0.016525687649846077, "learning_rate": 6.064625152222632e-06, "loss": 0.0016, "step": 119480 }, { "epoch": 0.9771435580815309, "grad_norm": 0.048909399658441544, "learning_rate": 6.063927874217084e-06, "loss": 0.001, "step": 119490 }, { "epoch": 0.97722533426013, "grad_norm": 0.0021933759562671185, "learning_rate": 6.063230574538461e-06, "loss": 0.0013, "step": 119500 }, { "epoch": 0.9773071104387292, "grad_norm": 0.0638638585805893, "learning_rate": 6.062533253200967e-06, "loss": 0.0015, "step": 119510 }, { "epoch": 0.9773888866173284, "grad_norm": 0.021107865497469902, "learning_rate": 6.061835910218808e-06, "loss": 0.0013, "step": 119520 }, { "epoch": 0.9774706627959275, "grad_norm": 0.0010262990836054087, "learning_rate": 6.061138545606189e-06, "loss": 0.0011, "step": 119530 }, { "epoch": 0.9775524389745267, "grad_norm": 0.16885767877101898, "learning_rate": 6.060441159377314e-06, "loss": 0.0015, "step": 119540 }, { "epoch": 0.9776342151531259, "grad_norm": 0.051877960562705994, "learning_rate": 6.059743751546389e-06, "loss": 0.0009, "step": 119550 }, { "epoch": 0.977715991331725, "grad_norm": 0.10194875299930573, "learning_rate": 6.059046322127625e-06, "loss": 0.0022, "step": 119560 }, { "epoch": 0.9777977675103242, "grad_norm": 0.09507175534963608, "learning_rate": 6.0583488711352254e-06, "loss": 0.002, "step": 119570 }, { "epoch": 0.9778795436889234, "grad_norm": 0.06079384684562683, "learning_rate": 6.0576513985834e-06, "loss": 0.0013, "step": 119580 }, { "epoch": 0.9779613198675225, "grad_norm": 0.03190896287560463, "learning_rate": 6.056953904486356e-06, "loss": 0.0014, "step": 119590 }, { "epoch": 0.9780430960461217, "grad_norm": 0.01983366720378399, "learning_rate": 6.056256388858301e-06, "loss": 0.001, "step": 119600 }, { "epoch": 0.9781248722247209, "grad_norm": 0.10261178761720657, "learning_rate": 6.055558851713443e-06, "loss": 0.0015, "step": 119610 }, { "epoch": 0.9782066484033202, "grad_norm": 0.005168362520635128, "learning_rate": 6.054861293065997e-06, "loss": 0.0006, "step": 119620 }, { "epoch": 0.9782884245819193, "grad_norm": 0.03155285492539406, "learning_rate": 6.054163712930166e-06, "loss": 0.001, "step": 119630 }, { "epoch": 0.9783702007605185, "grad_norm": 0.42537447810173035, "learning_rate": 6.053466111320164e-06, "loss": 0.0026, "step": 119640 }, { "epoch": 0.9784519769391177, "grad_norm": 0.04855590686202049, "learning_rate": 6.0527684882502024e-06, "loss": 0.0016, "step": 119650 }, { "epoch": 0.9785337531177168, "grad_norm": 0.05777241662144661, "learning_rate": 6.05207084373449e-06, "loss": 0.002, "step": 119660 }, { "epoch": 0.978615529296316, "grad_norm": 0.10027124732732773, "learning_rate": 6.051373177787239e-06, "loss": 0.0022, "step": 119670 }, { "epoch": 0.9786973054749152, "grad_norm": 0.020914802327752113, "learning_rate": 6.050675490422663e-06, "loss": 0.0015, "step": 119680 }, { "epoch": 0.9787790816535143, "grad_norm": 0.1584712713956833, "learning_rate": 6.049977781654971e-06, "loss": 0.0019, "step": 119690 }, { "epoch": 0.9788608578321135, "grad_norm": 0.0020166519097983837, "learning_rate": 6.049280051498379e-06, "loss": 0.002, "step": 119700 }, { "epoch": 0.9789426340107127, "grad_norm": 0.08776848763227463, "learning_rate": 6.048582299967101e-06, "loss": 0.0028, "step": 119710 }, { "epoch": 0.9790244101893119, "grad_norm": 0.019533682614564896, "learning_rate": 6.047884527075348e-06, "loss": 0.0011, "step": 119720 }, { "epoch": 0.979106186367911, "grad_norm": 0.17082765698432922, "learning_rate": 6.047186732837335e-06, "loss": 0.0014, "step": 119730 }, { "epoch": 0.9791879625465102, "grad_norm": 0.003190163290128112, "learning_rate": 6.046488917267278e-06, "loss": 0.002, "step": 119740 }, { "epoch": 0.9792697387251094, "grad_norm": 0.13622809946537018, "learning_rate": 6.045791080379391e-06, "loss": 0.0013, "step": 119750 }, { "epoch": 0.9793515149037085, "grad_norm": 0.07715527713298798, "learning_rate": 6.045093222187889e-06, "loss": 0.0008, "step": 119760 }, { "epoch": 0.9794332910823077, "grad_norm": 0.038035620003938675, "learning_rate": 6.044395342706989e-06, "loss": 0.0022, "step": 119770 }, { "epoch": 0.9795150672609069, "grad_norm": 0.005974447354674339, "learning_rate": 6.043697441950908e-06, "loss": 0.001, "step": 119780 }, { "epoch": 0.979596843439506, "grad_norm": 0.07190750539302826, "learning_rate": 6.042999519933859e-06, "loss": 0.0013, "step": 119790 }, { "epoch": 0.9796786196181052, "grad_norm": 0.007374091073870659, "learning_rate": 6.042301576670064e-06, "loss": 0.0016, "step": 119800 }, { "epoch": 0.9797603957967044, "grad_norm": 0.02889072895050049, "learning_rate": 6.041603612173737e-06, "loss": 0.0017, "step": 119810 }, { "epoch": 0.9798421719753035, "grad_norm": 0.0189172625541687, "learning_rate": 6.0409056264591e-06, "loss": 0.0006, "step": 119820 }, { "epoch": 0.9799239481539027, "grad_norm": 0.16356167197227478, "learning_rate": 6.040207619540369e-06, "loss": 0.0013, "step": 119830 }, { "epoch": 0.980005724332502, "grad_norm": 0.033994197845458984, "learning_rate": 6.0395095914317625e-06, "loss": 0.0014, "step": 119840 }, { "epoch": 0.9800875005111012, "grad_norm": 0.08785359561443329, "learning_rate": 6.0388115421475e-06, "loss": 0.0047, "step": 119850 }, { "epoch": 0.9801692766897003, "grad_norm": 0.030681882053613663, "learning_rate": 6.038113471701803e-06, "loss": 0.0014, "step": 119860 }, { "epoch": 0.9802510528682995, "grad_norm": 0.016966652125120163, "learning_rate": 6.03741538010889e-06, "loss": 0.0018, "step": 119870 }, { "epoch": 0.9803328290468987, "grad_norm": 0.09265690296888351, "learning_rate": 6.036717267382984e-06, "loss": 0.0015, "step": 119880 }, { "epoch": 0.9804146052254978, "grad_norm": 0.029227400198578835, "learning_rate": 6.036019133538303e-06, "loss": 0.0015, "step": 119890 }, { "epoch": 0.980496381404097, "grad_norm": 0.027290651574730873, "learning_rate": 6.035320978589071e-06, "loss": 0.0005, "step": 119900 }, { "epoch": 0.9805781575826962, "grad_norm": 0.046378135681152344, "learning_rate": 6.034622802549508e-06, "loss": 0.0012, "step": 119910 }, { "epoch": 0.9806599337612953, "grad_norm": 0.023667290806770325, "learning_rate": 6.0339246054338386e-06, "loss": 0.002, "step": 119920 }, { "epoch": 0.9807417099398945, "grad_norm": 0.06859437376260757, "learning_rate": 6.033226387256284e-06, "loss": 0.0016, "step": 119930 }, { "epoch": 0.9808234861184937, "grad_norm": 0.05718434229493141, "learning_rate": 6.032528148031069e-06, "loss": 0.0012, "step": 119940 }, { "epoch": 0.9809052622970929, "grad_norm": 0.03193407878279686, "learning_rate": 6.031829887772415e-06, "loss": 0.001, "step": 119950 }, { "epoch": 0.980987038475692, "grad_norm": 0.07623360306024551, "learning_rate": 6.031131606494549e-06, "loss": 0.0012, "step": 119960 }, { "epoch": 0.9810688146542912, "grad_norm": 0.048376213759183884, "learning_rate": 6.030433304211693e-06, "loss": 0.0011, "step": 119970 }, { "epoch": 0.9811505908328904, "grad_norm": 0.029947208240628242, "learning_rate": 6.029734980938073e-06, "loss": 0.0014, "step": 119980 }, { "epoch": 0.9812323670114895, "grad_norm": 0.014508059248328209, "learning_rate": 6.029036636687915e-06, "loss": 0.0008, "step": 119990 }, { "epoch": 0.9813141431900887, "grad_norm": 0.03877796605229378, "learning_rate": 6.028338271475444e-06, "loss": 0.0013, "step": 120000 }, { "epoch": 0.9813959193686879, "grad_norm": 0.1115129142999649, "learning_rate": 6.0276398853148865e-06, "loss": 0.0013, "step": 120010 }, { "epoch": 0.981477695547287, "grad_norm": 0.049899470061063766, "learning_rate": 6.02694147822047e-06, "loss": 0.0011, "step": 120020 }, { "epoch": 0.9815594717258862, "grad_norm": 0.02439580298960209, "learning_rate": 6.0262430502064195e-06, "loss": 0.0022, "step": 120030 }, { "epoch": 0.9816412479044854, "grad_norm": 0.015785135328769684, "learning_rate": 6.025544601286963e-06, "loss": 0.0017, "step": 120040 }, { "epoch": 0.9817230240830846, "grad_norm": 0.0239321980625391, "learning_rate": 6.0248461314763316e-06, "loss": 0.0011, "step": 120050 }, { "epoch": 0.9818048002616838, "grad_norm": 0.03740978240966797, "learning_rate": 6.02414764078875e-06, "loss": 0.0014, "step": 120060 }, { "epoch": 0.981886576440283, "grad_norm": 0.11160390079021454, "learning_rate": 6.023449129238449e-06, "loss": 0.0021, "step": 120070 }, { "epoch": 0.9819683526188822, "grad_norm": 0.025207985192537308, "learning_rate": 6.022750596839658e-06, "loss": 0.0018, "step": 120080 }, { "epoch": 0.9820501287974813, "grad_norm": 0.04845944792032242, "learning_rate": 6.022052043606605e-06, "loss": 0.0015, "step": 120090 }, { "epoch": 0.9821319049760805, "grad_norm": 0.03577496483922005, "learning_rate": 6.021353469553522e-06, "loss": 0.0007, "step": 120100 }, { "epoch": 0.9822136811546797, "grad_norm": 0.023669734597206116, "learning_rate": 6.020654874694638e-06, "loss": 0.001, "step": 120110 }, { "epoch": 0.9822954573332788, "grad_norm": 0.07328461110591888, "learning_rate": 6.019956259044184e-06, "loss": 0.0013, "step": 120120 }, { "epoch": 0.982377233511878, "grad_norm": 0.025065075606107712, "learning_rate": 6.019257622616393e-06, "loss": 0.001, "step": 120130 }, { "epoch": 0.9824590096904772, "grad_norm": 0.01662481389939785, "learning_rate": 6.018558965425496e-06, "loss": 0.001, "step": 120140 }, { "epoch": 0.9825407858690763, "grad_norm": 0.09219970554113388, "learning_rate": 6.017860287485725e-06, "loss": 0.0017, "step": 120150 }, { "epoch": 0.9826225620476755, "grad_norm": 0.04999057948589325, "learning_rate": 6.017161588811311e-06, "loss": 0.0012, "step": 120160 }, { "epoch": 0.9827043382262747, "grad_norm": 0.040255360305309296, "learning_rate": 6.016462869416491e-06, "loss": 0.0019, "step": 120170 }, { "epoch": 0.9827861144048738, "grad_norm": 0.017053430899977684, "learning_rate": 6.015764129315493e-06, "loss": 0.002, "step": 120180 }, { "epoch": 0.982867890583473, "grad_norm": 0.08996625244617462, "learning_rate": 6.015065368522557e-06, "loss": 0.0012, "step": 120190 }, { "epoch": 0.9829496667620722, "grad_norm": 0.05768732354044914, "learning_rate": 6.014366587051913e-06, "loss": 0.0013, "step": 120200 }, { "epoch": 0.9830314429406714, "grad_norm": 0.14900517463684082, "learning_rate": 6.013667784917797e-06, "loss": 0.0021, "step": 120210 }, { "epoch": 0.9831132191192705, "grad_norm": 0.03285551443696022, "learning_rate": 6.012968962134444e-06, "loss": 0.0015, "step": 120220 }, { "epoch": 0.9831949952978697, "grad_norm": 0.0708361491560936, "learning_rate": 6.01227011871609e-06, "loss": 0.0008, "step": 120230 }, { "epoch": 0.9832767714764689, "grad_norm": 0.011042413301765919, "learning_rate": 6.011571254676971e-06, "loss": 0.0011, "step": 120240 }, { "epoch": 0.983358547655068, "grad_norm": 0.0333956703543663, "learning_rate": 6.010872370031324e-06, "loss": 0.001, "step": 120250 }, { "epoch": 0.9834403238336673, "grad_norm": 0.001751255476847291, "learning_rate": 6.010173464793385e-06, "loss": 0.0011, "step": 120260 }, { "epoch": 0.9835221000122665, "grad_norm": 0.053721729665994644, "learning_rate": 6.009474538977391e-06, "loss": 0.0011, "step": 120270 }, { "epoch": 0.9836038761908656, "grad_norm": 0.07829448580741882, "learning_rate": 6.0087755925975796e-06, "loss": 0.0007, "step": 120280 }, { "epoch": 0.9836856523694648, "grad_norm": 0.0025027156807482243, "learning_rate": 6.008076625668191e-06, "loss": 0.0011, "step": 120290 }, { "epoch": 0.983767428548064, "grad_norm": 0.08130620419979095, "learning_rate": 6.00737763820346e-06, "loss": 0.0011, "step": 120300 }, { "epoch": 0.9838492047266632, "grad_norm": 0.05687863752245903, "learning_rate": 6.006678630217629e-06, "loss": 0.0009, "step": 120310 }, { "epoch": 0.9839309809052623, "grad_norm": 0.2061663120985031, "learning_rate": 6.005979601724936e-06, "loss": 0.0026, "step": 120320 }, { "epoch": 0.9840127570838615, "grad_norm": 0.07917072623968124, "learning_rate": 6.005280552739622e-06, "loss": 0.0012, "step": 120330 }, { "epoch": 0.9840945332624607, "grad_norm": 0.07101953774690628, "learning_rate": 6.0045814832759234e-06, "loss": 0.0011, "step": 120340 }, { "epoch": 0.9841763094410598, "grad_norm": 0.068425253033638, "learning_rate": 6.003882393348085e-06, "loss": 0.0012, "step": 120350 }, { "epoch": 0.984258085619659, "grad_norm": 0.04066820815205574, "learning_rate": 6.003183282970345e-06, "loss": 0.0017, "step": 120360 }, { "epoch": 0.9843398617982582, "grad_norm": 0.09956500679254532, "learning_rate": 6.002484152156947e-06, "loss": 0.0016, "step": 120370 }, { "epoch": 0.9844216379768573, "grad_norm": 0.04808436334133148, "learning_rate": 6.001785000922132e-06, "loss": 0.0008, "step": 120380 }, { "epoch": 0.9845034141554565, "grad_norm": 0.04592026397585869, "learning_rate": 6.001085829280144e-06, "loss": 0.001, "step": 120390 }, { "epoch": 0.9845851903340557, "grad_norm": 0.04002285748720169, "learning_rate": 6.00038663724522e-06, "loss": 0.0013, "step": 120400 }, { "epoch": 0.9846669665126548, "grad_norm": 0.030700860545039177, "learning_rate": 5.999687424831611e-06, "loss": 0.0013, "step": 120410 }, { "epoch": 0.984748742691254, "grad_norm": 0.022975191473960876, "learning_rate": 5.998988192053553e-06, "loss": 0.0023, "step": 120420 }, { "epoch": 0.9848305188698532, "grad_norm": 0.03745858371257782, "learning_rate": 5.998288938925296e-06, "loss": 0.0022, "step": 120430 }, { "epoch": 0.9849122950484523, "grad_norm": 0.007468884345144033, "learning_rate": 5.997589665461081e-06, "loss": 0.0018, "step": 120440 }, { "epoch": 0.9849940712270515, "grad_norm": 0.0035509688314050436, "learning_rate": 5.996890371675155e-06, "loss": 0.0009, "step": 120450 }, { "epoch": 0.9850758474056507, "grad_norm": 0.03589550778269768, "learning_rate": 5.9961910575817595e-06, "loss": 0.0014, "step": 120460 }, { "epoch": 0.9851576235842499, "grad_norm": 0.000747326819691807, "learning_rate": 5.995491723195144e-06, "loss": 0.0017, "step": 120470 }, { "epoch": 0.9852393997628491, "grad_norm": 0.03060908429324627, "learning_rate": 5.9947923685295515e-06, "loss": 0.0015, "step": 120480 }, { "epoch": 0.9853211759414483, "grad_norm": 0.06686169654130936, "learning_rate": 5.994092993599231e-06, "loss": 0.0022, "step": 120490 }, { "epoch": 0.9854029521200475, "grad_norm": 0.05716865509748459, "learning_rate": 5.9933935984184274e-06, "loss": 0.0018, "step": 120500 }, { "epoch": 0.9854847282986466, "grad_norm": 0.05452525243163109, "learning_rate": 5.992694183001391e-06, "loss": 0.002, "step": 120510 }, { "epoch": 0.9855665044772458, "grad_norm": 0.0066546290181577206, "learning_rate": 5.991994747362364e-06, "loss": 0.0016, "step": 120520 }, { "epoch": 0.985648280655845, "grad_norm": 0.011923052370548248, "learning_rate": 5.9912952915156e-06, "loss": 0.0035, "step": 120530 }, { "epoch": 0.9857300568344441, "grad_norm": 0.060750361531972885, "learning_rate": 5.990595815475343e-06, "loss": 0.0028, "step": 120540 }, { "epoch": 0.9858118330130433, "grad_norm": 0.04811937361955643, "learning_rate": 5.989896319255845e-06, "loss": 0.0016, "step": 120550 }, { "epoch": 0.9858936091916425, "grad_norm": 0.0679512619972229, "learning_rate": 5.989196802871356e-06, "loss": 0.0016, "step": 120560 }, { "epoch": 0.9859753853702417, "grad_norm": 0.15712673962116241, "learning_rate": 5.9884972663361225e-06, "loss": 0.0039, "step": 120570 }, { "epoch": 0.9860571615488408, "grad_norm": 0.007500902283936739, "learning_rate": 5.987797709664395e-06, "loss": 0.0015, "step": 120580 }, { "epoch": 0.98613893772744, "grad_norm": 0.041534967720508575, "learning_rate": 5.987098132870428e-06, "loss": 0.0025, "step": 120590 }, { "epoch": 0.9862207139060392, "grad_norm": 0.031145527958869934, "learning_rate": 5.986398535968467e-06, "loss": 0.0012, "step": 120600 }, { "epoch": 0.9863024900846383, "grad_norm": 0.07457441091537476, "learning_rate": 5.985698918972769e-06, "loss": 0.0013, "step": 120610 }, { "epoch": 0.9863842662632375, "grad_norm": 0.04673402011394501, "learning_rate": 5.984999281897581e-06, "loss": 0.0018, "step": 120620 }, { "epoch": 0.9864660424418367, "grad_norm": 0.12801355123519897, "learning_rate": 5.984299624757158e-06, "loss": 0.0011, "step": 120630 }, { "epoch": 0.9865478186204358, "grad_norm": 0.08107773959636688, "learning_rate": 5.983599947565749e-06, "loss": 0.0007, "step": 120640 }, { "epoch": 0.986629594799035, "grad_norm": 0.024562843143939972, "learning_rate": 5.982900250337612e-06, "loss": 0.001, "step": 120650 }, { "epoch": 0.9867113709776342, "grad_norm": 0.0033008866012096405, "learning_rate": 5.982200533086996e-06, "loss": 0.0032, "step": 120660 }, { "epoch": 0.9867931471562333, "grad_norm": 0.047706469893455505, "learning_rate": 5.981500795828159e-06, "loss": 0.0019, "step": 120670 }, { "epoch": 0.9868749233348325, "grad_norm": 0.11497076600790024, "learning_rate": 5.980801038575352e-06, "loss": 0.0017, "step": 120680 }, { "epoch": 0.9869566995134318, "grad_norm": 0.01661958359181881, "learning_rate": 5.9801012613428315e-06, "loss": 0.0008, "step": 120690 }, { "epoch": 0.987038475692031, "grad_norm": 0.028911376371979713, "learning_rate": 5.9794014641448495e-06, "loss": 0.0006, "step": 120700 }, { "epoch": 0.9871202518706301, "grad_norm": 0.07135674357414246, "learning_rate": 5.978701646995665e-06, "loss": 0.0016, "step": 120710 }, { "epoch": 0.9872020280492293, "grad_norm": 0.3338734805583954, "learning_rate": 5.9780018099095316e-06, "loss": 0.0012, "step": 120720 }, { "epoch": 0.9872838042278285, "grad_norm": 0.084747314453125, "learning_rate": 5.977301952900707e-06, "loss": 0.0028, "step": 120730 }, { "epoch": 0.9873655804064276, "grad_norm": 0.03147732838988304, "learning_rate": 5.976602075983447e-06, "loss": 0.001, "step": 120740 }, { "epoch": 0.9874473565850268, "grad_norm": 0.03488631919026375, "learning_rate": 5.975902179172011e-06, "loss": 0.0015, "step": 120750 }, { "epoch": 0.987529132763626, "grad_norm": 0.013499120250344276, "learning_rate": 5.9752022624806505e-06, "loss": 0.0044, "step": 120760 }, { "epoch": 0.9876109089422251, "grad_norm": 0.008028835989534855, "learning_rate": 5.97450232592363e-06, "loss": 0.0011, "step": 120770 }, { "epoch": 0.9876926851208243, "grad_norm": 0.1833779364824295, "learning_rate": 5.9738023695152035e-06, "loss": 0.0014, "step": 120780 }, { "epoch": 0.9877744612994235, "grad_norm": 0.2380089908838272, "learning_rate": 5.973102393269632e-06, "loss": 0.0034, "step": 120790 }, { "epoch": 0.9878562374780226, "grad_norm": 0.05806852504611015, "learning_rate": 5.9724023972011745e-06, "loss": 0.0016, "step": 120800 }, { "epoch": 0.9879380136566218, "grad_norm": 0.04061151668429375, "learning_rate": 5.971702381324089e-06, "loss": 0.0027, "step": 120810 }, { "epoch": 0.988019789835221, "grad_norm": 0.10001330822706223, "learning_rate": 5.971002345652636e-06, "loss": 0.0024, "step": 120820 }, { "epoch": 0.9881015660138202, "grad_norm": 0.022784773260354996, "learning_rate": 5.970302290201075e-06, "loss": 0.0017, "step": 120830 }, { "epoch": 0.9881833421924193, "grad_norm": 0.044711966067552567, "learning_rate": 5.969602214983669e-06, "loss": 0.0011, "step": 120840 }, { "epoch": 0.9882651183710185, "grad_norm": 0.07743814587593079, "learning_rate": 5.968902120014677e-06, "loss": 0.0014, "step": 120850 }, { "epoch": 0.9883468945496177, "grad_norm": 0.008698815479874611, "learning_rate": 5.968202005308363e-06, "loss": 0.0023, "step": 120860 }, { "epoch": 0.9884286707282168, "grad_norm": 0.10103078931570053, "learning_rate": 5.967501870878986e-06, "loss": 0.0019, "step": 120870 }, { "epoch": 0.988510446906816, "grad_norm": 0.03285948559641838, "learning_rate": 5.966801716740809e-06, "loss": 0.0019, "step": 120880 }, { "epoch": 0.9885922230854152, "grad_norm": 0.08544975519180298, "learning_rate": 5.966101542908095e-06, "loss": 0.0041, "step": 120890 }, { "epoch": 0.9886739992640144, "grad_norm": 0.0381406806409359, "learning_rate": 5.965401349395107e-06, "loss": 0.0012, "step": 120900 }, { "epoch": 0.9887557754426136, "grad_norm": 0.0046663060784339905, "learning_rate": 5.96470113621611e-06, "loss": 0.0012, "step": 120910 }, { "epoch": 0.9888375516212128, "grad_norm": 0.014375782571732998, "learning_rate": 5.9640009033853665e-06, "loss": 0.0017, "step": 120920 }, { "epoch": 0.988919327799812, "grad_norm": 0.04997723549604416, "learning_rate": 5.963300650917141e-06, "loss": 0.001, "step": 120930 }, { "epoch": 0.9890011039784111, "grad_norm": 0.05272151529788971, "learning_rate": 5.962600378825697e-06, "loss": 0.0052, "step": 120940 }, { "epoch": 0.9890828801570103, "grad_norm": 0.153398334980011, "learning_rate": 5.961900087125303e-06, "loss": 0.0024, "step": 120950 }, { "epoch": 0.9891646563356095, "grad_norm": 0.03518655151128769, "learning_rate": 5.96119977583022e-06, "loss": 0.0016, "step": 120960 }, { "epoch": 0.9892464325142086, "grad_norm": 0.001479673315770924, "learning_rate": 5.960499444954718e-06, "loss": 0.0008, "step": 120970 }, { "epoch": 0.9893282086928078, "grad_norm": 0.05059833452105522, "learning_rate": 5.959799094513061e-06, "loss": 0.0019, "step": 120980 }, { "epoch": 0.989409984871407, "grad_norm": 0.02361101657152176, "learning_rate": 5.959098724519516e-06, "loss": 0.0008, "step": 120990 }, { "epoch": 0.9894917610500061, "grad_norm": 0.014037755317986012, "learning_rate": 5.958398334988352e-06, "loss": 0.001, "step": 121000 }, { "epoch": 0.9895735372286053, "grad_norm": 0.10040776431560516, "learning_rate": 5.957697925933833e-06, "loss": 0.0015, "step": 121010 }, { "epoch": 0.9896553134072045, "grad_norm": 0.04417092725634575, "learning_rate": 5.956997497370228e-06, "loss": 0.0012, "step": 121020 }, { "epoch": 0.9897370895858036, "grad_norm": 0.10141853243112564, "learning_rate": 5.956297049311809e-06, "loss": 0.0012, "step": 121030 }, { "epoch": 0.9898188657644028, "grad_norm": 0.05370889604091644, "learning_rate": 5.95559658177284e-06, "loss": 0.0012, "step": 121040 }, { "epoch": 0.989900641943002, "grad_norm": 0.038928497582674026, "learning_rate": 5.954896094767593e-06, "loss": 0.0013, "step": 121050 }, { "epoch": 0.9899824181216011, "grad_norm": 0.06851954758167267, "learning_rate": 5.9541955883103344e-06, "loss": 0.0018, "step": 121060 }, { "epoch": 0.9900641943002003, "grad_norm": 0.04278092458844185, "learning_rate": 5.9534950624153374e-06, "loss": 0.0013, "step": 121070 }, { "epoch": 0.9901459704787995, "grad_norm": 0.1216086894273758, "learning_rate": 5.95279451709687e-06, "loss": 0.0017, "step": 121080 }, { "epoch": 0.9902277466573987, "grad_norm": 0.04158376529812813, "learning_rate": 5.952093952369204e-06, "loss": 0.0021, "step": 121090 }, { "epoch": 0.9903095228359978, "grad_norm": 0.0586681067943573, "learning_rate": 5.951393368246612e-06, "loss": 0.0014, "step": 121100 }, { "epoch": 0.990391299014597, "grad_norm": 0.06458186358213425, "learning_rate": 5.950692764743363e-06, "loss": 0.0005, "step": 121110 }, { "epoch": 0.9904730751931963, "grad_norm": 0.08800280839204788, "learning_rate": 5.949992141873728e-06, "loss": 0.0022, "step": 121120 }, { "epoch": 0.9905548513717954, "grad_norm": 0.029059652239084244, "learning_rate": 5.949291499651982e-06, "loss": 0.001, "step": 121130 }, { "epoch": 0.9906366275503946, "grad_norm": 0.022217826917767525, "learning_rate": 5.948590838092396e-06, "loss": 0.0015, "step": 121140 }, { "epoch": 0.9907184037289938, "grad_norm": 0.028701797127723694, "learning_rate": 5.947890157209245e-06, "loss": 0.001, "step": 121150 }, { "epoch": 0.990800179907593, "grad_norm": 0.05719681456685066, "learning_rate": 5.947189457016801e-06, "loss": 0.0007, "step": 121160 }, { "epoch": 0.9908819560861921, "grad_norm": 0.05964844673871994, "learning_rate": 5.946488737529337e-06, "loss": 0.0021, "step": 121170 }, { "epoch": 0.9909637322647913, "grad_norm": 0.10132652521133423, "learning_rate": 5.9457879987611275e-06, "loss": 0.0021, "step": 121180 }, { "epoch": 0.9910455084433905, "grad_norm": 0.014276833273470402, "learning_rate": 5.945087240726448e-06, "loss": 0.0017, "step": 121190 }, { "epoch": 0.9911272846219896, "grad_norm": 0.03409087285399437, "learning_rate": 5.944386463439573e-06, "loss": 0.0007, "step": 121200 }, { "epoch": 0.9912090608005888, "grad_norm": 0.021835744380950928, "learning_rate": 5.943685666914779e-06, "loss": 0.0012, "step": 121210 }, { "epoch": 0.991290836979188, "grad_norm": 0.012870382517576218, "learning_rate": 5.942984851166341e-06, "loss": 0.0008, "step": 121220 }, { "epoch": 0.9913726131577871, "grad_norm": 0.04150146618485451, "learning_rate": 5.942284016208535e-06, "loss": 0.0009, "step": 121230 }, { "epoch": 0.9914543893363863, "grad_norm": 0.03200765326619148, "learning_rate": 5.941583162055636e-06, "loss": 0.0023, "step": 121240 }, { "epoch": 0.9915361655149855, "grad_norm": 0.04988521337509155, "learning_rate": 5.940882288721926e-06, "loss": 0.0017, "step": 121250 }, { "epoch": 0.9916179416935846, "grad_norm": 0.04601968079805374, "learning_rate": 5.940181396221675e-06, "loss": 0.001, "step": 121260 }, { "epoch": 0.9916997178721838, "grad_norm": 0.059704117476940155, "learning_rate": 5.939480484569167e-06, "loss": 0.0013, "step": 121270 }, { "epoch": 0.991781494050783, "grad_norm": 0.06386008113622665, "learning_rate": 5.938779553778677e-06, "loss": 0.0022, "step": 121280 }, { "epoch": 0.9918632702293821, "grad_norm": 0.06316830217838287, "learning_rate": 5.9380786038644864e-06, "loss": 0.0013, "step": 121290 }, { "epoch": 0.9919450464079813, "grad_norm": 0.03160783648490906, "learning_rate": 5.93737763484087e-06, "loss": 0.0014, "step": 121300 }, { "epoch": 0.9920268225865805, "grad_norm": 0.020157650113105774, "learning_rate": 5.9366766467221105e-06, "loss": 0.0013, "step": 121310 }, { "epoch": 0.9921085987651797, "grad_norm": 0.02360030636191368, "learning_rate": 5.9359756395224845e-06, "loss": 0.0006, "step": 121320 }, { "epoch": 0.9921903749437789, "grad_norm": 0.06832371652126312, "learning_rate": 5.935274613256276e-06, "loss": 0.0027, "step": 121330 }, { "epoch": 0.9922721511223781, "grad_norm": 0.05071704834699631, "learning_rate": 5.934573567937762e-06, "loss": 0.0021, "step": 121340 }, { "epoch": 0.9923539273009773, "grad_norm": 0.05362255871295929, "learning_rate": 5.933872503581226e-06, "loss": 0.0013, "step": 121350 }, { "epoch": 0.9924357034795764, "grad_norm": 0.008863692171871662, "learning_rate": 5.933171420200946e-06, "loss": 0.0014, "step": 121360 }, { "epoch": 0.9925174796581756, "grad_norm": 0.02892731875181198, "learning_rate": 5.932470317811208e-06, "loss": 0.0011, "step": 121370 }, { "epoch": 0.9925992558367748, "grad_norm": 0.062020935118198395, "learning_rate": 5.931769196426289e-06, "loss": 0.0016, "step": 121380 }, { "epoch": 0.9926810320153739, "grad_norm": 0.05736176297068596, "learning_rate": 5.931068056060477e-06, "loss": 0.0018, "step": 121390 }, { "epoch": 0.9927628081939731, "grad_norm": 0.01721285656094551, "learning_rate": 5.9303668967280504e-06, "loss": 0.0015, "step": 121400 }, { "epoch": 0.9928445843725723, "grad_norm": 0.02720123715698719, "learning_rate": 5.929665718443295e-06, "loss": 0.0015, "step": 121410 }, { "epoch": 0.9929263605511714, "grad_norm": 0.2733965516090393, "learning_rate": 5.928964521220493e-06, "loss": 0.0022, "step": 121420 }, { "epoch": 0.9930081367297706, "grad_norm": 0.0945454090833664, "learning_rate": 5.928263305073927e-06, "loss": 0.0014, "step": 121430 }, { "epoch": 0.9930899129083698, "grad_norm": 0.005422467365860939, "learning_rate": 5.927562070017884e-06, "loss": 0.0017, "step": 121440 }, { "epoch": 0.993171689086969, "grad_norm": 0.029961807653307915, "learning_rate": 5.926860816066646e-06, "loss": 0.0007, "step": 121450 }, { "epoch": 0.9932534652655681, "grad_norm": 0.01434290874749422, "learning_rate": 5.926159543234503e-06, "loss": 0.0019, "step": 121460 }, { "epoch": 0.9933352414441673, "grad_norm": 0.08992908895015717, "learning_rate": 5.925458251535735e-06, "loss": 0.0011, "step": 121470 }, { "epoch": 0.9934170176227665, "grad_norm": 0.12520621716976166, "learning_rate": 5.924756940984631e-06, "loss": 0.0021, "step": 121480 }, { "epoch": 0.9934987938013656, "grad_norm": 0.026180196553468704, "learning_rate": 5.924055611595476e-06, "loss": 0.0008, "step": 121490 }, { "epoch": 0.9935805699799648, "grad_norm": 0.06631122529506683, "learning_rate": 5.923354263382558e-06, "loss": 0.0026, "step": 121500 }, { "epoch": 0.993662346158564, "grad_norm": 0.010995223186910152, "learning_rate": 5.922652896360161e-06, "loss": 0.0011, "step": 121510 }, { "epoch": 0.9937441223371631, "grad_norm": 0.043376438319683075, "learning_rate": 5.921951510542577e-06, "loss": 0.0018, "step": 121520 }, { "epoch": 0.9938258985157623, "grad_norm": 0.16599848866462708, "learning_rate": 5.9212501059440895e-06, "loss": 0.0019, "step": 121530 }, { "epoch": 0.9939076746943616, "grad_norm": 0.11862049996852875, "learning_rate": 5.920548682578991e-06, "loss": 0.0016, "step": 121540 }, { "epoch": 0.9939894508729608, "grad_norm": 0.09305332601070404, "learning_rate": 5.919847240461565e-06, "loss": 0.0012, "step": 121550 }, { "epoch": 0.9940712270515599, "grad_norm": 0.017125669866800308, "learning_rate": 5.919145779606104e-06, "loss": 0.0011, "step": 121560 }, { "epoch": 0.9941530032301591, "grad_norm": 0.04972057789564133, "learning_rate": 5.918444300026895e-06, "loss": 0.0021, "step": 121570 }, { "epoch": 0.9942347794087583, "grad_norm": 0.08398696035146713, "learning_rate": 5.917742801738232e-06, "loss": 0.0019, "step": 121580 }, { "epoch": 0.9943165555873574, "grad_norm": 0.005228096153587103, "learning_rate": 5.9170412847544e-06, "loss": 0.0009, "step": 121590 }, { "epoch": 0.9943983317659566, "grad_norm": 0.04116179421544075, "learning_rate": 5.916339749089692e-06, "loss": 0.0011, "step": 121600 }, { "epoch": 0.9944801079445558, "grad_norm": 0.011072034016251564, "learning_rate": 5.915638194758398e-06, "loss": 0.0011, "step": 121610 }, { "epoch": 0.9945618841231549, "grad_norm": 0.025000670924782753, "learning_rate": 5.91493662177481e-06, "loss": 0.0014, "step": 121620 }, { "epoch": 0.9946436603017541, "grad_norm": 0.04290807992219925, "learning_rate": 5.914235030153219e-06, "loss": 0.0016, "step": 121630 }, { "epoch": 0.9947254364803533, "grad_norm": 0.01842462085187435, "learning_rate": 5.91353341990792e-06, "loss": 0.0009, "step": 121640 }, { "epoch": 0.9948072126589524, "grad_norm": 0.014480570331215858, "learning_rate": 5.9128317910532e-06, "loss": 0.001, "step": 121650 }, { "epoch": 0.9948889888375516, "grad_norm": 0.005957846995443106, "learning_rate": 5.912130143603355e-06, "loss": 0.0002, "step": 121660 }, { "epoch": 0.9949707650161508, "grad_norm": 0.051568325608968735, "learning_rate": 5.911428477572676e-06, "loss": 0.0022, "step": 121670 }, { "epoch": 0.99505254119475, "grad_norm": 0.029355643317103386, "learning_rate": 5.910726792975461e-06, "loss": 0.001, "step": 121680 }, { "epoch": 0.9951343173733491, "grad_norm": 0.03299593925476074, "learning_rate": 5.910025089825998e-06, "loss": 0.002, "step": 121690 }, { "epoch": 0.9952160935519483, "grad_norm": 0.03843175992369652, "learning_rate": 5.909323368138588e-06, "loss": 0.0011, "step": 121700 }, { "epoch": 0.9952978697305475, "grad_norm": 0.025990096852183342, "learning_rate": 5.908621627927519e-06, "loss": 0.0008, "step": 121710 }, { "epoch": 0.9953796459091466, "grad_norm": 0.046160947531461716, "learning_rate": 5.907919869207089e-06, "loss": 0.0012, "step": 121720 }, { "epoch": 0.9954614220877458, "grad_norm": 0.3620782494544983, "learning_rate": 5.907218091991593e-06, "loss": 0.0011, "step": 121730 }, { "epoch": 0.995543198266345, "grad_norm": 0.09966219961643219, "learning_rate": 5.906516296295328e-06, "loss": 0.0017, "step": 121740 }, { "epoch": 0.9956249744449441, "grad_norm": 0.03490449860692024, "learning_rate": 5.9058144821325895e-06, "loss": 0.0024, "step": 121750 }, { "epoch": 0.9957067506235434, "grad_norm": 0.06730461865663528, "learning_rate": 5.9051126495176736e-06, "loss": 0.0023, "step": 121760 }, { "epoch": 0.9957885268021426, "grad_norm": 0.012174781411886215, "learning_rate": 5.904410798464877e-06, "loss": 0.001, "step": 121770 }, { "epoch": 0.9958703029807418, "grad_norm": 0.14397075772285461, "learning_rate": 5.903708928988498e-06, "loss": 0.0014, "step": 121780 }, { "epoch": 0.9959520791593409, "grad_norm": 0.010603148490190506, "learning_rate": 5.90300704110283e-06, "loss": 0.0021, "step": 121790 }, { "epoch": 0.9960338553379401, "grad_norm": 0.07291997224092484, "learning_rate": 5.902305134822178e-06, "loss": 0.001, "step": 121800 }, { "epoch": 0.9961156315165393, "grad_norm": 0.023215681314468384, "learning_rate": 5.901603210160837e-06, "loss": 0.0008, "step": 121810 }, { "epoch": 0.9961974076951384, "grad_norm": 0.1189185082912445, "learning_rate": 5.9009012671331055e-06, "loss": 0.0019, "step": 121820 }, { "epoch": 0.9962791838737376, "grad_norm": 0.0160224512219429, "learning_rate": 5.900199305753283e-06, "loss": 0.0018, "step": 121830 }, { "epoch": 0.9963609600523368, "grad_norm": 0.03866582363843918, "learning_rate": 5.899497326035668e-06, "loss": 0.0018, "step": 121840 }, { "epoch": 0.9964427362309359, "grad_norm": 0.15292823314666748, "learning_rate": 5.8987953279945605e-06, "loss": 0.0031, "step": 121850 }, { "epoch": 0.9965245124095351, "grad_norm": 0.020626051351428032, "learning_rate": 5.898093311644263e-06, "loss": 0.0008, "step": 121860 }, { "epoch": 0.9966062885881343, "grad_norm": 0.010717302560806274, "learning_rate": 5.8973912769990734e-06, "loss": 0.0005, "step": 121870 }, { "epoch": 0.9966880647667334, "grad_norm": 0.08777724206447601, "learning_rate": 5.896689224073298e-06, "loss": 0.0027, "step": 121880 }, { "epoch": 0.9967698409453326, "grad_norm": 0.014005398377776146, "learning_rate": 5.895987152881229e-06, "loss": 0.0014, "step": 121890 }, { "epoch": 0.9968516171239318, "grad_norm": 0.08277472108602524, "learning_rate": 5.895285063437177e-06, "loss": 0.0011, "step": 121900 }, { "epoch": 0.996933393302531, "grad_norm": 0.0027619448956102133, "learning_rate": 5.8945829557554394e-06, "loss": 0.001, "step": 121910 }, { "epoch": 0.9970151694811301, "grad_norm": 0.02782551199197769, "learning_rate": 5.893880829850319e-06, "loss": 0.0018, "step": 121920 }, { "epoch": 0.9970969456597293, "grad_norm": 0.08549346029758453, "learning_rate": 5.893178685736121e-06, "loss": 0.0016, "step": 121930 }, { "epoch": 0.9971787218383285, "grad_norm": 0.010066721588373184, "learning_rate": 5.892476523427147e-06, "loss": 0.0007, "step": 121940 }, { "epoch": 0.9972604980169276, "grad_norm": 0.03790212422609329, "learning_rate": 5.8917743429377e-06, "loss": 0.0013, "step": 121950 }, { "epoch": 0.9973422741955268, "grad_norm": 0.09011966735124588, "learning_rate": 5.891072144282085e-06, "loss": 0.0019, "step": 121960 }, { "epoch": 0.9974240503741261, "grad_norm": 0.04493482783436775, "learning_rate": 5.8903699274746055e-06, "loss": 0.0008, "step": 121970 }, { "epoch": 0.9975058265527252, "grad_norm": 0.11953932791948318, "learning_rate": 5.889667692529568e-06, "loss": 0.0021, "step": 121980 }, { "epoch": 0.9975876027313244, "grad_norm": 0.3251173794269562, "learning_rate": 5.888965439461275e-06, "loss": 0.0023, "step": 121990 }, { "epoch": 0.9976693789099236, "grad_norm": 0.04495628923177719, "learning_rate": 5.888263168284035e-06, "loss": 0.0009, "step": 122000 }, { "epoch": 0.9976693789099236, "eval_loss": 0.0014875316992402077, "eval_runtime": 5.3621, "eval_samples_per_second": 37.299, "eval_steps_per_second": 9.325, "step": 122000 }, { "epoch": 0.9977511550885227, "grad_norm": 0.01609574444591999, "learning_rate": 5.887560879012152e-06, "loss": 0.0019, "step": 122010 }, { "epoch": 0.9978329312671219, "grad_norm": 0.03879019245505333, "learning_rate": 5.886858571659931e-06, "loss": 0.0008, "step": 122020 }, { "epoch": 0.9979147074457211, "grad_norm": 0.020034292712807655, "learning_rate": 5.886156246241681e-06, "loss": 0.0015, "step": 122030 }, { "epoch": 0.9979964836243203, "grad_norm": 0.06330041587352753, "learning_rate": 5.8854539027717085e-06, "loss": 0.0016, "step": 122040 }, { "epoch": 0.9980782598029194, "grad_norm": 0.06349852681159973, "learning_rate": 5.884751541264319e-06, "loss": 0.0012, "step": 122050 }, { "epoch": 0.9981600359815186, "grad_norm": 0.07723531872034073, "learning_rate": 5.8840491617338234e-06, "loss": 0.0012, "step": 122060 }, { "epoch": 0.9982418121601178, "grad_norm": 0.05244052782654762, "learning_rate": 5.883346764194526e-06, "loss": 0.0013, "step": 122070 }, { "epoch": 0.9983235883387169, "grad_norm": 0.032875847071409225, "learning_rate": 5.882644348660738e-06, "loss": 0.0012, "step": 122080 }, { "epoch": 0.9984053645173161, "grad_norm": 0.031709522008895874, "learning_rate": 5.881941915146766e-06, "loss": 0.0014, "step": 122090 }, { "epoch": 0.9984871406959153, "grad_norm": 0.009434807114303112, "learning_rate": 5.881239463666921e-06, "loss": 0.0012, "step": 122100 }, { "epoch": 0.9985689168745144, "grad_norm": 0.05948904529213905, "learning_rate": 5.880536994235512e-06, "loss": 0.0013, "step": 122110 }, { "epoch": 0.9986506930531136, "grad_norm": 0.010785634629428387, "learning_rate": 5.87983450686685e-06, "loss": 0.0029, "step": 122120 }, { "epoch": 0.9987324692317128, "grad_norm": 0.01164211705327034, "learning_rate": 5.879132001575242e-06, "loss": 0.0011, "step": 122130 }, { "epoch": 0.9988142454103119, "grad_norm": 0.05314292013645172, "learning_rate": 5.878429478375001e-06, "loss": 0.0007, "step": 122140 }, { "epoch": 0.9988960215889111, "grad_norm": 0.058802444487810135, "learning_rate": 5.877726937280437e-06, "loss": 0.0017, "step": 122150 }, { "epoch": 0.9989777977675103, "grad_norm": 0.05952451750636101, "learning_rate": 5.877024378305862e-06, "loss": 0.0016, "step": 122160 }, { "epoch": 0.9990595739461094, "grad_norm": 0.039284899830818176, "learning_rate": 5.8763218014655875e-06, "loss": 0.0018, "step": 122170 }, { "epoch": 0.9991413501247087, "grad_norm": 0.12952680885791779, "learning_rate": 5.875619206773927e-06, "loss": 0.0029, "step": 122180 }, { "epoch": 0.9992231263033079, "grad_norm": 0.04465900734066963, "learning_rate": 5.87491659424519e-06, "loss": 0.002, "step": 122190 }, { "epoch": 0.9993049024819071, "grad_norm": 0.002696712501347065, "learning_rate": 5.874213963893692e-06, "loss": 0.0008, "step": 122200 }, { "epoch": 0.9993866786605062, "grad_norm": 0.008284921757876873, "learning_rate": 5.873511315733743e-06, "loss": 0.0023, "step": 122210 }, { "epoch": 0.9994684548391054, "grad_norm": 0.05624904856085777, "learning_rate": 5.872808649779661e-06, "loss": 0.0009, "step": 122220 }, { "epoch": 0.9995502310177046, "grad_norm": 0.07555404305458069, "learning_rate": 5.872105966045755e-06, "loss": 0.0014, "step": 122230 }, { "epoch": 0.9996320071963037, "grad_norm": 0.039931777864694595, "learning_rate": 5.871403264546344e-06, "loss": 0.002, "step": 122240 }, { "epoch": 0.9997137833749029, "grad_norm": 0.034880321472883224, "learning_rate": 5.8707005452957375e-06, "loss": 0.0007, "step": 122250 }, { "epoch": 0.9997955595535021, "grad_norm": 0.012509118765592575, "learning_rate": 5.869997808308255e-06, "loss": 0.0008, "step": 122260 }, { "epoch": 0.9998773357321012, "grad_norm": 0.053757891058921814, "learning_rate": 5.86929505359821e-06, "loss": 0.0015, "step": 122270 }, { "epoch": 0.9999591119107004, "grad_norm": 0.07155827432870865, "learning_rate": 5.868592281179918e-06, "loss": 0.0011, "step": 122280 }, { "epoch": 1.0000408880892997, "grad_norm": 0.04841756448149681, "learning_rate": 5.8678894910676935e-06, "loss": 0.001, "step": 122290 }, { "epoch": 1.0001226642678989, "grad_norm": 0.03694113343954086, "learning_rate": 5.867186683275857e-06, "loss": 0.0007, "step": 122300 }, { "epoch": 1.000204440446498, "grad_norm": 0.05382499098777771, "learning_rate": 5.866483857818721e-06, "loss": 0.001, "step": 122310 }, { "epoch": 1.0002862166250972, "grad_norm": 0.007254567928612232, "learning_rate": 5.865781014710606e-06, "loss": 0.0009, "step": 122320 }, { "epoch": 1.0003679928036964, "grad_norm": 0.014406873844563961, "learning_rate": 5.865078153965827e-06, "loss": 0.0008, "step": 122330 }, { "epoch": 1.0004497689822955, "grad_norm": 0.01864759437739849, "learning_rate": 5.8643752755987035e-06, "loss": 0.0008, "step": 122340 }, { "epoch": 1.0005315451608947, "grad_norm": 0.035292770713567734, "learning_rate": 5.863672379623552e-06, "loss": 0.0004, "step": 122350 }, { "epoch": 1.0006133213394939, "grad_norm": 0.0203461442142725, "learning_rate": 5.862969466054694e-06, "loss": 0.0007, "step": 122360 }, { "epoch": 1.000695097518093, "grad_norm": 0.12191912531852722, "learning_rate": 5.862266534906444e-06, "loss": 0.0012, "step": 122370 }, { "epoch": 1.0007768736966922, "grad_norm": 0.0014273332199081779, "learning_rate": 5.861563586193127e-06, "loss": 0.0011, "step": 122380 }, { "epoch": 1.0008586498752914, "grad_norm": 0.023561490699648857, "learning_rate": 5.860860619929057e-06, "loss": 0.001, "step": 122390 }, { "epoch": 1.0009404260538906, "grad_norm": 0.03264275938272476, "learning_rate": 5.860157636128556e-06, "loss": 0.0037, "step": 122400 }, { "epoch": 1.0010222022324897, "grad_norm": 0.03130807355046272, "learning_rate": 5.859454634805946e-06, "loss": 0.0007, "step": 122410 }, { "epoch": 1.001103978411089, "grad_norm": 0.06685163080692291, "learning_rate": 5.858751615975547e-06, "loss": 0.0011, "step": 122420 }, { "epoch": 1.001185754589688, "grad_norm": 0.1126769483089447, "learning_rate": 5.858048579651678e-06, "loss": 0.0014, "step": 122430 }, { "epoch": 1.0012675307682872, "grad_norm": 0.030687425285577774, "learning_rate": 5.857345525848663e-06, "loss": 0.0014, "step": 122440 }, { "epoch": 1.0013493069468864, "grad_norm": 0.001359337824396789, "learning_rate": 5.856642454580822e-06, "loss": 0.0009, "step": 122450 }, { "epoch": 1.0014310831254856, "grad_norm": 0.04388917237520218, "learning_rate": 5.85593936586248e-06, "loss": 0.0016, "step": 122460 }, { "epoch": 1.0015128593040847, "grad_norm": 0.04674634709954262, "learning_rate": 5.855236259707955e-06, "loss": 0.001, "step": 122470 }, { "epoch": 1.001594635482684, "grad_norm": 0.05652175843715668, "learning_rate": 5.854533136131574e-06, "loss": 0.001, "step": 122480 }, { "epoch": 1.001676411661283, "grad_norm": 0.013830563053488731, "learning_rate": 5.853829995147656e-06, "loss": 0.0014, "step": 122490 }, { "epoch": 1.0017581878398822, "grad_norm": 0.07181493937969208, "learning_rate": 5.853126836770529e-06, "loss": 0.0011, "step": 122500 }, { "epoch": 1.0018399640184814, "grad_norm": 0.011612712405622005, "learning_rate": 5.852423661014513e-06, "loss": 0.0012, "step": 122510 }, { "epoch": 1.0019217401970806, "grad_norm": 0.01040930487215519, "learning_rate": 5.851720467893937e-06, "loss": 0.0009, "step": 122520 }, { "epoch": 1.0020035163756797, "grad_norm": 0.03904595598578453, "learning_rate": 5.85101725742312e-06, "loss": 0.0011, "step": 122530 }, { "epoch": 1.002085292554279, "grad_norm": 0.048935480415821075, "learning_rate": 5.850314029616393e-06, "loss": 0.0006, "step": 122540 }, { "epoch": 1.002167068732878, "grad_norm": 0.0669260025024414, "learning_rate": 5.849610784488075e-06, "loss": 0.0012, "step": 122550 }, { "epoch": 1.0022488449114773, "grad_norm": 0.0025125949177891016, "learning_rate": 5.848907522052496e-06, "loss": 0.0016, "step": 122560 }, { "epoch": 1.0023306210900764, "grad_norm": 0.030420534312725067, "learning_rate": 5.848204242323979e-06, "loss": 0.0015, "step": 122570 }, { "epoch": 1.0024123972686756, "grad_norm": 0.046960361301898956, "learning_rate": 5.847500945316854e-06, "loss": 0.0008, "step": 122580 }, { "epoch": 1.0024941734472748, "grad_norm": 0.008876780048012733, "learning_rate": 5.846797631045444e-06, "loss": 0.0011, "step": 122590 }, { "epoch": 1.002575949625874, "grad_norm": 0.054715368896722794, "learning_rate": 5.846094299524082e-06, "loss": 0.0125, "step": 122600 }, { "epoch": 1.002657725804473, "grad_norm": 0.03282884880900383, "learning_rate": 5.845390950767088e-06, "loss": 0.0007, "step": 122610 }, { "epoch": 1.0027395019830723, "grad_norm": 0.026642203330993652, "learning_rate": 5.844687584788794e-06, "loss": 0.0012, "step": 122620 }, { "epoch": 1.0028212781616714, "grad_norm": 0.06117421016097069, "learning_rate": 5.843984201603526e-06, "loss": 0.0014, "step": 122630 }, { "epoch": 1.0029030543402706, "grad_norm": 0.08907849341630936, "learning_rate": 5.843280801225616e-06, "loss": 0.0008, "step": 122640 }, { "epoch": 1.0029848305188698, "grad_norm": 0.04792633280158043, "learning_rate": 5.8425773836693885e-06, "loss": 0.0014, "step": 122650 }, { "epoch": 1.003066606697469, "grad_norm": 0.013125953264534473, "learning_rate": 5.841873948949177e-06, "loss": 0.0008, "step": 122660 }, { "epoch": 1.0031483828760681, "grad_norm": 0.00547027587890625, "learning_rate": 5.841170497079308e-06, "loss": 0.0006, "step": 122670 }, { "epoch": 1.0032301590546673, "grad_norm": 0.05546191707253456, "learning_rate": 5.8404670280741125e-06, "loss": 0.0011, "step": 122680 }, { "epoch": 1.0033119352332664, "grad_norm": 0.02717270329594612, "learning_rate": 5.839763541947919e-06, "loss": 0.0014, "step": 122690 }, { "epoch": 1.0033937114118656, "grad_norm": 0.0986189991235733, "learning_rate": 5.839060038715061e-06, "loss": 0.0024, "step": 122700 }, { "epoch": 1.0034754875904648, "grad_norm": 0.0450422540307045, "learning_rate": 5.838356518389867e-06, "loss": 0.0006, "step": 122710 }, { "epoch": 1.0035572637690642, "grad_norm": 0.06170764937996864, "learning_rate": 5.837652980986671e-06, "loss": 0.0014, "step": 122720 }, { "epoch": 1.0036390399476633, "grad_norm": 0.021152015775442123, "learning_rate": 5.8369494265198016e-06, "loss": 0.0009, "step": 122730 }, { "epoch": 1.0037208161262625, "grad_norm": 0.034272562712430954, "learning_rate": 5.8362458550035916e-06, "loss": 0.0015, "step": 122740 }, { "epoch": 1.0038025923048617, "grad_norm": 0.08019464462995529, "learning_rate": 5.835542266452374e-06, "loss": 0.0013, "step": 122750 }, { "epoch": 1.0038843684834609, "grad_norm": 0.024214711040258408, "learning_rate": 5.834838660880482e-06, "loss": 0.0021, "step": 122760 }, { "epoch": 1.00396614466206, "grad_norm": 0.015517804771661758, "learning_rate": 5.834135038302247e-06, "loss": 0.0014, "step": 122770 }, { "epoch": 1.0040479208406592, "grad_norm": 0.024344054982066154, "learning_rate": 5.833431398732005e-06, "loss": 0.0008, "step": 122780 }, { "epoch": 1.0041296970192584, "grad_norm": 0.02945040725171566, "learning_rate": 5.832727742184086e-06, "loss": 0.0008, "step": 122790 }, { "epoch": 1.0042114731978575, "grad_norm": 0.0024779534433037043, "learning_rate": 5.832024068672827e-06, "loss": 0.0011, "step": 122800 }, { "epoch": 1.0042932493764567, "grad_norm": 0.04596865177154541, "learning_rate": 5.831320378212561e-06, "loss": 0.0012, "step": 122810 }, { "epoch": 1.0043750255550559, "grad_norm": 0.04482979699969292, "learning_rate": 5.830616670817623e-06, "loss": 0.0012, "step": 122820 }, { "epoch": 1.004456801733655, "grad_norm": 0.023521406576037407, "learning_rate": 5.829912946502348e-06, "loss": 0.0006, "step": 122830 }, { "epoch": 1.0045385779122542, "grad_norm": 0.007910707034170628, "learning_rate": 5.829209205281075e-06, "loss": 0.0011, "step": 122840 }, { "epoch": 1.0046203540908534, "grad_norm": 0.03927317634224892, "learning_rate": 5.828505447168134e-06, "loss": 0.0017, "step": 122850 }, { "epoch": 1.0047021302694525, "grad_norm": 0.014682290144264698, "learning_rate": 5.827801672177862e-06, "loss": 0.0009, "step": 122860 }, { "epoch": 1.0047839064480517, "grad_norm": 0.01649435982108116, "learning_rate": 5.827097880324598e-06, "loss": 0.0008, "step": 122870 }, { "epoch": 1.0048656826266509, "grad_norm": 0.024670850485563278, "learning_rate": 5.8263940716226785e-06, "loss": 0.0007, "step": 122880 }, { "epoch": 1.00494745880525, "grad_norm": 0.053342945873737335, "learning_rate": 5.825690246086439e-06, "loss": 0.0013, "step": 122890 }, { "epoch": 1.0050292349838492, "grad_norm": 0.028567766770720482, "learning_rate": 5.8249864037302195e-06, "loss": 0.0011, "step": 122900 }, { "epoch": 1.0051110111624484, "grad_norm": 0.022719314321875572, "learning_rate": 5.824282544568357e-06, "loss": 0.0011, "step": 122910 }, { "epoch": 1.0051927873410476, "grad_norm": 0.0029751798138022423, "learning_rate": 5.823578668615186e-06, "loss": 0.0032, "step": 122920 }, { "epoch": 1.0052745635196467, "grad_norm": 0.013013404794037342, "learning_rate": 5.8228747758850514e-06, "loss": 0.0009, "step": 122930 }, { "epoch": 1.005356339698246, "grad_norm": 0.030893510207533836, "learning_rate": 5.822170866392286e-06, "loss": 0.0007, "step": 122940 }, { "epoch": 1.005438115876845, "grad_norm": 0.16660641133785248, "learning_rate": 5.8214669401512335e-06, "loss": 0.0029, "step": 122950 }, { "epoch": 1.0055198920554442, "grad_norm": 0.10783253610134125, "learning_rate": 5.820762997176232e-06, "loss": 0.0008, "step": 122960 }, { "epoch": 1.0056016682340434, "grad_norm": 0.006674263160675764, "learning_rate": 5.82005903748162e-06, "loss": 0.0021, "step": 122970 }, { "epoch": 1.0056834444126426, "grad_norm": 0.017376597970724106, "learning_rate": 5.819355061081739e-06, "loss": 0.0013, "step": 122980 }, { "epoch": 1.0057652205912417, "grad_norm": 0.12341649830341339, "learning_rate": 5.818651067990929e-06, "loss": 0.0016, "step": 122990 }, { "epoch": 1.005846996769841, "grad_norm": 0.01879141293466091, "learning_rate": 5.817947058223532e-06, "loss": 0.0014, "step": 123000 }, { "epoch": 1.00592877294844, "grad_norm": 0.027886392548680305, "learning_rate": 5.817243031793889e-06, "loss": 0.0009, "step": 123010 }, { "epoch": 1.0060105491270392, "grad_norm": 0.055224813520908356, "learning_rate": 5.8165389887163405e-06, "loss": 0.0015, "step": 123020 }, { "epoch": 1.0060923253056384, "grad_norm": 0.061123382300138474, "learning_rate": 5.81583492900523e-06, "loss": 0.0017, "step": 123030 }, { "epoch": 1.0061741014842376, "grad_norm": 0.06321214884519577, "learning_rate": 5.8151308526748965e-06, "loss": 0.0017, "step": 123040 }, { "epoch": 1.0062558776628368, "grad_norm": 0.042639750987291336, "learning_rate": 5.814426759739687e-06, "loss": 0.0012, "step": 123050 }, { "epoch": 1.006337653841436, "grad_norm": 0.020828820765018463, "learning_rate": 5.813722650213941e-06, "loss": 0.0011, "step": 123060 }, { "epoch": 1.006419430020035, "grad_norm": 0.06511426717042923, "learning_rate": 5.813018524112004e-06, "loss": 0.0017, "step": 123070 }, { "epoch": 1.0065012061986343, "grad_norm": 0.13893146812915802, "learning_rate": 5.81231438144822e-06, "loss": 0.0022, "step": 123080 }, { "epoch": 1.0065829823772334, "grad_norm": 0.04278821870684624, "learning_rate": 5.811610222236931e-06, "loss": 0.0019, "step": 123090 }, { "epoch": 1.0066647585558326, "grad_norm": 0.03906092420220375, "learning_rate": 5.810906046492481e-06, "loss": 0.0008, "step": 123100 }, { "epoch": 1.0067465347344318, "grad_norm": 0.061829593032598495, "learning_rate": 5.810201854229216e-06, "loss": 0.0011, "step": 123110 }, { "epoch": 1.006828310913031, "grad_norm": 0.02440304309129715, "learning_rate": 5.80949764546148e-06, "loss": 0.0018, "step": 123120 }, { "epoch": 1.00691008709163, "grad_norm": 0.035139720886945724, "learning_rate": 5.808793420203621e-06, "loss": 0.0017, "step": 123130 }, { "epoch": 1.0069918632702295, "grad_norm": 0.03312889486551285, "learning_rate": 5.808089178469982e-06, "loss": 0.0006, "step": 123140 }, { "epoch": 1.0070736394488287, "grad_norm": 0.06879404932260513, "learning_rate": 5.80738492027491e-06, "loss": 0.001, "step": 123150 }, { "epoch": 1.0071554156274278, "grad_norm": 0.01541243214160204, "learning_rate": 5.806680645632749e-06, "loss": 0.0018, "step": 123160 }, { "epoch": 1.007237191806027, "grad_norm": 0.09773366898298264, "learning_rate": 5.805976354557849e-06, "loss": 0.0008, "step": 123170 }, { "epoch": 1.0073189679846262, "grad_norm": 0.03603294864296913, "learning_rate": 5.805272047064554e-06, "loss": 0.0008, "step": 123180 }, { "epoch": 1.0074007441632253, "grad_norm": 0.019595636054873466, "learning_rate": 5.804567723167215e-06, "loss": 0.001, "step": 123190 }, { "epoch": 1.0074825203418245, "grad_norm": 0.02145562134683132, "learning_rate": 5.803863382880177e-06, "loss": 0.0011, "step": 123200 }, { "epoch": 1.0075642965204237, "grad_norm": 0.2027330994606018, "learning_rate": 5.803159026217788e-06, "loss": 0.0022, "step": 123210 }, { "epoch": 1.0076460726990228, "grad_norm": 0.020251993089914322, "learning_rate": 5.802454653194396e-06, "loss": 0.0013, "step": 123220 }, { "epoch": 1.007727848877622, "grad_norm": 0.05146496370434761, "learning_rate": 5.801750263824352e-06, "loss": 0.002, "step": 123230 }, { "epoch": 1.0078096250562212, "grad_norm": 0.04964342713356018, "learning_rate": 5.801045858122002e-06, "loss": 0.0008, "step": 123240 }, { "epoch": 1.0078914012348203, "grad_norm": 0.03353013098239899, "learning_rate": 5.8003414361016975e-06, "loss": 0.0011, "step": 123250 }, { "epoch": 1.0079731774134195, "grad_norm": 0.04366566240787506, "learning_rate": 5.799636997777788e-06, "loss": 0.0011, "step": 123260 }, { "epoch": 1.0080549535920187, "grad_norm": 0.019122684374451637, "learning_rate": 5.798932543164622e-06, "loss": 0.0012, "step": 123270 }, { "epoch": 1.0081367297706179, "grad_norm": 0.18412956595420837, "learning_rate": 5.79822807227655e-06, "loss": 0.0028, "step": 123280 }, { "epoch": 1.008218505949217, "grad_norm": 0.0392635315656662, "learning_rate": 5.797523585127924e-06, "loss": 0.0012, "step": 123290 }, { "epoch": 1.0083002821278162, "grad_norm": 0.03724817559123039, "learning_rate": 5.7968190817330945e-06, "loss": 0.0018, "step": 123300 }, { "epoch": 1.0083820583064154, "grad_norm": 0.15196947753429413, "learning_rate": 5.796114562106413e-06, "loss": 0.0012, "step": 123310 }, { "epoch": 1.0084638344850145, "grad_norm": 0.05358791723847389, "learning_rate": 5.795410026262231e-06, "loss": 0.0018, "step": 123320 }, { "epoch": 1.0085456106636137, "grad_norm": 0.020342601463198662, "learning_rate": 5.7947054742149e-06, "loss": 0.0014, "step": 123330 }, { "epoch": 1.0086273868422129, "grad_norm": 0.06777537614107132, "learning_rate": 5.794000905978771e-06, "loss": 0.0012, "step": 123340 }, { "epoch": 1.008709163020812, "grad_norm": 0.033161461353302, "learning_rate": 5.7932963215682005e-06, "loss": 0.0013, "step": 123350 }, { "epoch": 1.0087909391994112, "grad_norm": 0.06705757975578308, "learning_rate": 5.792591720997537e-06, "loss": 0.0016, "step": 123360 }, { "epoch": 1.0088727153780104, "grad_norm": 0.08563007414340973, "learning_rate": 5.791887104281136e-06, "loss": 0.001, "step": 123370 }, { "epoch": 1.0089544915566095, "grad_norm": 0.0369582325220108, "learning_rate": 5.791182471433353e-06, "loss": 0.0009, "step": 123380 }, { "epoch": 1.0090362677352087, "grad_norm": 0.06088869273662567, "learning_rate": 5.790477822468539e-06, "loss": 0.0008, "step": 123390 }, { "epoch": 1.0091180439138079, "grad_norm": 0.02185758762061596, "learning_rate": 5.789773157401049e-06, "loss": 0.0011, "step": 123400 }, { "epoch": 1.009199820092407, "grad_norm": 0.017317995429039, "learning_rate": 5.789068476245238e-06, "loss": 0.0008, "step": 123410 }, { "epoch": 1.0092815962710062, "grad_norm": 0.1584082543849945, "learning_rate": 5.788363779015459e-06, "loss": 0.0016, "step": 123420 }, { "epoch": 1.0093633724496054, "grad_norm": 0.04669629782438278, "learning_rate": 5.787659065726071e-06, "loss": 0.0018, "step": 123430 }, { "epoch": 1.0094451486282046, "grad_norm": 0.001632491359487176, "learning_rate": 5.7869543363914285e-06, "loss": 0.0017, "step": 123440 }, { "epoch": 1.0095269248068037, "grad_norm": 0.05295023322105408, "learning_rate": 5.786249591025885e-06, "loss": 0.0013, "step": 123450 }, { "epoch": 1.009608700985403, "grad_norm": 0.05913177877664566, "learning_rate": 5.785544829643797e-06, "loss": 0.0008, "step": 123460 }, { "epoch": 1.009690477164002, "grad_norm": 0.004313082434237003, "learning_rate": 5.784840052259524e-06, "loss": 0.0011, "step": 123470 }, { "epoch": 1.0097722533426012, "grad_norm": 0.01724051497876644, "learning_rate": 5.78413525888742e-06, "loss": 0.001, "step": 123480 }, { "epoch": 1.0098540295212004, "grad_norm": 0.15800537168979645, "learning_rate": 5.783430449541845e-06, "loss": 0.0012, "step": 123490 }, { "epoch": 1.0099358056997996, "grad_norm": 0.05901914834976196, "learning_rate": 5.782725624237154e-06, "loss": 0.0016, "step": 123500 }, { "epoch": 1.0100175818783987, "grad_norm": 0.030751511454582214, "learning_rate": 5.782020782987707e-06, "loss": 0.0006, "step": 123510 }, { "epoch": 1.010099358056998, "grad_norm": 0.07751365005970001, "learning_rate": 5.78131592580786e-06, "loss": 0.0014, "step": 123520 }, { "epoch": 1.010181134235597, "grad_norm": 0.12760843336582184, "learning_rate": 5.780611052711972e-06, "loss": 0.0014, "step": 123530 }, { "epoch": 1.0102629104141962, "grad_norm": 0.014917737804353237, "learning_rate": 5.779906163714403e-06, "loss": 0.001, "step": 123540 }, { "epoch": 1.0103446865927954, "grad_norm": 0.11748120188713074, "learning_rate": 5.779201258829511e-06, "loss": 0.0011, "step": 123550 }, { "epoch": 1.0104264627713946, "grad_norm": 0.014786464162170887, "learning_rate": 5.778496338071656e-06, "loss": 0.0015, "step": 123560 }, { "epoch": 1.010508238949994, "grad_norm": 0.02411659248173237, "learning_rate": 5.777791401455199e-06, "loss": 0.0009, "step": 123570 }, { "epoch": 1.0105900151285931, "grad_norm": 0.05854354798793793, "learning_rate": 5.777086448994498e-06, "loss": 0.0005, "step": 123580 }, { "epoch": 1.0106717913071923, "grad_norm": 0.008141263388097286, "learning_rate": 5.776381480703914e-06, "loss": 0.0014, "step": 123590 }, { "epoch": 1.0107535674857915, "grad_norm": 0.01743919961154461, "learning_rate": 5.775676496597809e-06, "loss": 0.0006, "step": 123600 }, { "epoch": 1.0108353436643907, "grad_norm": 0.01493933517485857, "learning_rate": 5.7749714966905425e-06, "loss": 0.0013, "step": 123610 }, { "epoch": 1.0109171198429898, "grad_norm": 0.013346445746719837, "learning_rate": 5.774266480996478e-06, "loss": 0.0023, "step": 123620 }, { "epoch": 1.010998896021589, "grad_norm": 0.020474178716540337, "learning_rate": 5.773561449529975e-06, "loss": 0.0011, "step": 123630 }, { "epoch": 1.0110806722001882, "grad_norm": 0.0028326152823865414, "learning_rate": 5.7728564023053955e-06, "loss": 0.0015, "step": 123640 }, { "epoch": 1.0111624483787873, "grad_norm": 0.01305505819618702, "learning_rate": 5.772151339337104e-06, "loss": 0.0014, "step": 123650 }, { "epoch": 1.0112442245573865, "grad_norm": 0.0349353663623333, "learning_rate": 5.771446260639462e-06, "loss": 0.0012, "step": 123660 }, { "epoch": 1.0113260007359857, "grad_norm": 0.0105005307123065, "learning_rate": 5.770741166226832e-06, "loss": 0.0008, "step": 123670 }, { "epoch": 1.0114077769145848, "grad_norm": 0.08771423995494843, "learning_rate": 5.77003605611358e-06, "loss": 0.0014, "step": 123680 }, { "epoch": 1.011489553093184, "grad_norm": 0.029011553153395653, "learning_rate": 5.7693309303140656e-06, "loss": 0.001, "step": 123690 }, { "epoch": 1.0115713292717832, "grad_norm": 0.12440857291221619, "learning_rate": 5.768625788842655e-06, "loss": 0.0013, "step": 123700 }, { "epoch": 1.0116531054503823, "grad_norm": 0.002852312522009015, "learning_rate": 5.767920631713713e-06, "loss": 0.0013, "step": 123710 }, { "epoch": 1.0117348816289815, "grad_norm": 0.035870496183633804, "learning_rate": 5.767215458941602e-06, "loss": 0.0015, "step": 123720 }, { "epoch": 1.0118166578075807, "grad_norm": 0.053409453481435776, "learning_rate": 5.766510270540691e-06, "loss": 0.0005, "step": 123730 }, { "epoch": 1.0118984339861798, "grad_norm": 0.029599206522107124, "learning_rate": 5.76580506652534e-06, "loss": 0.0011, "step": 123740 }, { "epoch": 1.011980210164779, "grad_norm": 0.030882710590958595, "learning_rate": 5.765099846909921e-06, "loss": 0.0018, "step": 123750 }, { "epoch": 1.0120619863433782, "grad_norm": 0.01025256048887968, "learning_rate": 5.764394611708792e-06, "loss": 0.0008, "step": 123760 }, { "epoch": 1.0121437625219774, "grad_norm": 0.05523933097720146, "learning_rate": 5.763689360936326e-06, "loss": 0.0014, "step": 123770 }, { "epoch": 1.0122255387005765, "grad_norm": 0.022627295926213264, "learning_rate": 5.762984094606886e-06, "loss": 0.0008, "step": 123780 }, { "epoch": 1.0123073148791757, "grad_norm": 0.044475194066762924, "learning_rate": 5.7622788127348406e-06, "loss": 0.0011, "step": 123790 }, { "epoch": 1.0123890910577749, "grad_norm": 0.10502253472805023, "learning_rate": 5.761573515334555e-06, "loss": 0.0016, "step": 123800 }, { "epoch": 1.012470867236374, "grad_norm": 0.004522641655057669, "learning_rate": 5.7608682024204e-06, "loss": 0.0013, "step": 123810 }, { "epoch": 1.0125526434149732, "grad_norm": 0.04033064469695091, "learning_rate": 5.760162874006738e-06, "loss": 0.001, "step": 123820 }, { "epoch": 1.0126344195935724, "grad_norm": 0.14081576466560364, "learning_rate": 5.759457530107944e-06, "loss": 0.0013, "step": 123830 }, { "epoch": 1.0127161957721715, "grad_norm": 0.01683662086725235, "learning_rate": 5.7587521707383795e-06, "loss": 0.0013, "step": 123840 }, { "epoch": 1.0127979719507707, "grad_norm": 0.09907377511262894, "learning_rate": 5.758046795912417e-06, "loss": 0.0009, "step": 123850 }, { "epoch": 1.0128797481293699, "grad_norm": 0.0460868701338768, "learning_rate": 5.757341405644428e-06, "loss": 0.0014, "step": 123860 }, { "epoch": 1.012961524307969, "grad_norm": 0.016413850709795952, "learning_rate": 5.756635999948777e-06, "loss": 0.0018, "step": 123870 }, { "epoch": 1.0130433004865682, "grad_norm": 0.032543592154979706, "learning_rate": 5.755930578839835e-06, "loss": 0.0011, "step": 123880 }, { "epoch": 1.0131250766651674, "grad_norm": 0.0030923280864953995, "learning_rate": 5.755225142331974e-06, "loss": 0.0011, "step": 123890 }, { "epoch": 1.0132068528437665, "grad_norm": 0.026920046657323837, "learning_rate": 5.754519690439562e-06, "loss": 0.0024, "step": 123900 }, { "epoch": 1.0132886290223657, "grad_norm": 0.14379242062568665, "learning_rate": 5.75381422317697e-06, "loss": 0.0012, "step": 123910 }, { "epoch": 1.0133704052009649, "grad_norm": 0.0051137846894562244, "learning_rate": 5.753108740558572e-06, "loss": 0.0008, "step": 123920 }, { "epoch": 1.013452181379564, "grad_norm": 0.0009699783404357731, "learning_rate": 5.752403242598735e-06, "loss": 0.0008, "step": 123930 }, { "epoch": 1.0135339575581632, "grad_norm": 0.05649811774492264, "learning_rate": 5.751697729311832e-06, "loss": 0.0013, "step": 123940 }, { "epoch": 1.0136157337367624, "grad_norm": 0.0935535579919815, "learning_rate": 5.750992200712236e-06, "loss": 0.0015, "step": 123950 }, { "epoch": 1.0136975099153616, "grad_norm": 0.010484801605343819, "learning_rate": 5.750286656814318e-06, "loss": 0.0006, "step": 123960 }, { "epoch": 1.0137792860939607, "grad_norm": 0.06034578010439873, "learning_rate": 5.749581097632452e-06, "loss": 0.001, "step": 123970 }, { "epoch": 1.01386106227256, "grad_norm": 0.19931933283805847, "learning_rate": 5.748875523181009e-06, "loss": 0.0026, "step": 123980 }, { "epoch": 1.013942838451159, "grad_norm": 0.030099518597126007, "learning_rate": 5.748169933474363e-06, "loss": 0.0008, "step": 123990 }, { "epoch": 1.0140246146297585, "grad_norm": 0.0067597986198961735, "learning_rate": 5.7474643285268865e-06, "loss": 0.0011, "step": 124000 }, { "epoch": 1.0141063908083576, "grad_norm": 0.03200351446866989, "learning_rate": 5.7467587083529556e-06, "loss": 0.0008, "step": 124010 }, { "epoch": 1.0141881669869568, "grad_norm": 0.1743510514497757, "learning_rate": 5.7460530729669406e-06, "loss": 0.0019, "step": 124020 }, { "epoch": 1.014269943165556, "grad_norm": 0.10264082252979279, "learning_rate": 5.745347422383221e-06, "loss": 0.001, "step": 124030 }, { "epoch": 1.0143517193441551, "grad_norm": 0.067496158182621, "learning_rate": 5.744641756616166e-06, "loss": 0.0009, "step": 124040 }, { "epoch": 1.0144334955227543, "grad_norm": 0.017682217061519623, "learning_rate": 5.7439360756801555e-06, "loss": 0.0013, "step": 124050 }, { "epoch": 1.0145152717013535, "grad_norm": 0.034935660660266876, "learning_rate": 5.743230379589559e-06, "loss": 0.001, "step": 124060 }, { "epoch": 1.0145970478799526, "grad_norm": 0.04378359764814377, "learning_rate": 5.742524668358759e-06, "loss": 0.0016, "step": 124070 }, { "epoch": 1.0146788240585518, "grad_norm": 0.0015583960339426994, "learning_rate": 5.741818942002125e-06, "loss": 0.0005, "step": 124080 }, { "epoch": 1.014760600237151, "grad_norm": 0.023205460980534554, "learning_rate": 5.741113200534039e-06, "loss": 0.0008, "step": 124090 }, { "epoch": 1.0148423764157501, "grad_norm": 0.02861558087170124, "learning_rate": 5.740407443968874e-06, "loss": 0.0007, "step": 124100 }, { "epoch": 1.0149241525943493, "grad_norm": 0.018538348376750946, "learning_rate": 5.739701672321007e-06, "loss": 0.0008, "step": 124110 }, { "epoch": 1.0150059287729485, "grad_norm": 0.026810208335518837, "learning_rate": 5.738995885604814e-06, "loss": 0.001, "step": 124120 }, { "epoch": 1.0150877049515477, "grad_norm": 0.05731307342648506, "learning_rate": 5.738290083834677e-06, "loss": 0.0017, "step": 124130 }, { "epoch": 1.0151694811301468, "grad_norm": 0.005054228939116001, "learning_rate": 5.737584267024969e-06, "loss": 0.0007, "step": 124140 }, { "epoch": 1.015251257308746, "grad_norm": 0.03543688729405403, "learning_rate": 5.736878435190071e-06, "loss": 0.0008, "step": 124150 }, { "epoch": 1.0153330334873452, "grad_norm": 0.047896601259708405, "learning_rate": 5.73617258834436e-06, "loss": 0.0011, "step": 124160 }, { "epoch": 1.0154148096659443, "grad_norm": 0.013473578728735447, "learning_rate": 5.735466726502215e-06, "loss": 0.0016, "step": 124170 }, { "epoch": 1.0154965858445435, "grad_norm": 0.017258545383810997, "learning_rate": 5.734760849678015e-06, "loss": 0.0012, "step": 124180 }, { "epoch": 1.0155783620231427, "grad_norm": 0.03428949788212776, "learning_rate": 5.7340549578861395e-06, "loss": 0.0014, "step": 124190 }, { "epoch": 1.0156601382017418, "grad_norm": 0.05065057426691055, "learning_rate": 5.733349051140967e-06, "loss": 0.0032, "step": 124200 }, { "epoch": 1.015741914380341, "grad_norm": 0.020032459869980812, "learning_rate": 5.732643129456879e-06, "loss": 0.0013, "step": 124210 }, { "epoch": 1.0158236905589402, "grad_norm": 0.008495158515870571, "learning_rate": 5.731937192848255e-06, "loss": 0.0007, "step": 124220 }, { "epoch": 1.0159054667375393, "grad_norm": 0.02844536490738392, "learning_rate": 5.731231241329474e-06, "loss": 0.0011, "step": 124230 }, { "epoch": 1.0159872429161385, "grad_norm": 0.002102242549881339, "learning_rate": 5.73052527491492e-06, "loss": 0.001, "step": 124240 }, { "epoch": 1.0160690190947377, "grad_norm": 0.05527645722031593, "learning_rate": 5.729819293618971e-06, "loss": 0.0018, "step": 124250 }, { "epoch": 1.0161507952733368, "grad_norm": 0.0352231003344059, "learning_rate": 5.7291132974560105e-06, "loss": 0.001, "step": 124260 }, { "epoch": 1.016232571451936, "grad_norm": 0.051676616072654724, "learning_rate": 5.7284072864404195e-06, "loss": 0.0008, "step": 124270 }, { "epoch": 1.0163143476305352, "grad_norm": 0.0038601302076131105, "learning_rate": 5.72770126058658e-06, "loss": 0.0011, "step": 124280 }, { "epoch": 1.0163961238091344, "grad_norm": 0.028206877410411835, "learning_rate": 5.726995219908874e-06, "loss": 0.0013, "step": 124290 }, { "epoch": 1.0164778999877335, "grad_norm": 0.018916338682174683, "learning_rate": 5.726289164421686e-06, "loss": 0.0006, "step": 124300 }, { "epoch": 1.0165596761663327, "grad_norm": 0.02001240663230419, "learning_rate": 5.725583094139395e-06, "loss": 0.0011, "step": 124310 }, { "epoch": 1.0166414523449319, "grad_norm": 0.0209850762039423, "learning_rate": 5.724877009076388e-06, "loss": 0.001, "step": 124320 }, { "epoch": 1.016723228523531, "grad_norm": 0.02516740933060646, "learning_rate": 5.724170909247045e-06, "loss": 0.0011, "step": 124330 }, { "epoch": 1.0168050047021302, "grad_norm": 0.019312093034386635, "learning_rate": 5.7234647946657555e-06, "loss": 0.0009, "step": 124340 }, { "epoch": 1.0168867808807294, "grad_norm": 0.04081517457962036, "learning_rate": 5.722758665346898e-06, "loss": 0.0009, "step": 124350 }, { "epoch": 1.0169685570593285, "grad_norm": 0.025496583431959152, "learning_rate": 5.72205252130486e-06, "loss": 0.0007, "step": 124360 }, { "epoch": 1.0170503332379277, "grad_norm": 0.0398341603577137, "learning_rate": 5.721346362554023e-06, "loss": 0.0009, "step": 124370 }, { "epoch": 1.0171321094165269, "grad_norm": 0.005067347548902035, "learning_rate": 5.720640189108776e-06, "loss": 0.0005, "step": 124380 }, { "epoch": 1.017213885595126, "grad_norm": 0.05906152352690697, "learning_rate": 5.7199340009835016e-06, "loss": 0.0012, "step": 124390 }, { "epoch": 1.0172956617737252, "grad_norm": 0.12749086320400238, "learning_rate": 5.719227798192587e-06, "loss": 0.0013, "step": 124400 }, { "epoch": 1.0173774379523244, "grad_norm": 0.015227795578539371, "learning_rate": 5.718521580750418e-06, "loss": 0.0011, "step": 124410 }, { "epoch": 1.0174592141309238, "grad_norm": 0.007289292756468058, "learning_rate": 5.71781534867138e-06, "loss": 0.0014, "step": 124420 }, { "epoch": 1.017540990309523, "grad_norm": 0.10084835439920425, "learning_rate": 5.71710910196986e-06, "loss": 0.0013, "step": 124430 }, { "epoch": 1.017622766488122, "grad_norm": 0.018565837293863297, "learning_rate": 5.7164028406602446e-06, "loss": 0.0008, "step": 124440 }, { "epoch": 1.0177045426667213, "grad_norm": 0.007102593779563904, "learning_rate": 5.7156965647569205e-06, "loss": 0.0011, "step": 124450 }, { "epoch": 1.0177863188453204, "grad_norm": 0.015773935243487358, "learning_rate": 5.7149902742742765e-06, "loss": 0.0013, "step": 124460 }, { "epoch": 1.0178680950239196, "grad_norm": 0.03260080888867378, "learning_rate": 5.714283969226699e-06, "loss": 0.0007, "step": 124470 }, { "epoch": 1.0179498712025188, "grad_norm": 0.05349788814783096, "learning_rate": 5.713577649628577e-06, "loss": 0.0033, "step": 124480 }, { "epoch": 1.018031647381118, "grad_norm": 0.04628996551036835, "learning_rate": 5.712871315494297e-06, "loss": 0.0012, "step": 124490 }, { "epoch": 1.0181134235597171, "grad_norm": 0.0942297875881195, "learning_rate": 5.7121649668382495e-06, "loss": 0.0011, "step": 124500 }, { "epoch": 1.0181951997383163, "grad_norm": 0.07203572988510132, "learning_rate": 5.711458603674822e-06, "loss": 0.0012, "step": 124510 }, { "epoch": 1.0182769759169155, "grad_norm": 0.061972856521606445, "learning_rate": 5.710752226018406e-06, "loss": 0.0022, "step": 124520 }, { "epoch": 1.0183587520955146, "grad_norm": 0.022544734179973602, "learning_rate": 5.7100458338833885e-06, "loss": 0.0009, "step": 124530 }, { "epoch": 1.0184405282741138, "grad_norm": 0.02215774729847908, "learning_rate": 5.7093394272841614e-06, "loss": 0.0007, "step": 124540 }, { "epoch": 1.018522304452713, "grad_norm": 0.025250619277358055, "learning_rate": 5.708633006235111e-06, "loss": 0.0083, "step": 124550 }, { "epoch": 1.0186040806313121, "grad_norm": 0.07332638651132584, "learning_rate": 5.707926570750633e-06, "loss": 0.0016, "step": 124560 }, { "epoch": 1.0186858568099113, "grad_norm": 0.031185196712613106, "learning_rate": 5.707220120845114e-06, "loss": 0.0004, "step": 124570 }, { "epoch": 1.0187676329885105, "grad_norm": 0.10188726335763931, "learning_rate": 5.706513656532946e-06, "loss": 0.0013, "step": 124580 }, { "epoch": 1.0188494091671096, "grad_norm": 0.05006551742553711, "learning_rate": 5.705807177828522e-06, "loss": 0.0017, "step": 124590 }, { "epoch": 1.0189311853457088, "grad_norm": 0.03240600973367691, "learning_rate": 5.705100684746231e-06, "loss": 0.0018, "step": 124600 }, { "epoch": 1.019012961524308, "grad_norm": 0.030996810644865036, "learning_rate": 5.704394177300465e-06, "loss": 0.001, "step": 124610 }, { "epoch": 1.0190947377029071, "grad_norm": 0.029582874849438667, "learning_rate": 5.7036876555056184e-06, "loss": 0.001, "step": 124620 }, { "epoch": 1.0191765138815063, "grad_norm": 0.04015965759754181, "learning_rate": 5.702981119376082e-06, "loss": 0.001, "step": 124630 }, { "epoch": 1.0192582900601055, "grad_norm": 0.0227054413408041, "learning_rate": 5.702274568926248e-06, "loss": 0.0008, "step": 124640 }, { "epoch": 1.0193400662387047, "grad_norm": 0.06380504369735718, "learning_rate": 5.701568004170512e-06, "loss": 0.0011, "step": 124650 }, { "epoch": 1.0194218424173038, "grad_norm": 0.029362710192799568, "learning_rate": 5.7008614251232645e-06, "loss": 0.0009, "step": 124660 }, { "epoch": 1.019503618595903, "grad_norm": 0.04888751730322838, "learning_rate": 5.7001548317989e-06, "loss": 0.0008, "step": 124670 }, { "epoch": 1.0195853947745022, "grad_norm": 0.06801245361566544, "learning_rate": 5.699448224211812e-06, "loss": 0.0025, "step": 124680 }, { "epoch": 1.0196671709531013, "grad_norm": 0.03592488169670105, "learning_rate": 5.698741602376395e-06, "loss": 0.0014, "step": 124690 }, { "epoch": 1.0197489471317005, "grad_norm": 0.054305098950862885, "learning_rate": 5.698034966307045e-06, "loss": 0.0013, "step": 124700 }, { "epoch": 1.0198307233102997, "grad_norm": 0.03731228783726692, "learning_rate": 5.697328316018154e-06, "loss": 0.0024, "step": 124710 }, { "epoch": 1.0199124994888988, "grad_norm": 0.028616704046726227, "learning_rate": 5.696621651524119e-06, "loss": 0.0015, "step": 124720 }, { "epoch": 1.019994275667498, "grad_norm": 0.12551327049732208, "learning_rate": 5.695914972839334e-06, "loss": 0.0014, "step": 124730 }, { "epoch": 1.0200760518460972, "grad_norm": 0.04173295944929123, "learning_rate": 5.695208279978197e-06, "loss": 0.001, "step": 124740 }, { "epoch": 1.0201578280246963, "grad_norm": 0.019421804696321487, "learning_rate": 5.6945015729551004e-06, "loss": 0.0021, "step": 124750 }, { "epoch": 1.0202396042032955, "grad_norm": 0.029035087674856186, "learning_rate": 5.693794851784444e-06, "loss": 0.0006, "step": 124760 }, { "epoch": 1.0203213803818947, "grad_norm": 0.13862425088882446, "learning_rate": 5.69308811648062e-06, "loss": 0.0011, "step": 124770 }, { "epoch": 1.0204031565604939, "grad_norm": 0.008488851599395275, "learning_rate": 5.692381367058031e-06, "loss": 0.0007, "step": 124780 }, { "epoch": 1.020484932739093, "grad_norm": 0.005697760730981827, "learning_rate": 5.691674603531068e-06, "loss": 0.0013, "step": 124790 }, { "epoch": 1.0205667089176922, "grad_norm": 0.05774443596601486, "learning_rate": 5.690967825914132e-06, "loss": 0.0006, "step": 124800 }, { "epoch": 1.0206484850962914, "grad_norm": 0.0409109890460968, "learning_rate": 5.690261034221619e-06, "loss": 0.0011, "step": 124810 }, { "epoch": 1.0207302612748905, "grad_norm": 0.04185699298977852, "learning_rate": 5.68955422846793e-06, "loss": 0.0018, "step": 124820 }, { "epoch": 1.0208120374534897, "grad_norm": 0.023932091891765594, "learning_rate": 5.688847408667459e-06, "loss": 0.0009, "step": 124830 }, { "epoch": 1.0208938136320889, "grad_norm": 0.06015309691429138, "learning_rate": 5.688140574834606e-06, "loss": 0.0012, "step": 124840 }, { "epoch": 1.0209755898106883, "grad_norm": 0.035007841885089874, "learning_rate": 5.687433726983771e-06, "loss": 0.0013, "step": 124850 }, { "epoch": 1.0210573659892874, "grad_norm": 0.01957426406443119, "learning_rate": 5.6867268651293526e-06, "loss": 0.001, "step": 124860 }, { "epoch": 1.0211391421678866, "grad_norm": 0.05642261728644371, "learning_rate": 5.686019989285748e-06, "loss": 0.0008, "step": 124870 }, { "epoch": 1.0212209183464858, "grad_norm": 0.03484335169196129, "learning_rate": 5.68531309946736e-06, "loss": 0.0009, "step": 124880 }, { "epoch": 1.021302694525085, "grad_norm": 0.05260748043656349, "learning_rate": 5.684606195688588e-06, "loss": 0.0012, "step": 124890 }, { "epoch": 1.021384470703684, "grad_norm": 0.033816706389188766, "learning_rate": 5.68389927796383e-06, "loss": 0.0026, "step": 124900 }, { "epoch": 1.0214662468822833, "grad_norm": 0.019426662474870682, "learning_rate": 5.683192346307488e-06, "loss": 0.0008, "step": 124910 }, { "epoch": 1.0215480230608824, "grad_norm": 0.10439766198396683, "learning_rate": 5.682485400733962e-06, "loss": 0.0011, "step": 124920 }, { "epoch": 1.0216297992394816, "grad_norm": 0.06284686177968979, "learning_rate": 5.6817784412576534e-06, "loss": 0.0026, "step": 124930 }, { "epoch": 1.0217115754180808, "grad_norm": 0.0068252249620854855, "learning_rate": 5.6810714678929655e-06, "loss": 0.0006, "step": 124940 }, { "epoch": 1.02179335159668, "grad_norm": 0.006873533129692078, "learning_rate": 5.680364480654298e-06, "loss": 0.0026, "step": 124950 }, { "epoch": 1.0218751277752791, "grad_norm": 0.06596728414297104, "learning_rate": 5.679657479556053e-06, "loss": 0.0008, "step": 124960 }, { "epoch": 1.0219569039538783, "grad_norm": 0.038833457976579666, "learning_rate": 5.678950464612631e-06, "loss": 0.0006, "step": 124970 }, { "epoch": 1.0220386801324775, "grad_norm": 0.05378013849258423, "learning_rate": 5.6782434358384375e-06, "loss": 0.0018, "step": 124980 }, { "epoch": 1.0221204563110766, "grad_norm": 0.03958739712834358, "learning_rate": 5.677536393247874e-06, "loss": 0.0008, "step": 124990 }, { "epoch": 1.0222022324896758, "grad_norm": 0.04257408156991005, "learning_rate": 5.6768293368553425e-06, "loss": 0.0009, "step": 125000 }, { "epoch": 1.022284008668275, "grad_norm": 0.050287388265132904, "learning_rate": 5.676122266675248e-06, "loss": 0.001, "step": 125010 }, { "epoch": 1.0223657848468741, "grad_norm": 0.006253495346754789, "learning_rate": 5.675415182721994e-06, "loss": 0.0017, "step": 125020 }, { "epoch": 1.0224475610254733, "grad_norm": 0.01256078016012907, "learning_rate": 5.674708085009981e-06, "loss": 0.0023, "step": 125030 }, { "epoch": 1.0225293372040725, "grad_norm": 0.04774273931980133, "learning_rate": 5.6740009735536185e-06, "loss": 0.0009, "step": 125040 }, { "epoch": 1.0226111133826716, "grad_norm": 0.06543926149606705, "learning_rate": 5.6732938483673075e-06, "loss": 0.0014, "step": 125050 }, { "epoch": 1.0226928895612708, "grad_norm": 0.002584409900009632, "learning_rate": 5.672586709465453e-06, "loss": 0.0011, "step": 125060 }, { "epoch": 1.02277466573987, "grad_norm": 0.1290348768234253, "learning_rate": 5.6718795568624615e-06, "loss": 0.0013, "step": 125070 }, { "epoch": 1.0228564419184691, "grad_norm": 0.1381874829530716, "learning_rate": 5.6711723905727374e-06, "loss": 0.001, "step": 125080 }, { "epoch": 1.0229382180970683, "grad_norm": 0.13112924993038177, "learning_rate": 5.670465210610685e-06, "loss": 0.0014, "step": 125090 }, { "epoch": 1.0230199942756675, "grad_norm": 0.052616264671087265, "learning_rate": 5.669758016990711e-06, "loss": 0.0006, "step": 125100 }, { "epoch": 1.0231017704542666, "grad_norm": 0.005173565354198217, "learning_rate": 5.669050809727224e-06, "loss": 0.0008, "step": 125110 }, { "epoch": 1.0231835466328658, "grad_norm": 0.11920955032110214, "learning_rate": 5.668343588834626e-06, "loss": 0.0023, "step": 125120 }, { "epoch": 1.023265322811465, "grad_norm": 0.05153743550181389, "learning_rate": 5.667636354327328e-06, "loss": 0.0009, "step": 125130 }, { "epoch": 1.0233470989900642, "grad_norm": 0.03147711977362633, "learning_rate": 5.666929106219734e-06, "loss": 0.0012, "step": 125140 }, { "epoch": 1.0234288751686633, "grad_norm": 0.030087795108556747, "learning_rate": 5.666221844526251e-06, "loss": 0.0009, "step": 125150 }, { "epoch": 1.0235106513472625, "grad_norm": 0.014370533637702465, "learning_rate": 5.665514569261288e-06, "loss": 0.0012, "step": 125160 }, { "epoch": 1.0235924275258617, "grad_norm": 0.10270120203495026, "learning_rate": 5.664807280439251e-06, "loss": 0.001, "step": 125170 }, { "epoch": 1.0236742037044608, "grad_norm": 0.046069592237472534, "learning_rate": 5.664099978074551e-06, "loss": 0.0025, "step": 125180 }, { "epoch": 1.02375597988306, "grad_norm": 0.06629961729049683, "learning_rate": 5.663392662181595e-06, "loss": 0.0014, "step": 125190 }, { "epoch": 1.0238377560616592, "grad_norm": 0.0008248239173553884, "learning_rate": 5.662685332774791e-06, "loss": 0.0009, "step": 125200 }, { "epoch": 1.0239195322402583, "grad_norm": 0.12579132616519928, "learning_rate": 5.661977989868547e-06, "loss": 0.0026, "step": 125210 }, { "epoch": 1.0240013084188575, "grad_norm": 0.03547243773937225, "learning_rate": 5.661270633477273e-06, "loss": 0.001, "step": 125220 }, { "epoch": 1.0240830845974567, "grad_norm": 0.02749304659664631, "learning_rate": 5.6605632636153795e-06, "loss": 0.0012, "step": 125230 }, { "epoch": 1.0241648607760558, "grad_norm": 0.026135019958019257, "learning_rate": 5.659855880297276e-06, "loss": 0.0011, "step": 125240 }, { "epoch": 1.024246636954655, "grad_norm": 0.01348991971462965, "learning_rate": 5.659148483537371e-06, "loss": 0.0011, "step": 125250 }, { "epoch": 1.0243284131332542, "grad_norm": 0.06975825875997543, "learning_rate": 5.658441073350076e-06, "loss": 0.001, "step": 125260 }, { "epoch": 1.0244101893118533, "grad_norm": 0.009841199964284897, "learning_rate": 5.6577336497498e-06, "loss": 0.0015, "step": 125270 }, { "epoch": 1.0244919654904527, "grad_norm": 0.16369062662124634, "learning_rate": 5.6570262127509566e-06, "loss": 0.0013, "step": 125280 }, { "epoch": 1.024573741669052, "grad_norm": 0.02679325081408024, "learning_rate": 5.656318762367954e-06, "loss": 0.0014, "step": 125290 }, { "epoch": 1.024655517847651, "grad_norm": 0.29195624589920044, "learning_rate": 5.655611298615206e-06, "loss": 0.0006, "step": 125300 }, { "epoch": 1.0247372940262502, "grad_norm": 0.03772886097431183, "learning_rate": 5.654903821507123e-06, "loss": 0.0013, "step": 125310 }, { "epoch": 1.0248190702048494, "grad_norm": 0.002172197215259075, "learning_rate": 5.654196331058116e-06, "loss": 0.0012, "step": 125320 }, { "epoch": 1.0249008463834486, "grad_norm": 0.00181483943015337, "learning_rate": 5.653488827282597e-06, "loss": 0.001, "step": 125330 }, { "epoch": 1.0249826225620478, "grad_norm": 0.026751404628157616, "learning_rate": 5.65278131019498e-06, "loss": 0.001, "step": 125340 }, { "epoch": 1.025064398740647, "grad_norm": 0.047404393553733826, "learning_rate": 5.652073779809677e-06, "loss": 0.0008, "step": 125350 }, { "epoch": 1.025146174919246, "grad_norm": 0.10432659834623337, "learning_rate": 5.651366236141103e-06, "loss": 0.0012, "step": 125360 }, { "epoch": 1.0252279510978453, "grad_norm": 0.1083826795220375, "learning_rate": 5.650658679203668e-06, "loss": 0.0011, "step": 125370 }, { "epoch": 1.0253097272764444, "grad_norm": 0.05342972278594971, "learning_rate": 5.649951109011786e-06, "loss": 0.0017, "step": 125380 }, { "epoch": 1.0253915034550436, "grad_norm": 0.03385861590504646, "learning_rate": 5.649243525579873e-06, "loss": 0.0009, "step": 125390 }, { "epoch": 1.0254732796336428, "grad_norm": 0.11766131967306137, "learning_rate": 5.648535928922341e-06, "loss": 0.0009, "step": 125400 }, { "epoch": 1.025555055812242, "grad_norm": 0.015841389074921608, "learning_rate": 5.647828319053603e-06, "loss": 0.0014, "step": 125410 }, { "epoch": 1.025636831990841, "grad_norm": 0.02873433753848076, "learning_rate": 5.647120695988079e-06, "loss": 0.0016, "step": 125420 }, { "epoch": 1.0257186081694403, "grad_norm": 0.03470964729785919, "learning_rate": 5.646413059740179e-06, "loss": 0.0018, "step": 125430 }, { "epoch": 1.0258003843480394, "grad_norm": 0.0739554911851883, "learning_rate": 5.6457054103243195e-06, "loss": 0.0015, "step": 125440 }, { "epoch": 1.0258821605266386, "grad_norm": 0.12340732663869858, "learning_rate": 5.644997747754915e-06, "loss": 0.0009, "step": 125450 }, { "epoch": 1.0259639367052378, "grad_norm": 0.09037754684686661, "learning_rate": 5.644290072046383e-06, "loss": 0.0015, "step": 125460 }, { "epoch": 1.026045712883837, "grad_norm": 0.05557527765631676, "learning_rate": 5.643582383213139e-06, "loss": 0.0012, "step": 125470 }, { "epoch": 1.0261274890624361, "grad_norm": 0.05836515873670578, "learning_rate": 5.642874681269598e-06, "loss": 0.002, "step": 125480 }, { "epoch": 1.0262092652410353, "grad_norm": 0.04799019917845726, "learning_rate": 5.6421669662301784e-06, "loss": 0.0012, "step": 125490 }, { "epoch": 1.0262910414196345, "grad_norm": 0.1572624295949936, "learning_rate": 5.641459238109295e-06, "loss": 0.0007, "step": 125500 }, { "epoch": 1.0263728175982336, "grad_norm": 0.03839457780122757, "learning_rate": 5.640751496921364e-06, "loss": 0.0009, "step": 125510 }, { "epoch": 1.0264545937768328, "grad_norm": 0.04857579618692398, "learning_rate": 5.640043742680805e-06, "loss": 0.0006, "step": 125520 }, { "epoch": 1.026536369955432, "grad_norm": 0.04247669875621796, "learning_rate": 5.639335975402035e-06, "loss": 0.0009, "step": 125530 }, { "epoch": 1.0266181461340311, "grad_norm": 0.10487481206655502, "learning_rate": 5.638628195099472e-06, "loss": 0.0015, "step": 125540 }, { "epoch": 1.0266999223126303, "grad_norm": 0.018646329641342163, "learning_rate": 5.637920401787534e-06, "loss": 0.0027, "step": 125550 }, { "epoch": 1.0267816984912295, "grad_norm": 0.005943621508777142, "learning_rate": 5.637212595480638e-06, "loss": 0.0015, "step": 125560 }, { "epoch": 1.0268634746698286, "grad_norm": 0.04555297642946243, "learning_rate": 5.636504776193204e-06, "loss": 0.0008, "step": 125570 }, { "epoch": 1.0269452508484278, "grad_norm": 0.05403684824705124, "learning_rate": 5.635796943939648e-06, "loss": 0.0009, "step": 125580 }, { "epoch": 1.027027027027027, "grad_norm": 0.03855002671480179, "learning_rate": 5.635089098734394e-06, "loss": 0.001, "step": 125590 }, { "epoch": 1.0271088032056261, "grad_norm": 0.027782881632447243, "learning_rate": 5.634381240591859e-06, "loss": 0.0012, "step": 125600 }, { "epoch": 1.0271905793842253, "grad_norm": 0.04353969916701317, "learning_rate": 5.633673369526461e-06, "loss": 0.0032, "step": 125610 }, { "epoch": 1.0272723555628245, "grad_norm": 0.045216821134090424, "learning_rate": 5.632965485552623e-06, "loss": 0.0011, "step": 125620 }, { "epoch": 1.0273541317414236, "grad_norm": 0.008538107387721539, "learning_rate": 5.632257588684762e-06, "loss": 0.0016, "step": 125630 }, { "epoch": 1.0274359079200228, "grad_norm": 0.05735768750309944, "learning_rate": 5.631549678937301e-06, "loss": 0.0013, "step": 125640 }, { "epoch": 1.027517684098622, "grad_norm": 0.05939050763845444, "learning_rate": 5.630841756324657e-06, "loss": 0.0011, "step": 125650 }, { "epoch": 1.0275994602772212, "grad_norm": 0.03126923739910126, "learning_rate": 5.630133820861258e-06, "loss": 0.0013, "step": 125660 }, { "epoch": 1.0276812364558203, "grad_norm": 0.08067057281732559, "learning_rate": 5.629425872561518e-06, "loss": 0.0014, "step": 125670 }, { "epoch": 1.0277630126344195, "grad_norm": 0.17030516266822815, "learning_rate": 5.628717911439863e-06, "loss": 0.0008, "step": 125680 }, { "epoch": 1.0278447888130187, "grad_norm": 0.059771012514829636, "learning_rate": 5.628009937510711e-06, "loss": 0.0012, "step": 125690 }, { "epoch": 1.0279265649916178, "grad_norm": 0.016189614310860634, "learning_rate": 5.6273019507884885e-06, "loss": 0.0017, "step": 125700 }, { "epoch": 1.0280083411702172, "grad_norm": 0.04566016048192978, "learning_rate": 5.6265939512876145e-06, "loss": 0.0008, "step": 125710 }, { "epoch": 1.0280901173488164, "grad_norm": 0.04413656145334244, "learning_rate": 5.6258859390225105e-06, "loss": 0.0017, "step": 125720 }, { "epoch": 1.0281718935274156, "grad_norm": 0.038800615817308426, "learning_rate": 5.625177914007604e-06, "loss": 0.0014, "step": 125730 }, { "epoch": 1.0282536697060147, "grad_norm": 0.027312198653817177, "learning_rate": 5.624469876257315e-06, "loss": 0.0012, "step": 125740 }, { "epoch": 1.028335445884614, "grad_norm": 0.08098268508911133, "learning_rate": 5.623761825786066e-06, "loss": 0.0009, "step": 125750 }, { "epoch": 1.028417222063213, "grad_norm": 0.07550717890262604, "learning_rate": 5.623053762608281e-06, "loss": 0.0014, "step": 125760 }, { "epoch": 1.0284989982418122, "grad_norm": 0.08542031049728394, "learning_rate": 5.622345686738385e-06, "loss": 0.001, "step": 125770 }, { "epoch": 1.0285807744204114, "grad_norm": 0.07564002275466919, "learning_rate": 5.621637598190801e-06, "loss": 0.0029, "step": 125780 }, { "epoch": 1.0286625505990106, "grad_norm": 0.02707793191075325, "learning_rate": 5.6209294969799545e-06, "loss": 0.0007, "step": 125790 }, { "epoch": 1.0287443267776097, "grad_norm": 0.06590748578310013, "learning_rate": 5.62022138312027e-06, "loss": 0.0009, "step": 125800 }, { "epoch": 1.028826102956209, "grad_norm": 0.015175536274909973, "learning_rate": 5.619513256626172e-06, "loss": 0.0008, "step": 125810 }, { "epoch": 1.028907879134808, "grad_norm": 0.03899654000997543, "learning_rate": 5.618805117512083e-06, "loss": 0.001, "step": 125820 }, { "epoch": 1.0289896553134072, "grad_norm": 0.06317692250013351, "learning_rate": 5.618096965792432e-06, "loss": 0.0013, "step": 125830 }, { "epoch": 1.0290714314920064, "grad_norm": 0.010756570845842361, "learning_rate": 5.617388801481643e-06, "loss": 0.001, "step": 125840 }, { "epoch": 1.0291532076706056, "grad_norm": 0.10700781643390656, "learning_rate": 5.6166806245941444e-06, "loss": 0.0014, "step": 125850 }, { "epoch": 1.0292349838492048, "grad_norm": 0.044919565320014954, "learning_rate": 5.6159724351443574e-06, "loss": 0.0017, "step": 125860 }, { "epoch": 1.029316760027804, "grad_norm": 0.04132057726383209, "learning_rate": 5.615264233146713e-06, "loss": 0.001, "step": 125870 }, { "epoch": 1.029398536206403, "grad_norm": 0.011838627979159355, "learning_rate": 5.614556018615635e-06, "loss": 0.0009, "step": 125880 }, { "epoch": 1.0294803123850023, "grad_norm": 0.02605975605547428, "learning_rate": 5.6138477915655535e-06, "loss": 0.0004, "step": 125890 }, { "epoch": 1.0295620885636014, "grad_norm": 0.04459800198674202, "learning_rate": 5.613139552010891e-06, "loss": 0.0007, "step": 125900 }, { "epoch": 1.0296438647422006, "grad_norm": 0.06790979206562042, "learning_rate": 5.612431299966078e-06, "loss": 0.0006, "step": 125910 }, { "epoch": 1.0297256409207998, "grad_norm": 0.061114951968193054, "learning_rate": 5.611723035445543e-06, "loss": 0.0008, "step": 125920 }, { "epoch": 1.029807417099399, "grad_norm": 0.09250668436288834, "learning_rate": 5.611014758463712e-06, "loss": 0.0016, "step": 125930 }, { "epoch": 1.029889193277998, "grad_norm": 0.0529368557035923, "learning_rate": 5.610306469035012e-06, "loss": 0.0019, "step": 125940 }, { "epoch": 1.0299709694565973, "grad_norm": 0.05620140582323074, "learning_rate": 5.609598167173875e-06, "loss": 0.0011, "step": 125950 }, { "epoch": 1.0300527456351964, "grad_norm": 0.036129146814346313, "learning_rate": 5.608889852894727e-06, "loss": 0.0017, "step": 125960 }, { "epoch": 1.0301345218137956, "grad_norm": 0.006232827436178923, "learning_rate": 5.608181526211998e-06, "loss": 0.0009, "step": 125970 }, { "epoch": 1.0302162979923948, "grad_norm": 0.009561237879097462, "learning_rate": 5.6074731871401165e-06, "loss": 0.0008, "step": 125980 }, { "epoch": 1.030298074170994, "grad_norm": 0.02189827151596546, "learning_rate": 5.606764835693515e-06, "loss": 0.0014, "step": 125990 }, { "epoch": 1.0303798503495931, "grad_norm": 0.01370086520910263, "learning_rate": 5.606056471886616e-06, "loss": 0.0008, "step": 126000 }, { "epoch": 1.0304616265281923, "grad_norm": 0.1343926191329956, "learning_rate": 5.605348095733858e-06, "loss": 0.0014, "step": 126010 }, { "epoch": 1.0305434027067915, "grad_norm": 0.02941616252064705, "learning_rate": 5.604639707249665e-06, "loss": 0.0005, "step": 126020 }, { "epoch": 1.0306251788853906, "grad_norm": 0.02151387184858322, "learning_rate": 5.603931306448471e-06, "loss": 0.0012, "step": 126030 }, { "epoch": 1.0307069550639898, "grad_norm": 0.1391439139842987, "learning_rate": 5.603222893344704e-06, "loss": 0.0009, "step": 126040 }, { "epoch": 1.030788731242589, "grad_norm": 0.0356450229883194, "learning_rate": 5.602514467952798e-06, "loss": 0.001, "step": 126050 }, { "epoch": 1.0308705074211881, "grad_norm": 0.08516588062047958, "learning_rate": 5.601806030287181e-06, "loss": 0.0013, "step": 126060 }, { "epoch": 1.0309522835997873, "grad_norm": 0.014352484606206417, "learning_rate": 5.601097580362287e-06, "loss": 0.0014, "step": 126070 }, { "epoch": 1.0310340597783865, "grad_norm": 0.05689918249845505, "learning_rate": 5.600389118192545e-06, "loss": 0.006, "step": 126080 }, { "epoch": 1.0311158359569856, "grad_norm": 0.01999714784324169, "learning_rate": 5.599680643792391e-06, "loss": 0.0008, "step": 126090 }, { "epoch": 1.0311976121355848, "grad_norm": 0.03469323366880417, "learning_rate": 5.598972157176255e-06, "loss": 0.0024, "step": 126100 }, { "epoch": 1.031279388314184, "grad_norm": 0.02328501082956791, "learning_rate": 5.598263658358568e-06, "loss": 0.0008, "step": 126110 }, { "epoch": 1.0313611644927834, "grad_norm": 0.1327362060546875, "learning_rate": 5.597555147353764e-06, "loss": 0.0014, "step": 126120 }, { "epoch": 1.0314429406713823, "grad_norm": 0.03619789704680443, "learning_rate": 5.596846624176275e-06, "loss": 0.002, "step": 126130 }, { "epoch": 1.0315247168499817, "grad_norm": 0.023453861474990845, "learning_rate": 5.596138088840536e-06, "loss": 0.0009, "step": 126140 }, { "epoch": 1.0316064930285809, "grad_norm": 0.029491089284420013, "learning_rate": 5.59542954136098e-06, "loss": 0.0042, "step": 126150 }, { "epoch": 1.03168826920718, "grad_norm": 0.050726693123579025, "learning_rate": 5.594720981752041e-06, "loss": 0.0012, "step": 126160 }, { "epoch": 1.0317700453857792, "grad_norm": 0.01173460204154253, "learning_rate": 5.594012410028151e-06, "loss": 0.0014, "step": 126170 }, { "epoch": 1.0318518215643784, "grad_norm": 0.02491580881178379, "learning_rate": 5.593303826203746e-06, "loss": 0.0006, "step": 126180 }, { "epoch": 1.0319335977429775, "grad_norm": 0.047381818294525146, "learning_rate": 5.592595230293258e-06, "loss": 0.0013, "step": 126190 }, { "epoch": 1.0320153739215767, "grad_norm": 0.030929630622267723, "learning_rate": 5.591886622311124e-06, "loss": 0.0014, "step": 126200 }, { "epoch": 1.0320971501001759, "grad_norm": 0.06253742426633835, "learning_rate": 5.59117800227178e-06, "loss": 0.0012, "step": 126210 }, { "epoch": 1.032178926278775, "grad_norm": 0.03958744555711746, "learning_rate": 5.59046937018966e-06, "loss": 0.0009, "step": 126220 }, { "epoch": 1.0322607024573742, "grad_norm": 0.024918923154473305, "learning_rate": 5.589760726079198e-06, "loss": 0.0012, "step": 126230 }, { "epoch": 1.0323424786359734, "grad_norm": 0.18521259725093842, "learning_rate": 5.589052069954829e-06, "loss": 0.0018, "step": 126240 }, { "epoch": 1.0324242548145726, "grad_norm": 0.09271696209907532, "learning_rate": 5.588343401830992e-06, "loss": 0.0017, "step": 126250 }, { "epoch": 1.0325060309931717, "grad_norm": 0.04269144684076309, "learning_rate": 5.5876347217221216e-06, "loss": 0.0014, "step": 126260 }, { "epoch": 1.032587807171771, "grad_norm": 0.03280429542064667, "learning_rate": 5.586926029642654e-06, "loss": 0.0006, "step": 126270 }, { "epoch": 1.03266958335037, "grad_norm": 0.022860843688249588, "learning_rate": 5.586217325607027e-06, "loss": 0.0014, "step": 126280 }, { "epoch": 1.0327513595289692, "grad_norm": 0.006207102909684181, "learning_rate": 5.585508609629676e-06, "loss": 0.0013, "step": 126290 }, { "epoch": 1.0328331357075684, "grad_norm": 0.02451750636100769, "learning_rate": 5.584799881725038e-06, "loss": 0.0013, "step": 126300 }, { "epoch": 1.0329149118861676, "grad_norm": 0.042049188166856766, "learning_rate": 5.584091141907553e-06, "loss": 0.0008, "step": 126310 }, { "epoch": 1.0329966880647667, "grad_norm": 0.04772912338376045, "learning_rate": 5.5833823901916535e-06, "loss": 0.0025, "step": 126320 }, { "epoch": 1.033078464243366, "grad_norm": 0.4637871980667114, "learning_rate": 5.5826736265917836e-06, "loss": 0.0014, "step": 126330 }, { "epoch": 1.033160240421965, "grad_norm": 0.060353655368089676, "learning_rate": 5.5819648511223765e-06, "loss": 0.0008, "step": 126340 }, { "epoch": 1.0332420166005643, "grad_norm": 0.07299168407917023, "learning_rate": 5.581256063797874e-06, "loss": 0.0013, "step": 126350 }, { "epoch": 1.0333237927791634, "grad_norm": 0.046899955719709396, "learning_rate": 5.5805472646327104e-06, "loss": 0.0008, "step": 126360 }, { "epoch": 1.0334055689577626, "grad_norm": 0.10471460968255997, "learning_rate": 5.57983845364133e-06, "loss": 0.0012, "step": 126370 }, { "epoch": 1.0334873451363618, "grad_norm": 0.0018482919549569488, "learning_rate": 5.579129630838166e-06, "loss": 0.0008, "step": 126380 }, { "epoch": 1.033569121314961, "grad_norm": 0.03832045570015907, "learning_rate": 5.578420796237663e-06, "loss": 0.0006, "step": 126390 }, { "epoch": 1.03365089749356, "grad_norm": 0.04397515952587128, "learning_rate": 5.577711949854257e-06, "loss": 0.001, "step": 126400 }, { "epoch": 1.0337326736721593, "grad_norm": 0.009920249693095684, "learning_rate": 5.577003091702391e-06, "loss": 0.0021, "step": 126410 }, { "epoch": 1.0338144498507584, "grad_norm": 0.027361014857888222, "learning_rate": 5.576294221796501e-06, "loss": 0.0007, "step": 126420 }, { "epoch": 1.0338962260293576, "grad_norm": 0.01035915408283472, "learning_rate": 5.57558534015103e-06, "loss": 0.0009, "step": 126430 }, { "epoch": 1.0339780022079568, "grad_norm": 0.0022010670509189367, "learning_rate": 5.5748764467804164e-06, "loss": 0.0007, "step": 126440 }, { "epoch": 1.034059778386556, "grad_norm": 0.02358565665781498, "learning_rate": 5.5741675416991035e-06, "loss": 0.0009, "step": 126450 }, { "epoch": 1.034141554565155, "grad_norm": 0.04212837666273117, "learning_rate": 5.573458624921532e-06, "loss": 0.0025, "step": 126460 }, { "epoch": 1.0342233307437543, "grad_norm": 0.04448120296001434, "learning_rate": 5.572749696462142e-06, "loss": 0.0014, "step": 126470 }, { "epoch": 1.0343051069223534, "grad_norm": 0.024456795305013657, "learning_rate": 5.572040756335373e-06, "loss": 0.0025, "step": 126480 }, { "epoch": 1.0343868831009526, "grad_norm": 0.0072042387910187244, "learning_rate": 5.571331804555671e-06, "loss": 0.0007, "step": 126490 }, { "epoch": 1.0344686592795518, "grad_norm": 0.10612747073173523, "learning_rate": 5.570622841137474e-06, "loss": 0.0015, "step": 126500 }, { "epoch": 1.034550435458151, "grad_norm": 0.01727825403213501, "learning_rate": 5.569913866095227e-06, "loss": 0.0005, "step": 126510 }, { "epoch": 1.0346322116367501, "grad_norm": 0.003517400473356247, "learning_rate": 5.569204879443371e-06, "loss": 0.0018, "step": 126520 }, { "epoch": 1.0347139878153493, "grad_norm": 0.03913702070713043, "learning_rate": 5.56849588119635e-06, "loss": 0.0011, "step": 126530 }, { "epoch": 1.0347957639939485, "grad_norm": 0.03398742899298668, "learning_rate": 5.567786871368603e-06, "loss": 0.0012, "step": 126540 }, { "epoch": 1.0348775401725478, "grad_norm": 0.009695368818938732, "learning_rate": 5.567077849974579e-06, "loss": 0.0008, "step": 126550 }, { "epoch": 1.034959316351147, "grad_norm": 0.18535937368869781, "learning_rate": 5.566368817028717e-06, "loss": 0.0009, "step": 126560 }, { "epoch": 1.0350410925297462, "grad_norm": 0.0028937647584825754, "learning_rate": 5.5656597725454605e-06, "loss": 0.0015, "step": 126570 }, { "epoch": 1.0351228687083454, "grad_norm": 0.06121698021888733, "learning_rate": 5.564950716539257e-06, "loss": 0.0012, "step": 126580 }, { "epoch": 1.0352046448869445, "grad_norm": 0.11941453814506531, "learning_rate": 5.564241649024548e-06, "loss": 0.0007, "step": 126590 }, { "epoch": 1.0352864210655437, "grad_norm": 0.0785020962357521, "learning_rate": 5.563532570015776e-06, "loss": 0.0011, "step": 126600 }, { "epoch": 1.0353681972441429, "grad_norm": 0.11180043965578079, "learning_rate": 5.5628234795273895e-06, "loss": 0.0022, "step": 126610 }, { "epoch": 1.035449973422742, "grad_norm": 0.04680881276726723, "learning_rate": 5.56211437757383e-06, "loss": 0.0008, "step": 126620 }, { "epoch": 1.0355317496013412, "grad_norm": 0.08864593505859375, "learning_rate": 5.561405264169544e-06, "loss": 0.0009, "step": 126630 }, { "epoch": 1.0356135257799404, "grad_norm": 0.024586355313658714, "learning_rate": 5.560696139328976e-06, "loss": 0.0008, "step": 126640 }, { "epoch": 1.0356953019585395, "grad_norm": 0.04877832531929016, "learning_rate": 5.559987003066574e-06, "loss": 0.0018, "step": 126650 }, { "epoch": 1.0357770781371387, "grad_norm": 0.0311603881418705, "learning_rate": 5.559277855396778e-06, "loss": 0.0013, "step": 126660 }, { "epoch": 1.0358588543157379, "grad_norm": 0.046809129416942596, "learning_rate": 5.5585686963340405e-06, "loss": 0.0007, "step": 126670 }, { "epoch": 1.035940630494337, "grad_norm": 0.12030664831399918, "learning_rate": 5.557859525892803e-06, "loss": 0.0026, "step": 126680 }, { "epoch": 1.0360224066729362, "grad_norm": 0.00425545871257782, "learning_rate": 5.557150344087514e-06, "loss": 0.0006, "step": 126690 }, { "epoch": 1.0361041828515354, "grad_norm": 0.0552624948322773, "learning_rate": 5.55644115093262e-06, "loss": 0.001, "step": 126700 }, { "epoch": 1.0361859590301346, "grad_norm": 0.03963613137602806, "learning_rate": 5.555731946442568e-06, "loss": 0.0019, "step": 126710 }, { "epoch": 1.0362677352087337, "grad_norm": 0.058229587972164154, "learning_rate": 5.555022730631803e-06, "loss": 0.001, "step": 126720 }, { "epoch": 1.036349511387333, "grad_norm": 0.017476331442594528, "learning_rate": 5.554313503514774e-06, "loss": 0.0008, "step": 126730 }, { "epoch": 1.036431287565932, "grad_norm": 0.038615815341472626, "learning_rate": 5.5536042651059284e-06, "loss": 0.0029, "step": 126740 }, { "epoch": 1.0365130637445312, "grad_norm": 0.03508350998163223, "learning_rate": 5.552895015419715e-06, "loss": 0.0026, "step": 126750 }, { "epoch": 1.0365948399231304, "grad_norm": 0.08925440162420273, "learning_rate": 5.552185754470581e-06, "loss": 0.0019, "step": 126760 }, { "epoch": 1.0366766161017296, "grad_norm": 0.03271359205245972, "learning_rate": 5.551476482272973e-06, "loss": 0.0008, "step": 126770 }, { "epoch": 1.0367583922803287, "grad_norm": 0.06575208902359009, "learning_rate": 5.55076719884134e-06, "loss": 0.0018, "step": 126780 }, { "epoch": 1.036840168458928, "grad_norm": 0.04285497963428497, "learning_rate": 5.550057904190134e-06, "loss": 0.001, "step": 126790 }, { "epoch": 1.036921944637527, "grad_norm": 0.02746076136827469, "learning_rate": 5.5493485983337994e-06, "loss": 0.0009, "step": 126800 }, { "epoch": 1.0370037208161262, "grad_norm": 0.02591138519346714, "learning_rate": 5.548639281286787e-06, "loss": 0.0006, "step": 126810 }, { "epoch": 1.0370854969947254, "grad_norm": 0.009063546545803547, "learning_rate": 5.547929953063549e-06, "loss": 0.0007, "step": 126820 }, { "epoch": 1.0371672731733246, "grad_norm": 0.1886235475540161, "learning_rate": 5.547220613678531e-06, "loss": 0.0014, "step": 126830 }, { "epoch": 1.0372490493519237, "grad_norm": 0.039326999336481094, "learning_rate": 5.5465112631461835e-06, "loss": 0.001, "step": 126840 }, { "epoch": 1.037330825530523, "grad_norm": 0.00508239446207881, "learning_rate": 5.545801901480958e-06, "loss": 0.0018, "step": 126850 }, { "epoch": 1.037412601709122, "grad_norm": 0.017025446519255638, "learning_rate": 5.545092528697304e-06, "loss": 0.0011, "step": 126860 }, { "epoch": 1.0374943778877213, "grad_norm": 0.10768352448940277, "learning_rate": 5.544383144809673e-06, "loss": 0.0015, "step": 126870 }, { "epoch": 1.0375761540663204, "grad_norm": 0.07857424020767212, "learning_rate": 5.543673749832514e-06, "loss": 0.0013, "step": 126880 }, { "epoch": 1.0376579302449196, "grad_norm": 0.10728384554386139, "learning_rate": 5.542964343780279e-06, "loss": 0.0014, "step": 126890 }, { "epoch": 1.0377397064235188, "grad_norm": 0.01352258026599884, "learning_rate": 5.542254926667418e-06, "loss": 0.0014, "step": 126900 }, { "epoch": 1.037821482602118, "grad_norm": 0.036706969141960144, "learning_rate": 5.541545498508386e-06, "loss": 0.0011, "step": 126910 }, { "epoch": 1.037903258780717, "grad_norm": 0.12133897840976715, "learning_rate": 5.540836059317629e-06, "loss": 0.0012, "step": 126920 }, { "epoch": 1.0379850349593163, "grad_norm": 0.05242133513092995, "learning_rate": 5.540126609109604e-06, "loss": 0.0015, "step": 126930 }, { "epoch": 1.0380668111379154, "grad_norm": 0.028410211205482483, "learning_rate": 5.53941714789876e-06, "loss": 0.0015, "step": 126940 }, { "epoch": 1.0381485873165146, "grad_norm": 0.03753333166241646, "learning_rate": 5.538707675699551e-06, "loss": 0.0017, "step": 126950 }, { "epoch": 1.0382303634951138, "grad_norm": 0.08011870086193085, "learning_rate": 5.537998192526427e-06, "loss": 0.0013, "step": 126960 }, { "epoch": 1.038312139673713, "grad_norm": 0.00939388107508421, "learning_rate": 5.537288698393845e-06, "loss": 0.0009, "step": 126970 }, { "epoch": 1.0383939158523123, "grad_norm": 0.1404193937778473, "learning_rate": 5.536579193316252e-06, "loss": 0.0008, "step": 126980 }, { "epoch": 1.0384756920309115, "grad_norm": 0.04473713040351868, "learning_rate": 5.535869677308108e-06, "loss": 0.0021, "step": 126990 }, { "epoch": 1.0385574682095107, "grad_norm": 0.06286406517028809, "learning_rate": 5.535160150383862e-06, "loss": 0.0018, "step": 127000 }, { "epoch": 1.0386392443881098, "grad_norm": 0.028638599440455437, "learning_rate": 5.534450612557969e-06, "loss": 0.0039, "step": 127010 }, { "epoch": 1.038721020566709, "grad_norm": 0.09487476944923401, "learning_rate": 5.533741063844882e-06, "loss": 0.0017, "step": 127020 }, { "epoch": 1.0388027967453082, "grad_norm": 0.05142538994550705, "learning_rate": 5.533031504259057e-06, "loss": 0.0017, "step": 127030 }, { "epoch": 1.0388845729239073, "grad_norm": 0.07385127246379852, "learning_rate": 5.532321933814945e-06, "loss": 0.0014, "step": 127040 }, { "epoch": 1.0389663491025065, "grad_norm": 0.07489754259586334, "learning_rate": 5.531612352527002e-06, "loss": 0.0011, "step": 127050 }, { "epoch": 1.0390481252811057, "grad_norm": 0.09064259380102158, "learning_rate": 5.530902760409687e-06, "loss": 0.0011, "step": 127060 }, { "epoch": 1.0391299014597049, "grad_norm": 0.7632246613502502, "learning_rate": 5.530193157477448e-06, "loss": 0.0014, "step": 127070 }, { "epoch": 1.039211677638304, "grad_norm": 0.027901405468583107, "learning_rate": 5.529483543744744e-06, "loss": 0.0012, "step": 127080 }, { "epoch": 1.0392934538169032, "grad_norm": 0.07317134737968445, "learning_rate": 5.52877391922603e-06, "loss": 0.0016, "step": 127090 }, { "epoch": 1.0393752299955024, "grad_norm": 0.022999877110123634, "learning_rate": 5.528064283935761e-06, "loss": 0.0012, "step": 127100 }, { "epoch": 1.0394570061741015, "grad_norm": 0.06847783923149109, "learning_rate": 5.527354637888395e-06, "loss": 0.0008, "step": 127110 }, { "epoch": 1.0395387823527007, "grad_norm": 0.0531894750893116, "learning_rate": 5.526644981098386e-06, "loss": 0.0008, "step": 127120 }, { "epoch": 1.0396205585312999, "grad_norm": 0.07377315312623978, "learning_rate": 5.52593531358019e-06, "loss": 0.001, "step": 127130 }, { "epoch": 1.039702334709899, "grad_norm": 0.0911051332950592, "learning_rate": 5.5252256353482635e-06, "loss": 0.0015, "step": 127140 }, { "epoch": 1.0397841108884982, "grad_norm": 0.10538147389888763, "learning_rate": 5.524515946417064e-06, "loss": 0.0012, "step": 127150 }, { "epoch": 1.0398658870670974, "grad_norm": 0.06713191419839859, "learning_rate": 5.5238062468010504e-06, "loss": 0.0007, "step": 127160 }, { "epoch": 1.0399476632456965, "grad_norm": 0.035490572452545166, "learning_rate": 5.523096536514675e-06, "loss": 0.0007, "step": 127170 }, { "epoch": 1.0400294394242957, "grad_norm": 0.005440519656985998, "learning_rate": 5.5223868155724e-06, "loss": 0.0015, "step": 127180 }, { "epoch": 1.0401112156028949, "grad_norm": 0.05672214925289154, "learning_rate": 5.5216770839886806e-06, "loss": 0.0007, "step": 127190 }, { "epoch": 1.040192991781494, "grad_norm": 0.054591961205005646, "learning_rate": 5.5209673417779745e-06, "loss": 0.0027, "step": 127200 }, { "epoch": 1.0402747679600932, "grad_norm": 0.023875795304775238, "learning_rate": 5.5202575889547385e-06, "loss": 0.0008, "step": 127210 }, { "epoch": 1.0403565441386924, "grad_norm": 0.027013208717107773, "learning_rate": 5.519547825533435e-06, "loss": 0.0018, "step": 127220 }, { "epoch": 1.0404383203172916, "grad_norm": 0.033230703324079514, "learning_rate": 5.518838051528518e-06, "loss": 0.0006, "step": 127230 }, { "epoch": 1.0405200964958907, "grad_norm": 0.013966680504381657, "learning_rate": 5.5181282669544515e-06, "loss": 0.0006, "step": 127240 }, { "epoch": 1.04060187267449, "grad_norm": 0.0319879986345768, "learning_rate": 5.517418471825687e-06, "loss": 0.0006, "step": 127250 }, { "epoch": 1.040683648853089, "grad_norm": 0.05980683118104935, "learning_rate": 5.51670866615669e-06, "loss": 0.002, "step": 127260 }, { "epoch": 1.0407654250316882, "grad_norm": 0.028256362304091454, "learning_rate": 5.515998849961917e-06, "loss": 0.0009, "step": 127270 }, { "epoch": 1.0408472012102874, "grad_norm": 0.06196453049778938, "learning_rate": 5.5152890232558286e-06, "loss": 0.0011, "step": 127280 }, { "epoch": 1.0409289773888866, "grad_norm": 0.03138440102338791, "learning_rate": 5.514579186052882e-06, "loss": 0.0016, "step": 127290 }, { "epoch": 1.0410107535674857, "grad_norm": 0.06137345731258392, "learning_rate": 5.513869338367542e-06, "loss": 0.0016, "step": 127300 }, { "epoch": 1.041092529746085, "grad_norm": 0.016853995621204376, "learning_rate": 5.513159480214264e-06, "loss": 0.0011, "step": 127310 }, { "epoch": 1.041174305924684, "grad_norm": 0.20556312799453735, "learning_rate": 5.512449611607511e-06, "loss": 0.0025, "step": 127320 }, { "epoch": 1.0412560821032832, "grad_norm": 0.06684251129627228, "learning_rate": 5.511739732561742e-06, "loss": 0.0006, "step": 127330 }, { "epoch": 1.0413378582818824, "grad_norm": 0.1251193881034851, "learning_rate": 5.511029843091419e-06, "loss": 0.0025, "step": 127340 }, { "epoch": 1.0414196344604816, "grad_norm": 0.045765869319438934, "learning_rate": 5.510319943211003e-06, "loss": 0.0007, "step": 127350 }, { "epoch": 1.0415014106390807, "grad_norm": 0.01120842806994915, "learning_rate": 5.5096100329349555e-06, "loss": 0.0006, "step": 127360 }, { "epoch": 1.04158318681768, "grad_norm": 0.015357732772827148, "learning_rate": 5.508900112277736e-06, "loss": 0.0006, "step": 127370 }, { "epoch": 1.041664962996279, "grad_norm": 0.09261112660169601, "learning_rate": 5.508190181253808e-06, "loss": 0.0015, "step": 127380 }, { "epoch": 1.0417467391748783, "grad_norm": 0.11336296796798706, "learning_rate": 5.507480239877633e-06, "loss": 0.0012, "step": 127390 }, { "epoch": 1.0418285153534774, "grad_norm": 0.01255512423813343, "learning_rate": 5.506770288163673e-06, "loss": 0.001, "step": 127400 }, { "epoch": 1.0419102915320768, "grad_norm": 0.031343426555395126, "learning_rate": 5.506060326126391e-06, "loss": 0.0017, "step": 127410 }, { "epoch": 1.041992067710676, "grad_norm": 0.07674676924943924, "learning_rate": 5.505350353780249e-06, "loss": 0.0013, "step": 127420 }, { "epoch": 1.0420738438892752, "grad_norm": 0.03188522532582283, "learning_rate": 5.504640371139708e-06, "loss": 0.0014, "step": 127430 }, { "epoch": 1.0421556200678743, "grad_norm": 0.09875867515802383, "learning_rate": 5.503930378219233e-06, "loss": 0.0026, "step": 127440 }, { "epoch": 1.0422373962464735, "grad_norm": 0.014695878140628338, "learning_rate": 5.503220375033287e-06, "loss": 0.0011, "step": 127450 }, { "epoch": 1.0423191724250727, "grad_norm": 0.2695150077342987, "learning_rate": 5.502510361596332e-06, "loss": 0.001, "step": 127460 }, { "epoch": 1.0424009486036718, "grad_norm": 0.06676603108644485, "learning_rate": 5.501800337922835e-06, "loss": 0.0009, "step": 127470 }, { "epoch": 1.042482724782271, "grad_norm": 0.0387437529861927, "learning_rate": 5.501090304027256e-06, "loss": 0.0012, "step": 127480 }, { "epoch": 1.0425645009608702, "grad_norm": 0.07765825092792511, "learning_rate": 5.500380259924058e-06, "loss": 0.0017, "step": 127490 }, { "epoch": 1.0426462771394693, "grad_norm": 0.04993286728858948, "learning_rate": 5.499670205627709e-06, "loss": 0.0019, "step": 127500 }, { "epoch": 1.0427280533180685, "grad_norm": 0.021965501829981804, "learning_rate": 5.498960141152671e-06, "loss": 0.0009, "step": 127510 }, { "epoch": 1.0428098294966677, "grad_norm": 0.08250173926353455, "learning_rate": 5.498250066513412e-06, "loss": 0.0012, "step": 127520 }, { "epoch": 1.0428916056752668, "grad_norm": 0.05767123028635979, "learning_rate": 5.497539981724392e-06, "loss": 0.0015, "step": 127530 }, { "epoch": 1.042973381853866, "grad_norm": 0.019229866564273834, "learning_rate": 5.496829886800079e-06, "loss": 0.0006, "step": 127540 }, { "epoch": 1.0430551580324652, "grad_norm": 0.0703572928905487, "learning_rate": 5.496119781754936e-06, "loss": 0.0013, "step": 127550 }, { "epoch": 1.0431369342110643, "grad_norm": 0.04368409886956215, "learning_rate": 5.495409666603431e-06, "loss": 0.0019, "step": 127560 }, { "epoch": 1.0432187103896635, "grad_norm": 0.009624435566365719, "learning_rate": 5.494699541360028e-06, "loss": 0.0006, "step": 127570 }, { "epoch": 1.0433004865682627, "grad_norm": 0.02994164638221264, "learning_rate": 5.493989406039193e-06, "loss": 0.0016, "step": 127580 }, { "epoch": 1.0433822627468619, "grad_norm": 0.002365678548812866, "learning_rate": 5.493279260655393e-06, "loss": 0.0004, "step": 127590 }, { "epoch": 1.043464038925461, "grad_norm": 0.08285147696733475, "learning_rate": 5.492569105223093e-06, "loss": 0.0017, "step": 127600 }, { "epoch": 1.0435458151040602, "grad_norm": 0.00892947893589735, "learning_rate": 5.49185893975676e-06, "loss": 0.0008, "step": 127610 }, { "epoch": 1.0436275912826594, "grad_norm": 0.07045504450798035, "learning_rate": 5.491148764270861e-06, "loss": 0.0018, "step": 127620 }, { "epoch": 1.0437093674612585, "grad_norm": 0.011877980083227158, "learning_rate": 5.490438578779861e-06, "loss": 0.002, "step": 127630 }, { "epoch": 1.0437911436398577, "grad_norm": 0.06209500879049301, "learning_rate": 5.48972838329823e-06, "loss": 0.0012, "step": 127640 }, { "epoch": 1.0438729198184569, "grad_norm": 0.01209715474396944, "learning_rate": 5.489018177840434e-06, "loss": 0.0008, "step": 127650 }, { "epoch": 1.043954695997056, "grad_norm": 0.019386054947972298, "learning_rate": 5.488307962420939e-06, "loss": 0.0009, "step": 127660 }, { "epoch": 1.0440364721756552, "grad_norm": 0.031216291710734367, "learning_rate": 5.487597737054215e-06, "loss": 0.0018, "step": 127670 }, { "epoch": 1.0441182483542544, "grad_norm": 0.020789330825209618, "learning_rate": 5.486887501754728e-06, "loss": 0.0009, "step": 127680 }, { "epoch": 1.0442000245328535, "grad_norm": 0.041880201548337936, "learning_rate": 5.486177256536947e-06, "loss": 0.0009, "step": 127690 }, { "epoch": 1.0442818007114527, "grad_norm": 0.06130986288189888, "learning_rate": 5.485467001415339e-06, "loss": 0.0033, "step": 127700 }, { "epoch": 1.0443635768900519, "grad_norm": 0.02187723107635975, "learning_rate": 5.484756736404375e-06, "loss": 0.0009, "step": 127710 }, { "epoch": 1.044445353068651, "grad_norm": 0.006842315662652254, "learning_rate": 5.484046461518523e-06, "loss": 0.0013, "step": 127720 }, { "epoch": 1.0445271292472502, "grad_norm": 0.04048996791243553, "learning_rate": 5.483336176772249e-06, "loss": 0.0015, "step": 127730 }, { "epoch": 1.0446089054258494, "grad_norm": 0.0622466616332531, "learning_rate": 5.482625882180026e-06, "loss": 0.0008, "step": 127740 }, { "epoch": 1.0446906816044486, "grad_norm": 0.06255175918340683, "learning_rate": 5.481915577756319e-06, "loss": 0.0012, "step": 127750 }, { "epoch": 1.0447724577830477, "grad_norm": 0.03730649873614311, "learning_rate": 5.481205263515602e-06, "loss": 0.0012, "step": 127760 }, { "epoch": 1.044854233961647, "grad_norm": 0.054921455681324005, "learning_rate": 5.480494939472343e-06, "loss": 0.0008, "step": 127770 }, { "epoch": 1.044936010140246, "grad_norm": 0.018139854073524475, "learning_rate": 5.479784605641011e-06, "loss": 0.0004, "step": 127780 }, { "epoch": 1.0450177863188452, "grad_norm": 0.03798608481884003, "learning_rate": 5.479074262036076e-06, "loss": 0.0016, "step": 127790 }, { "epoch": 1.0450995624974444, "grad_norm": 0.04072647541761398, "learning_rate": 5.4783639086720096e-06, "loss": 0.0012, "step": 127800 }, { "epoch": 1.0451813386760436, "grad_norm": 0.05145133659243584, "learning_rate": 5.477653545563281e-06, "loss": 0.0013, "step": 127810 }, { "epoch": 1.0452631148546427, "grad_norm": 0.05179368332028389, "learning_rate": 5.476943172724362e-06, "loss": 0.0008, "step": 127820 }, { "epoch": 1.045344891033242, "grad_norm": 0.0014452537288889289, "learning_rate": 5.4762327901697224e-06, "loss": 0.0007, "step": 127830 }, { "epoch": 1.0454266672118413, "grad_norm": 0.020337508991360664, "learning_rate": 5.4755223979138335e-06, "loss": 0.0015, "step": 127840 }, { "epoch": 1.0455084433904405, "grad_norm": 0.03533384948968887, "learning_rate": 5.474811995971167e-06, "loss": 0.0011, "step": 127850 }, { "epoch": 1.0455902195690396, "grad_norm": 0.01646002195775509, "learning_rate": 5.474101584356195e-06, "loss": 0.0008, "step": 127860 }, { "epoch": 1.0456719957476388, "grad_norm": 0.04049032926559448, "learning_rate": 5.473391163083387e-06, "loss": 0.0012, "step": 127870 }, { "epoch": 1.045753771926238, "grad_norm": 0.02008451707661152, "learning_rate": 5.472680732167217e-06, "loss": 0.0013, "step": 127880 }, { "epoch": 1.0458355481048371, "grad_norm": 0.041944269090890884, "learning_rate": 5.471970291622157e-06, "loss": 0.0011, "step": 127890 }, { "epoch": 1.0459173242834363, "grad_norm": 0.06084967032074928, "learning_rate": 5.471259841462678e-06, "loss": 0.0014, "step": 127900 }, { "epoch": 1.0459991004620355, "grad_norm": 0.10906115174293518, "learning_rate": 5.470549381703253e-06, "loss": 0.0012, "step": 127910 }, { "epoch": 1.0460808766406346, "grad_norm": 0.05709236487746239, "learning_rate": 5.469838912358354e-06, "loss": 0.0011, "step": 127920 }, { "epoch": 1.0461626528192338, "grad_norm": 0.06621095538139343, "learning_rate": 5.469128433442454e-06, "loss": 0.0012, "step": 127930 }, { "epoch": 1.046244428997833, "grad_norm": 0.04563339799642563, "learning_rate": 5.468417944970028e-06, "loss": 0.0007, "step": 127940 }, { "epoch": 1.0463262051764322, "grad_norm": 0.049461379647254944, "learning_rate": 5.467707446955548e-06, "loss": 0.0017, "step": 127950 }, { "epoch": 1.0464079813550313, "grad_norm": 0.023780522868037224, "learning_rate": 5.466996939413486e-06, "loss": 0.0017, "step": 127960 }, { "epoch": 1.0464897575336305, "grad_norm": 0.04837073013186455, "learning_rate": 5.466286422358316e-06, "loss": 0.0024, "step": 127970 }, { "epoch": 1.0465715337122297, "grad_norm": 0.05245114117860794, "learning_rate": 5.465575895804514e-06, "loss": 0.0008, "step": 127980 }, { "epoch": 1.0466533098908288, "grad_norm": 0.01907220482826233, "learning_rate": 5.4648653597665515e-06, "loss": 0.0005, "step": 127990 }, { "epoch": 1.046735086069428, "grad_norm": 0.11381439119577408, "learning_rate": 5.464154814258904e-06, "loss": 0.0008, "step": 128000 }, { "epoch": 1.0468168622480272, "grad_norm": 0.06097230315208435, "learning_rate": 5.463444259296047e-06, "loss": 0.0014, "step": 128010 }, { "epoch": 1.0468986384266263, "grad_norm": 0.009440528228878975, "learning_rate": 5.462733694892452e-06, "loss": 0.0014, "step": 128020 }, { "epoch": 1.0469804146052255, "grad_norm": 0.02085624635219574, "learning_rate": 5.4620231210625965e-06, "loss": 0.0005, "step": 128030 }, { "epoch": 1.0470621907838247, "grad_norm": 0.02438785508275032, "learning_rate": 5.461312537820954e-06, "loss": 0.0013, "step": 128040 }, { "epoch": 1.0471439669624238, "grad_norm": 0.0231104027479887, "learning_rate": 5.460601945182e-06, "loss": 0.0008, "step": 128050 }, { "epoch": 1.047225743141023, "grad_norm": 0.13430586457252502, "learning_rate": 5.459891343160211e-06, "loss": 0.0017, "step": 128060 }, { "epoch": 1.0473075193196222, "grad_norm": 0.05025046318769455, "learning_rate": 5.45918073177006e-06, "loss": 0.001, "step": 128070 }, { "epoch": 1.0473892954982214, "grad_norm": 0.08955772966146469, "learning_rate": 5.458470111026026e-06, "loss": 0.0011, "step": 128080 }, { "epoch": 1.0474710716768205, "grad_norm": 0.032185062766075134, "learning_rate": 5.457759480942582e-06, "loss": 0.0008, "step": 128090 }, { "epoch": 1.0475528478554197, "grad_norm": 0.041174810379743576, "learning_rate": 5.457048841534206e-06, "loss": 0.0012, "step": 128100 }, { "epoch": 1.0476346240340189, "grad_norm": 0.03155495971441269, "learning_rate": 5.456338192815371e-06, "loss": 0.0012, "step": 128110 }, { "epoch": 1.047716400212618, "grad_norm": 0.008408510126173496, "learning_rate": 5.45562753480056e-06, "loss": 0.0011, "step": 128120 }, { "epoch": 1.0477981763912172, "grad_norm": 0.03642726689577103, "learning_rate": 5.454916867504244e-06, "loss": 0.0012, "step": 128130 }, { "epoch": 1.0478799525698164, "grad_norm": 0.0393354557454586, "learning_rate": 5.454206190940902e-06, "loss": 0.0009, "step": 128140 }, { "epoch": 1.0479617287484155, "grad_norm": 0.13725057244300842, "learning_rate": 5.453495505125009e-06, "loss": 0.0014, "step": 128150 }, { "epoch": 1.0480435049270147, "grad_norm": 0.03513619303703308, "learning_rate": 5.452784810071046e-06, "loss": 0.0012, "step": 128160 }, { "epoch": 1.0481252811056139, "grad_norm": 0.06886965781450272, "learning_rate": 5.452074105793486e-06, "loss": 0.0014, "step": 128170 }, { "epoch": 1.048207057284213, "grad_norm": 0.019989322870969772, "learning_rate": 5.4513633923068125e-06, "loss": 0.0014, "step": 128180 }, { "epoch": 1.0482888334628122, "grad_norm": 0.06203628331422806, "learning_rate": 5.450652669625497e-06, "loss": 0.0019, "step": 128190 }, { "epoch": 1.0483706096414114, "grad_norm": 0.03259649500250816, "learning_rate": 5.449941937764021e-06, "loss": 0.0012, "step": 128200 }, { "epoch": 1.0484523858200105, "grad_norm": 0.039466604590415955, "learning_rate": 5.449231196736863e-06, "loss": 0.0013, "step": 128210 }, { "epoch": 1.0485341619986097, "grad_norm": 0.055510636419057846, "learning_rate": 5.4485204465585e-06, "loss": 0.0015, "step": 128220 }, { "epoch": 1.0486159381772089, "grad_norm": 0.07478934526443481, "learning_rate": 5.447809687243409e-06, "loss": 0.0011, "step": 128230 }, { "epoch": 1.048697714355808, "grad_norm": 0.0653301477432251, "learning_rate": 5.447098918806073e-06, "loss": 0.0007, "step": 128240 }, { "epoch": 1.0487794905344072, "grad_norm": 0.005743286106735468, "learning_rate": 5.446388141260968e-06, "loss": 0.0007, "step": 128250 }, { "epoch": 1.0488612667130064, "grad_norm": 0.05505799502134323, "learning_rate": 5.4456773546225735e-06, "loss": 0.0008, "step": 128260 }, { "epoch": 1.0489430428916058, "grad_norm": 0.008112430572509766, "learning_rate": 5.444966558905368e-06, "loss": 0.0013, "step": 128270 }, { "epoch": 1.049024819070205, "grad_norm": 0.012463979423046112, "learning_rate": 5.444255754123834e-06, "loss": 0.0006, "step": 128280 }, { "epoch": 1.0491065952488041, "grad_norm": 0.11841584742069244, "learning_rate": 5.443544940292448e-06, "loss": 0.0011, "step": 128290 }, { "epoch": 1.0491883714274033, "grad_norm": 0.03643227741122246, "learning_rate": 5.442834117425691e-06, "loss": 0.0022, "step": 128300 }, { "epoch": 1.0492701476060025, "grad_norm": 0.02863503247499466, "learning_rate": 5.442123285538043e-06, "loss": 0.0013, "step": 128310 }, { "epoch": 1.0493519237846016, "grad_norm": 0.09879722446203232, "learning_rate": 5.4414124446439855e-06, "loss": 0.0017, "step": 128320 }, { "epoch": 1.0494336999632008, "grad_norm": 0.0032743392512202263, "learning_rate": 5.440701594757997e-06, "loss": 0.0005, "step": 128330 }, { "epoch": 1.0495154761418, "grad_norm": 0.06034841015934944, "learning_rate": 5.439990735894558e-06, "loss": 0.0007, "step": 128340 }, { "epoch": 1.0495972523203991, "grad_norm": 0.048539821058511734, "learning_rate": 5.4392798680681506e-06, "loss": 0.0011, "step": 128350 }, { "epoch": 1.0496790284989983, "grad_norm": 0.03134800121188164, "learning_rate": 5.438568991293255e-06, "loss": 0.0005, "step": 128360 }, { "epoch": 1.0497608046775975, "grad_norm": 0.053267836570739746, "learning_rate": 5.437858105584354e-06, "loss": 0.0006, "step": 128370 }, { "epoch": 1.0498425808561966, "grad_norm": 0.14396028220653534, "learning_rate": 5.437147210955927e-06, "loss": 0.0015, "step": 128380 }, { "epoch": 1.0499243570347958, "grad_norm": 0.09068275988101959, "learning_rate": 5.436436307422453e-06, "loss": 0.0014, "step": 128390 }, { "epoch": 1.050006133213395, "grad_norm": 0.08292985707521439, "learning_rate": 5.4357253949984205e-06, "loss": 0.0012, "step": 128400 }, { "epoch": 1.0500879093919941, "grad_norm": 0.035395074635744095, "learning_rate": 5.435014473698305e-06, "loss": 0.001, "step": 128410 }, { "epoch": 1.0501696855705933, "grad_norm": 0.12439356744289398, "learning_rate": 5.4343035435365925e-06, "loss": 0.0022, "step": 128420 }, { "epoch": 1.0502514617491925, "grad_norm": 0.07710568606853485, "learning_rate": 5.433592604527763e-06, "loss": 0.0009, "step": 128430 }, { "epoch": 1.0503332379277917, "grad_norm": 0.14629049599170685, "learning_rate": 5.4328816566863005e-06, "loss": 0.001, "step": 128440 }, { "epoch": 1.0504150141063908, "grad_norm": 0.04795287176966667, "learning_rate": 5.432170700026687e-06, "loss": 0.0017, "step": 128450 }, { "epoch": 1.05049679028499, "grad_norm": 0.07007929682731628, "learning_rate": 5.431459734563404e-06, "loss": 0.0014, "step": 128460 }, { "epoch": 1.0505785664635892, "grad_norm": 0.03957251459360123, "learning_rate": 5.430748760310936e-06, "loss": 0.0011, "step": 128470 }, { "epoch": 1.0506603426421883, "grad_norm": 0.05653301626443863, "learning_rate": 5.430037777283765e-06, "loss": 0.0006, "step": 128480 }, { "epoch": 1.0507421188207875, "grad_norm": 0.10555149614810944, "learning_rate": 5.429326785496376e-06, "loss": 0.0014, "step": 128490 }, { "epoch": 1.0508238949993867, "grad_norm": 0.11752988398075104, "learning_rate": 5.4286157849632515e-06, "loss": 0.0011, "step": 128500 }, { "epoch": 1.0509056711779858, "grad_norm": 0.1150747612118721, "learning_rate": 5.4279047756988736e-06, "loss": 0.0011, "step": 128510 }, { "epoch": 1.050987447356585, "grad_norm": 0.034403178840875626, "learning_rate": 5.4271937577177296e-06, "loss": 0.0012, "step": 128520 }, { "epoch": 1.0510692235351842, "grad_norm": 0.019794393330812454, "learning_rate": 5.4264827310342995e-06, "loss": 0.0012, "step": 128530 }, { "epoch": 1.0511509997137833, "grad_norm": 0.012273562140762806, "learning_rate": 5.42577169566307e-06, "loss": 0.0014, "step": 128540 }, { "epoch": 1.0512327758923825, "grad_norm": 0.02903379127383232, "learning_rate": 5.425060651618526e-06, "loss": 0.0015, "step": 128550 }, { "epoch": 1.0513145520709817, "grad_norm": 0.012456665746867657, "learning_rate": 5.424349598915151e-06, "loss": 0.0003, "step": 128560 }, { "epoch": 1.0513963282495808, "grad_norm": 0.030904525890946388, "learning_rate": 5.423638537567429e-06, "loss": 0.0016, "step": 128570 }, { "epoch": 1.05147810442818, "grad_norm": 0.09039399772882462, "learning_rate": 5.422927467589845e-06, "loss": 0.0009, "step": 128580 }, { "epoch": 1.0515598806067792, "grad_norm": 0.046543680131435394, "learning_rate": 5.422216388996886e-06, "loss": 0.0011, "step": 128590 }, { "epoch": 1.0516416567853784, "grad_norm": 0.02001097798347473, "learning_rate": 5.421505301803035e-06, "loss": 0.0011, "step": 128600 }, { "epoch": 1.0517234329639775, "grad_norm": 0.01892237178981304, "learning_rate": 5.4207942060227795e-06, "loss": 0.0013, "step": 128610 }, { "epoch": 1.0518052091425767, "grad_norm": 0.031968794763088226, "learning_rate": 5.420083101670602e-06, "loss": 0.0019, "step": 128620 }, { "epoch": 1.0518869853211759, "grad_norm": 0.1956598460674286, "learning_rate": 5.419371988760993e-06, "loss": 0.0011, "step": 128630 }, { "epoch": 1.051968761499775, "grad_norm": 0.07046399265527725, "learning_rate": 5.4186608673084326e-06, "loss": 0.0007, "step": 128640 }, { "epoch": 1.0520505376783742, "grad_norm": 0.04099932685494423, "learning_rate": 5.417949737327413e-06, "loss": 0.0009, "step": 128650 }, { "epoch": 1.0521323138569734, "grad_norm": 0.01438309345394373, "learning_rate": 5.417238598832415e-06, "loss": 0.0011, "step": 128660 }, { "epoch": 1.0522140900355725, "grad_norm": 0.03660688176751137, "learning_rate": 5.416527451837929e-06, "loss": 0.0011, "step": 128670 }, { "epoch": 1.0522958662141717, "grad_norm": 0.07825833559036255, "learning_rate": 5.41581629635844e-06, "loss": 0.0007, "step": 128680 }, { "epoch": 1.0523776423927709, "grad_norm": 0.02971065044403076, "learning_rate": 5.4151051324084355e-06, "loss": 0.0016, "step": 128690 }, { "epoch": 1.0524594185713703, "grad_norm": 0.019781315699219704, "learning_rate": 5.414393960002402e-06, "loss": 0.0017, "step": 128700 }, { "epoch": 1.0525411947499694, "grad_norm": 0.07091251760721207, "learning_rate": 5.4136827791548264e-06, "loss": 0.0014, "step": 128710 }, { "epoch": 1.0526229709285686, "grad_norm": 0.03494321554899216, "learning_rate": 5.412971589880196e-06, "loss": 0.0022, "step": 128720 }, { "epoch": 1.0527047471071678, "grad_norm": 0.033801719546318054, "learning_rate": 5.412260392192998e-06, "loss": 0.0011, "step": 128730 }, { "epoch": 1.052786523285767, "grad_norm": 0.0837957113981247, "learning_rate": 5.411549186107723e-06, "loss": 0.0011, "step": 128740 }, { "epoch": 1.052868299464366, "grad_norm": 0.012048466131091118, "learning_rate": 5.410837971638855e-06, "loss": 0.0008, "step": 128750 }, { "epoch": 1.0529500756429653, "grad_norm": 0.06890928000211716, "learning_rate": 5.410126748800883e-06, "loss": 0.0013, "step": 128760 }, { "epoch": 1.0530318518215644, "grad_norm": 0.0724404901266098, "learning_rate": 5.409415517608297e-06, "loss": 0.0017, "step": 128770 }, { "epoch": 1.0531136280001636, "grad_norm": 0.012556458823382854, "learning_rate": 5.4087042780755825e-06, "loss": 0.0007, "step": 128780 }, { "epoch": 1.0531954041787628, "grad_norm": 0.05012540519237518, "learning_rate": 5.407993030217231e-06, "loss": 0.0009, "step": 128790 }, { "epoch": 1.053277180357362, "grad_norm": 0.06734326481819153, "learning_rate": 5.40728177404773e-06, "loss": 0.0008, "step": 128800 }, { "epoch": 1.0533589565359611, "grad_norm": 0.0880030170083046, "learning_rate": 5.406570509581569e-06, "loss": 0.0011, "step": 128810 }, { "epoch": 1.0534407327145603, "grad_norm": 0.032302238047122955, "learning_rate": 5.405859236833234e-06, "loss": 0.0007, "step": 128820 }, { "epoch": 1.0535225088931595, "grad_norm": 0.01129445992410183, "learning_rate": 5.405147955817218e-06, "loss": 0.0014, "step": 128830 }, { "epoch": 1.0536042850717586, "grad_norm": 0.0691634938120842, "learning_rate": 5.4044366665480076e-06, "loss": 0.0012, "step": 128840 }, { "epoch": 1.0536860612503578, "grad_norm": 0.0705733522772789, "learning_rate": 5.403725369040095e-06, "loss": 0.0007, "step": 128850 }, { "epoch": 1.053767837428957, "grad_norm": 0.03596048802137375, "learning_rate": 5.403014063307967e-06, "loss": 0.0012, "step": 128860 }, { "epoch": 1.0538496136075561, "grad_norm": 0.08334583789110184, "learning_rate": 5.402302749366116e-06, "loss": 0.0014, "step": 128870 }, { "epoch": 1.0539313897861553, "grad_norm": 0.08146495372056961, "learning_rate": 5.4015914272290305e-06, "loss": 0.0005, "step": 128880 }, { "epoch": 1.0540131659647545, "grad_norm": 0.03338612616062164, "learning_rate": 5.400880096911202e-06, "loss": 0.0013, "step": 128890 }, { "epoch": 1.0540949421433536, "grad_norm": 0.0566227063536644, "learning_rate": 5.400168758427119e-06, "loss": 0.0013, "step": 128900 }, { "epoch": 1.0541767183219528, "grad_norm": 0.04066217318177223, "learning_rate": 5.399457411791273e-06, "loss": 0.0009, "step": 128910 }, { "epoch": 1.054258494500552, "grad_norm": 0.039671700447797775, "learning_rate": 5.398746057018156e-06, "loss": 0.0015, "step": 128920 }, { "epoch": 1.0543402706791511, "grad_norm": 0.09567975997924805, "learning_rate": 5.398034694122257e-06, "loss": 0.0008, "step": 128930 }, { "epoch": 1.0544220468577503, "grad_norm": 0.20782984793186188, "learning_rate": 5.397323323118068e-06, "loss": 0.0019, "step": 128940 }, { "epoch": 1.0545038230363495, "grad_norm": 0.020416971296072006, "learning_rate": 5.39661194402008e-06, "loss": 0.0012, "step": 128950 }, { "epoch": 1.0545855992149487, "grad_norm": 0.009925802238285542, "learning_rate": 5.395900556842784e-06, "loss": 0.001, "step": 128960 }, { "epoch": 1.0546673753935478, "grad_norm": 0.013927974738180637, "learning_rate": 5.395189161600671e-06, "loss": 0.0005, "step": 128970 }, { "epoch": 1.054749151572147, "grad_norm": 0.03449508175253868, "learning_rate": 5.394477758308235e-06, "loss": 0.0013, "step": 128980 }, { "epoch": 1.0548309277507462, "grad_norm": 0.024836478754878044, "learning_rate": 5.393766346979966e-06, "loss": 0.0008, "step": 128990 }, { "epoch": 1.0549127039293453, "grad_norm": 0.053553029894828796, "learning_rate": 5.393054927630357e-06, "loss": 0.0009, "step": 129000 }, { "epoch": 1.0549944801079445, "grad_norm": 0.01749570295214653, "learning_rate": 5.392343500273899e-06, "loss": 0.0013, "step": 129010 }, { "epoch": 1.0550762562865437, "grad_norm": 0.3347640931606293, "learning_rate": 5.391632064925084e-06, "loss": 0.0009, "step": 129020 }, { "epoch": 1.0551580324651428, "grad_norm": 0.10517161339521408, "learning_rate": 5.390920621598406e-06, "loss": 0.0016, "step": 129030 }, { "epoch": 1.055239808643742, "grad_norm": 0.00878996029496193, "learning_rate": 5.390209170308358e-06, "loss": 0.0012, "step": 129040 }, { "epoch": 1.0553215848223412, "grad_norm": 0.07840252667665482, "learning_rate": 5.389497711069433e-06, "loss": 0.0011, "step": 129050 }, { "epoch": 1.0554033610009403, "grad_norm": 0.014914807863533497, "learning_rate": 5.38878624389612e-06, "loss": 0.0007, "step": 129060 }, { "epoch": 1.0554851371795395, "grad_norm": 0.060480255633592606, "learning_rate": 5.388074768802918e-06, "loss": 0.0013, "step": 129070 }, { "epoch": 1.0555669133581387, "grad_norm": 0.024022402241826057, "learning_rate": 5.387363285804315e-06, "loss": 0.0009, "step": 129080 }, { "epoch": 1.0556486895367378, "grad_norm": 0.053956177085638046, "learning_rate": 5.386651794914809e-06, "loss": 0.0017, "step": 129090 }, { "epoch": 1.055730465715337, "grad_norm": 0.028390884399414062, "learning_rate": 5.3859402961488915e-06, "loss": 0.0012, "step": 129100 }, { "epoch": 1.0558122418939364, "grad_norm": 0.001878201961517334, "learning_rate": 5.385228789521056e-06, "loss": 0.0006, "step": 129110 }, { "epoch": 1.0558940180725354, "grad_norm": 0.07278245687484741, "learning_rate": 5.384517275045797e-06, "loss": 0.0008, "step": 129120 }, { "epoch": 1.0559757942511347, "grad_norm": 0.04907058924436569, "learning_rate": 5.383805752737609e-06, "loss": 0.0008, "step": 129130 }, { "epoch": 1.056057570429734, "grad_norm": 0.04436507448554039, "learning_rate": 5.383094222610985e-06, "loss": 0.0014, "step": 129140 }, { "epoch": 1.056139346608333, "grad_norm": 0.10454650968313217, "learning_rate": 5.3823826846804205e-06, "loss": 0.001, "step": 129150 }, { "epoch": 1.0562211227869323, "grad_norm": 0.03167722746729851, "learning_rate": 5.381671138960411e-06, "loss": 0.0008, "step": 129160 }, { "epoch": 1.0563028989655314, "grad_norm": 0.014734635129570961, "learning_rate": 5.380959585465448e-06, "loss": 0.0008, "step": 129170 }, { "epoch": 1.0563846751441306, "grad_norm": 0.05910569801926613, "learning_rate": 5.38024802421003e-06, "loss": 0.0016, "step": 129180 }, { "epoch": 1.0564664513227298, "grad_norm": 0.054120711982250214, "learning_rate": 5.37953645520865e-06, "loss": 0.0012, "step": 129190 }, { "epoch": 1.056548227501329, "grad_norm": 0.01698000729084015, "learning_rate": 5.3788248784758046e-06, "loss": 0.0012, "step": 129200 }, { "epoch": 1.056630003679928, "grad_norm": 0.03862190619111061, "learning_rate": 5.378113294025988e-06, "loss": 0.001, "step": 129210 }, { "epoch": 1.0567117798585273, "grad_norm": 0.07927753031253815, "learning_rate": 5.377401701873697e-06, "loss": 0.0011, "step": 129220 }, { "epoch": 1.0567935560371264, "grad_norm": 0.044472455978393555, "learning_rate": 5.376690102033427e-06, "loss": 0.0008, "step": 129230 }, { "epoch": 1.0568753322157256, "grad_norm": 0.09963918477296829, "learning_rate": 5.375978494519672e-06, "loss": 0.0012, "step": 129240 }, { "epoch": 1.0569571083943248, "grad_norm": 0.15489405393600464, "learning_rate": 5.37526687934693e-06, "loss": 0.0011, "step": 129250 }, { "epoch": 1.057038884572924, "grad_norm": 0.02123716101050377, "learning_rate": 5.374555256529695e-06, "loss": 0.0018, "step": 129260 }, { "epoch": 1.0571206607515231, "grad_norm": 0.007946252822875977, "learning_rate": 5.373843626082468e-06, "loss": 0.0008, "step": 129270 }, { "epoch": 1.0572024369301223, "grad_norm": 0.06105845421552658, "learning_rate": 5.373131988019741e-06, "loss": 0.0007, "step": 129280 }, { "epoch": 1.0572842131087214, "grad_norm": 0.056235894560813904, "learning_rate": 5.372420342356013e-06, "loss": 0.0009, "step": 129290 }, { "epoch": 1.0573659892873206, "grad_norm": 0.03685712814331055, "learning_rate": 5.371708689105778e-06, "loss": 0.0007, "step": 129300 }, { "epoch": 1.0574477654659198, "grad_norm": 0.12246774137020111, "learning_rate": 5.370997028283536e-06, "loss": 0.0017, "step": 129310 }, { "epoch": 1.057529541644519, "grad_norm": 0.04919906333088875, "learning_rate": 5.370285359903782e-06, "loss": 0.0007, "step": 129320 }, { "epoch": 1.0576113178231181, "grad_norm": 0.004948937334120274, "learning_rate": 5.3695736839810155e-06, "loss": 0.0008, "step": 129330 }, { "epoch": 1.0576930940017173, "grad_norm": 0.024028023704886436, "learning_rate": 5.368862000529733e-06, "loss": 0.0008, "step": 129340 }, { "epoch": 1.0577748701803165, "grad_norm": 0.04436246678233147, "learning_rate": 5.368150309564432e-06, "loss": 0.0006, "step": 129350 }, { "epoch": 1.0578566463589156, "grad_norm": 0.04058745130896568, "learning_rate": 5.367438611099608e-06, "loss": 0.0024, "step": 129360 }, { "epoch": 1.0579384225375148, "grad_norm": 0.025451291352510452, "learning_rate": 5.366726905149762e-06, "loss": 0.0009, "step": 129370 }, { "epoch": 1.058020198716114, "grad_norm": 0.025891708210110664, "learning_rate": 5.366015191729389e-06, "loss": 0.0014, "step": 129380 }, { "epoch": 1.0581019748947131, "grad_norm": 0.051700521260499954, "learning_rate": 5.365303470852992e-06, "loss": 0.0008, "step": 129390 }, { "epoch": 1.0581837510733123, "grad_norm": 0.03825349360704422, "learning_rate": 5.364591742535065e-06, "loss": 0.0008, "step": 129400 }, { "epoch": 1.0582655272519115, "grad_norm": 0.048259440809488297, "learning_rate": 5.363880006790107e-06, "loss": 0.0014, "step": 129410 }, { "epoch": 1.0583473034305106, "grad_norm": 0.002357825869694352, "learning_rate": 5.363168263632619e-06, "loss": 0.0011, "step": 129420 }, { "epoch": 1.0584290796091098, "grad_norm": 0.027084341272711754, "learning_rate": 5.362456513077098e-06, "loss": 0.001, "step": 129430 }, { "epoch": 1.058510855787709, "grad_norm": 0.017822623252868652, "learning_rate": 5.361744755138042e-06, "loss": 0.0008, "step": 129440 }, { "epoch": 1.0585926319663082, "grad_norm": 0.03422866389155388, "learning_rate": 5.361032989829952e-06, "loss": 0.001, "step": 129450 }, { "epoch": 1.0586744081449073, "grad_norm": 0.017171351239085197, "learning_rate": 5.360321217167327e-06, "loss": 0.0015, "step": 129460 }, { "epoch": 1.0587561843235065, "grad_norm": 0.033358752727508545, "learning_rate": 5.359609437164667e-06, "loss": 0.0014, "step": 129470 }, { "epoch": 1.0588379605021057, "grad_norm": 0.030268048867583275, "learning_rate": 5.358897649836468e-06, "loss": 0.0019, "step": 129480 }, { "epoch": 1.0589197366807048, "grad_norm": 0.07567392289638519, "learning_rate": 5.358185855197233e-06, "loss": 0.001, "step": 129490 }, { "epoch": 1.059001512859304, "grad_norm": 0.10094473510980606, "learning_rate": 5.357474053261461e-06, "loss": 0.0016, "step": 129500 }, { "epoch": 1.0590832890379032, "grad_norm": 0.015910714864730835, "learning_rate": 5.356762244043652e-06, "loss": 0.0008, "step": 129510 }, { "epoch": 1.0591650652165023, "grad_norm": 0.014747239649295807, "learning_rate": 5.356050427558307e-06, "loss": 0.0018, "step": 129520 }, { "epoch": 1.0592468413951015, "grad_norm": 0.08191727101802826, "learning_rate": 5.355338603819925e-06, "loss": 0.0009, "step": 129530 }, { "epoch": 1.059328617573701, "grad_norm": 0.01763160340487957, "learning_rate": 5.354626772843006e-06, "loss": 0.0013, "step": 129540 }, { "epoch": 1.0594103937523, "grad_norm": 0.09092094749212265, "learning_rate": 5.353914934642052e-06, "loss": 0.0018, "step": 129550 }, { "epoch": 1.0594921699308992, "grad_norm": 0.04300757125020027, "learning_rate": 5.353203089231561e-06, "loss": 0.001, "step": 129560 }, { "epoch": 1.0595739461094984, "grad_norm": 0.06073417887091637, "learning_rate": 5.352491236626039e-06, "loss": 0.001, "step": 129570 }, { "epoch": 1.0596557222880976, "grad_norm": 0.02607007697224617, "learning_rate": 5.351779376839983e-06, "loss": 0.0018, "step": 129580 }, { "epoch": 1.0597374984666967, "grad_norm": 0.004333387594670057, "learning_rate": 5.351067509887895e-06, "loss": 0.0006, "step": 129590 }, { "epoch": 1.059819274645296, "grad_norm": 0.024192268028855324, "learning_rate": 5.350355635784277e-06, "loss": 0.0012, "step": 129600 }, { "epoch": 1.059901050823895, "grad_norm": 0.026530755683779716, "learning_rate": 5.349643754543629e-06, "loss": 0.001, "step": 129610 }, { "epoch": 1.0599828270024942, "grad_norm": 0.036530595272779465, "learning_rate": 5.3489318661804535e-06, "loss": 0.001, "step": 129620 }, { "epoch": 1.0600646031810934, "grad_norm": 0.23580417037010193, "learning_rate": 5.348219970709253e-06, "loss": 0.0026, "step": 129630 }, { "epoch": 1.0601463793596926, "grad_norm": 0.03528326749801636, "learning_rate": 5.347508068144528e-06, "loss": 0.0007, "step": 129640 }, { "epoch": 1.0602281555382917, "grad_norm": 0.037409160286188126, "learning_rate": 5.346796158500782e-06, "loss": 0.0018, "step": 129650 }, { "epoch": 1.060309931716891, "grad_norm": 0.036608919501304626, "learning_rate": 5.346084241792515e-06, "loss": 0.001, "step": 129660 }, { "epoch": 1.06039170789549, "grad_norm": 0.02970326505601406, "learning_rate": 5.345372318034232e-06, "loss": 0.001, "step": 129670 }, { "epoch": 1.0604734840740893, "grad_norm": 0.08902831375598907, "learning_rate": 5.344660387240433e-06, "loss": 0.001, "step": 129680 }, { "epoch": 1.0605552602526884, "grad_norm": 0.09501226991415024, "learning_rate": 5.3439484494256236e-06, "loss": 0.0012, "step": 129690 }, { "epoch": 1.0606370364312876, "grad_norm": 0.05401322618126869, "learning_rate": 5.343236504604303e-06, "loss": 0.0012, "step": 129700 }, { "epoch": 1.0607188126098868, "grad_norm": 0.046608634293079376, "learning_rate": 5.342524552790977e-06, "loss": 0.0007, "step": 129710 }, { "epoch": 1.060800588788486, "grad_norm": 0.04017502814531326, "learning_rate": 5.3418125940001465e-06, "loss": 0.001, "step": 129720 }, { "epoch": 1.060882364967085, "grad_norm": 0.009419684298336506, "learning_rate": 5.341100628246316e-06, "loss": 0.0006, "step": 129730 }, { "epoch": 1.0609641411456843, "grad_norm": 0.05964246392250061, "learning_rate": 5.340388655543988e-06, "loss": 0.0013, "step": 129740 }, { "epoch": 1.0610459173242834, "grad_norm": 0.17656873166561127, "learning_rate": 5.339676675907667e-06, "loss": 0.0007, "step": 129750 }, { "epoch": 1.0611276935028826, "grad_norm": 0.004583589732646942, "learning_rate": 5.338964689351857e-06, "loss": 0.0004, "step": 129760 }, { "epoch": 1.0612094696814818, "grad_norm": 0.041457973420619965, "learning_rate": 5.3382526958910586e-06, "loss": 0.001, "step": 129770 }, { "epoch": 1.061291245860081, "grad_norm": 0.010557372123003006, "learning_rate": 5.337540695539778e-06, "loss": 0.0007, "step": 129780 }, { "epoch": 1.0613730220386801, "grad_norm": 0.08473990857601166, "learning_rate": 5.336828688312521e-06, "loss": 0.0008, "step": 129790 }, { "epoch": 1.0614547982172793, "grad_norm": 0.10610596090555191, "learning_rate": 5.336116674223788e-06, "loss": 0.0011, "step": 129800 }, { "epoch": 1.0615365743958785, "grad_norm": 0.05042937397956848, "learning_rate": 5.335404653288086e-06, "loss": 0.0013, "step": 129810 }, { "epoch": 1.0616183505744776, "grad_norm": 0.01708436757326126, "learning_rate": 5.334692625519918e-06, "loss": 0.0011, "step": 129820 }, { "epoch": 1.0617001267530768, "grad_norm": 0.05301178991794586, "learning_rate": 5.3339805909337905e-06, "loss": 0.0012, "step": 129830 }, { "epoch": 1.061781902931676, "grad_norm": 0.020135581493377686, "learning_rate": 5.333268549544205e-06, "loss": 0.0005, "step": 129840 }, { "epoch": 1.0618636791102751, "grad_norm": 0.13963575661182404, "learning_rate": 5.33255650136567e-06, "loss": 0.0013, "step": 129850 }, { "epoch": 1.0619454552888743, "grad_norm": 0.0340576246380806, "learning_rate": 5.331844446412687e-06, "loss": 0.002, "step": 129860 }, { "epoch": 1.0620272314674735, "grad_norm": 0.06251034885644913, "learning_rate": 5.331132384699765e-06, "loss": 0.0007, "step": 129870 }, { "epoch": 1.0621090076460726, "grad_norm": 0.006845737341791391, "learning_rate": 5.3304203162414055e-06, "loss": 0.0008, "step": 129880 }, { "epoch": 1.0621907838246718, "grad_norm": 0.15784133970737457, "learning_rate": 5.329708241052116e-06, "loss": 0.0007, "step": 129890 }, { "epoch": 1.062272560003271, "grad_norm": 0.030202573165297508, "learning_rate": 5.3289961591464015e-06, "loss": 0.0008, "step": 129900 }, { "epoch": 1.0623543361818701, "grad_norm": 0.2628765404224396, "learning_rate": 5.328284070538768e-06, "loss": 0.0041, "step": 129910 }, { "epoch": 1.0624361123604693, "grad_norm": 0.033348847180604935, "learning_rate": 5.32757197524372e-06, "loss": 0.001, "step": 129920 }, { "epoch": 1.0625178885390685, "grad_norm": 0.1332375854253769, "learning_rate": 5.326859873275767e-06, "loss": 0.001, "step": 129930 }, { "epoch": 1.0625996647176676, "grad_norm": 0.028780721127986908, "learning_rate": 5.326147764649412e-06, "loss": 0.0009, "step": 129940 }, { "epoch": 1.0626814408962668, "grad_norm": 0.1016438901424408, "learning_rate": 5.3254356493791615e-06, "loss": 0.0014, "step": 129950 }, { "epoch": 1.062763217074866, "grad_norm": 0.06667085736989975, "learning_rate": 5.324723527479522e-06, "loss": 0.0011, "step": 129960 }, { "epoch": 1.0628449932534654, "grad_norm": 0.1047830656170845, "learning_rate": 5.324011398965e-06, "loss": 0.0015, "step": 129970 }, { "epoch": 1.0629267694320643, "grad_norm": 0.02291044220328331, "learning_rate": 5.3232992638501025e-06, "loss": 0.0018, "step": 129980 }, { "epoch": 1.0630085456106637, "grad_norm": 0.02169753797352314, "learning_rate": 5.322587122149337e-06, "loss": 0.0006, "step": 129990 }, { "epoch": 1.0630903217892629, "grad_norm": 0.03694050759077072, "learning_rate": 5.321874973877211e-06, "loss": 0.0012, "step": 130000 }, { "epoch": 1.063172097967862, "grad_norm": 0.0617285780608654, "learning_rate": 5.321162819048228e-06, "loss": 0.0007, "step": 130010 }, { "epoch": 1.0632538741464612, "grad_norm": 0.209077849984169, "learning_rate": 5.320450657676898e-06, "loss": 0.0017, "step": 130020 }, { "epoch": 1.0633356503250604, "grad_norm": 0.14610518515110016, "learning_rate": 5.319738489777726e-06, "loss": 0.0012, "step": 130030 }, { "epoch": 1.0634174265036596, "grad_norm": 0.0743148997426033, "learning_rate": 5.319026315365223e-06, "loss": 0.0018, "step": 130040 }, { "epoch": 1.0634992026822587, "grad_norm": 0.048927415162324905, "learning_rate": 5.318314134453894e-06, "loss": 0.0013, "step": 130050 }, { "epoch": 1.063580978860858, "grad_norm": 0.048427823930978775, "learning_rate": 5.317601947058247e-06, "loss": 0.0005, "step": 130060 }, { "epoch": 1.063662755039457, "grad_norm": 0.09644138067960739, "learning_rate": 5.316889753192791e-06, "loss": 0.0009, "step": 130070 }, { "epoch": 1.0637445312180562, "grad_norm": 0.12371408939361572, "learning_rate": 5.316177552872033e-06, "loss": 0.0012, "step": 130080 }, { "epoch": 1.0638263073966554, "grad_norm": 0.061957161873579025, "learning_rate": 5.31546534611048e-06, "loss": 0.0008, "step": 130090 }, { "epoch": 1.0639080835752546, "grad_norm": 0.03893687576055527, "learning_rate": 5.3147531329226435e-06, "loss": 0.001, "step": 130100 }, { "epoch": 1.0639898597538537, "grad_norm": 0.046807751059532166, "learning_rate": 5.314040913323028e-06, "loss": 0.0009, "step": 130110 }, { "epoch": 1.064071635932453, "grad_norm": 0.022278372198343277, "learning_rate": 5.313328687326145e-06, "loss": 0.0005, "step": 130120 }, { "epoch": 1.064153412111052, "grad_norm": 0.06192731484770775, "learning_rate": 5.312616454946501e-06, "loss": 0.0011, "step": 130130 }, { "epoch": 1.0642351882896512, "grad_norm": 0.09203307330608368, "learning_rate": 5.311904216198607e-06, "loss": 0.0012, "step": 130140 }, { "epoch": 1.0643169644682504, "grad_norm": 0.04485524073243141, "learning_rate": 5.31119197109697e-06, "loss": 0.0013, "step": 130150 }, { "epoch": 1.0643987406468496, "grad_norm": 0.035311732441186905, "learning_rate": 5.310479719656099e-06, "loss": 0.0014, "step": 130160 }, { "epoch": 1.0644805168254488, "grad_norm": 0.0808248519897461, "learning_rate": 5.3097674618905035e-06, "loss": 0.0019, "step": 130170 }, { "epoch": 1.064562293004048, "grad_norm": 0.03259054571390152, "learning_rate": 5.309055197814695e-06, "loss": 0.0014, "step": 130180 }, { "epoch": 1.064644069182647, "grad_norm": 0.15730640292167664, "learning_rate": 5.308342927443179e-06, "loss": 0.0028, "step": 130190 }, { "epoch": 1.0647258453612463, "grad_norm": 0.09091485291719437, "learning_rate": 5.307630650790467e-06, "loss": 0.0009, "step": 130200 }, { "epoch": 1.0648076215398454, "grad_norm": 0.029606333002448082, "learning_rate": 5.3069183678710686e-06, "loss": 0.0004, "step": 130210 }, { "epoch": 1.0648893977184446, "grad_norm": 0.03842705488204956, "learning_rate": 5.306206078699495e-06, "loss": 0.001, "step": 130220 }, { "epoch": 1.0649711738970438, "grad_norm": 0.05943305417895317, "learning_rate": 5.305493783290253e-06, "loss": 0.0006, "step": 130230 }, { "epoch": 1.065052950075643, "grad_norm": 0.07366017997264862, "learning_rate": 5.304781481657855e-06, "loss": 0.0011, "step": 130240 }, { "epoch": 1.065134726254242, "grad_norm": 0.002759842202067375, "learning_rate": 5.304069173816811e-06, "loss": 0.0006, "step": 130250 }, { "epoch": 1.0652165024328413, "grad_norm": 0.003755429992452264, "learning_rate": 5.303356859781631e-06, "loss": 0.0008, "step": 130260 }, { "epoch": 1.0652982786114404, "grad_norm": 0.07282018661499023, "learning_rate": 5.302644539566823e-06, "loss": 0.0015, "step": 130270 }, { "epoch": 1.0653800547900396, "grad_norm": 0.019367359578609467, "learning_rate": 5.301932213186902e-06, "loss": 0.0009, "step": 130280 }, { "epoch": 1.0654618309686388, "grad_norm": 0.0680919885635376, "learning_rate": 5.3012198806563754e-06, "loss": 0.0012, "step": 130290 }, { "epoch": 1.065543607147238, "grad_norm": 0.08380156755447388, "learning_rate": 5.300507541989756e-06, "loss": 0.0007, "step": 130300 }, { "epoch": 1.0656253833258371, "grad_norm": 0.09500474482774734, "learning_rate": 5.299795197201553e-06, "loss": 0.0007, "step": 130310 }, { "epoch": 1.0657071595044363, "grad_norm": 0.08900672942399979, "learning_rate": 5.299082846306278e-06, "loss": 0.001, "step": 130320 }, { "epoch": 1.0657889356830355, "grad_norm": 0.11639133840799332, "learning_rate": 5.298370489318442e-06, "loss": 0.0017, "step": 130330 }, { "epoch": 1.0658707118616346, "grad_norm": 0.020718201994895935, "learning_rate": 5.297658126252557e-06, "loss": 0.0006, "step": 130340 }, { "epoch": 1.0659524880402338, "grad_norm": 0.01800147444009781, "learning_rate": 5.296945757123134e-06, "loss": 0.0004, "step": 130350 }, { "epoch": 1.066034264218833, "grad_norm": 0.034983113408088684, "learning_rate": 5.296233381944684e-06, "loss": 0.0015, "step": 130360 }, { "epoch": 1.0661160403974321, "grad_norm": 0.231215700507164, "learning_rate": 5.295521000731722e-06, "loss": 0.002, "step": 130370 }, { "epoch": 1.0661978165760313, "grad_norm": 0.06134551763534546, "learning_rate": 5.2948086134987545e-06, "loss": 0.001, "step": 130380 }, { "epoch": 1.0662795927546305, "grad_norm": 0.0029947166331112385, "learning_rate": 5.294096220260295e-06, "loss": 0.0021, "step": 130390 }, { "epoch": 1.0663613689332299, "grad_norm": 0.10048434883356094, "learning_rate": 5.293383821030859e-06, "loss": 0.0034, "step": 130400 }, { "epoch": 1.066443145111829, "grad_norm": 0.03972404822707176, "learning_rate": 5.292671415824954e-06, "loss": 0.0007, "step": 130410 }, { "epoch": 1.0665249212904282, "grad_norm": 0.07212960720062256, "learning_rate": 5.2919590046570945e-06, "loss": 0.001, "step": 130420 }, { "epoch": 1.0666066974690274, "grad_norm": 0.01994055137038231, "learning_rate": 5.2912465875417936e-06, "loss": 0.0017, "step": 130430 }, { "epoch": 1.0666884736476265, "grad_norm": 0.0538751594722271, "learning_rate": 5.290534164493564e-06, "loss": 0.0012, "step": 130440 }, { "epoch": 1.0667702498262257, "grad_norm": 0.07132013142108917, "learning_rate": 5.289821735526914e-06, "loss": 0.0012, "step": 130450 }, { "epoch": 1.0668520260048249, "grad_norm": 0.0032345277722924948, "learning_rate": 5.2891093006563635e-06, "loss": 0.0014, "step": 130460 }, { "epoch": 1.066933802183424, "grad_norm": 0.023358825594186783, "learning_rate": 5.288396859896419e-06, "loss": 0.0009, "step": 130470 }, { "epoch": 1.0670155783620232, "grad_norm": 0.04285562410950661, "learning_rate": 5.287684413261597e-06, "loss": 0.0008, "step": 130480 }, { "epoch": 1.0670973545406224, "grad_norm": 0.04546497389674187, "learning_rate": 5.286971960766409e-06, "loss": 0.0009, "step": 130490 }, { "epoch": 1.0671791307192215, "grad_norm": 0.003824672196060419, "learning_rate": 5.28625950242537e-06, "loss": 0.0007, "step": 130500 }, { "epoch": 1.0672609068978207, "grad_norm": 0.007444888819009066, "learning_rate": 5.28554703825299e-06, "loss": 0.0009, "step": 130510 }, { "epoch": 1.0673426830764199, "grad_norm": 0.017676446586847305, "learning_rate": 5.284834568263788e-06, "loss": 0.0007, "step": 130520 }, { "epoch": 1.067424459255019, "grad_norm": 0.031529899686574936, "learning_rate": 5.284122092472272e-06, "loss": 0.0008, "step": 130530 }, { "epoch": 1.0675062354336182, "grad_norm": 0.05335712060332298, "learning_rate": 5.283409610892959e-06, "loss": 0.0012, "step": 130540 }, { "epoch": 1.0675880116122174, "grad_norm": 0.0010049615520983934, "learning_rate": 5.282697123540361e-06, "loss": 0.0012, "step": 130550 }, { "epoch": 1.0676697877908166, "grad_norm": 0.02508990839123726, "learning_rate": 5.281984630428994e-06, "loss": 0.0011, "step": 130560 }, { "epoch": 1.0677515639694157, "grad_norm": 0.04529884085059166, "learning_rate": 5.28127213157337e-06, "loss": 0.0017, "step": 130570 }, { "epoch": 1.067833340148015, "grad_norm": 0.03139002248644829, "learning_rate": 5.280559626988004e-06, "loss": 0.0007, "step": 130580 }, { "epoch": 1.067915116326614, "grad_norm": 0.03829631581902504, "learning_rate": 5.27984711668741e-06, "loss": 0.0042, "step": 130590 }, { "epoch": 1.0679968925052132, "grad_norm": 0.04587161913514137, "learning_rate": 5.2791346006861045e-06, "loss": 0.0011, "step": 130600 }, { "epoch": 1.0680786686838124, "grad_norm": 0.04050422087311745, "learning_rate": 5.278422078998599e-06, "loss": 0.0008, "step": 130610 }, { "epoch": 1.0681604448624116, "grad_norm": 0.08182619512081146, "learning_rate": 5.277709551639411e-06, "loss": 0.0007, "step": 130620 }, { "epoch": 1.0682422210410107, "grad_norm": 0.02881530486047268, "learning_rate": 5.276997018623052e-06, "loss": 0.001, "step": 130630 }, { "epoch": 1.06832399721961, "grad_norm": 0.0349307619035244, "learning_rate": 5.276284479964038e-06, "loss": 0.0008, "step": 130640 }, { "epoch": 1.068405773398209, "grad_norm": 0.09318644553422928, "learning_rate": 5.2755719356768855e-06, "loss": 0.0007, "step": 130650 }, { "epoch": 1.0684875495768082, "grad_norm": 0.05237395688891411, "learning_rate": 5.274859385776109e-06, "loss": 0.0015, "step": 130660 }, { "epoch": 1.0685693257554074, "grad_norm": 0.033266834914684296, "learning_rate": 5.274146830276222e-06, "loss": 0.0009, "step": 130670 }, { "epoch": 1.0686511019340066, "grad_norm": 0.02219597063958645, "learning_rate": 5.273434269191743e-06, "loss": 0.0015, "step": 130680 }, { "epoch": 1.0687328781126058, "grad_norm": 0.005863804370164871, "learning_rate": 5.272721702537185e-06, "loss": 0.001, "step": 130690 }, { "epoch": 1.068814654291205, "grad_norm": 0.024863867089152336, "learning_rate": 5.2720091303270635e-06, "loss": 0.0011, "step": 130700 }, { "epoch": 1.068896430469804, "grad_norm": 0.038275085389614105, "learning_rate": 5.271296552575895e-06, "loss": 0.0011, "step": 130710 }, { "epoch": 1.0689782066484033, "grad_norm": 0.08557571470737457, "learning_rate": 5.270583969298196e-06, "loss": 0.0014, "step": 130720 }, { "epoch": 1.0690599828270024, "grad_norm": 0.022471532225608826, "learning_rate": 5.2698713805084824e-06, "loss": 0.0008, "step": 130730 }, { "epoch": 1.0691417590056016, "grad_norm": 0.016146494075655937, "learning_rate": 5.269158786221269e-06, "loss": 0.0007, "step": 130740 }, { "epoch": 1.0692235351842008, "grad_norm": 0.001648151082918048, "learning_rate": 5.26844618645107e-06, "loss": 0.0011, "step": 130750 }, { "epoch": 1.0693053113628, "grad_norm": 0.0024016080424189568, "learning_rate": 5.267733581212406e-06, "loss": 0.0011, "step": 130760 }, { "epoch": 1.069387087541399, "grad_norm": 0.05327535420656204, "learning_rate": 5.267020970519791e-06, "loss": 0.0015, "step": 130770 }, { "epoch": 1.0694688637199983, "grad_norm": 0.01621931418776512, "learning_rate": 5.266308354387742e-06, "loss": 0.0006, "step": 130780 }, { "epoch": 1.0695506398985974, "grad_norm": 0.03908991441130638, "learning_rate": 5.265595732830776e-06, "loss": 0.0012, "step": 130790 }, { "epoch": 1.0696324160771966, "grad_norm": 0.19076471030712128, "learning_rate": 5.264883105863409e-06, "loss": 0.0034, "step": 130800 }, { "epoch": 1.069714192255796, "grad_norm": 0.13255369663238525, "learning_rate": 5.264170473500156e-06, "loss": 0.0012, "step": 130810 }, { "epoch": 1.069795968434395, "grad_norm": 0.0619816817343235, "learning_rate": 5.2634578357555374e-06, "loss": 0.0011, "step": 130820 }, { "epoch": 1.0698777446129943, "grad_norm": 0.019603349268436432, "learning_rate": 5.262745192644066e-06, "loss": 0.0007, "step": 130830 }, { "epoch": 1.0699595207915935, "grad_norm": 0.003973593004047871, "learning_rate": 5.262032544180265e-06, "loss": 0.0006, "step": 130840 }, { "epoch": 1.0700412969701927, "grad_norm": 0.08844365179538727, "learning_rate": 5.2613198903786465e-06, "loss": 0.001, "step": 130850 }, { "epoch": 1.0701230731487918, "grad_norm": 0.024466339498758316, "learning_rate": 5.260607231253729e-06, "loss": 0.0011, "step": 130860 }, { "epoch": 1.070204849327391, "grad_norm": 0.021009331569075584, "learning_rate": 5.259894566820031e-06, "loss": 0.0009, "step": 130870 }, { "epoch": 1.0702866255059902, "grad_norm": 0.03372981771826744, "learning_rate": 5.259181897092069e-06, "loss": 0.001, "step": 130880 }, { "epoch": 1.0703684016845894, "grad_norm": 0.021062973886728287, "learning_rate": 5.258469222084361e-06, "loss": 0.0016, "step": 130890 }, { "epoch": 1.0704501778631885, "grad_norm": 0.055328864604234695, "learning_rate": 5.257756541811425e-06, "loss": 0.0007, "step": 130900 }, { "epoch": 1.0705319540417877, "grad_norm": 0.029816532507538795, "learning_rate": 5.25704385628778e-06, "loss": 0.0013, "step": 130910 }, { "epoch": 1.0706137302203869, "grad_norm": 0.16081546247005463, "learning_rate": 5.2563311655279415e-06, "loss": 0.0019, "step": 130920 }, { "epoch": 1.070695506398986, "grad_norm": 0.027256201952695847, "learning_rate": 5.255618469546429e-06, "loss": 0.0022, "step": 130930 }, { "epoch": 1.0707772825775852, "grad_norm": 0.013478598557412624, "learning_rate": 5.25490576835776e-06, "loss": 0.0005, "step": 130940 }, { "epoch": 1.0708590587561844, "grad_norm": 0.02113822102546692, "learning_rate": 5.2541930619764525e-06, "loss": 0.0007, "step": 130950 }, { "epoch": 1.0709408349347835, "grad_norm": 0.07097151130437851, "learning_rate": 5.253480350417027e-06, "loss": 0.0026, "step": 130960 }, { "epoch": 1.0710226111133827, "grad_norm": 0.03567548841238022, "learning_rate": 5.252767633694002e-06, "loss": 0.0012, "step": 130970 }, { "epoch": 1.0711043872919819, "grad_norm": 0.019314493983983994, "learning_rate": 5.2520549118218935e-06, "loss": 0.0018, "step": 130980 }, { "epoch": 1.071186163470581, "grad_norm": 0.011491012759506702, "learning_rate": 5.25134218481522e-06, "loss": 0.0018, "step": 130990 }, { "epoch": 1.0712679396491802, "grad_norm": 0.06900867819786072, "learning_rate": 5.250629452688504e-06, "loss": 0.0011, "step": 131000 }, { "epoch": 1.0713497158277794, "grad_norm": 0.025614408776164055, "learning_rate": 5.249916715456261e-06, "loss": 0.0005, "step": 131010 }, { "epoch": 1.0714314920063785, "grad_norm": 0.04399077966809273, "learning_rate": 5.2492039731330125e-06, "loss": 0.0013, "step": 131020 }, { "epoch": 1.0715132681849777, "grad_norm": 0.02557327412068844, "learning_rate": 5.248491225733277e-06, "loss": 0.0006, "step": 131030 }, { "epoch": 1.0715950443635769, "grad_norm": 0.03303716331720352, "learning_rate": 5.247778473271572e-06, "loss": 0.001, "step": 131040 }, { "epoch": 1.071676820542176, "grad_norm": 0.02935847081243992, "learning_rate": 5.247065715762418e-06, "loss": 0.001, "step": 131050 }, { "epoch": 1.0717585967207752, "grad_norm": 0.010597116313874722, "learning_rate": 5.246352953220335e-06, "loss": 0.0011, "step": 131060 }, { "epoch": 1.0718403728993744, "grad_norm": 0.020889002829790115, "learning_rate": 5.245640185659842e-06, "loss": 0.0006, "step": 131070 }, { "epoch": 1.0719221490779736, "grad_norm": 0.02323862351477146, "learning_rate": 5.244927413095459e-06, "loss": 0.0011, "step": 131080 }, { "epoch": 1.0720039252565727, "grad_norm": 0.12159141898155212, "learning_rate": 5.244214635541706e-06, "loss": 0.0015, "step": 131090 }, { "epoch": 1.072085701435172, "grad_norm": 0.0998557060956955, "learning_rate": 5.2435018530131024e-06, "loss": 0.0013, "step": 131100 }, { "epoch": 1.072167477613771, "grad_norm": 0.03624821454286575, "learning_rate": 5.2427890655241665e-06, "loss": 0.0011, "step": 131110 }, { "epoch": 1.0722492537923702, "grad_norm": 0.02007262408733368, "learning_rate": 5.24207627308942e-06, "loss": 0.0008, "step": 131120 }, { "epoch": 1.0723310299709694, "grad_norm": 0.03616117313504219, "learning_rate": 5.241363475723385e-06, "loss": 0.0021, "step": 131130 }, { "epoch": 1.0724128061495686, "grad_norm": 0.036335986107587814, "learning_rate": 5.240650673440578e-06, "loss": 0.0012, "step": 131140 }, { "epoch": 1.0724945823281677, "grad_norm": 0.05629923939704895, "learning_rate": 5.239937866255522e-06, "loss": 0.0016, "step": 131150 }, { "epoch": 1.072576358506767, "grad_norm": 0.05312114581465721, "learning_rate": 5.239225054182737e-06, "loss": 0.0015, "step": 131160 }, { "epoch": 1.072658134685366, "grad_norm": 0.06336396932601929, "learning_rate": 5.2385122372367425e-06, "loss": 0.0028, "step": 131170 }, { "epoch": 1.0727399108639653, "grad_norm": 0.10763503611087799, "learning_rate": 5.23779941543206e-06, "loss": 0.0007, "step": 131180 }, { "epoch": 1.0728216870425644, "grad_norm": 0.01639317162334919, "learning_rate": 5.23708658878321e-06, "loss": 0.0007, "step": 131190 }, { "epoch": 1.0729034632211636, "grad_norm": 0.06798285990953445, "learning_rate": 5.236373757304713e-06, "loss": 0.0006, "step": 131200 }, { "epoch": 1.0729852393997628, "grad_norm": 0.025316767394542694, "learning_rate": 5.235660921011092e-06, "loss": 0.001, "step": 131210 }, { "epoch": 1.073067015578362, "grad_norm": 0.03324589878320694, "learning_rate": 5.234948079916866e-06, "loss": 0.0013, "step": 131220 }, { "epoch": 1.073148791756961, "grad_norm": 0.14147771894931793, "learning_rate": 5.234235234036556e-06, "loss": 0.0013, "step": 131230 }, { "epoch": 1.0732305679355605, "grad_norm": 0.011541408486664295, "learning_rate": 5.233522383384685e-06, "loss": 0.001, "step": 131240 }, { "epoch": 1.0733123441141594, "grad_norm": 0.033502399921417236, "learning_rate": 5.2328095279757706e-06, "loss": 0.0012, "step": 131250 }, { "epoch": 1.0733941202927588, "grad_norm": 0.03248061612248421, "learning_rate": 5.232096667824339e-06, "loss": 0.0008, "step": 131260 }, { "epoch": 1.073475896471358, "grad_norm": 0.06909770518541336, "learning_rate": 5.23138380294491e-06, "loss": 0.0012, "step": 131270 }, { "epoch": 1.0735576726499572, "grad_norm": 0.13317684829235077, "learning_rate": 5.230670933352004e-06, "loss": 0.0015, "step": 131280 }, { "epoch": 1.0736394488285563, "grad_norm": 0.08955871313810349, "learning_rate": 5.229958059060144e-06, "loss": 0.0012, "step": 131290 }, { "epoch": 1.0737212250071555, "grad_norm": 0.16682910919189453, "learning_rate": 5.229245180083851e-06, "loss": 0.0008, "step": 131300 }, { "epoch": 1.0738030011857547, "grad_norm": 0.07555627822875977, "learning_rate": 5.228532296437647e-06, "loss": 0.0016, "step": 131310 }, { "epoch": 1.0738847773643538, "grad_norm": 0.06750831007957458, "learning_rate": 5.227819408136054e-06, "loss": 0.0008, "step": 131320 }, { "epoch": 1.073966553542953, "grad_norm": 0.07777290046215057, "learning_rate": 5.2271065151935965e-06, "loss": 0.001, "step": 131330 }, { "epoch": 1.0740483297215522, "grad_norm": 0.06389448791742325, "learning_rate": 5.226393617624794e-06, "loss": 0.001, "step": 131340 }, { "epoch": 1.0741301059001513, "grad_norm": 0.05060570687055588, "learning_rate": 5.225680715444168e-06, "loss": 0.0011, "step": 131350 }, { "epoch": 1.0742118820787505, "grad_norm": 0.12533821165561676, "learning_rate": 5.224967808666244e-06, "loss": 0.0008, "step": 131360 }, { "epoch": 1.0742936582573497, "grad_norm": 0.0016796790296211839, "learning_rate": 5.224254897305542e-06, "loss": 0.0014, "step": 131370 }, { "epoch": 1.0743754344359489, "grad_norm": 0.06054924800992012, "learning_rate": 5.2235419813765855e-06, "loss": 0.0017, "step": 131380 }, { "epoch": 1.074457210614548, "grad_norm": 0.1134696826338768, "learning_rate": 5.222829060893898e-06, "loss": 0.001, "step": 131390 }, { "epoch": 1.0745389867931472, "grad_norm": 0.04509704187512398, "learning_rate": 5.222116135872001e-06, "loss": 0.0013, "step": 131400 }, { "epoch": 1.0746207629717464, "grad_norm": 0.022484049201011658, "learning_rate": 5.221403206325417e-06, "loss": 0.0011, "step": 131410 }, { "epoch": 1.0747025391503455, "grad_norm": 0.04528186470270157, "learning_rate": 5.2206902722686705e-06, "loss": 0.0018, "step": 131420 }, { "epoch": 1.0747843153289447, "grad_norm": 0.016538409516215324, "learning_rate": 5.219977333716283e-06, "loss": 0.0014, "step": 131430 }, { "epoch": 1.0748660915075439, "grad_norm": 0.03708714619278908, "learning_rate": 5.219264390682779e-06, "loss": 0.0009, "step": 131440 }, { "epoch": 1.074947867686143, "grad_norm": 0.09721828997135162, "learning_rate": 5.218551443182681e-06, "loss": 0.0013, "step": 131450 }, { "epoch": 1.0750296438647422, "grad_norm": 0.14160175621509552, "learning_rate": 5.2178384912305135e-06, "loss": 0.0013, "step": 131460 }, { "epoch": 1.0751114200433414, "grad_norm": 0.01636987179517746, "learning_rate": 5.217125534840798e-06, "loss": 0.0014, "step": 131470 }, { "epoch": 1.0751931962219405, "grad_norm": 0.059389229863882065, "learning_rate": 5.216412574028058e-06, "loss": 0.0009, "step": 131480 }, { "epoch": 1.0752749724005397, "grad_norm": 0.04212546348571777, "learning_rate": 5.215699608806819e-06, "loss": 0.0014, "step": 131490 }, { "epoch": 1.0753567485791389, "grad_norm": 0.08905646950006485, "learning_rate": 5.214986639191603e-06, "loss": 0.0016, "step": 131500 }, { "epoch": 1.075438524757738, "grad_norm": 0.03473948314785957, "learning_rate": 5.214273665196934e-06, "loss": 0.002, "step": 131510 }, { "epoch": 1.0755203009363372, "grad_norm": 0.027768975123763084, "learning_rate": 5.213560686837338e-06, "loss": 0.0013, "step": 131520 }, { "epoch": 1.0756020771149364, "grad_norm": 0.012312177568674088, "learning_rate": 5.212847704127336e-06, "loss": 0.0009, "step": 131530 }, { "epoch": 1.0756838532935356, "grad_norm": 0.01708291471004486, "learning_rate": 5.212134717081453e-06, "loss": 0.0009, "step": 131540 }, { "epoch": 1.0757656294721347, "grad_norm": 0.02543995901942253, "learning_rate": 5.2114217257142145e-06, "loss": 0.0007, "step": 131550 }, { "epoch": 1.075847405650734, "grad_norm": 0.050626955926418304, "learning_rate": 5.210708730040142e-06, "loss": 0.0007, "step": 131560 }, { "epoch": 1.075929181829333, "grad_norm": 0.04939523711800575, "learning_rate": 5.209995730073763e-06, "loss": 0.0008, "step": 131570 }, { "epoch": 1.0760109580079322, "grad_norm": 0.049035049974918365, "learning_rate": 5.2092827258295995e-06, "loss": 0.001, "step": 131580 }, { "epoch": 1.0760927341865314, "grad_norm": 0.11751936376094818, "learning_rate": 5.208569717322177e-06, "loss": 0.0011, "step": 131590 }, { "epoch": 1.0761745103651306, "grad_norm": 0.032302163541316986, "learning_rate": 5.20785670456602e-06, "loss": 0.0007, "step": 131600 }, { "epoch": 1.0762562865437297, "grad_norm": 0.013004683889448643, "learning_rate": 5.207143687575653e-06, "loss": 0.0016, "step": 131610 }, { "epoch": 1.076338062722329, "grad_norm": 0.0537099651992321, "learning_rate": 5.2064306663656e-06, "loss": 0.0008, "step": 131620 }, { "epoch": 1.076419838900928, "grad_norm": 0.03654894977807999, "learning_rate": 5.2057176409503875e-06, "loss": 0.0013, "step": 131630 }, { "epoch": 1.0765016150795272, "grad_norm": 0.10019431263208389, "learning_rate": 5.205004611344538e-06, "loss": 0.0008, "step": 131640 }, { "epoch": 1.0765833912581264, "grad_norm": 0.06312689185142517, "learning_rate": 5.204291577562579e-06, "loss": 0.0018, "step": 131650 }, { "epoch": 1.0766651674367256, "grad_norm": 0.0931088849902153, "learning_rate": 5.203578539619034e-06, "loss": 0.0009, "step": 131660 }, { "epoch": 1.076746943615325, "grad_norm": 0.04276008903980255, "learning_rate": 5.20286549752843e-06, "loss": 0.0012, "step": 131670 }, { "epoch": 1.076828719793924, "grad_norm": 0.05531881004571915, "learning_rate": 5.202152451305289e-06, "loss": 0.0013, "step": 131680 }, { "epoch": 1.0769104959725233, "grad_norm": 0.05577598139643669, "learning_rate": 5.20143940096414e-06, "loss": 0.0014, "step": 131690 }, { "epoch": 1.0769922721511225, "grad_norm": 0.026619989424943924, "learning_rate": 5.200726346519505e-06, "loss": 0.0006, "step": 131700 }, { "epoch": 1.0770740483297216, "grad_norm": 0.009830199182033539, "learning_rate": 5.200013287985912e-06, "loss": 0.0014, "step": 131710 }, { "epoch": 1.0771558245083208, "grad_norm": 0.03081437200307846, "learning_rate": 5.199300225377886e-06, "loss": 0.0013, "step": 131720 }, { "epoch": 1.07723760068692, "grad_norm": 0.03633912652730942, "learning_rate": 5.198587158709952e-06, "loss": 0.001, "step": 131730 }, { "epoch": 1.0773193768655192, "grad_norm": 0.04952608793973923, "learning_rate": 5.197874087996636e-06, "loss": 0.0007, "step": 131740 }, { "epoch": 1.0774011530441183, "grad_norm": 0.018774978816509247, "learning_rate": 5.197161013252465e-06, "loss": 0.0004, "step": 131750 }, { "epoch": 1.0774829292227175, "grad_norm": 0.06816008687019348, "learning_rate": 5.196447934491963e-06, "loss": 0.0016, "step": 131760 }, { "epoch": 1.0775647054013167, "grad_norm": 0.05722521245479584, "learning_rate": 5.195734851729657e-06, "loss": 0.0007, "step": 131770 }, { "epoch": 1.0776464815799158, "grad_norm": 0.016519417986273766, "learning_rate": 5.195021764980073e-06, "loss": 0.0016, "step": 131780 }, { "epoch": 1.077728257758515, "grad_norm": 0.10211040824651718, "learning_rate": 5.194308674257737e-06, "loss": 0.001, "step": 131790 }, { "epoch": 1.0778100339371142, "grad_norm": 0.0574214905500412, "learning_rate": 5.193595579577175e-06, "loss": 0.0017, "step": 131800 }, { "epoch": 1.0778918101157133, "grad_norm": 0.029532765969634056, "learning_rate": 5.192882480952915e-06, "loss": 0.0008, "step": 131810 }, { "epoch": 1.0779735862943125, "grad_norm": 0.07560554891824722, "learning_rate": 5.192169378399481e-06, "loss": 0.0016, "step": 131820 }, { "epoch": 1.0780553624729117, "grad_norm": 0.04188672825694084, "learning_rate": 5.191456271931401e-06, "loss": 0.0009, "step": 131830 }, { "epoch": 1.0781371386515108, "grad_norm": 0.04589293152093887, "learning_rate": 5.190743161563201e-06, "loss": 0.0016, "step": 131840 }, { "epoch": 1.07821891483011, "grad_norm": 0.01374059822410345, "learning_rate": 5.190030047309408e-06, "loss": 0.0006, "step": 131850 }, { "epoch": 1.0783006910087092, "grad_norm": 0.014879081398248672, "learning_rate": 5.189316929184547e-06, "loss": 0.0009, "step": 131860 }, { "epoch": 1.0783824671873083, "grad_norm": 0.07375336438417435, "learning_rate": 5.188603807203149e-06, "loss": 0.002, "step": 131870 }, { "epoch": 1.0784642433659075, "grad_norm": 0.05749551206827164, "learning_rate": 5.187890681379736e-06, "loss": 0.0008, "step": 131880 }, { "epoch": 1.0785460195445067, "grad_norm": 0.051413122564554214, "learning_rate": 5.1871775517288385e-06, "loss": 0.0016, "step": 131890 }, { "epoch": 1.0786277957231059, "grad_norm": 0.049356859177351, "learning_rate": 5.18646441826498e-06, "loss": 0.0012, "step": 131900 }, { "epoch": 1.078709571901705, "grad_norm": 0.26023945212364197, "learning_rate": 5.185751281002691e-06, "loss": 0.0015, "step": 131910 }, { "epoch": 1.0787913480803042, "grad_norm": 0.040325287729501724, "learning_rate": 5.1850381399564974e-06, "loss": 0.001, "step": 131920 }, { "epoch": 1.0788731242589034, "grad_norm": 0.06296360492706299, "learning_rate": 5.184324995140928e-06, "loss": 0.0014, "step": 131930 }, { "epoch": 1.0789549004375025, "grad_norm": 0.07092812657356262, "learning_rate": 5.183611846570506e-06, "loss": 0.0011, "step": 131940 }, { "epoch": 1.0790366766161017, "grad_norm": 0.03727753832936287, "learning_rate": 5.1828986942597635e-06, "loss": 0.0014, "step": 131950 }, { "epoch": 1.0791184527947009, "grad_norm": 0.03779008612036705, "learning_rate": 5.1821855382232245e-06, "loss": 0.0009, "step": 131960 }, { "epoch": 1.0792002289733, "grad_norm": 0.15508919954299927, "learning_rate": 5.181472378475418e-06, "loss": 0.002, "step": 131970 }, { "epoch": 1.0792820051518992, "grad_norm": 0.1059015765786171, "learning_rate": 5.180759215030871e-06, "loss": 0.0012, "step": 131980 }, { "epoch": 1.0793637813304984, "grad_norm": 0.045965831726789474, "learning_rate": 5.180046047904115e-06, "loss": 0.0007, "step": 131990 }, { "epoch": 1.0794455575090975, "grad_norm": 0.13250862061977386, "learning_rate": 5.1793328771096715e-06, "loss": 0.0013, "step": 132000 }, { "epoch": 1.0795273336876967, "grad_norm": 0.011856970377266407, "learning_rate": 5.178619702662074e-06, "loss": 0.0009, "step": 132010 }, { "epoch": 1.0796091098662959, "grad_norm": 0.0271294042468071, "learning_rate": 5.177906524575846e-06, "loss": 0.0008, "step": 132020 }, { "epoch": 1.079690886044895, "grad_norm": 0.022192297503352165, "learning_rate": 5.1771933428655185e-06, "loss": 0.0009, "step": 132030 }, { "epoch": 1.0797726622234942, "grad_norm": 0.07253627479076385, "learning_rate": 5.176480157545617e-06, "loss": 0.0012, "step": 132040 }, { "epoch": 1.0798544384020934, "grad_norm": 0.0512554794549942, "learning_rate": 5.1757669686306746e-06, "loss": 0.0011, "step": 132050 }, { "epoch": 1.0799362145806926, "grad_norm": 0.05974428355693817, "learning_rate": 5.1750537761352126e-06, "loss": 0.0015, "step": 132060 }, { "epoch": 1.0800179907592917, "grad_norm": 0.010039126500487328, "learning_rate": 5.1743405800737655e-06, "loss": 0.0013, "step": 132070 }, { "epoch": 1.080099766937891, "grad_norm": 0.037193674594163895, "learning_rate": 5.173627380460858e-06, "loss": 0.0008, "step": 132080 }, { "epoch": 1.08018154311649, "grad_norm": 0.03793151304125786, "learning_rate": 5.172914177311019e-06, "loss": 0.0011, "step": 132090 }, { "epoch": 1.0802633192950895, "grad_norm": 0.005475566256791353, "learning_rate": 5.172200970638777e-06, "loss": 0.0007, "step": 132100 }, { "epoch": 1.0803450954736884, "grad_norm": 0.08159515261650085, "learning_rate": 5.171487760458664e-06, "loss": 0.0008, "step": 132110 }, { "epoch": 1.0804268716522878, "grad_norm": 0.04142070189118385, "learning_rate": 5.170774546785204e-06, "loss": 0.0007, "step": 132120 }, { "epoch": 1.080508647830887, "grad_norm": 0.0346568264067173, "learning_rate": 5.170061329632928e-06, "loss": 0.0006, "step": 132130 }, { "epoch": 1.0805904240094861, "grad_norm": 0.022282499819993973, "learning_rate": 5.169348109016364e-06, "loss": 0.0009, "step": 132140 }, { "epoch": 1.0806722001880853, "grad_norm": 0.03240371495485306, "learning_rate": 5.168634884950042e-06, "loss": 0.0005, "step": 132150 }, { "epoch": 1.0807539763666845, "grad_norm": 0.018488187342882156, "learning_rate": 5.167921657448491e-06, "loss": 0.0009, "step": 132160 }, { "epoch": 1.0808357525452836, "grad_norm": 0.0471164733171463, "learning_rate": 5.167208426526239e-06, "loss": 0.001, "step": 132170 }, { "epoch": 1.0809175287238828, "grad_norm": 0.058753497898578644, "learning_rate": 5.166495192197814e-06, "loss": 0.0006, "step": 132180 }, { "epoch": 1.080999304902482, "grad_norm": 0.018175259232521057, "learning_rate": 5.165781954477749e-06, "loss": 0.001, "step": 132190 }, { "epoch": 1.0810810810810811, "grad_norm": 0.006685633212327957, "learning_rate": 5.165068713380568e-06, "loss": 0.0005, "step": 132200 }, { "epoch": 1.0811628572596803, "grad_norm": 0.025971921160817146, "learning_rate": 5.164355468920805e-06, "loss": 0.0013, "step": 132210 }, { "epoch": 1.0812446334382795, "grad_norm": 0.015432456508278847, "learning_rate": 5.163642221112986e-06, "loss": 0.0013, "step": 132220 }, { "epoch": 1.0813264096168786, "grad_norm": 0.03772445395588875, "learning_rate": 5.162928969971645e-06, "loss": 0.0011, "step": 132230 }, { "epoch": 1.0814081857954778, "grad_norm": 0.02100151963531971, "learning_rate": 5.162215715511306e-06, "loss": 0.0014, "step": 132240 }, { "epoch": 1.081489961974077, "grad_norm": 0.046241968870162964, "learning_rate": 5.1615024577465015e-06, "loss": 0.0014, "step": 132250 }, { "epoch": 1.0815717381526762, "grad_norm": 0.008058430626988411, "learning_rate": 5.16078919669176e-06, "loss": 0.001, "step": 132260 }, { "epoch": 1.0816535143312753, "grad_norm": 0.008479541167616844, "learning_rate": 5.160075932361612e-06, "loss": 0.001, "step": 132270 }, { "epoch": 1.0817352905098745, "grad_norm": 0.05575933679938316, "learning_rate": 5.1593626647705876e-06, "loss": 0.0009, "step": 132280 }, { "epoch": 1.0818170666884737, "grad_norm": 0.0023167901672422886, "learning_rate": 5.1586493939332185e-06, "loss": 0.002, "step": 132290 }, { "epoch": 1.0818988428670728, "grad_norm": 0.033177368342876434, "learning_rate": 5.157936119864029e-06, "loss": 0.0012, "step": 132300 }, { "epoch": 1.081980619045672, "grad_norm": 0.0879700630903244, "learning_rate": 5.157222842577554e-06, "loss": 0.0012, "step": 132310 }, { "epoch": 1.0820623952242712, "grad_norm": 0.04619225114583969, "learning_rate": 5.1565095620883195e-06, "loss": 0.0013, "step": 132320 }, { "epoch": 1.0821441714028703, "grad_norm": 0.04054795950651169, "learning_rate": 5.15579627841086e-06, "loss": 0.0011, "step": 132330 }, { "epoch": 1.0822259475814695, "grad_norm": 0.05397479608654976, "learning_rate": 5.155082991559703e-06, "loss": 0.0016, "step": 132340 }, { "epoch": 1.0823077237600687, "grad_norm": 0.05318654701113701, "learning_rate": 5.1543697015493824e-06, "loss": 0.0008, "step": 132350 }, { "epoch": 1.0823894999386678, "grad_norm": 0.03804710879921913, "learning_rate": 5.1536564083944215e-06, "loss": 0.0009, "step": 132360 }, { "epoch": 1.082471276117267, "grad_norm": 0.057598963379859924, "learning_rate": 5.152943112109356e-06, "loss": 0.0005, "step": 132370 }, { "epoch": 1.0825530522958662, "grad_norm": 0.08479294180870056, "learning_rate": 5.1522298127087156e-06, "loss": 0.001, "step": 132380 }, { "epoch": 1.0826348284744653, "grad_norm": 0.0010761278681457043, "learning_rate": 5.15151651020703e-06, "loss": 0.0012, "step": 132390 }, { "epoch": 1.0827166046530645, "grad_norm": 0.016814634203910828, "learning_rate": 5.150803204618828e-06, "loss": 0.0016, "step": 132400 }, { "epoch": 1.0827983808316637, "grad_norm": 0.1403568536043167, "learning_rate": 5.150089895958645e-06, "loss": 0.0028, "step": 132410 }, { "epoch": 1.0828801570102629, "grad_norm": 0.018380645662546158, "learning_rate": 5.149376584241007e-06, "loss": 0.0014, "step": 132420 }, { "epoch": 1.082961933188862, "grad_norm": 0.01389298401772976, "learning_rate": 5.148663269480448e-06, "loss": 0.0008, "step": 132430 }, { "epoch": 1.0830437093674612, "grad_norm": 0.023364711552858353, "learning_rate": 5.147949951691496e-06, "loss": 0.0009, "step": 132440 }, { "epoch": 1.0831254855460604, "grad_norm": 0.025543836876749992, "learning_rate": 5.147236630888685e-06, "loss": 0.0009, "step": 132450 }, { "epoch": 1.0832072617246595, "grad_norm": 0.047567639499902725, "learning_rate": 5.146523307086542e-06, "loss": 0.0011, "step": 132460 }, { "epoch": 1.0832890379032587, "grad_norm": 0.057431865483522415, "learning_rate": 5.145809980299602e-06, "loss": 0.0009, "step": 132470 }, { "epoch": 1.0833708140818579, "grad_norm": 0.03378208726644516, "learning_rate": 5.145096650542391e-06, "loss": 0.0007, "step": 132480 }, { "epoch": 1.083452590260457, "grad_norm": 0.010593515820801258, "learning_rate": 5.144383317829447e-06, "loss": 0.0008, "step": 132490 }, { "epoch": 1.0835343664390562, "grad_norm": 0.012704716064035892, "learning_rate": 5.143669982175295e-06, "loss": 0.0013, "step": 132500 }, { "epoch": 1.0836161426176554, "grad_norm": 0.04341232776641846, "learning_rate": 5.14295664359447e-06, "loss": 0.0016, "step": 132510 }, { "epoch": 1.0836979187962545, "grad_norm": 0.004391430411487818, "learning_rate": 5.1422433021015e-06, "loss": 0.0005, "step": 132520 }, { "epoch": 1.083779694974854, "grad_norm": 0.04871582239866257, "learning_rate": 5.141529957710922e-06, "loss": 0.0006, "step": 132530 }, { "epoch": 1.0838614711534529, "grad_norm": 0.07889119535684586, "learning_rate": 5.140816610437261e-06, "loss": 0.0016, "step": 132540 }, { "epoch": 1.0839432473320523, "grad_norm": 0.001404796727001667, "learning_rate": 5.140103260295053e-06, "loss": 0.0016, "step": 132550 }, { "epoch": 1.0840250235106514, "grad_norm": 0.04351629689335823, "learning_rate": 5.139389907298825e-06, "loss": 0.0013, "step": 132560 }, { "epoch": 1.0841067996892506, "grad_norm": 0.006663523614406586, "learning_rate": 5.138676551463113e-06, "loss": 0.0017, "step": 132570 }, { "epoch": 1.0841885758678498, "grad_norm": 0.0323718786239624, "learning_rate": 5.137963192802447e-06, "loss": 0.0016, "step": 132580 }, { "epoch": 1.084270352046449, "grad_norm": 0.10396485775709152, "learning_rate": 5.137249831331359e-06, "loss": 0.0014, "step": 132590 }, { "epoch": 1.0843521282250481, "grad_norm": 0.043593186885118484, "learning_rate": 5.136536467064379e-06, "loss": 0.0006, "step": 132600 }, { "epoch": 1.0844339044036473, "grad_norm": 0.03186643868684769, "learning_rate": 5.1358231000160415e-06, "loss": 0.0015, "step": 132610 }, { "epoch": 1.0845156805822465, "grad_norm": 0.05284825712442398, "learning_rate": 5.135109730200875e-06, "loss": 0.0013, "step": 132620 }, { "epoch": 1.0845974567608456, "grad_norm": 0.05588029325008392, "learning_rate": 5.134396357633417e-06, "loss": 0.0011, "step": 132630 }, { "epoch": 1.0846792329394448, "grad_norm": 0.06404789537191391, "learning_rate": 5.133682982328192e-06, "loss": 0.0012, "step": 132640 }, { "epoch": 1.084761009118044, "grad_norm": 0.04021332040429115, "learning_rate": 5.1329696042997405e-06, "loss": 0.0008, "step": 132650 }, { "epoch": 1.0848427852966431, "grad_norm": 0.06667903810739517, "learning_rate": 5.132256223562586e-06, "loss": 0.0017, "step": 132660 }, { "epoch": 1.0849245614752423, "grad_norm": 0.09308571368455887, "learning_rate": 5.131542840131266e-06, "loss": 0.0012, "step": 132670 }, { "epoch": 1.0850063376538415, "grad_norm": 0.0036977967247366905, "learning_rate": 5.130829454020312e-06, "loss": 0.0014, "step": 132680 }, { "epoch": 1.0850881138324406, "grad_norm": 0.02540682815015316, "learning_rate": 5.130116065244255e-06, "loss": 0.0013, "step": 132690 }, { "epoch": 1.0851698900110398, "grad_norm": 0.03375915437936783, "learning_rate": 5.129402673817628e-06, "loss": 0.0012, "step": 132700 }, { "epoch": 1.085251666189639, "grad_norm": 0.09554458409547806, "learning_rate": 5.128689279754965e-06, "loss": 0.0011, "step": 132710 }, { "epoch": 1.0853334423682381, "grad_norm": 0.06798645853996277, "learning_rate": 5.127975883070794e-06, "loss": 0.001, "step": 132720 }, { "epoch": 1.0854152185468373, "grad_norm": 0.005233862437307835, "learning_rate": 5.127262483779653e-06, "loss": 0.0012, "step": 132730 }, { "epoch": 1.0854969947254365, "grad_norm": 0.24732838571071625, "learning_rate": 5.126549081896068e-06, "loss": 0.0019, "step": 132740 }, { "epoch": 1.0855787709040357, "grad_norm": 0.07936298847198486, "learning_rate": 5.1258356774345786e-06, "loss": 0.0015, "step": 132750 }, { "epoch": 1.0856605470826348, "grad_norm": 0.4257761240005493, "learning_rate": 5.125122270409712e-06, "loss": 0.0005, "step": 132760 }, { "epoch": 1.085742323261234, "grad_norm": 0.028273677453398705, "learning_rate": 5.124408860836006e-06, "loss": 0.0016, "step": 132770 }, { "epoch": 1.0858240994398332, "grad_norm": 0.04265979304909706, "learning_rate": 5.1236954487279886e-06, "loss": 0.0009, "step": 132780 }, { "epoch": 1.0859058756184323, "grad_norm": 0.04889027401804924, "learning_rate": 5.1229820341001944e-06, "loss": 0.0016, "step": 132790 }, { "epoch": 1.0859876517970315, "grad_norm": 0.06490562856197357, "learning_rate": 5.122268616967156e-06, "loss": 0.0024, "step": 132800 }, { "epoch": 1.0860694279756307, "grad_norm": 0.045245636254549026, "learning_rate": 5.121555197343408e-06, "loss": 0.0019, "step": 132810 }, { "epoch": 1.0861512041542298, "grad_norm": 0.06389249861240387, "learning_rate": 5.120841775243479e-06, "loss": 0.0017, "step": 132820 }, { "epoch": 1.086232980332829, "grad_norm": 0.057925641536712646, "learning_rate": 5.120128350681907e-06, "loss": 0.0013, "step": 132830 }, { "epoch": 1.0863147565114282, "grad_norm": 0.026656152680516243, "learning_rate": 5.119414923673222e-06, "loss": 0.0007, "step": 132840 }, { "epoch": 1.0863965326900273, "grad_norm": 0.06941518187522888, "learning_rate": 5.1187014942319595e-06, "loss": 0.001, "step": 132850 }, { "epoch": 1.0864783088686265, "grad_norm": 0.040096722543239594, "learning_rate": 5.117988062372649e-06, "loss": 0.0008, "step": 132860 }, { "epoch": 1.0865600850472257, "grad_norm": 0.00788032729178667, "learning_rate": 5.117274628109826e-06, "loss": 0.0017, "step": 132870 }, { "epoch": 1.0866418612258248, "grad_norm": 0.0712255984544754, "learning_rate": 5.116561191458025e-06, "loss": 0.0009, "step": 132880 }, { "epoch": 1.086723637404424, "grad_norm": 0.06654654443264008, "learning_rate": 5.115847752431777e-06, "loss": 0.0009, "step": 132890 }, { "epoch": 1.0868054135830232, "grad_norm": 0.04453769698739052, "learning_rate": 5.115134311045616e-06, "loss": 0.0011, "step": 132900 }, { "epoch": 1.0868871897616224, "grad_norm": 0.02982785552740097, "learning_rate": 5.114420867314075e-06, "loss": 0.0019, "step": 132910 }, { "epoch": 1.0869689659402215, "grad_norm": 0.02392123080790043, "learning_rate": 5.1137074212516895e-06, "loss": 0.0013, "step": 132920 }, { "epoch": 1.0870507421188207, "grad_norm": 0.00501983193680644, "learning_rate": 5.112993972872989e-06, "loss": 0.0015, "step": 132930 }, { "epoch": 1.0871325182974199, "grad_norm": 0.026350542902946472, "learning_rate": 5.112280522192511e-06, "loss": 0.001, "step": 132940 }, { "epoch": 1.087214294476019, "grad_norm": 0.018967438489198685, "learning_rate": 5.1115670692247875e-06, "loss": 0.0016, "step": 132950 }, { "epoch": 1.0872960706546184, "grad_norm": 0.07229896634817123, "learning_rate": 5.110853613984352e-06, "loss": 0.0013, "step": 132960 }, { "epoch": 1.0873778468332176, "grad_norm": 0.030079606920480728, "learning_rate": 5.110140156485738e-06, "loss": 0.0011, "step": 132970 }, { "epoch": 1.0874596230118168, "grad_norm": 0.0575433224439621, "learning_rate": 5.109426696743479e-06, "loss": 0.0017, "step": 132980 }, { "epoch": 1.087541399190416, "grad_norm": 0.028714211657643318, "learning_rate": 5.108713234772109e-06, "loss": 0.0007, "step": 132990 }, { "epoch": 1.087623175369015, "grad_norm": 0.011880265548825264, "learning_rate": 5.107999770586164e-06, "loss": 0.0013, "step": 133000 }, { "epoch": 1.0877049515476143, "grad_norm": 0.037305861711502075, "learning_rate": 5.1072863042001756e-06, "loss": 0.0017, "step": 133010 }, { "epoch": 1.0877867277262134, "grad_norm": 0.058193959295749664, "learning_rate": 5.106572835628677e-06, "loss": 0.0009, "step": 133020 }, { "epoch": 1.0878685039048126, "grad_norm": 0.02500336244702339, "learning_rate": 5.105859364886202e-06, "loss": 0.0011, "step": 133030 }, { "epoch": 1.0879502800834118, "grad_norm": 0.03243072330951691, "learning_rate": 5.105145891987287e-06, "loss": 0.0008, "step": 133040 }, { "epoch": 1.088032056262011, "grad_norm": 0.0026776890736073256, "learning_rate": 5.104432416946464e-06, "loss": 0.001, "step": 133050 }, { "epoch": 1.08811383244061, "grad_norm": 0.07251238077878952, "learning_rate": 5.103718939778269e-06, "loss": 0.0011, "step": 133060 }, { "epoch": 1.0881956086192093, "grad_norm": 0.0606120340526104, "learning_rate": 5.103005460497235e-06, "loss": 0.0007, "step": 133070 }, { "epoch": 1.0882773847978084, "grad_norm": 0.0885557234287262, "learning_rate": 5.102291979117896e-06, "loss": 0.0014, "step": 133080 }, { "epoch": 1.0883591609764076, "grad_norm": 0.05148822441697121, "learning_rate": 5.1015784956547844e-06, "loss": 0.001, "step": 133090 }, { "epoch": 1.0884409371550068, "grad_norm": 0.025451872497797012, "learning_rate": 5.100865010122437e-06, "loss": 0.0007, "step": 133100 }, { "epoch": 1.088522713333606, "grad_norm": 0.061860520392656326, "learning_rate": 5.100151522535388e-06, "loss": 0.0014, "step": 133110 }, { "epoch": 1.0886044895122051, "grad_norm": 0.028920408338308334, "learning_rate": 5.0994380329081715e-06, "loss": 0.0014, "step": 133120 }, { "epoch": 1.0886862656908043, "grad_norm": 0.012224473059177399, "learning_rate": 5.098724541255321e-06, "loss": 0.0016, "step": 133130 }, { "epoch": 1.0887680418694035, "grad_norm": 0.06278063356876373, "learning_rate": 5.098011047591371e-06, "loss": 0.002, "step": 133140 }, { "epoch": 1.0888498180480026, "grad_norm": 0.034030478447675705, "learning_rate": 5.097297551930857e-06, "loss": 0.0013, "step": 133150 }, { "epoch": 1.0889315942266018, "grad_norm": 0.03084690496325493, "learning_rate": 5.096584054288312e-06, "loss": 0.0014, "step": 133160 }, { "epoch": 1.089013370405201, "grad_norm": 0.04505618289113045, "learning_rate": 5.0958705546782704e-06, "loss": 0.0008, "step": 133170 }, { "epoch": 1.0890951465838001, "grad_norm": 0.048379383981227875, "learning_rate": 5.095157053115269e-06, "loss": 0.0008, "step": 133180 }, { "epoch": 1.0891769227623993, "grad_norm": 0.028899654746055603, "learning_rate": 5.094443549613841e-06, "loss": 0.0011, "step": 133190 }, { "epoch": 1.0892586989409985, "grad_norm": 0.04365309700369835, "learning_rate": 5.093730044188522e-06, "loss": 0.002, "step": 133200 }, { "epoch": 1.0893404751195976, "grad_norm": 0.015130547806620598, "learning_rate": 5.093016536853844e-06, "loss": 0.0011, "step": 133210 }, { "epoch": 1.0894222512981968, "grad_norm": 0.032062970101833344, "learning_rate": 5.092303027624344e-06, "loss": 0.0008, "step": 133220 }, { "epoch": 1.089504027476796, "grad_norm": 0.012013107538223267, "learning_rate": 5.091589516514557e-06, "loss": 0.0007, "step": 133230 }, { "epoch": 1.0895858036553951, "grad_norm": 0.023159680888056755, "learning_rate": 5.090876003539017e-06, "loss": 0.0018, "step": 133240 }, { "epoch": 1.0896675798339943, "grad_norm": 0.0053790234960615635, "learning_rate": 5.090162488712259e-06, "loss": 0.0015, "step": 133250 }, { "epoch": 1.0897493560125935, "grad_norm": 0.05026838928461075, "learning_rate": 5.089448972048818e-06, "loss": 0.0016, "step": 133260 }, { "epoch": 1.0898311321911927, "grad_norm": 0.011101284995675087, "learning_rate": 5.088735453563227e-06, "loss": 0.0005, "step": 133270 }, { "epoch": 1.0899129083697918, "grad_norm": 0.03721359744668007, "learning_rate": 5.088021933270026e-06, "loss": 0.0005, "step": 133280 }, { "epoch": 1.089994684548391, "grad_norm": 0.028299609199166298, "learning_rate": 5.087308411183742e-06, "loss": 0.0008, "step": 133290 }, { "epoch": 1.0900764607269902, "grad_norm": 0.006588207557797432, "learning_rate": 5.086594887318918e-06, "loss": 0.001, "step": 133300 }, { "epoch": 1.0901582369055893, "grad_norm": 0.020520415157079697, "learning_rate": 5.085881361690086e-06, "loss": 0.0007, "step": 133310 }, { "epoch": 1.0902400130841885, "grad_norm": 0.03675883635878563, "learning_rate": 5.08516783431178e-06, "loss": 0.0017, "step": 133320 }, { "epoch": 1.0903217892627877, "grad_norm": 0.08511420339345932, "learning_rate": 5.084454305198535e-06, "loss": 0.0008, "step": 133330 }, { "epoch": 1.0904035654413868, "grad_norm": 0.07465647161006927, "learning_rate": 5.083740774364889e-06, "loss": 0.0007, "step": 133340 }, { "epoch": 1.090485341619986, "grad_norm": 0.02674107812345028, "learning_rate": 5.083027241825374e-06, "loss": 0.0005, "step": 133350 }, { "epoch": 1.0905671177985852, "grad_norm": 0.059075817465782166, "learning_rate": 5.082313707594528e-06, "loss": 0.001, "step": 133360 }, { "epoch": 1.0906488939771846, "grad_norm": 0.03269098326563835, "learning_rate": 5.081600171686883e-06, "loss": 0.0014, "step": 133370 }, { "epoch": 1.0907306701557835, "grad_norm": 0.14097397029399872, "learning_rate": 5.080886634116978e-06, "loss": 0.0012, "step": 133380 }, { "epoch": 1.090812446334383, "grad_norm": 0.045375991612672806, "learning_rate": 5.080173094899344e-06, "loss": 0.001, "step": 133390 }, { "epoch": 1.090894222512982, "grad_norm": 0.052845485508441925, "learning_rate": 5.079459554048521e-06, "loss": 0.0017, "step": 133400 }, { "epoch": 1.0909759986915812, "grad_norm": 0.005065103527158499, "learning_rate": 5.078746011579041e-06, "loss": 0.001, "step": 133410 }, { "epoch": 1.0910577748701804, "grad_norm": 0.04754846915602684, "learning_rate": 5.078032467505443e-06, "loss": 0.0008, "step": 133420 }, { "epoch": 1.0911395510487796, "grad_norm": 0.007727849297225475, "learning_rate": 5.077318921842258e-06, "loss": 0.0014, "step": 133430 }, { "epoch": 1.0912213272273787, "grad_norm": 0.04055342078208923, "learning_rate": 5.076605374604025e-06, "loss": 0.001, "step": 133440 }, { "epoch": 1.091303103405978, "grad_norm": 0.07507243752479553, "learning_rate": 5.075891825805277e-06, "loss": 0.0012, "step": 133450 }, { "epoch": 1.091384879584577, "grad_norm": 0.011060060933232307, "learning_rate": 5.0751782754605515e-06, "loss": 0.0008, "step": 133460 }, { "epoch": 1.0914666557631763, "grad_norm": 0.005138674285262823, "learning_rate": 5.074464723584384e-06, "loss": 0.0009, "step": 133470 }, { "epoch": 1.0915484319417754, "grad_norm": 0.019890403375029564, "learning_rate": 5.073751170191309e-06, "loss": 0.0012, "step": 133480 }, { "epoch": 1.0916302081203746, "grad_norm": 0.03611396998167038, "learning_rate": 5.073037615295863e-06, "loss": 0.0013, "step": 133490 }, { "epoch": 1.0917119842989738, "grad_norm": 0.008649426512420177, "learning_rate": 5.072324058912581e-06, "loss": 0.0013, "step": 133500 }, { "epoch": 1.091793760477573, "grad_norm": 0.10904084146022797, "learning_rate": 5.071610501055999e-06, "loss": 0.001, "step": 133510 }, { "epoch": 1.091875536656172, "grad_norm": 0.010615170933306217, "learning_rate": 5.070896941740653e-06, "loss": 0.0017, "step": 133520 }, { "epoch": 1.0919573128347713, "grad_norm": 0.053645435720682144, "learning_rate": 5.070183380981079e-06, "loss": 0.0019, "step": 133530 }, { "epoch": 1.0920390890133704, "grad_norm": 0.02716514654457569, "learning_rate": 5.069469818791812e-06, "loss": 0.0009, "step": 133540 }, { "epoch": 1.0921208651919696, "grad_norm": 0.04595799744129181, "learning_rate": 5.068756255187389e-06, "loss": 0.0009, "step": 133550 }, { "epoch": 1.0922026413705688, "grad_norm": 0.07395405322313309, "learning_rate": 5.068042690182345e-06, "loss": 0.001, "step": 133560 }, { "epoch": 1.092284417549168, "grad_norm": 0.041642408818006516, "learning_rate": 5.067329123791215e-06, "loss": 0.0012, "step": 133570 }, { "epoch": 1.092366193727767, "grad_norm": 0.060673121362924576, "learning_rate": 5.066615556028538e-06, "loss": 0.0005, "step": 133580 }, { "epoch": 1.0924479699063663, "grad_norm": 0.06781741231679916, "learning_rate": 5.0659019869088455e-06, "loss": 0.0009, "step": 133590 }, { "epoch": 1.0925297460849654, "grad_norm": 0.021539567038416862, "learning_rate": 5.065188416446678e-06, "loss": 0.0011, "step": 133600 }, { "epoch": 1.0926115222635646, "grad_norm": 0.07345053553581238, "learning_rate": 5.064474844656569e-06, "loss": 0.0018, "step": 133610 }, { "epoch": 1.0926932984421638, "grad_norm": 0.050296708941459656, "learning_rate": 5.063761271553055e-06, "loss": 0.0008, "step": 133620 }, { "epoch": 1.092775074620763, "grad_norm": 0.06256013363599777, "learning_rate": 5.063047697150671e-06, "loss": 0.0016, "step": 133630 }, { "epoch": 1.0928568507993621, "grad_norm": 0.040449973195791245, "learning_rate": 5.062334121463956e-06, "loss": 0.0008, "step": 133640 }, { "epoch": 1.0929386269779613, "grad_norm": 0.029675738885998726, "learning_rate": 5.061620544507443e-06, "loss": 0.001, "step": 133650 }, { "epoch": 1.0930204031565605, "grad_norm": 0.028388746082782745, "learning_rate": 5.06090696629567e-06, "loss": 0.0012, "step": 133660 }, { "epoch": 1.0931021793351596, "grad_norm": 0.03628013655543327, "learning_rate": 5.060193386843172e-06, "loss": 0.0008, "step": 133670 }, { "epoch": 1.0931839555137588, "grad_norm": 0.046245839446783066, "learning_rate": 5.059479806164486e-06, "loss": 0.0009, "step": 133680 }, { "epoch": 1.093265731692358, "grad_norm": 0.003677567932754755, "learning_rate": 5.058766224274149e-06, "loss": 0.0014, "step": 133690 }, { "epoch": 1.0933475078709571, "grad_norm": 0.0979563444852829, "learning_rate": 5.058052641186695e-06, "loss": 0.0016, "step": 133700 }, { "epoch": 1.0934292840495563, "grad_norm": 0.06388670206069946, "learning_rate": 5.057339056916662e-06, "loss": 0.0007, "step": 133710 }, { "epoch": 1.0935110602281555, "grad_norm": 0.004147195257246494, "learning_rate": 5.056625471478584e-06, "loss": 0.0011, "step": 133720 }, { "epoch": 1.0935928364067546, "grad_norm": 0.01876499503850937, "learning_rate": 5.055911884887002e-06, "loss": 0.0015, "step": 133730 }, { "epoch": 1.0936746125853538, "grad_norm": 0.03788401186466217, "learning_rate": 5.055198297156448e-06, "loss": 0.0008, "step": 133740 }, { "epoch": 1.093756388763953, "grad_norm": 0.017454488202929497, "learning_rate": 5.05448470830146e-06, "loss": 0.0006, "step": 133750 }, { "epoch": 1.0938381649425521, "grad_norm": 0.047388751059770584, "learning_rate": 5.053771118336575e-06, "loss": 0.0012, "step": 133760 }, { "epoch": 1.0939199411211513, "grad_norm": 0.04876568540930748, "learning_rate": 5.053057527276327e-06, "loss": 0.0013, "step": 133770 }, { "epoch": 1.0940017172997505, "grad_norm": 0.055420223623514175, "learning_rate": 5.052343935135256e-06, "loss": 0.0012, "step": 133780 }, { "epoch": 1.0940834934783497, "grad_norm": 0.0751187652349472, "learning_rate": 5.051630341927896e-06, "loss": 0.0011, "step": 133790 }, { "epoch": 1.094165269656949, "grad_norm": 0.04945101588964462, "learning_rate": 5.0509167476687826e-06, "loss": 0.001, "step": 133800 }, { "epoch": 1.094247045835548, "grad_norm": 0.0734889954328537, "learning_rate": 5.050203152372454e-06, "loss": 0.0013, "step": 133810 }, { "epoch": 1.0943288220141474, "grad_norm": 0.27996885776519775, "learning_rate": 5.049489556053447e-06, "loss": 0.0039, "step": 133820 }, { "epoch": 1.0944105981927466, "grad_norm": 0.18113455176353455, "learning_rate": 5.048775958726296e-06, "loss": 0.0023, "step": 133830 }, { "epoch": 1.0944923743713457, "grad_norm": 0.032265327870845795, "learning_rate": 5.048062360405541e-06, "loss": 0.0016, "step": 133840 }, { "epoch": 1.094574150549945, "grad_norm": 0.1036435067653656, "learning_rate": 5.0473487611057174e-06, "loss": 0.0008, "step": 133850 }, { "epoch": 1.094655926728544, "grad_norm": 0.030709270387887955, "learning_rate": 5.04663516084136e-06, "loss": 0.0017, "step": 133860 }, { "epoch": 1.0947377029071432, "grad_norm": 0.015368764288723469, "learning_rate": 5.045921559627005e-06, "loss": 0.0007, "step": 133870 }, { "epoch": 1.0948194790857424, "grad_norm": 0.007628113962709904, "learning_rate": 5.045207957477192e-06, "loss": 0.0015, "step": 133880 }, { "epoch": 1.0949012552643416, "grad_norm": 0.04645293578505516, "learning_rate": 5.044494354406455e-06, "loss": 0.0009, "step": 133890 }, { "epoch": 1.0949830314429407, "grad_norm": 0.03889406472444534, "learning_rate": 5.043780750429334e-06, "loss": 0.0012, "step": 133900 }, { "epoch": 1.09506480762154, "grad_norm": 0.03673429414629936, "learning_rate": 5.043067145560362e-06, "loss": 0.0007, "step": 133910 }, { "epoch": 1.095146583800139, "grad_norm": 0.02084151655435562, "learning_rate": 5.042353539814078e-06, "loss": 0.0008, "step": 133920 }, { "epoch": 1.0952283599787382, "grad_norm": 0.039550844579935074, "learning_rate": 5.041639933205017e-06, "loss": 0.001, "step": 133930 }, { "epoch": 1.0953101361573374, "grad_norm": 0.0010642345296218991, "learning_rate": 5.040926325747717e-06, "loss": 0.0015, "step": 133940 }, { "epoch": 1.0953919123359366, "grad_norm": 0.11427228152751923, "learning_rate": 5.040212717456714e-06, "loss": 0.0014, "step": 133950 }, { "epoch": 1.0954736885145357, "grad_norm": 0.02925535850226879, "learning_rate": 5.039499108346546e-06, "loss": 0.0015, "step": 133960 }, { "epoch": 1.095555464693135, "grad_norm": 0.14590348303318024, "learning_rate": 5.038785498431749e-06, "loss": 0.0018, "step": 133970 }, { "epoch": 1.095637240871734, "grad_norm": 0.045274216681718826, "learning_rate": 5.0380718877268604e-06, "loss": 0.0008, "step": 133980 }, { "epoch": 1.0957190170503333, "grad_norm": 0.0944746881723404, "learning_rate": 5.0373582762464155e-06, "loss": 0.0009, "step": 133990 }, { "epoch": 1.0958007932289324, "grad_norm": 0.03284798935055733, "learning_rate": 5.036644664004953e-06, "loss": 0.0009, "step": 134000 }, { "epoch": 1.0958825694075316, "grad_norm": 0.041632164269685745, "learning_rate": 5.035931051017008e-06, "loss": 0.001, "step": 134010 }, { "epoch": 1.0959643455861308, "grad_norm": 0.05345657840371132, "learning_rate": 5.035217437297119e-06, "loss": 0.0011, "step": 134020 }, { "epoch": 1.09604612176473, "grad_norm": 0.03127175569534302, "learning_rate": 5.034503822859822e-06, "loss": 0.0011, "step": 134030 }, { "epoch": 1.096127897943329, "grad_norm": 0.15079708397388458, "learning_rate": 5.033790207719654e-06, "loss": 0.0007, "step": 134040 }, { "epoch": 1.0962096741219283, "grad_norm": 0.042390283197164536, "learning_rate": 5.033076591891152e-06, "loss": 0.0012, "step": 134050 }, { "epoch": 1.0962914503005274, "grad_norm": 0.06360849738121033, "learning_rate": 5.032362975388853e-06, "loss": 0.0013, "step": 134060 }, { "epoch": 1.0963732264791266, "grad_norm": 0.036761414259672165, "learning_rate": 5.031649358227293e-06, "loss": 0.0033, "step": 134070 }, { "epoch": 1.0964550026577258, "grad_norm": 0.01613871566951275, "learning_rate": 5.030935740421011e-06, "loss": 0.0008, "step": 134080 }, { "epoch": 1.096536778836325, "grad_norm": 0.10971596091985703, "learning_rate": 5.030222121984543e-06, "loss": 0.0009, "step": 134090 }, { "epoch": 1.0966185550149241, "grad_norm": 0.019654374569654465, "learning_rate": 5.029508502932425e-06, "loss": 0.0016, "step": 134100 }, { "epoch": 1.0967003311935233, "grad_norm": 0.014417015016078949, "learning_rate": 5.028794883279194e-06, "loss": 0.0014, "step": 134110 }, { "epoch": 1.0967821073721225, "grad_norm": 0.05346161499619484, "learning_rate": 5.02808126303939e-06, "loss": 0.0011, "step": 134120 }, { "epoch": 1.0968638835507216, "grad_norm": 0.030764542520046234, "learning_rate": 5.027367642227547e-06, "loss": 0.0013, "step": 134130 }, { "epoch": 1.0969456597293208, "grad_norm": 0.028923189267516136, "learning_rate": 5.026654020858203e-06, "loss": 0.0006, "step": 134140 }, { "epoch": 1.09702743590792, "grad_norm": 0.0015914945397526026, "learning_rate": 5.025940398945894e-06, "loss": 0.0012, "step": 134150 }, { "epoch": 1.0971092120865191, "grad_norm": 0.00206560967490077, "learning_rate": 5.025226776505159e-06, "loss": 0.0016, "step": 134160 }, { "epoch": 1.0971909882651183, "grad_norm": 0.019431594759225845, "learning_rate": 5.024513153550533e-06, "loss": 0.0007, "step": 134170 }, { "epoch": 1.0972727644437175, "grad_norm": 0.041599616408348083, "learning_rate": 5.023799530096555e-06, "loss": 0.0004, "step": 134180 }, { "epoch": 1.0973545406223166, "grad_norm": 0.017244918271899223, "learning_rate": 5.0230859061577605e-06, "loss": 0.001, "step": 134190 }, { "epoch": 1.0974363168009158, "grad_norm": 0.04333552345633507, "learning_rate": 5.022372281748688e-06, "loss": 0.0008, "step": 134200 }, { "epoch": 1.097518092979515, "grad_norm": 0.08066840469837189, "learning_rate": 5.021658656883874e-06, "loss": 0.0011, "step": 134210 }, { "epoch": 1.0975998691581141, "grad_norm": 0.02040991559624672, "learning_rate": 5.020945031577856e-06, "loss": 0.0013, "step": 134220 }, { "epoch": 1.0976816453367135, "grad_norm": 0.03849377855658531, "learning_rate": 5.02023140584517e-06, "loss": 0.0019, "step": 134230 }, { "epoch": 1.0977634215153125, "grad_norm": 0.07422548532485962, "learning_rate": 5.019517779700353e-06, "loss": 0.0015, "step": 134240 }, { "epoch": 1.0978451976939119, "grad_norm": 0.03264692425727844, "learning_rate": 5.018804153157943e-06, "loss": 0.0009, "step": 134250 }, { "epoch": 1.097926973872511, "grad_norm": 0.02230861596763134, "learning_rate": 5.018090526232478e-06, "loss": 0.0019, "step": 134260 }, { "epoch": 1.0980087500511102, "grad_norm": 0.0912240743637085, "learning_rate": 5.017376898938495e-06, "loss": 0.0014, "step": 134270 }, { "epoch": 1.0980905262297094, "grad_norm": 0.03354101628065109, "learning_rate": 5.016663271290531e-06, "loss": 0.0014, "step": 134280 }, { "epoch": 1.0981723024083085, "grad_norm": 0.010578123852610588, "learning_rate": 5.0159496433031195e-06, "loss": 0.0015, "step": 134290 }, { "epoch": 1.0982540785869077, "grad_norm": 0.0018498888239264488, "learning_rate": 5.015236014990803e-06, "loss": 0.001, "step": 134300 }, { "epoch": 1.0983358547655069, "grad_norm": 0.03514302894473076, "learning_rate": 5.014522386368117e-06, "loss": 0.0015, "step": 134310 }, { "epoch": 1.098417630944106, "grad_norm": 0.01465944666415453, "learning_rate": 5.013808757449596e-06, "loss": 0.0013, "step": 134320 }, { "epoch": 1.0984994071227052, "grad_norm": 0.24547725915908813, "learning_rate": 5.013095128249781e-06, "loss": 0.0009, "step": 134330 }, { "epoch": 1.0985811833013044, "grad_norm": 0.05879750847816467, "learning_rate": 5.012381498783207e-06, "loss": 0.0012, "step": 134340 }, { "epoch": 1.0986629594799036, "grad_norm": 0.12905359268188477, "learning_rate": 5.011667869064413e-06, "loss": 0.0028, "step": 134350 }, { "epoch": 1.0987447356585027, "grad_norm": 0.03904755413532257, "learning_rate": 5.010954239107934e-06, "loss": 0.0007, "step": 134360 }, { "epoch": 1.098826511837102, "grad_norm": 0.06786077469587326, "learning_rate": 5.010240608928307e-06, "loss": 0.0011, "step": 134370 }, { "epoch": 1.098908288015701, "grad_norm": 0.05559404566884041, "learning_rate": 5.009526978540071e-06, "loss": 0.0012, "step": 134380 }, { "epoch": 1.0989900641943002, "grad_norm": 0.07676620036363602, "learning_rate": 5.008813347957765e-06, "loss": 0.001, "step": 134390 }, { "epoch": 1.0990718403728994, "grad_norm": 0.06132730096578598, "learning_rate": 5.008099717195922e-06, "loss": 0.0012, "step": 134400 }, { "epoch": 1.0991536165514986, "grad_norm": 0.026292452588677406, "learning_rate": 5.007386086269082e-06, "loss": 0.0011, "step": 134410 }, { "epoch": 1.0992353927300977, "grad_norm": 0.022015925496816635, "learning_rate": 5.00667245519178e-06, "loss": 0.0013, "step": 134420 }, { "epoch": 1.099317168908697, "grad_norm": 0.2786199748516083, "learning_rate": 5.005958823978556e-06, "loss": 0.0017, "step": 134430 }, { "epoch": 1.099398945087296, "grad_norm": 0.06923581659793854, "learning_rate": 5.005245192643945e-06, "loss": 0.001, "step": 134440 }, { "epoch": 1.0994807212658952, "grad_norm": 0.04052572697401047, "learning_rate": 5.004531561202487e-06, "loss": 0.0008, "step": 134450 }, { "epoch": 1.0995624974444944, "grad_norm": 0.027262823656201363, "learning_rate": 5.003817929668716e-06, "loss": 0.0005, "step": 134460 }, { "epoch": 1.0996442736230936, "grad_norm": 0.08116646856069565, "learning_rate": 5.003104298057171e-06, "loss": 0.0029, "step": 134470 }, { "epoch": 1.0997260498016928, "grad_norm": 0.011582346633076668, "learning_rate": 5.002390666382388e-06, "loss": 0.001, "step": 134480 }, { "epoch": 1.099807825980292, "grad_norm": 0.036495551466941833, "learning_rate": 5.001677034658906e-06, "loss": 0.0008, "step": 134490 }, { "epoch": 1.099889602158891, "grad_norm": 0.04483636096119881, "learning_rate": 5.000963402901261e-06, "loss": 0.0013, "step": 134500 }, { "epoch": 1.0999713783374903, "grad_norm": 0.06112236902117729, "learning_rate": 5.0002497711239915e-06, "loss": 0.0015, "step": 134510 }, { "epoch": 1.1000531545160894, "grad_norm": 0.05539526790380478, "learning_rate": 4.999536139341634e-06, "loss": 0.001, "step": 134520 }, { "epoch": 1.1001349306946886, "grad_norm": 0.07073988020420074, "learning_rate": 4.998822507568725e-06, "loss": 0.0009, "step": 134530 }, { "epoch": 1.1002167068732878, "grad_norm": 0.033197127282619476, "learning_rate": 4.998108875819803e-06, "loss": 0.0005, "step": 134540 }, { "epoch": 1.100298483051887, "grad_norm": 0.0017124749720096588, "learning_rate": 4.9973952441094035e-06, "loss": 0.0007, "step": 134550 }, { "epoch": 1.100380259230486, "grad_norm": 0.04884253442287445, "learning_rate": 4.9966816124520655e-06, "loss": 0.0006, "step": 134560 }, { "epoch": 1.1004620354090853, "grad_norm": 0.023656222969293594, "learning_rate": 4.9959679808623266e-06, "loss": 0.002, "step": 134570 }, { "epoch": 1.1005438115876844, "grad_norm": 0.02430424466729164, "learning_rate": 4.995254349354724e-06, "loss": 0.0011, "step": 134580 }, { "epoch": 1.1006255877662836, "grad_norm": 0.01048364583402872, "learning_rate": 4.994540717943792e-06, "loss": 0.0011, "step": 134590 }, { "epoch": 1.1007073639448828, "grad_norm": 0.02361707016825676, "learning_rate": 4.993827086644071e-06, "loss": 0.0014, "step": 134600 }, { "epoch": 1.100789140123482, "grad_norm": 0.014675411395728588, "learning_rate": 4.993113455470098e-06, "loss": 0.0006, "step": 134610 }, { "epoch": 1.1008709163020811, "grad_norm": 0.0011926018632948399, "learning_rate": 4.992399824436408e-06, "loss": 0.0012, "step": 134620 }, { "epoch": 1.1009526924806803, "grad_norm": 0.03904151916503906, "learning_rate": 4.99168619355754e-06, "loss": 0.001, "step": 134630 }, { "epoch": 1.1010344686592795, "grad_norm": 0.037484146654605865, "learning_rate": 4.990972562848032e-06, "loss": 0.0005, "step": 134640 }, { "epoch": 1.1011162448378786, "grad_norm": 0.04792105406522751, "learning_rate": 4.9902589323224205e-06, "loss": 0.0004, "step": 134650 }, { "epoch": 1.101198021016478, "grad_norm": 0.037229374051094055, "learning_rate": 4.989545301995242e-06, "loss": 0.0022, "step": 134660 }, { "epoch": 1.101279797195077, "grad_norm": 0.002453576773405075, "learning_rate": 4.988831671881032e-06, "loss": 0.002, "step": 134670 }, { "epoch": 1.1013615733736764, "grad_norm": 0.06900518387556076, "learning_rate": 4.988118041994332e-06, "loss": 0.0012, "step": 134680 }, { "epoch": 1.1014433495522755, "grad_norm": 0.031185146421194077, "learning_rate": 4.9874044123496774e-06, "loss": 0.0008, "step": 134690 }, { "epoch": 1.1015251257308747, "grad_norm": 0.02477804385125637, "learning_rate": 4.986690782961605e-06, "loss": 0.0014, "step": 134700 }, { "epoch": 1.1016069019094739, "grad_norm": 0.043245915323495865, "learning_rate": 4.985977153844651e-06, "loss": 0.0014, "step": 134710 }, { "epoch": 1.101688678088073, "grad_norm": 0.09061280637979507, "learning_rate": 4.985263525013355e-06, "loss": 0.0008, "step": 134720 }, { "epoch": 1.1017704542666722, "grad_norm": 0.06435033679008484, "learning_rate": 4.984549896482253e-06, "loss": 0.0009, "step": 134730 }, { "epoch": 1.1018522304452714, "grad_norm": 0.019150089472532272, "learning_rate": 4.983836268265882e-06, "loss": 0.0012, "step": 134740 }, { "epoch": 1.1019340066238705, "grad_norm": 0.004395820666104555, "learning_rate": 4.9831226403787785e-06, "loss": 0.0005, "step": 134750 }, { "epoch": 1.1020157828024697, "grad_norm": 0.02159304730594158, "learning_rate": 4.982409012835482e-06, "loss": 0.0009, "step": 134760 }, { "epoch": 1.1020975589810689, "grad_norm": 0.07914764434099197, "learning_rate": 4.981695385650527e-06, "loss": 0.0015, "step": 134770 }, { "epoch": 1.102179335159668, "grad_norm": 0.0441010408103466, "learning_rate": 4.980981758838454e-06, "loss": 0.0004, "step": 134780 }, { "epoch": 1.1022611113382672, "grad_norm": 0.019765883684158325, "learning_rate": 4.980268132413794e-06, "loss": 0.0014, "step": 134790 }, { "epoch": 1.1023428875168664, "grad_norm": 0.037056565284729004, "learning_rate": 4.979554506391091e-06, "loss": 0.0011, "step": 134800 }, { "epoch": 1.1024246636954655, "grad_norm": 0.061651285737752914, "learning_rate": 4.978840880784879e-06, "loss": 0.0014, "step": 134810 }, { "epoch": 1.1025064398740647, "grad_norm": 0.05855026841163635, "learning_rate": 4.978127255609695e-06, "loss": 0.0012, "step": 134820 }, { "epoch": 1.1025882160526639, "grad_norm": 0.08988485485315323, "learning_rate": 4.977413630880076e-06, "loss": 0.0013, "step": 134830 }, { "epoch": 1.102669992231263, "grad_norm": 0.04821433126926422, "learning_rate": 4.976700006610561e-06, "loss": 0.0014, "step": 134840 }, { "epoch": 1.1027517684098622, "grad_norm": 0.0040064542554318905, "learning_rate": 4.975986382815686e-06, "loss": 0.0008, "step": 134850 }, { "epoch": 1.1028335445884614, "grad_norm": 0.10507041215896606, "learning_rate": 4.975272759509988e-06, "loss": 0.0013, "step": 134860 }, { "epoch": 1.1029153207670606, "grad_norm": 0.03368814289569855, "learning_rate": 4.974559136708002e-06, "loss": 0.0008, "step": 134870 }, { "epoch": 1.1029970969456597, "grad_norm": 0.03440874442458153, "learning_rate": 4.973845514424269e-06, "loss": 0.0015, "step": 134880 }, { "epoch": 1.103078873124259, "grad_norm": 0.007701188791543245, "learning_rate": 4.973131892673323e-06, "loss": 0.0009, "step": 134890 }, { "epoch": 1.103160649302858, "grad_norm": 0.039018768817186356, "learning_rate": 4.972418271469703e-06, "loss": 0.0007, "step": 134900 }, { "epoch": 1.1032424254814572, "grad_norm": 0.07674477249383926, "learning_rate": 4.971704650827944e-06, "loss": 0.0016, "step": 134910 }, { "epoch": 1.1033242016600564, "grad_norm": 0.050534602254629135, "learning_rate": 4.970991030762586e-06, "loss": 0.0017, "step": 134920 }, { "epoch": 1.1034059778386556, "grad_norm": 0.06223548576235771, "learning_rate": 4.970277411288163e-06, "loss": 0.001, "step": 134930 }, { "epoch": 1.1034877540172547, "grad_norm": 0.07901497185230255, "learning_rate": 4.969563792419213e-06, "loss": 0.0016, "step": 134940 }, { "epoch": 1.103569530195854, "grad_norm": 0.018244443461298943, "learning_rate": 4.968850174170274e-06, "loss": 0.0018, "step": 134950 }, { "epoch": 1.103651306374453, "grad_norm": 0.019414935261011124, "learning_rate": 4.968136556555882e-06, "loss": 0.0009, "step": 134960 }, { "epoch": 1.1037330825530522, "grad_norm": 0.030972877517342567, "learning_rate": 4.9674229395905755e-06, "loss": 0.0014, "step": 134970 }, { "epoch": 1.1038148587316514, "grad_norm": 0.10370049625635147, "learning_rate": 4.966709323288889e-06, "loss": 0.0016, "step": 134980 }, { "epoch": 1.1038966349102506, "grad_norm": 0.006576004438102245, "learning_rate": 4.965995707665361e-06, "loss": 0.0013, "step": 134990 }, { "epoch": 1.1039784110888498, "grad_norm": 0.06568785011768341, "learning_rate": 4.965282092734528e-06, "loss": 0.0014, "step": 135000 }, { "epoch": 1.104060187267449, "grad_norm": 0.02593020163476467, "learning_rate": 4.964568478510927e-06, "loss": 0.0025, "step": 135010 }, { "epoch": 1.104141963446048, "grad_norm": 0.17838865518569946, "learning_rate": 4.963854865009095e-06, "loss": 0.0013, "step": 135020 }, { "epoch": 1.1042237396246473, "grad_norm": 0.054204680025577545, "learning_rate": 4.963141252243567e-06, "loss": 0.0008, "step": 135030 }, { "epoch": 1.1043055158032464, "grad_norm": 0.0025347552727907896, "learning_rate": 4.962427640228883e-06, "loss": 0.0019, "step": 135040 }, { "epoch": 1.1043872919818456, "grad_norm": 0.0006526661454699934, "learning_rate": 4.961714028979579e-06, "loss": 0.0017, "step": 135050 }, { "epoch": 1.1044690681604448, "grad_norm": 0.03643130883574486, "learning_rate": 4.9610004185101926e-06, "loss": 0.0009, "step": 135060 }, { "epoch": 1.104550844339044, "grad_norm": 0.029582751914858818, "learning_rate": 4.960286808835257e-06, "loss": 0.0013, "step": 135070 }, { "epoch": 1.104632620517643, "grad_norm": 0.010507329367101192, "learning_rate": 4.959573199969312e-06, "loss": 0.0005, "step": 135080 }, { "epoch": 1.1047143966962425, "grad_norm": 0.034475501626729965, "learning_rate": 4.958859591926894e-06, "loss": 0.0018, "step": 135090 }, { "epoch": 1.1047961728748414, "grad_norm": 0.10048993676900864, "learning_rate": 4.95814598472254e-06, "loss": 0.001, "step": 135100 }, { "epoch": 1.1048779490534408, "grad_norm": 0.029268881306052208, "learning_rate": 4.957432378370786e-06, "loss": 0.0011, "step": 135110 }, { "epoch": 1.10495972523204, "grad_norm": 0.014347531832754612, "learning_rate": 4.956718772886169e-06, "loss": 0.0007, "step": 135120 }, { "epoch": 1.1050415014106392, "grad_norm": 0.009507916867733002, "learning_rate": 4.956005168283226e-06, "loss": 0.0008, "step": 135130 }, { "epoch": 1.1051232775892383, "grad_norm": 0.01653928868472576, "learning_rate": 4.955291564576494e-06, "loss": 0.0012, "step": 135140 }, { "epoch": 1.1052050537678375, "grad_norm": 0.010624311864376068, "learning_rate": 4.954577961780506e-06, "loss": 0.0038, "step": 135150 }, { "epoch": 1.1052868299464367, "grad_norm": 0.0012983541237190366, "learning_rate": 4.953864359909805e-06, "loss": 0.0011, "step": 135160 }, { "epoch": 1.1053686061250358, "grad_norm": 0.00455318670719862, "learning_rate": 4.953150758978923e-06, "loss": 0.001, "step": 135170 }, { "epoch": 1.105450382303635, "grad_norm": 0.0020894398912787437, "learning_rate": 4.9524371590024e-06, "loss": 0.0015, "step": 135180 }, { "epoch": 1.1055321584822342, "grad_norm": 0.07936244457960129, "learning_rate": 4.951723559994769e-06, "loss": 0.001, "step": 135190 }, { "epoch": 1.1056139346608334, "grad_norm": 0.003572692396119237, "learning_rate": 4.951009961970569e-06, "loss": 0.0009, "step": 135200 }, { "epoch": 1.1056957108394325, "grad_norm": 0.0400802381336689, "learning_rate": 4.950296364944336e-06, "loss": 0.001, "step": 135210 }, { "epoch": 1.1057774870180317, "grad_norm": 0.09149773418903351, "learning_rate": 4.9495827689306065e-06, "loss": 0.0016, "step": 135220 }, { "epoch": 1.1058592631966309, "grad_norm": 0.038135964423418045, "learning_rate": 4.948869173943916e-06, "loss": 0.0012, "step": 135230 }, { "epoch": 1.10594103937523, "grad_norm": 0.01114257425069809, "learning_rate": 4.948155579998803e-06, "loss": 0.0012, "step": 135240 }, { "epoch": 1.1060228155538292, "grad_norm": 0.04101026430726051, "learning_rate": 4.947441987109804e-06, "loss": 0.0008, "step": 135250 }, { "epoch": 1.1061045917324284, "grad_norm": 0.04039061814546585, "learning_rate": 4.946728395291452e-06, "loss": 0.0015, "step": 135260 }, { "epoch": 1.1061863679110275, "grad_norm": 0.009597684256732464, "learning_rate": 4.946014804558286e-06, "loss": 0.0014, "step": 135270 }, { "epoch": 1.1062681440896267, "grad_norm": 0.02978740818798542, "learning_rate": 4.945301214924843e-06, "loss": 0.0008, "step": 135280 }, { "epoch": 1.1063499202682259, "grad_norm": 0.013252493925392628, "learning_rate": 4.94458762640566e-06, "loss": 0.0008, "step": 135290 }, { "epoch": 1.106431696446825, "grad_norm": 0.023887446150183678, "learning_rate": 4.943874039015271e-06, "loss": 0.0006, "step": 135300 }, { "epoch": 1.1065134726254242, "grad_norm": 0.04216210916638374, "learning_rate": 4.943160452768213e-06, "loss": 0.0017, "step": 135310 }, { "epoch": 1.1065952488040234, "grad_norm": 0.05897687375545502, "learning_rate": 4.942446867679024e-06, "loss": 0.0012, "step": 135320 }, { "epoch": 1.1066770249826225, "grad_norm": 0.020152878016233444, "learning_rate": 4.941733283762238e-06, "loss": 0.0023, "step": 135330 }, { "epoch": 1.1067588011612217, "grad_norm": 0.03979015722870827, "learning_rate": 4.941019701032393e-06, "loss": 0.0007, "step": 135340 }, { "epoch": 1.1068405773398209, "grad_norm": 0.032829441130161285, "learning_rate": 4.940306119504024e-06, "loss": 0.0008, "step": 135350 }, { "epoch": 1.10692235351842, "grad_norm": 0.012039300985634327, "learning_rate": 4.9395925391916686e-06, "loss": 0.0009, "step": 135360 }, { "epoch": 1.1070041296970192, "grad_norm": 0.03603936359286308, "learning_rate": 4.938878960109863e-06, "loss": 0.0009, "step": 135370 }, { "epoch": 1.1070859058756184, "grad_norm": 0.05157586559653282, "learning_rate": 4.9381653822731414e-06, "loss": 0.0007, "step": 135380 }, { "epoch": 1.1071676820542176, "grad_norm": 0.06612031161785126, "learning_rate": 4.93745180569604e-06, "loss": 0.0012, "step": 135390 }, { "epoch": 1.1072494582328167, "grad_norm": 0.047166142612695694, "learning_rate": 4.9367382303930975e-06, "loss": 0.0012, "step": 135400 }, { "epoch": 1.107331234411416, "grad_norm": 0.02775202877819538, "learning_rate": 4.936024656378849e-06, "loss": 0.0008, "step": 135410 }, { "epoch": 1.107413010590015, "grad_norm": 0.0034094329457730055, "learning_rate": 4.935311083667831e-06, "loss": 0.0012, "step": 135420 }, { "epoch": 1.1074947867686142, "grad_norm": 0.047135449945926666, "learning_rate": 4.934597512274577e-06, "loss": 0.0009, "step": 135430 }, { "epoch": 1.1075765629472134, "grad_norm": 0.045287758111953735, "learning_rate": 4.933883942213627e-06, "loss": 0.0017, "step": 135440 }, { "epoch": 1.1076583391258126, "grad_norm": 0.01490104105323553, "learning_rate": 4.933170373499513e-06, "loss": 0.0014, "step": 135450 }, { "epoch": 1.1077401153044117, "grad_norm": 0.014226374216377735, "learning_rate": 4.932456806146774e-06, "loss": 0.0012, "step": 135460 }, { "epoch": 1.107821891483011, "grad_norm": 0.011019682511687279, "learning_rate": 4.9317432401699435e-06, "loss": 0.0009, "step": 135470 }, { "epoch": 1.10790366766161, "grad_norm": 0.005180124659091234, "learning_rate": 4.931029675583559e-06, "loss": 0.0004, "step": 135480 }, { "epoch": 1.1079854438402092, "grad_norm": 0.054773058742284775, "learning_rate": 4.930316112402157e-06, "loss": 0.0007, "step": 135490 }, { "epoch": 1.1080672200188084, "grad_norm": 0.011733065359294415, "learning_rate": 4.9296025506402726e-06, "loss": 0.0011, "step": 135500 }, { "epoch": 1.1081489961974076, "grad_norm": 0.05223818123340607, "learning_rate": 4.928888990312438e-06, "loss": 0.0005, "step": 135510 }, { "epoch": 1.108230772376007, "grad_norm": 0.0325329415500164, "learning_rate": 4.9281754314331955e-06, "loss": 0.001, "step": 135520 }, { "epoch": 1.1083125485546061, "grad_norm": 0.03452645242214203, "learning_rate": 4.927461874017078e-06, "loss": 0.0016, "step": 135530 }, { "epoch": 1.1083943247332053, "grad_norm": 0.037320468574762344, "learning_rate": 4.926748318078621e-06, "loss": 0.001, "step": 135540 }, { "epoch": 1.1084761009118045, "grad_norm": 0.012756261974573135, "learning_rate": 4.926034763632359e-06, "loss": 0.0016, "step": 135550 }, { "epoch": 1.1085578770904037, "grad_norm": 0.1346917599439621, "learning_rate": 4.92532121069283e-06, "loss": 0.001, "step": 135560 }, { "epoch": 1.1086396532690028, "grad_norm": 0.047012537717819214, "learning_rate": 4.924607659274569e-06, "loss": 0.0017, "step": 135570 }, { "epoch": 1.108721429447602, "grad_norm": 0.007210188079625368, "learning_rate": 4.923894109392111e-06, "loss": 0.0016, "step": 135580 }, { "epoch": 1.1088032056262012, "grad_norm": 0.14465728402137756, "learning_rate": 4.923180561059991e-06, "loss": 0.0019, "step": 135590 }, { "epoch": 1.1088849818048003, "grad_norm": 0.014961212873458862, "learning_rate": 4.922467014292746e-06, "loss": 0.0007, "step": 135600 }, { "epoch": 1.1089667579833995, "grad_norm": 0.052575234323740005, "learning_rate": 4.9217534691049115e-06, "loss": 0.0007, "step": 135610 }, { "epoch": 1.1090485341619987, "grad_norm": 0.025142930448055267, "learning_rate": 4.9210399255110225e-06, "loss": 0.0009, "step": 135620 }, { "epoch": 1.1091303103405978, "grad_norm": 0.10212451964616776, "learning_rate": 4.920326383525612e-06, "loss": 0.0017, "step": 135630 }, { "epoch": 1.109212086519197, "grad_norm": 0.018038872629404068, "learning_rate": 4.91961284316322e-06, "loss": 0.0005, "step": 135640 }, { "epoch": 1.1092938626977962, "grad_norm": 0.013887641951441765, "learning_rate": 4.91889930443838e-06, "loss": 0.0005, "step": 135650 }, { "epoch": 1.1093756388763953, "grad_norm": 0.03759930655360222, "learning_rate": 4.918185767365626e-06, "loss": 0.0009, "step": 135660 }, { "epoch": 1.1094574150549945, "grad_norm": 0.040127936750650406, "learning_rate": 4.917472231959494e-06, "loss": 0.001, "step": 135670 }, { "epoch": 1.1095391912335937, "grad_norm": 0.02376837097108364, "learning_rate": 4.916758698234521e-06, "loss": 0.0003, "step": 135680 }, { "epoch": 1.1096209674121928, "grad_norm": 0.04108111932873726, "learning_rate": 4.916045166205241e-06, "loss": 0.0008, "step": 135690 }, { "epoch": 1.109702743590792, "grad_norm": 0.008013823069632053, "learning_rate": 4.9153316358861894e-06, "loss": 0.0024, "step": 135700 }, { "epoch": 1.1097845197693912, "grad_norm": 0.12459045648574829, "learning_rate": 4.9146181072918995e-06, "loss": 0.0011, "step": 135710 }, { "epoch": 1.1098662959479904, "grad_norm": 0.03307488560676575, "learning_rate": 4.91390458043691e-06, "loss": 0.0008, "step": 135720 }, { "epoch": 1.1099480721265895, "grad_norm": 0.0044076754711568356, "learning_rate": 4.913191055335754e-06, "loss": 0.0007, "step": 135730 }, { "epoch": 1.1100298483051887, "grad_norm": 0.09102211892604828, "learning_rate": 4.912477532002966e-06, "loss": 0.0016, "step": 135740 }, { "epoch": 1.1101116244837879, "grad_norm": 0.0036384048871695995, "learning_rate": 4.911764010453082e-06, "loss": 0.001, "step": 135750 }, { "epoch": 1.110193400662387, "grad_norm": 0.06503565609455109, "learning_rate": 4.911050490700635e-06, "loss": 0.001, "step": 135760 }, { "epoch": 1.1102751768409862, "grad_norm": 0.08374123275279999, "learning_rate": 4.910336972760164e-06, "loss": 0.0009, "step": 135770 }, { "epoch": 1.1103569530195854, "grad_norm": 0.10138941556215286, "learning_rate": 4.909623456646202e-06, "loss": 0.0018, "step": 135780 }, { "epoch": 1.1104387291981845, "grad_norm": 0.07101955264806747, "learning_rate": 4.908909942373284e-06, "loss": 0.0008, "step": 135790 }, { "epoch": 1.1105205053767837, "grad_norm": 0.025867437943816185, "learning_rate": 4.908196429955942e-06, "loss": 0.0014, "step": 135800 }, { "epoch": 1.1106022815553829, "grad_norm": 0.07018342614173889, "learning_rate": 4.907482919408715e-06, "loss": 0.0015, "step": 135810 }, { "epoch": 1.110684057733982, "grad_norm": 0.01872711442410946, "learning_rate": 4.906769410746137e-06, "loss": 0.0011, "step": 135820 }, { "epoch": 1.1107658339125812, "grad_norm": 0.04807053506374359, "learning_rate": 4.906055903982741e-06, "loss": 0.0011, "step": 135830 }, { "epoch": 1.1108476100911804, "grad_norm": 0.03282363340258598, "learning_rate": 4.905342399133062e-06, "loss": 0.0008, "step": 135840 }, { "epoch": 1.1109293862697796, "grad_norm": 0.006178369279950857, "learning_rate": 4.904628896211636e-06, "loss": 0.0007, "step": 135850 }, { "epoch": 1.1110111624483787, "grad_norm": 0.04509333521127701, "learning_rate": 4.9039153952329965e-06, "loss": 0.0012, "step": 135860 }, { "epoch": 1.1110929386269779, "grad_norm": 0.10699921101331711, "learning_rate": 4.9032018962116795e-06, "loss": 0.0013, "step": 135870 }, { "epoch": 1.111174714805577, "grad_norm": 0.030861403793096542, "learning_rate": 4.902488399162216e-06, "loss": 0.001, "step": 135880 }, { "epoch": 1.1112564909841762, "grad_norm": 0.10019614547491074, "learning_rate": 4.901774904099145e-06, "loss": 0.0012, "step": 135890 }, { "epoch": 1.1113382671627754, "grad_norm": 0.043546032160520554, "learning_rate": 4.9010614110369995e-06, "loss": 0.0015, "step": 135900 }, { "epoch": 1.1114200433413746, "grad_norm": 0.07055814564228058, "learning_rate": 4.900347919990314e-06, "loss": 0.0021, "step": 135910 }, { "epoch": 1.1115018195199737, "grad_norm": 0.04223985597491264, "learning_rate": 4.899634430973619e-06, "loss": 0.0011, "step": 135920 }, { "epoch": 1.111583595698573, "grad_norm": 0.10442014038562775, "learning_rate": 4.898920944001455e-06, "loss": 0.0011, "step": 135930 }, { "epoch": 1.111665371877172, "grad_norm": 0.02805938385426998, "learning_rate": 4.898207459088353e-06, "loss": 0.0015, "step": 135940 }, { "epoch": 1.1117471480557715, "grad_norm": 0.0934477150440216, "learning_rate": 4.897493976248848e-06, "loss": 0.0017, "step": 135950 }, { "epoch": 1.1118289242343706, "grad_norm": 0.01194579154253006, "learning_rate": 4.896780495497473e-06, "loss": 0.0005, "step": 135960 }, { "epoch": 1.1119107004129698, "grad_norm": 0.048488955944776535, "learning_rate": 4.8960670168487645e-06, "loss": 0.0007, "step": 135970 }, { "epoch": 1.111992476591569, "grad_norm": 0.026069756597280502, "learning_rate": 4.8953535403172555e-06, "loss": 0.0008, "step": 135980 }, { "epoch": 1.1120742527701681, "grad_norm": 0.027349205687642097, "learning_rate": 4.8946400659174795e-06, "loss": 0.0027, "step": 135990 }, { "epoch": 1.1121560289487673, "grad_norm": 0.16480515897274017, "learning_rate": 4.893926593663969e-06, "loss": 0.0014, "step": 136000 }, { "epoch": 1.1122378051273665, "grad_norm": 0.06549626588821411, "learning_rate": 4.893213123571261e-06, "loss": 0.0018, "step": 136010 }, { "epoch": 1.1123195813059656, "grad_norm": 0.023714842274785042, "learning_rate": 4.89249965565389e-06, "loss": 0.0008, "step": 136020 }, { "epoch": 1.1124013574845648, "grad_norm": 0.22502805292606354, "learning_rate": 4.89178618992639e-06, "loss": 0.0016, "step": 136030 }, { "epoch": 1.112483133663164, "grad_norm": 0.01967492699623108, "learning_rate": 4.8910727264032885e-06, "loss": 0.0007, "step": 136040 }, { "epoch": 1.1125649098417631, "grad_norm": 0.02654198184609413, "learning_rate": 4.890359265099126e-06, "loss": 0.0007, "step": 136050 }, { "epoch": 1.1126466860203623, "grad_norm": 0.012425416149199009, "learning_rate": 4.889645806028437e-06, "loss": 0.0011, "step": 136060 }, { "epoch": 1.1127284621989615, "grad_norm": 0.24594904482364655, "learning_rate": 4.88893234920575e-06, "loss": 0.0014, "step": 136070 }, { "epoch": 1.1128102383775607, "grad_norm": 0.03700936958193779, "learning_rate": 4.888218894645601e-06, "loss": 0.0009, "step": 136080 }, { "epoch": 1.1128920145561598, "grad_norm": 0.023949500173330307, "learning_rate": 4.887505442362525e-06, "loss": 0.0011, "step": 136090 }, { "epoch": 1.112973790734759, "grad_norm": 0.1573318988084793, "learning_rate": 4.886791992371055e-06, "loss": 0.001, "step": 136100 }, { "epoch": 1.1130555669133582, "grad_norm": 0.15730063617229462, "learning_rate": 4.886078544685724e-06, "loss": 0.0006, "step": 136110 }, { "epoch": 1.1131373430919573, "grad_norm": 0.002955118427053094, "learning_rate": 4.885365099321063e-06, "loss": 0.0006, "step": 136120 }, { "epoch": 1.1132191192705565, "grad_norm": 0.05036328732967377, "learning_rate": 4.8846516562916105e-06, "loss": 0.0017, "step": 136130 }, { "epoch": 1.1133008954491557, "grad_norm": 0.04150623455643654, "learning_rate": 4.8839382156118985e-06, "loss": 0.0011, "step": 136140 }, { "epoch": 1.1133826716277548, "grad_norm": 0.07468457520008087, "learning_rate": 4.8832247772964595e-06, "loss": 0.0012, "step": 136150 }, { "epoch": 1.113464447806354, "grad_norm": 0.09095177799463272, "learning_rate": 4.882511341359824e-06, "loss": 0.0007, "step": 136160 }, { "epoch": 1.1135462239849532, "grad_norm": 0.04031023010611534, "learning_rate": 4.881797907816529e-06, "loss": 0.0011, "step": 136170 }, { "epoch": 1.1136280001635523, "grad_norm": 0.002815553918480873, "learning_rate": 4.881084476681107e-06, "loss": 0.0011, "step": 136180 }, { "epoch": 1.1137097763421515, "grad_norm": 0.02566906064748764, "learning_rate": 4.880371047968091e-06, "loss": 0.0006, "step": 136190 }, { "epoch": 1.1137915525207507, "grad_norm": 0.02635842002928257, "learning_rate": 4.879657621692014e-06, "loss": 0.0008, "step": 136200 }, { "epoch": 1.1138733286993499, "grad_norm": 0.041027627885341644, "learning_rate": 4.878944197867409e-06, "loss": 0.0016, "step": 136210 }, { "epoch": 1.113955104877949, "grad_norm": 0.06743039190769196, "learning_rate": 4.87823077650881e-06, "loss": 0.0014, "step": 136220 }, { "epoch": 1.1140368810565482, "grad_norm": 0.04735531285405159, "learning_rate": 4.877517357630749e-06, "loss": 0.0007, "step": 136230 }, { "epoch": 1.1141186572351474, "grad_norm": 0.041559115052223206, "learning_rate": 4.876803941247756e-06, "loss": 0.001, "step": 136240 }, { "epoch": 1.1142004334137465, "grad_norm": 0.05121779814362526, "learning_rate": 4.876090527374369e-06, "loss": 0.0015, "step": 136250 }, { "epoch": 1.1142822095923457, "grad_norm": 0.045949555933475494, "learning_rate": 4.875377116025119e-06, "loss": 0.0008, "step": 136260 }, { "epoch": 1.1143639857709449, "grad_norm": 0.0029156978707760572, "learning_rate": 4.87466370721454e-06, "loss": 0.001, "step": 136270 }, { "epoch": 1.114445761949544, "grad_norm": 0.11862189322710037, "learning_rate": 4.873950300957159e-06, "loss": 0.0016, "step": 136280 }, { "epoch": 1.1145275381281432, "grad_norm": 0.0409090518951416, "learning_rate": 4.8732368972675156e-06, "loss": 0.0006, "step": 136290 }, { "epoch": 1.1146093143067424, "grad_norm": 0.05751889571547508, "learning_rate": 4.872523496160139e-06, "loss": 0.0017, "step": 136300 }, { "epoch": 1.1146910904853415, "grad_norm": 0.030716150999069214, "learning_rate": 4.871810097649563e-06, "loss": 0.0005, "step": 136310 }, { "epoch": 1.1147728666639407, "grad_norm": 0.02323489636182785, "learning_rate": 4.871096701750318e-06, "loss": 0.0009, "step": 136320 }, { "epoch": 1.1148546428425399, "grad_norm": 0.06856345385313034, "learning_rate": 4.87038330847694e-06, "loss": 0.0011, "step": 136330 }, { "epoch": 1.114936419021139, "grad_norm": 0.11700908094644547, "learning_rate": 4.869669917843958e-06, "loss": 0.0011, "step": 136340 }, { "epoch": 1.1150181951997382, "grad_norm": 0.015608600340783596, "learning_rate": 4.868956529865907e-06, "loss": 0.0015, "step": 136350 }, { "epoch": 1.1150999713783376, "grad_norm": 0.15607047080993652, "learning_rate": 4.8682431445573155e-06, "loss": 0.001, "step": 136360 }, { "epoch": 1.1151817475569366, "grad_norm": 0.2300175130367279, "learning_rate": 4.86752976193272e-06, "loss": 0.0023, "step": 136370 }, { "epoch": 1.115263523735536, "grad_norm": 0.022551316767930984, "learning_rate": 4.866816382006651e-06, "loss": 0.0009, "step": 136380 }, { "epoch": 1.1153452999141351, "grad_norm": 0.02557010017335415, "learning_rate": 4.866103004793642e-06, "loss": 0.0009, "step": 136390 }, { "epoch": 1.1154270760927343, "grad_norm": 0.023793742060661316, "learning_rate": 4.86538963030822e-06, "loss": 0.0011, "step": 136400 }, { "epoch": 1.1155088522713335, "grad_norm": 0.0031675014179199934, "learning_rate": 4.864676258564923e-06, "loss": 0.0015, "step": 136410 }, { "epoch": 1.1155906284499326, "grad_norm": 0.003036022884771228, "learning_rate": 4.863962889578281e-06, "loss": 0.0005, "step": 136420 }, { "epoch": 1.1156724046285318, "grad_norm": 0.02742324396967888, "learning_rate": 4.8632495233628245e-06, "loss": 0.0006, "step": 136430 }, { "epoch": 1.115754180807131, "grad_norm": 0.04504280164837837, "learning_rate": 4.8625361599330864e-06, "loss": 0.0011, "step": 136440 }, { "epoch": 1.1158359569857301, "grad_norm": 0.018119681626558304, "learning_rate": 4.861822799303599e-06, "loss": 0.0011, "step": 136450 }, { "epoch": 1.1159177331643293, "grad_norm": 0.055840715765953064, "learning_rate": 4.861109441488893e-06, "loss": 0.001, "step": 136460 }, { "epoch": 1.1159995093429285, "grad_norm": 0.06177007034420967, "learning_rate": 4.8603960865035014e-06, "loss": 0.0011, "step": 136470 }, { "epoch": 1.1160812855215276, "grad_norm": 0.026820017024874687, "learning_rate": 4.859682734361952e-06, "loss": 0.0003, "step": 136480 }, { "epoch": 1.1161630617001268, "grad_norm": 0.028460588306188583, "learning_rate": 4.858969385078782e-06, "loss": 0.0009, "step": 136490 }, { "epoch": 1.116244837878726, "grad_norm": 0.045050524175167084, "learning_rate": 4.8582560386685205e-06, "loss": 0.0007, "step": 136500 }, { "epoch": 1.1163266140573251, "grad_norm": 0.007784539833664894, "learning_rate": 4.8575426951457e-06, "loss": 0.0017, "step": 136510 }, { "epoch": 1.1164083902359243, "grad_norm": 0.049572691321372986, "learning_rate": 4.856829354524847e-06, "loss": 0.002, "step": 136520 }, { "epoch": 1.1164901664145235, "grad_norm": 0.011959649622440338, "learning_rate": 4.8561160168204976e-06, "loss": 0.0004, "step": 136530 }, { "epoch": 1.1165719425931226, "grad_norm": 0.03831411153078079, "learning_rate": 4.8554026820471835e-06, "loss": 0.0013, "step": 136540 }, { "epoch": 1.1166537187717218, "grad_norm": 0.03533397242426872, "learning_rate": 4.854689350219434e-06, "loss": 0.0015, "step": 136550 }, { "epoch": 1.116735494950321, "grad_norm": 0.0241289883852005, "learning_rate": 4.8539760213517785e-06, "loss": 0.0012, "step": 136560 }, { "epoch": 1.1168172711289202, "grad_norm": 0.007181236520409584, "learning_rate": 4.853262695458752e-06, "loss": 0.001, "step": 136570 }, { "epoch": 1.1168990473075193, "grad_norm": 0.04949287325143814, "learning_rate": 4.852549372554883e-06, "loss": 0.0009, "step": 136580 }, { "epoch": 1.1169808234861185, "grad_norm": 0.009460153058171272, "learning_rate": 4.851836052654704e-06, "loss": 0.001, "step": 136590 }, { "epoch": 1.1170625996647177, "grad_norm": 0.10054687410593033, "learning_rate": 4.851122735772742e-06, "loss": 0.0011, "step": 136600 }, { "epoch": 1.1171443758433168, "grad_norm": 0.002240339992567897, "learning_rate": 4.850409421923534e-06, "loss": 0.0014, "step": 136610 }, { "epoch": 1.117226152021916, "grad_norm": 0.048734139651060104, "learning_rate": 4.849696111121607e-06, "loss": 0.0008, "step": 136620 }, { "epoch": 1.1173079282005152, "grad_norm": 0.05652366951107979, "learning_rate": 4.848982803381494e-06, "loss": 0.0009, "step": 136630 }, { "epoch": 1.1173897043791143, "grad_norm": 0.04852806776762009, "learning_rate": 4.848269498717721e-06, "loss": 0.0024, "step": 136640 }, { "epoch": 1.1174714805577135, "grad_norm": 0.05268046259880066, "learning_rate": 4.8475561971448225e-06, "loss": 0.0008, "step": 136650 }, { "epoch": 1.1175532567363127, "grad_norm": 0.03852059692144394, "learning_rate": 4.846842898677329e-06, "loss": 0.0015, "step": 136660 }, { "epoch": 1.1176350329149118, "grad_norm": 0.0074134403839707375, "learning_rate": 4.84612960332977e-06, "loss": 0.0013, "step": 136670 }, { "epoch": 1.117716809093511, "grad_norm": 0.023111650720238686, "learning_rate": 4.845416311116675e-06, "loss": 0.0012, "step": 136680 }, { "epoch": 1.1177985852721102, "grad_norm": 0.03698723763227463, "learning_rate": 4.844703022052577e-06, "loss": 0.001, "step": 136690 }, { "epoch": 1.1178803614507093, "grad_norm": 0.012018651701509953, "learning_rate": 4.843989736152004e-06, "loss": 0.0029, "step": 136700 }, { "epoch": 1.1179621376293085, "grad_norm": 0.03235533460974693, "learning_rate": 4.843276453429487e-06, "loss": 0.0008, "step": 136710 }, { "epoch": 1.1180439138079077, "grad_norm": 0.0032326423097401857, "learning_rate": 4.842563173899553e-06, "loss": 0.0015, "step": 136720 }, { "epoch": 1.1181256899865069, "grad_norm": 0.014232484623789787, "learning_rate": 4.841849897576739e-06, "loss": 0.0006, "step": 136730 }, { "epoch": 1.118207466165106, "grad_norm": 0.01571502909064293, "learning_rate": 4.84113662447557e-06, "loss": 0.0009, "step": 136740 }, { "epoch": 1.1182892423437052, "grad_norm": 0.0036675806622952223, "learning_rate": 4.840423354610579e-06, "loss": 0.0008, "step": 136750 }, { "epoch": 1.1183710185223044, "grad_norm": 0.049473244696855545, "learning_rate": 4.839710087996291e-06, "loss": 0.0007, "step": 136760 }, { "epoch": 1.1184527947009035, "grad_norm": 0.04093256965279579, "learning_rate": 4.83899682464724e-06, "loss": 0.0007, "step": 136770 }, { "epoch": 1.1185345708795027, "grad_norm": 0.02598774991929531, "learning_rate": 4.838283564577954e-06, "loss": 0.0021, "step": 136780 }, { "epoch": 1.118616347058102, "grad_norm": 0.012131981551647186, "learning_rate": 4.837570307802964e-06, "loss": 0.0007, "step": 136790 }, { "epoch": 1.118698123236701, "grad_norm": 0.04141242802143097, "learning_rate": 4.836857054336799e-06, "loss": 0.0013, "step": 136800 }, { "epoch": 1.1187798994153004, "grad_norm": 0.01818547211587429, "learning_rate": 4.836143804193988e-06, "loss": 0.0014, "step": 136810 }, { "epoch": 1.1188616755938996, "grad_norm": 0.053957097232341766, "learning_rate": 4.835430557389061e-06, "loss": 0.0014, "step": 136820 }, { "epoch": 1.1189434517724988, "grad_norm": 0.0728502869606018, "learning_rate": 4.8347173139365475e-06, "loss": 0.0011, "step": 136830 }, { "epoch": 1.119025227951098, "grad_norm": 0.03359472006559372, "learning_rate": 4.834004073850974e-06, "loss": 0.0015, "step": 136840 }, { "epoch": 1.119107004129697, "grad_norm": 0.061708103865385056, "learning_rate": 4.833290837146876e-06, "loss": 0.001, "step": 136850 }, { "epoch": 1.1191887803082963, "grad_norm": 0.015393265523016453, "learning_rate": 4.832577603838778e-06, "loss": 0.0013, "step": 136860 }, { "epoch": 1.1192705564868954, "grad_norm": 0.06306769698858261, "learning_rate": 4.831864373941212e-06, "loss": 0.0013, "step": 136870 }, { "epoch": 1.1193523326654946, "grad_norm": 0.014358208514750004, "learning_rate": 4.831151147468701e-06, "loss": 0.0017, "step": 136880 }, { "epoch": 1.1194341088440938, "grad_norm": 0.027841584756970406, "learning_rate": 4.830437924435782e-06, "loss": 0.0016, "step": 136890 }, { "epoch": 1.119515885022693, "grad_norm": 0.03480496630072594, "learning_rate": 4.829724704856979e-06, "loss": 0.0007, "step": 136900 }, { "epoch": 1.1195976612012921, "grad_norm": 0.0373680554330349, "learning_rate": 4.829011488746823e-06, "loss": 0.0006, "step": 136910 }, { "epoch": 1.1196794373798913, "grad_norm": 0.04286956414580345, "learning_rate": 4.828298276119841e-06, "loss": 0.0023, "step": 136920 }, { "epoch": 1.1197612135584905, "grad_norm": 0.05923735722899437, "learning_rate": 4.8275850669905635e-06, "loss": 0.002, "step": 136930 }, { "epoch": 1.1198429897370896, "grad_norm": 0.054139651358127594, "learning_rate": 4.8268718613735184e-06, "loss": 0.0008, "step": 136940 }, { "epoch": 1.1199247659156888, "grad_norm": 0.04645667225122452, "learning_rate": 4.826158659283234e-06, "loss": 0.0013, "step": 136950 }, { "epoch": 1.120006542094288, "grad_norm": 0.026374245062470436, "learning_rate": 4.825445460734237e-06, "loss": 0.0009, "step": 136960 }, { "epoch": 1.1200883182728871, "grad_norm": 0.02919354848563671, "learning_rate": 4.824732265741059e-06, "loss": 0.001, "step": 136970 }, { "epoch": 1.1201700944514863, "grad_norm": 0.07622544467449188, "learning_rate": 4.824019074318229e-06, "loss": 0.0008, "step": 136980 }, { "epoch": 1.1202518706300855, "grad_norm": 0.0177936814725399, "learning_rate": 4.823305886480273e-06, "loss": 0.0006, "step": 136990 }, { "epoch": 1.1203336468086846, "grad_norm": 0.0482453927397728, "learning_rate": 4.822592702241718e-06, "loss": 0.0013, "step": 137000 }, { "epoch": 1.1204154229872838, "grad_norm": 0.07423016428947449, "learning_rate": 4.821879521617095e-06, "loss": 0.0019, "step": 137010 }, { "epoch": 1.120497199165883, "grad_norm": 0.01975930482149124, "learning_rate": 4.8211663446209315e-06, "loss": 0.0027, "step": 137020 }, { "epoch": 1.1205789753444821, "grad_norm": 0.006629259791225195, "learning_rate": 4.820453171267754e-06, "loss": 0.0006, "step": 137030 }, { "epoch": 1.1206607515230813, "grad_norm": 0.010474220849573612, "learning_rate": 4.8197400015720915e-06, "loss": 0.0008, "step": 137040 }, { "epoch": 1.1207425277016805, "grad_norm": 0.04669814929366112, "learning_rate": 4.8190268355484724e-06, "loss": 0.0007, "step": 137050 }, { "epoch": 1.1208243038802796, "grad_norm": 0.05777113139629364, "learning_rate": 4.818313673211423e-06, "loss": 0.0034, "step": 137060 }, { "epoch": 1.1209060800588788, "grad_norm": 0.352373331785202, "learning_rate": 4.817600514575473e-06, "loss": 0.0011, "step": 137070 }, { "epoch": 1.120987856237478, "grad_norm": 0.013693487271666527, "learning_rate": 4.816887359655145e-06, "loss": 0.0008, "step": 137080 }, { "epoch": 1.1210696324160772, "grad_norm": 0.05991850048303604, "learning_rate": 4.816174208464973e-06, "loss": 0.0013, "step": 137090 }, { "epoch": 1.1211514085946763, "grad_norm": 0.10297630727291107, "learning_rate": 4.815461061019483e-06, "loss": 0.0009, "step": 137100 }, { "epoch": 1.1212331847732755, "grad_norm": 0.05529215931892395, "learning_rate": 4.8147479173332015e-06, "loss": 0.0011, "step": 137110 }, { "epoch": 1.1213149609518747, "grad_norm": 0.15468144416809082, "learning_rate": 4.814034777420651e-06, "loss": 0.002, "step": 137120 }, { "epoch": 1.1213967371304738, "grad_norm": 0.05381578952074051, "learning_rate": 4.813321641296368e-06, "loss": 0.0011, "step": 137130 }, { "epoch": 1.121478513309073, "grad_norm": 0.0020698399748653173, "learning_rate": 4.8126085089748725e-06, "loss": 0.0011, "step": 137140 }, { "epoch": 1.1215602894876722, "grad_norm": 0.03270953521132469, "learning_rate": 4.811895380470696e-06, "loss": 0.0012, "step": 137150 }, { "epoch": 1.1216420656662713, "grad_norm": 0.03193613141775131, "learning_rate": 4.81118225579836e-06, "loss": 0.0007, "step": 137160 }, { "epoch": 1.1217238418448705, "grad_norm": 0.14571920037269592, "learning_rate": 4.810469134972398e-06, "loss": 0.0012, "step": 137170 }, { "epoch": 1.1218056180234697, "grad_norm": 0.039377350360155106, "learning_rate": 4.809756018007333e-06, "loss": 0.0011, "step": 137180 }, { "epoch": 1.1218873942020688, "grad_norm": 0.08036232739686966, "learning_rate": 4.809042904917693e-06, "loss": 0.0021, "step": 137190 }, { "epoch": 1.121969170380668, "grad_norm": 0.020663395524024963, "learning_rate": 4.808329795718004e-06, "loss": 0.0015, "step": 137200 }, { "epoch": 1.1220509465592672, "grad_norm": 0.019485849887132645, "learning_rate": 4.80761669042279e-06, "loss": 0.0013, "step": 137210 }, { "epoch": 1.1221327227378666, "grad_norm": 0.035980306565761566, "learning_rate": 4.806903589046583e-06, "loss": 0.0009, "step": 137220 }, { "epoch": 1.1222144989164655, "grad_norm": 0.00919624138623476, "learning_rate": 4.806190491603909e-06, "loss": 0.0008, "step": 137230 }, { "epoch": 1.122296275095065, "grad_norm": 0.0477604940533638, "learning_rate": 4.8054773981092905e-06, "loss": 0.0009, "step": 137240 }, { "epoch": 1.122378051273664, "grad_norm": 0.02976735308766365, "learning_rate": 4.804764308577254e-06, "loss": 0.0008, "step": 137250 }, { "epoch": 1.1224598274522632, "grad_norm": 0.03765161335468292, "learning_rate": 4.804051223022327e-06, "loss": 0.0011, "step": 137260 }, { "epoch": 1.1225416036308624, "grad_norm": 0.09727159887552261, "learning_rate": 4.803338141459038e-06, "loss": 0.0011, "step": 137270 }, { "epoch": 1.1226233798094616, "grad_norm": 0.04075855761766434, "learning_rate": 4.802625063901911e-06, "loss": 0.0011, "step": 137280 }, { "epoch": 1.1227051559880608, "grad_norm": 0.07576247304677963, "learning_rate": 4.8019119903654695e-06, "loss": 0.0007, "step": 137290 }, { "epoch": 1.12278693216666, "grad_norm": 0.04481576383113861, "learning_rate": 4.801198920864243e-06, "loss": 0.0005, "step": 137300 }, { "epoch": 1.122868708345259, "grad_norm": 0.15565234422683716, "learning_rate": 4.800485855412757e-06, "loss": 0.0012, "step": 137310 }, { "epoch": 1.1229504845238583, "grad_norm": 0.06624281406402588, "learning_rate": 4.799772794025536e-06, "loss": 0.001, "step": 137320 }, { "epoch": 1.1230322607024574, "grad_norm": 0.10094378888607025, "learning_rate": 4.799059736717104e-06, "loss": 0.0018, "step": 137330 }, { "epoch": 1.1231140368810566, "grad_norm": 0.04715736210346222, "learning_rate": 4.79834668350199e-06, "loss": 0.0012, "step": 137340 }, { "epoch": 1.1231958130596558, "grad_norm": 0.007577735930681229, "learning_rate": 4.797633634394719e-06, "loss": 0.0006, "step": 137350 }, { "epoch": 1.123277589238255, "grad_norm": 0.06322149932384491, "learning_rate": 4.796920589409815e-06, "loss": 0.0011, "step": 137360 }, { "epoch": 1.123359365416854, "grad_norm": 0.03338778018951416, "learning_rate": 4.796207548561801e-06, "loss": 0.0016, "step": 137370 }, { "epoch": 1.1234411415954533, "grad_norm": 0.02741417847573757, "learning_rate": 4.795494511865207e-06, "loss": 0.0012, "step": 137380 }, { "epoch": 1.1235229177740524, "grad_norm": 0.12452919781208038, "learning_rate": 4.7947814793345565e-06, "loss": 0.0011, "step": 137390 }, { "epoch": 1.1236046939526516, "grad_norm": 0.06472546607255936, "learning_rate": 4.7940684509843735e-06, "loss": 0.0008, "step": 137400 }, { "epoch": 1.1236864701312508, "grad_norm": 0.16249287128448486, "learning_rate": 4.793355426829183e-06, "loss": 0.0012, "step": 137410 }, { "epoch": 1.12376824630985, "grad_norm": 0.015278929844498634, "learning_rate": 4.792642406883511e-06, "loss": 0.0009, "step": 137420 }, { "epoch": 1.1238500224884491, "grad_norm": 0.041253041476011276, "learning_rate": 4.791929391161882e-06, "loss": 0.0011, "step": 137430 }, { "epoch": 1.1239317986670483, "grad_norm": 0.05092981830239296, "learning_rate": 4.79121637967882e-06, "loss": 0.0021, "step": 137440 }, { "epoch": 1.1240135748456475, "grad_norm": 0.027341581881046295, "learning_rate": 4.790503372448849e-06, "loss": 0.0008, "step": 137450 }, { "epoch": 1.1240953510242466, "grad_norm": 0.09786365181207657, "learning_rate": 4.7897903694864955e-06, "loss": 0.0012, "step": 137460 }, { "epoch": 1.1241771272028458, "grad_norm": 0.033325448632240295, "learning_rate": 4.789077370806285e-06, "loss": 0.0006, "step": 137470 }, { "epoch": 1.124258903381445, "grad_norm": 0.031241819262504578, "learning_rate": 4.788364376422738e-06, "loss": 0.0038, "step": 137480 }, { "epoch": 1.1243406795600441, "grad_norm": 0.04045473039150238, "learning_rate": 4.787651386350379e-06, "loss": 0.0011, "step": 137490 }, { "epoch": 1.1244224557386433, "grad_norm": 0.07958273589611053, "learning_rate": 4.7869384006037365e-06, "loss": 0.0013, "step": 137500 }, { "epoch": 1.1245042319172425, "grad_norm": 0.11127951741218567, "learning_rate": 4.7862254191973316e-06, "loss": 0.002, "step": 137510 }, { "epoch": 1.1245860080958416, "grad_norm": 0.0315072201192379, "learning_rate": 4.785512442145688e-06, "loss": 0.0007, "step": 137520 }, { "epoch": 1.1246677842744408, "grad_norm": 0.016278311610221863, "learning_rate": 4.78479946946333e-06, "loss": 0.0009, "step": 137530 }, { "epoch": 1.12474956045304, "grad_norm": 0.041771870106458664, "learning_rate": 4.784086501164783e-06, "loss": 0.001, "step": 137540 }, { "epoch": 1.1248313366316391, "grad_norm": 0.007622809149324894, "learning_rate": 4.783373537264569e-06, "loss": 0.0012, "step": 137550 }, { "epoch": 1.1249131128102383, "grad_norm": 0.024915339425206184, "learning_rate": 4.7826605777772124e-06, "loss": 0.0004, "step": 137560 }, { "epoch": 1.1249948889888375, "grad_norm": 0.00919226836413145, "learning_rate": 4.781947622717234e-06, "loss": 0.0012, "step": 137570 }, { "epoch": 1.1250766651674367, "grad_norm": 0.03428823873400688, "learning_rate": 4.781234672099162e-06, "loss": 0.0013, "step": 137580 }, { "epoch": 1.1251584413460358, "grad_norm": 0.04467890039086342, "learning_rate": 4.780521725937518e-06, "loss": 0.0011, "step": 137590 }, { "epoch": 1.125240217524635, "grad_norm": 0.04151879623532295, "learning_rate": 4.779808784246825e-06, "loss": 0.0009, "step": 137600 }, { "epoch": 1.1253219937032342, "grad_norm": 0.07948166877031326, "learning_rate": 4.779095847041603e-06, "loss": 0.001, "step": 137610 }, { "epoch": 1.1254037698818333, "grad_norm": 0.07792278379201889, "learning_rate": 4.778382914336381e-06, "loss": 0.0019, "step": 137620 }, { "epoch": 1.1254855460604327, "grad_norm": 0.013561297208070755, "learning_rate": 4.777669986145679e-06, "loss": 0.0012, "step": 137630 }, { "epoch": 1.1255673222390317, "grad_norm": 0.08575382083654404, "learning_rate": 4.77695706248402e-06, "loss": 0.0008, "step": 137640 }, { "epoch": 1.125649098417631, "grad_norm": 0.08437510579824448, "learning_rate": 4.776244143365926e-06, "loss": 0.0009, "step": 137650 }, { "epoch": 1.12573087459623, "grad_norm": 0.05134178698062897, "learning_rate": 4.775531228805921e-06, "loss": 0.0006, "step": 137660 }, { "epoch": 1.1258126507748294, "grad_norm": 0.03544304519891739, "learning_rate": 4.774818318818528e-06, "loss": 0.0009, "step": 137670 }, { "epoch": 1.1258944269534286, "grad_norm": 0.07447367906570435, "learning_rate": 4.774105413418269e-06, "loss": 0.0011, "step": 137680 }, { "epoch": 1.1259762031320277, "grad_norm": 0.054459214210510254, "learning_rate": 4.773392512619664e-06, "loss": 0.0011, "step": 137690 }, { "epoch": 1.126057979310627, "grad_norm": 0.03188907355070114, "learning_rate": 4.77267961643724e-06, "loss": 0.0015, "step": 137700 }, { "epoch": 1.126139755489226, "grad_norm": 0.039595987647771835, "learning_rate": 4.771966724885517e-06, "loss": 0.001, "step": 137710 }, { "epoch": 1.1262215316678252, "grad_norm": 0.02236207015812397, "learning_rate": 4.771253837979017e-06, "loss": 0.0008, "step": 137720 }, { "epoch": 1.1263033078464244, "grad_norm": 0.01760527305305004, "learning_rate": 4.77054095573226e-06, "loss": 0.0005, "step": 137730 }, { "epoch": 1.1263850840250236, "grad_norm": 0.013224280439317226, "learning_rate": 4.7698280781597725e-06, "loss": 0.0016, "step": 137740 }, { "epoch": 1.1264668602036227, "grad_norm": 0.07039929926395416, "learning_rate": 4.769115205276075e-06, "loss": 0.0007, "step": 137750 }, { "epoch": 1.126548636382222, "grad_norm": 0.01518474705517292, "learning_rate": 4.768402337095687e-06, "loss": 0.001, "step": 137760 }, { "epoch": 1.126630412560821, "grad_norm": 0.19792410731315613, "learning_rate": 4.767689473633131e-06, "loss": 0.0009, "step": 137770 }, { "epoch": 1.1267121887394203, "grad_norm": 0.05028757452964783, "learning_rate": 4.766976614902931e-06, "loss": 0.0013, "step": 137780 }, { "epoch": 1.1267939649180194, "grad_norm": 0.026951095089316368, "learning_rate": 4.766263760919606e-06, "loss": 0.0007, "step": 137790 }, { "epoch": 1.1268757410966186, "grad_norm": 0.05052937939763069, "learning_rate": 4.765550911697678e-06, "loss": 0.0009, "step": 137800 }, { "epoch": 1.1269575172752178, "grad_norm": 0.04740246757864952, "learning_rate": 4.764838067251667e-06, "loss": 0.0014, "step": 137810 }, { "epoch": 1.127039293453817, "grad_norm": 0.10910426080226898, "learning_rate": 4.764125227596098e-06, "loss": 0.0015, "step": 137820 }, { "epoch": 1.127121069632416, "grad_norm": 0.08752579987049103, "learning_rate": 4.76341239274549e-06, "loss": 0.0007, "step": 137830 }, { "epoch": 1.1272028458110153, "grad_norm": 0.0245607141405344, "learning_rate": 4.762699562714364e-06, "loss": 0.0012, "step": 137840 }, { "epoch": 1.1272846219896144, "grad_norm": 0.012654416263103485, "learning_rate": 4.761986737517237e-06, "loss": 0.0008, "step": 137850 }, { "epoch": 1.1273663981682136, "grad_norm": 0.04059265926480293, "learning_rate": 4.761273917168637e-06, "loss": 0.0006, "step": 137860 }, { "epoch": 1.1274481743468128, "grad_norm": 0.046388816088438034, "learning_rate": 4.760561101683081e-06, "loss": 0.0005, "step": 137870 }, { "epoch": 1.127529950525412, "grad_norm": 0.05369880050420761, "learning_rate": 4.75984829107509e-06, "loss": 0.0012, "step": 137880 }, { "epoch": 1.127611726704011, "grad_norm": 0.02695014327764511, "learning_rate": 4.759135485359184e-06, "loss": 0.0009, "step": 137890 }, { "epoch": 1.1276935028826103, "grad_norm": 0.05291590467095375, "learning_rate": 4.758422684549884e-06, "loss": 0.0008, "step": 137900 }, { "epoch": 1.1277752790612094, "grad_norm": 0.23183101415634155, "learning_rate": 4.757709888661711e-06, "loss": 0.0006, "step": 137910 }, { "epoch": 1.1278570552398086, "grad_norm": 0.010232022032141685, "learning_rate": 4.756997097709185e-06, "loss": 0.0003, "step": 137920 }, { "epoch": 1.1279388314184078, "grad_norm": 0.01871107704937458, "learning_rate": 4.756284311706823e-06, "loss": 0.0022, "step": 137930 }, { "epoch": 1.128020607597007, "grad_norm": 0.02872200310230255, "learning_rate": 4.755571530669149e-06, "loss": 0.001, "step": 137940 }, { "epoch": 1.1281023837756061, "grad_norm": 0.05995392054319382, "learning_rate": 4.754858754610684e-06, "loss": 0.0007, "step": 137950 }, { "epoch": 1.1281841599542053, "grad_norm": 0.09628403931856155, "learning_rate": 4.754145983545943e-06, "loss": 0.0011, "step": 137960 }, { "epoch": 1.1282659361328045, "grad_norm": 0.23843534290790558, "learning_rate": 4.753433217489447e-06, "loss": 0.0027, "step": 137970 }, { "epoch": 1.1283477123114036, "grad_norm": 0.26947134733200073, "learning_rate": 4.752720456455718e-06, "loss": 0.0008, "step": 137980 }, { "epoch": 1.1284294884900028, "grad_norm": 0.022000335156917572, "learning_rate": 4.752007700459275e-06, "loss": 0.001, "step": 137990 }, { "epoch": 1.128511264668602, "grad_norm": 0.06045999377965927, "learning_rate": 4.751294949514637e-06, "loss": 0.0009, "step": 138000 }, { "epoch": 1.1285930408472011, "grad_norm": 0.0657300129532814, "learning_rate": 4.7505822036363214e-06, "loss": 0.001, "step": 138010 }, { "epoch": 1.1286748170258003, "grad_norm": 0.01952868327498436, "learning_rate": 4.74986946283885e-06, "loss": 0.0011, "step": 138020 }, { "epoch": 1.1287565932043995, "grad_norm": 0.06534289568662643, "learning_rate": 4.749156727136742e-06, "loss": 0.0011, "step": 138030 }, { "epoch": 1.1288383693829986, "grad_norm": 0.017477894201874733, "learning_rate": 4.748443996544514e-06, "loss": 0.0009, "step": 138040 }, { "epoch": 1.1289201455615978, "grad_norm": 0.03344443067908287, "learning_rate": 4.747731271076685e-06, "loss": 0.0016, "step": 138050 }, { "epoch": 1.1290019217401972, "grad_norm": 0.09175077825784683, "learning_rate": 4.747018550747777e-06, "loss": 0.0013, "step": 138060 }, { "epoch": 1.1290836979187961, "grad_norm": 0.09821821749210358, "learning_rate": 4.746305835572308e-06, "loss": 0.0013, "step": 138070 }, { "epoch": 1.1291654740973955, "grad_norm": 0.03380034491419792, "learning_rate": 4.745593125564794e-06, "loss": 0.0006, "step": 138080 }, { "epoch": 1.1292472502759945, "grad_norm": 0.0148538276553154, "learning_rate": 4.744880420739753e-06, "loss": 0.0014, "step": 138090 }, { "epoch": 1.1293290264545939, "grad_norm": 0.020216714590787888, "learning_rate": 4.744167721111706e-06, "loss": 0.0013, "step": 138100 }, { "epoch": 1.129410802633193, "grad_norm": 0.06799419224262238, "learning_rate": 4.743455026695172e-06, "loss": 0.0009, "step": 138110 }, { "epoch": 1.1294925788117922, "grad_norm": 0.052206821739673615, "learning_rate": 4.742742337504667e-06, "loss": 0.0008, "step": 138120 }, { "epoch": 1.1295743549903914, "grad_norm": 0.027654115110635757, "learning_rate": 4.7420296535547096e-06, "loss": 0.0006, "step": 138130 }, { "epoch": 1.1296561311689906, "grad_norm": 0.05309893563389778, "learning_rate": 4.741316974859818e-06, "loss": 0.0015, "step": 138140 }, { "epoch": 1.1297379073475897, "grad_norm": 0.02040322683751583, "learning_rate": 4.7406043014345095e-06, "loss": 0.0006, "step": 138150 }, { "epoch": 1.129819683526189, "grad_norm": 0.4187977910041809, "learning_rate": 4.7398916332933035e-06, "loss": 0.001, "step": 138160 }, { "epoch": 1.129901459704788, "grad_norm": 0.03738197311758995, "learning_rate": 4.739178970450713e-06, "loss": 0.0008, "step": 138170 }, { "epoch": 1.1299832358833872, "grad_norm": 0.05685047060251236, "learning_rate": 4.738466312921261e-06, "loss": 0.0008, "step": 138180 }, { "epoch": 1.1300650120619864, "grad_norm": 0.021309519186615944, "learning_rate": 4.737753660719465e-06, "loss": 0.0008, "step": 138190 }, { "epoch": 1.1301467882405856, "grad_norm": 0.004705615807324648, "learning_rate": 4.7370410138598385e-06, "loss": 0.0011, "step": 138200 }, { "epoch": 1.1302285644191847, "grad_norm": 0.0030388350132852793, "learning_rate": 4.736328372356898e-06, "loss": 0.0008, "step": 138210 }, { "epoch": 1.130310340597784, "grad_norm": 0.009536270052194595, "learning_rate": 4.735615736225165e-06, "loss": 0.0015, "step": 138220 }, { "epoch": 1.130392116776383, "grad_norm": 0.08986956626176834, "learning_rate": 4.734903105479155e-06, "loss": 0.0015, "step": 138230 }, { "epoch": 1.1304738929549822, "grad_norm": 0.021120846271514893, "learning_rate": 4.734190480133383e-06, "loss": 0.0009, "step": 138240 }, { "epoch": 1.1305556691335814, "grad_norm": 0.021599553525447845, "learning_rate": 4.7334778602023666e-06, "loss": 0.0009, "step": 138250 }, { "epoch": 1.1306374453121806, "grad_norm": 0.21948377788066864, "learning_rate": 4.732765245700624e-06, "loss": 0.0015, "step": 138260 }, { "epoch": 1.1307192214907797, "grad_norm": 0.06652483344078064, "learning_rate": 4.73205263664267e-06, "loss": 0.0011, "step": 138270 }, { "epoch": 1.130800997669379, "grad_norm": 0.0072839041240513325, "learning_rate": 4.731340033043022e-06, "loss": 0.0016, "step": 138280 }, { "epoch": 1.130882773847978, "grad_norm": 0.019628731533885002, "learning_rate": 4.730627434916194e-06, "loss": 0.0007, "step": 138290 }, { "epoch": 1.1309645500265773, "grad_norm": 0.0841074138879776, "learning_rate": 4.729914842276706e-06, "loss": 0.0009, "step": 138300 }, { "epoch": 1.1310463262051764, "grad_norm": 0.07885850220918655, "learning_rate": 4.729202255139073e-06, "loss": 0.0008, "step": 138310 }, { "epoch": 1.1311281023837756, "grad_norm": 0.12170058488845825, "learning_rate": 4.72848967351781e-06, "loss": 0.0017, "step": 138320 }, { "epoch": 1.1312098785623748, "grad_norm": 0.4172205328941345, "learning_rate": 4.72777709742743e-06, "loss": 0.0015, "step": 138330 }, { "epoch": 1.131291654740974, "grad_norm": 0.015349596738815308, "learning_rate": 4.727064526882454e-06, "loss": 0.0006, "step": 138340 }, { "epoch": 1.131373430919573, "grad_norm": 0.05414993315935135, "learning_rate": 4.726351961897396e-06, "loss": 0.0013, "step": 138350 }, { "epoch": 1.1314552070981723, "grad_norm": 0.002600097330287099, "learning_rate": 4.72563940248677e-06, "loss": 0.0008, "step": 138360 }, { "epoch": 1.1315369832767714, "grad_norm": 0.026029959321022034, "learning_rate": 4.724926848665092e-06, "loss": 0.0015, "step": 138370 }, { "epoch": 1.1316187594553706, "grad_norm": 0.03674613684415817, "learning_rate": 4.724214300446879e-06, "loss": 0.0017, "step": 138380 }, { "epoch": 1.1317005356339698, "grad_norm": 0.022700292989611626, "learning_rate": 4.723501757846644e-06, "loss": 0.0015, "step": 138390 }, { "epoch": 1.131782311812569, "grad_norm": 0.021329287439584732, "learning_rate": 4.722789220878904e-06, "loss": 0.0011, "step": 138400 }, { "epoch": 1.131864087991168, "grad_norm": 0.027881328016519547, "learning_rate": 4.722076689558171e-06, "loss": 0.0009, "step": 138410 }, { "epoch": 1.1319458641697673, "grad_norm": 0.024805132299661636, "learning_rate": 4.721364163898963e-06, "loss": 0.0008, "step": 138420 }, { "epoch": 1.1320276403483664, "grad_norm": 0.01333271898329258, "learning_rate": 4.720651643915795e-06, "loss": 0.001, "step": 138430 }, { "epoch": 1.1321094165269656, "grad_norm": 0.07716427743434906, "learning_rate": 4.719939129623178e-06, "loss": 0.0012, "step": 138440 }, { "epoch": 1.1321911927055648, "grad_norm": 0.03754661604762077, "learning_rate": 4.719226621035627e-06, "loss": 0.0011, "step": 138450 }, { "epoch": 1.132272968884164, "grad_norm": 0.15883803367614746, "learning_rate": 4.71851411816766e-06, "loss": 0.0013, "step": 138460 }, { "epoch": 1.1323547450627631, "grad_norm": 0.024230962619185448, "learning_rate": 4.71780162103379e-06, "loss": 0.0018, "step": 138470 }, { "epoch": 1.1324365212413623, "grad_norm": 0.012871627695858479, "learning_rate": 4.717089129648529e-06, "loss": 0.0013, "step": 138480 }, { "epoch": 1.1325182974199617, "grad_norm": 0.03579854592680931, "learning_rate": 4.7163766440263925e-06, "loss": 0.0013, "step": 138490 }, { "epoch": 1.1326000735985606, "grad_norm": 0.032968200743198395, "learning_rate": 4.715664164181895e-06, "loss": 0.0006, "step": 138500 }, { "epoch": 1.13268184977716, "grad_norm": 0.08983245491981506, "learning_rate": 4.71495169012955e-06, "loss": 0.0008, "step": 138510 }, { "epoch": 1.132763625955759, "grad_norm": 0.02858736179769039, "learning_rate": 4.71423922188387e-06, "loss": 0.0008, "step": 138520 }, { "epoch": 1.1328454021343584, "grad_norm": 0.01667075790464878, "learning_rate": 4.713526759459367e-06, "loss": 0.0011, "step": 138530 }, { "epoch": 1.1329271783129575, "grad_norm": 0.04626523330807686, "learning_rate": 4.712814302870561e-06, "loss": 0.0009, "step": 138540 }, { "epoch": 1.1330089544915567, "grad_norm": 0.04530984163284302, "learning_rate": 4.71210185213196e-06, "loss": 0.0007, "step": 138550 }, { "epoch": 1.1330907306701559, "grad_norm": 0.0506821945309639, "learning_rate": 4.711389407258078e-06, "loss": 0.0016, "step": 138560 }, { "epoch": 1.133172506848755, "grad_norm": 0.040468838065862656, "learning_rate": 4.7106769682634255e-06, "loss": 0.001, "step": 138570 }, { "epoch": 1.1332542830273542, "grad_norm": 0.0374792255461216, "learning_rate": 4.7099645351625215e-06, "loss": 0.0011, "step": 138580 }, { "epoch": 1.1333360592059534, "grad_norm": 0.008111875504255295, "learning_rate": 4.709252107969875e-06, "loss": 0.0007, "step": 138590 }, { "epoch": 1.1334178353845525, "grad_norm": 0.035058703273534775, "learning_rate": 4.708539686699999e-06, "loss": 0.0013, "step": 138600 }, { "epoch": 1.1334996115631517, "grad_norm": 0.08589926362037659, "learning_rate": 4.707827271367408e-06, "loss": 0.0011, "step": 138610 }, { "epoch": 1.1335813877417509, "grad_norm": 0.09147339314222336, "learning_rate": 4.707114861986611e-06, "loss": 0.0009, "step": 138620 }, { "epoch": 1.13366316392035, "grad_norm": 0.12606000900268555, "learning_rate": 4.706402458572123e-06, "loss": 0.0021, "step": 138630 }, { "epoch": 1.1337449400989492, "grad_norm": 0.005692746490240097, "learning_rate": 4.705690061138456e-06, "loss": 0.0008, "step": 138640 }, { "epoch": 1.1338267162775484, "grad_norm": 0.008747627027332783, "learning_rate": 4.704977669700122e-06, "loss": 0.0005, "step": 138650 }, { "epoch": 1.1339084924561476, "grad_norm": 0.03331458568572998, "learning_rate": 4.704265284271631e-06, "loss": 0.0016, "step": 138660 }, { "epoch": 1.1339902686347467, "grad_norm": 0.08787956088781357, "learning_rate": 4.703552904867498e-06, "loss": 0.001, "step": 138670 }, { "epoch": 1.134072044813346, "grad_norm": 0.059773512184619904, "learning_rate": 4.702840531502233e-06, "loss": 0.0017, "step": 138680 }, { "epoch": 1.134153820991945, "grad_norm": 0.060513947159051895, "learning_rate": 4.702128164190348e-06, "loss": 0.0007, "step": 138690 }, { "epoch": 1.1342355971705442, "grad_norm": 0.06329646706581116, "learning_rate": 4.7014158029463526e-06, "loss": 0.0008, "step": 138700 }, { "epoch": 1.1343173733491434, "grad_norm": 0.0681016817688942, "learning_rate": 4.700703447784762e-06, "loss": 0.0009, "step": 138710 }, { "epoch": 1.1343991495277426, "grad_norm": 0.020072754472494125, "learning_rate": 4.699991098720085e-06, "loss": 0.0008, "step": 138720 }, { "epoch": 1.1344809257063417, "grad_norm": 0.000654058821965009, "learning_rate": 4.6992787557668335e-06, "loss": 0.0005, "step": 138730 }, { "epoch": 1.134562701884941, "grad_norm": 0.006289386190474033, "learning_rate": 4.698566418939517e-06, "loss": 0.0012, "step": 138740 }, { "epoch": 1.13464447806354, "grad_norm": 0.03199278563261032, "learning_rate": 4.6978540882526494e-06, "loss": 0.0011, "step": 138750 }, { "epoch": 1.1347262542421392, "grad_norm": 0.06689640134572983, "learning_rate": 4.697141763720738e-06, "loss": 0.0012, "step": 138760 }, { "epoch": 1.1348080304207384, "grad_norm": 0.02702151983976364, "learning_rate": 4.696429445358296e-06, "loss": 0.0006, "step": 138770 }, { "epoch": 1.1348898065993376, "grad_norm": 0.04169989377260208, "learning_rate": 4.695717133179833e-06, "loss": 0.0008, "step": 138780 }, { "epoch": 1.1349715827779367, "grad_norm": 0.03992251306772232, "learning_rate": 4.695004827199859e-06, "loss": 0.0013, "step": 138790 }, { "epoch": 1.135053358956536, "grad_norm": 0.177342489361763, "learning_rate": 4.6942925274328844e-06, "loss": 0.001, "step": 138800 }, { "epoch": 1.135135135135135, "grad_norm": 0.07681587338447571, "learning_rate": 4.69358023389342e-06, "loss": 0.0022, "step": 138810 }, { "epoch": 1.1352169113137343, "grad_norm": 0.06669142097234726, "learning_rate": 4.692867946595973e-06, "loss": 0.0009, "step": 138820 }, { "epoch": 1.1352986874923334, "grad_norm": 0.11173271387815475, "learning_rate": 4.692155665555058e-06, "loss": 0.001, "step": 138830 }, { "epoch": 1.1353804636709326, "grad_norm": 0.023110492154955864, "learning_rate": 4.691443390785182e-06, "loss": 0.0009, "step": 138840 }, { "epoch": 1.1354622398495318, "grad_norm": 0.09586425870656967, "learning_rate": 4.690731122300856e-06, "loss": 0.0012, "step": 138850 }, { "epoch": 1.135544016028131, "grad_norm": 0.11343854665756226, "learning_rate": 4.690018860116587e-06, "loss": 0.0008, "step": 138860 }, { "epoch": 1.13562579220673, "grad_norm": 0.05793411657214165, "learning_rate": 4.6893066042468855e-06, "loss": 0.0007, "step": 138870 }, { "epoch": 1.1357075683853293, "grad_norm": 0.07650227844715118, "learning_rate": 4.688594354706262e-06, "loss": 0.0009, "step": 138880 }, { "epoch": 1.1357893445639284, "grad_norm": 0.07605564594268799, "learning_rate": 4.687882111509224e-06, "loss": 0.0015, "step": 138890 }, { "epoch": 1.1358711207425276, "grad_norm": 0.04840363562107086, "learning_rate": 4.687169874670281e-06, "loss": 0.0013, "step": 138900 }, { "epoch": 1.1359528969211268, "grad_norm": 0.033504847437143326, "learning_rate": 4.686457644203942e-06, "loss": 0.001, "step": 138910 }, { "epoch": 1.1360346730997262, "grad_norm": 0.05245300009846687, "learning_rate": 4.685745420124716e-06, "loss": 0.0026, "step": 138920 }, { "epoch": 1.1361164492783251, "grad_norm": 0.037370193749666214, "learning_rate": 4.685033202447111e-06, "loss": 0.0009, "step": 138930 }, { "epoch": 1.1361982254569245, "grad_norm": 0.07637111842632294, "learning_rate": 4.684320991185635e-06, "loss": 0.0016, "step": 138940 }, { "epoch": 1.1362800016355235, "grad_norm": 0.0661662369966507, "learning_rate": 4.683608786354798e-06, "loss": 0.0014, "step": 138950 }, { "epoch": 1.1363617778141228, "grad_norm": 0.13816851377487183, "learning_rate": 4.682896587969107e-06, "loss": 0.0014, "step": 138960 }, { "epoch": 1.136443553992722, "grad_norm": 0.0051660435274243355, "learning_rate": 4.68218439604307e-06, "loss": 0.0009, "step": 138970 }, { "epoch": 1.1365253301713212, "grad_norm": 0.023853594437241554, "learning_rate": 4.681472210591195e-06, "loss": 0.001, "step": 138980 }, { "epoch": 1.1366071063499203, "grad_norm": 0.04503502696752548, "learning_rate": 4.680760031627991e-06, "loss": 0.001, "step": 138990 }, { "epoch": 1.1366888825285195, "grad_norm": 0.04804138466715813, "learning_rate": 4.680047859167964e-06, "loss": 0.0009, "step": 139000 }, { "epoch": 1.1367706587071187, "grad_norm": 0.13849857449531555, "learning_rate": 4.6793356932256225e-06, "loss": 0.0027, "step": 139010 }, { "epoch": 1.1368524348857179, "grad_norm": 0.011865803971886635, "learning_rate": 4.678623533815473e-06, "loss": 0.0007, "step": 139020 }, { "epoch": 1.136934211064317, "grad_norm": 0.06585393846035004, "learning_rate": 4.6779113809520234e-06, "loss": 0.0009, "step": 139030 }, { "epoch": 1.1370159872429162, "grad_norm": 0.0028759322594851255, "learning_rate": 4.677199234649782e-06, "loss": 0.0018, "step": 139040 }, { "epoch": 1.1370977634215154, "grad_norm": 0.040120456367731094, "learning_rate": 4.676487094923254e-06, "loss": 0.0012, "step": 139050 }, { "epoch": 1.1371795396001145, "grad_norm": 0.0814889594912529, "learning_rate": 4.675774961786944e-06, "loss": 0.0018, "step": 139060 }, { "epoch": 1.1372613157787137, "grad_norm": 0.06429443508386612, "learning_rate": 4.675062835255365e-06, "loss": 0.0006, "step": 139070 }, { "epoch": 1.1373430919573129, "grad_norm": 0.012708895839750767, "learning_rate": 4.674350715343019e-06, "loss": 0.0013, "step": 139080 }, { "epoch": 1.137424868135912, "grad_norm": 0.0013276997487992048, "learning_rate": 4.673638602064414e-06, "loss": 0.0004, "step": 139090 }, { "epoch": 1.1375066443145112, "grad_norm": 0.04036218300461769, "learning_rate": 4.672926495434055e-06, "loss": 0.0014, "step": 139100 }, { "epoch": 1.1375884204931104, "grad_norm": 0.17036810517311096, "learning_rate": 4.672214395466451e-06, "loss": 0.0019, "step": 139110 }, { "epoch": 1.1376701966717095, "grad_norm": 0.0305235143750906, "learning_rate": 4.671502302176104e-06, "loss": 0.0012, "step": 139120 }, { "epoch": 1.1377519728503087, "grad_norm": 0.007280838210135698, "learning_rate": 4.670790215577524e-06, "loss": 0.0012, "step": 139130 }, { "epoch": 1.1378337490289079, "grad_norm": 0.03813037648797035, "learning_rate": 4.670078135685214e-06, "loss": 0.001, "step": 139140 }, { "epoch": 1.137915525207507, "grad_norm": 0.07520449906587601, "learning_rate": 4.66936606251368e-06, "loss": 0.0009, "step": 139150 }, { "epoch": 1.1379973013861062, "grad_norm": 0.0024567695800215006, "learning_rate": 4.66865399607743e-06, "loss": 0.0007, "step": 139160 }, { "epoch": 1.1380790775647054, "grad_norm": 0.07111042737960815, "learning_rate": 4.667941936390966e-06, "loss": 0.0008, "step": 139170 }, { "epoch": 1.1381608537433046, "grad_norm": 0.054693058133125305, "learning_rate": 4.667229883468794e-06, "loss": 0.0017, "step": 139180 }, { "epoch": 1.1382426299219037, "grad_norm": 0.07644667476415634, "learning_rate": 4.66651783732542e-06, "loss": 0.001, "step": 139190 }, { "epoch": 1.138324406100503, "grad_norm": 0.023590877652168274, "learning_rate": 4.66580579797535e-06, "loss": 0.0009, "step": 139200 }, { "epoch": 1.138406182279102, "grad_norm": 0.055449508130550385, "learning_rate": 4.665093765433087e-06, "loss": 0.0006, "step": 139210 }, { "epoch": 1.1384879584577012, "grad_norm": 0.12848715484142303, "learning_rate": 4.664381739713135e-06, "loss": 0.0011, "step": 139220 }, { "epoch": 1.1385697346363004, "grad_norm": 0.25488317012786865, "learning_rate": 4.663669720830002e-06, "loss": 0.001, "step": 139230 }, { "epoch": 1.1386515108148996, "grad_norm": 0.05152330547571182, "learning_rate": 4.662957708798189e-06, "loss": 0.0007, "step": 139240 }, { "epoch": 1.1387332869934987, "grad_norm": 0.004230705555528402, "learning_rate": 4.662245703632202e-06, "loss": 0.0008, "step": 139250 }, { "epoch": 1.138815063172098, "grad_norm": 0.08731815218925476, "learning_rate": 4.661533705346543e-06, "loss": 0.0012, "step": 139260 }, { "epoch": 1.138896839350697, "grad_norm": 0.03060084395110607, "learning_rate": 4.660821713955719e-06, "loss": 0.0006, "step": 139270 }, { "epoch": 1.1389786155292962, "grad_norm": 0.01716078817844391, "learning_rate": 4.660109729474232e-06, "loss": 0.0015, "step": 139280 }, { "epoch": 1.1390603917078954, "grad_norm": 0.0021858145482838154, "learning_rate": 4.659397751916587e-06, "loss": 0.0011, "step": 139290 }, { "epoch": 1.1391421678864946, "grad_norm": 0.10455527156591415, "learning_rate": 4.658685781297283e-06, "loss": 0.0014, "step": 139300 }, { "epoch": 1.1392239440650938, "grad_norm": 0.03746316581964493, "learning_rate": 4.657973817630831e-06, "loss": 0.0006, "step": 139310 }, { "epoch": 1.139305720243693, "grad_norm": 0.029012126848101616, "learning_rate": 4.657261860931729e-06, "loss": 0.0021, "step": 139320 }, { "epoch": 1.139387496422292, "grad_norm": 0.08502171188592911, "learning_rate": 4.656549911214482e-06, "loss": 0.0014, "step": 139330 }, { "epoch": 1.1394692726008913, "grad_norm": 0.02668088488280773, "learning_rate": 4.6558379684935916e-06, "loss": 0.0006, "step": 139340 }, { "epoch": 1.1395510487794906, "grad_norm": 0.035195257514715195, "learning_rate": 4.6551260327835615e-06, "loss": 0.0009, "step": 139350 }, { "epoch": 1.1396328249580896, "grad_norm": 0.044115595519542694, "learning_rate": 4.6544141040988955e-06, "loss": 0.0041, "step": 139360 }, { "epoch": 1.139714601136689, "grad_norm": 0.03359666466712952, "learning_rate": 4.653702182454095e-06, "loss": 0.001, "step": 139370 }, { "epoch": 1.139796377315288, "grad_norm": 0.014090440236032009, "learning_rate": 4.652990267863662e-06, "loss": 0.0009, "step": 139380 }, { "epoch": 1.1398781534938873, "grad_norm": 0.0198743287473917, "learning_rate": 4.652278360342099e-06, "loss": 0.0005, "step": 139390 }, { "epoch": 1.1399599296724865, "grad_norm": 0.03669895976781845, "learning_rate": 4.651566459903909e-06, "loss": 0.0011, "step": 139400 }, { "epoch": 1.1400417058510857, "grad_norm": 0.005705952178686857, "learning_rate": 4.650854566563593e-06, "loss": 0.0006, "step": 139410 }, { "epoch": 1.1401234820296848, "grad_norm": 0.017393752932548523, "learning_rate": 4.650142680335652e-06, "loss": 0.0012, "step": 139420 }, { "epoch": 1.140205258208284, "grad_norm": 0.03344256058335304, "learning_rate": 4.649430801234591e-06, "loss": 0.0009, "step": 139430 }, { "epoch": 1.1402870343868832, "grad_norm": 0.029219642281532288, "learning_rate": 4.648718929274909e-06, "loss": 0.0012, "step": 139440 }, { "epoch": 1.1403688105654823, "grad_norm": 0.0028832172974944115, "learning_rate": 4.648007064471108e-06, "loss": 0.0006, "step": 139450 }, { "epoch": 1.1404505867440815, "grad_norm": 0.022088756784796715, "learning_rate": 4.6472952068376895e-06, "loss": 0.0003, "step": 139460 }, { "epoch": 1.1405323629226807, "grad_norm": 0.06355391442775726, "learning_rate": 4.646583356389154e-06, "loss": 0.001, "step": 139470 }, { "epoch": 1.1406141391012798, "grad_norm": 0.060800790786743164, "learning_rate": 4.645871513140003e-06, "loss": 0.0014, "step": 139480 }, { "epoch": 1.140695915279879, "grad_norm": 0.005739135202020407, "learning_rate": 4.6451596771047375e-06, "loss": 0.0006, "step": 139490 }, { "epoch": 1.1407776914584782, "grad_norm": 0.046320315450429916, "learning_rate": 4.6444478482978565e-06, "loss": 0.0008, "step": 139500 }, { "epoch": 1.1408594676370774, "grad_norm": 0.10756330192089081, "learning_rate": 4.643736026733863e-06, "loss": 0.0012, "step": 139510 }, { "epoch": 1.1409412438156765, "grad_norm": 0.04874255880713463, "learning_rate": 4.643024212427256e-06, "loss": 0.0019, "step": 139520 }, { "epoch": 1.1410230199942757, "grad_norm": 0.021334698423743248, "learning_rate": 4.642312405392536e-06, "loss": 0.0009, "step": 139530 }, { "epoch": 1.1411047961728749, "grad_norm": 0.14140808582305908, "learning_rate": 4.641600605644201e-06, "loss": 0.001, "step": 139540 }, { "epoch": 1.141186572351474, "grad_norm": 0.1087183803319931, "learning_rate": 4.640888813196755e-06, "loss": 0.0011, "step": 139550 }, { "epoch": 1.1412683485300732, "grad_norm": 0.037575613707304, "learning_rate": 4.6401770280646965e-06, "loss": 0.0006, "step": 139560 }, { "epoch": 1.1413501247086724, "grad_norm": 0.019386712461709976, "learning_rate": 4.639465250262523e-06, "loss": 0.0034, "step": 139570 }, { "epoch": 1.1414319008872715, "grad_norm": 0.2201562523841858, "learning_rate": 4.638753479804735e-06, "loss": 0.002, "step": 139580 }, { "epoch": 1.1415136770658707, "grad_norm": 0.057238154113292694, "learning_rate": 4.638041716705834e-06, "loss": 0.0015, "step": 139590 }, { "epoch": 1.1415954532444699, "grad_norm": 0.011368588544428349, "learning_rate": 4.637329960980317e-06, "loss": 0.0012, "step": 139600 }, { "epoch": 1.141677229423069, "grad_norm": 0.01470206305384636, "learning_rate": 4.636618212642683e-06, "loss": 0.0024, "step": 139610 }, { "epoch": 1.1417590056016682, "grad_norm": 0.06300125271081924, "learning_rate": 4.635906471707431e-06, "loss": 0.0016, "step": 139620 }, { "epoch": 1.1418407817802674, "grad_norm": 0.039674341678619385, "learning_rate": 4.6351947381890614e-06, "loss": 0.0007, "step": 139630 }, { "epoch": 1.1419225579588665, "grad_norm": 0.02548358403146267, "learning_rate": 4.634483012102071e-06, "loss": 0.0007, "step": 139640 }, { "epoch": 1.1420043341374657, "grad_norm": 0.056693580001592636, "learning_rate": 4.633771293460958e-06, "loss": 0.0009, "step": 139650 }, { "epoch": 1.1420861103160649, "grad_norm": 0.32779911160469055, "learning_rate": 4.63305958228022e-06, "loss": 0.0009, "step": 139660 }, { "epoch": 1.142167886494664, "grad_norm": 0.023593906313180923, "learning_rate": 4.632347878574359e-06, "loss": 0.0008, "step": 139670 }, { "epoch": 1.1422496626732632, "grad_norm": 0.018124591559171677, "learning_rate": 4.631636182357871e-06, "loss": 0.0013, "step": 139680 }, { "epoch": 1.1423314388518624, "grad_norm": 0.0942128449678421, "learning_rate": 4.6309244936452526e-06, "loss": 0.0024, "step": 139690 }, { "epoch": 1.1424132150304616, "grad_norm": 0.02390427701175213, "learning_rate": 4.630212812451001e-06, "loss": 0.0012, "step": 139700 }, { "epoch": 1.1424949912090607, "grad_norm": 0.017232254147529602, "learning_rate": 4.629501138789616e-06, "loss": 0.0015, "step": 139710 }, { "epoch": 1.14257676738766, "grad_norm": 0.04887975752353668, "learning_rate": 4.628789472675595e-06, "loss": 0.0007, "step": 139720 }, { "epoch": 1.142658543566259, "grad_norm": 0.011781114153563976, "learning_rate": 4.628077814123433e-06, "loss": 0.0024, "step": 139730 }, { "epoch": 1.1427403197448582, "grad_norm": 0.06315381079912186, "learning_rate": 4.627366163147628e-06, "loss": 0.0012, "step": 139740 }, { "epoch": 1.1428220959234574, "grad_norm": 0.11788981407880783, "learning_rate": 4.626654519762678e-06, "loss": 0.0007, "step": 139750 }, { "epoch": 1.1429038721020568, "grad_norm": 0.00420344527810812, "learning_rate": 4.625942883983079e-06, "loss": 0.001, "step": 139760 }, { "epoch": 1.1429856482806557, "grad_norm": 0.005324841011315584, "learning_rate": 4.625231255823327e-06, "loss": 0.0005, "step": 139770 }, { "epoch": 1.1430674244592551, "grad_norm": 0.04485626146197319, "learning_rate": 4.624519635297917e-06, "loss": 0.0012, "step": 139780 }, { "epoch": 1.143149200637854, "grad_norm": 0.03727913275361061, "learning_rate": 4.623808022421349e-06, "loss": 0.0008, "step": 139790 }, { "epoch": 1.1432309768164535, "grad_norm": 0.003128533251583576, "learning_rate": 4.623096417208118e-06, "loss": 0.0006, "step": 139800 }, { "epoch": 1.1433127529950524, "grad_norm": 0.009125038050115108, "learning_rate": 4.62238481967272e-06, "loss": 0.0008, "step": 139810 }, { "epoch": 1.1433945291736518, "grad_norm": 0.03880813717842102, "learning_rate": 4.621673229829648e-06, "loss": 0.0009, "step": 139820 }, { "epoch": 1.143476305352251, "grad_norm": 0.07402637600898743, "learning_rate": 4.620961647693401e-06, "loss": 0.0014, "step": 139830 }, { "epoch": 1.1435580815308501, "grad_norm": 0.007827688939869404, "learning_rate": 4.620250073278474e-06, "loss": 0.0016, "step": 139840 }, { "epoch": 1.1436398577094493, "grad_norm": 0.1380838304758072, "learning_rate": 4.619538506599361e-06, "loss": 0.0014, "step": 139850 }, { "epoch": 1.1437216338880485, "grad_norm": 0.08810818195343018, "learning_rate": 4.618826947670556e-06, "loss": 0.0006, "step": 139860 }, { "epoch": 1.1438034100666477, "grad_norm": 0.0028475725557655096, "learning_rate": 4.618115396506559e-06, "loss": 0.001, "step": 139870 }, { "epoch": 1.1438851862452468, "grad_norm": 0.01789916679263115, "learning_rate": 4.6174038531218604e-06, "loss": 0.0009, "step": 139880 }, { "epoch": 1.143966962423846, "grad_norm": 0.06474604457616806, "learning_rate": 4.616692317530957e-06, "loss": 0.0015, "step": 139890 }, { "epoch": 1.1440487386024452, "grad_norm": 0.011511631309986115, "learning_rate": 4.61598078974834e-06, "loss": 0.0009, "step": 139900 }, { "epoch": 1.1441305147810443, "grad_norm": 0.06153576821088791, "learning_rate": 4.615269269788509e-06, "loss": 0.0014, "step": 139910 }, { "epoch": 1.1442122909596435, "grad_norm": 0.036536261439323425, "learning_rate": 4.614557757665956e-06, "loss": 0.0011, "step": 139920 }, { "epoch": 1.1442940671382427, "grad_norm": 0.0035806610248982906, "learning_rate": 4.613846253395174e-06, "loss": 0.0006, "step": 139930 }, { "epoch": 1.1443758433168418, "grad_norm": 0.03170756623148918, "learning_rate": 4.613134756990656e-06, "loss": 0.0013, "step": 139940 }, { "epoch": 1.144457619495441, "grad_norm": 0.04933341220021248, "learning_rate": 4.6124232684669e-06, "loss": 0.0015, "step": 139950 }, { "epoch": 1.1445393956740402, "grad_norm": 0.10312261432409286, "learning_rate": 4.6117117878383965e-06, "loss": 0.0021, "step": 139960 }, { "epoch": 1.1446211718526393, "grad_norm": 0.04362178593873978, "learning_rate": 4.61100031511964e-06, "loss": 0.0015, "step": 139970 }, { "epoch": 1.1447029480312385, "grad_norm": 0.006525084376335144, "learning_rate": 4.6102888503251215e-06, "loss": 0.0011, "step": 139980 }, { "epoch": 1.1447847242098377, "grad_norm": 0.12465957552194595, "learning_rate": 4.609577393469337e-06, "loss": 0.0023, "step": 139990 }, { "epoch": 1.1448665003884368, "grad_norm": 0.04740414023399353, "learning_rate": 4.608865944566778e-06, "loss": 0.0014, "step": 140000 }, { "epoch": 1.144948276567036, "grad_norm": 0.07337050139904022, "learning_rate": 4.608154503631938e-06, "loss": 0.001, "step": 140010 }, { "epoch": 1.1450300527456352, "grad_norm": 0.11442526429891586, "learning_rate": 4.607443070679308e-06, "loss": 0.0014, "step": 140020 }, { "epoch": 1.1451118289242344, "grad_norm": 0.05778681859374046, "learning_rate": 4.606731645723383e-06, "loss": 0.0011, "step": 140030 }, { "epoch": 1.1451936051028335, "grad_norm": 0.026475902646780014, "learning_rate": 4.606020228778654e-06, "loss": 0.001, "step": 140040 }, { "epoch": 1.1452753812814327, "grad_norm": 0.05506931245326996, "learning_rate": 4.605308819859614e-06, "loss": 0.0013, "step": 140050 }, { "epoch": 1.1453571574600319, "grad_norm": 0.029520513489842415, "learning_rate": 4.604597418980753e-06, "loss": 0.002, "step": 140060 }, { "epoch": 1.145438933638631, "grad_norm": 0.046884577721357346, "learning_rate": 4.603886026156564e-06, "loss": 0.0011, "step": 140070 }, { "epoch": 1.1455207098172302, "grad_norm": 0.018972434103488922, "learning_rate": 4.6031746414015386e-06, "loss": 0.0008, "step": 140080 }, { "epoch": 1.1456024859958294, "grad_norm": 0.015092946588993073, "learning_rate": 4.60246326473017e-06, "loss": 0.0007, "step": 140090 }, { "epoch": 1.1456842621744285, "grad_norm": 0.051710087805986404, "learning_rate": 4.601751896156946e-06, "loss": 0.001, "step": 140100 }, { "epoch": 1.1457660383530277, "grad_norm": 0.11247406154870987, "learning_rate": 4.601040535696359e-06, "loss": 0.0026, "step": 140110 }, { "epoch": 1.1458478145316269, "grad_norm": 0.04465637356042862, "learning_rate": 4.6003291833629014e-06, "loss": 0.0008, "step": 140120 }, { "epoch": 1.145929590710226, "grad_norm": 0.017937734723091125, "learning_rate": 4.599617839171064e-06, "loss": 0.0008, "step": 140130 }, { "epoch": 1.1460113668888252, "grad_norm": 0.09103525429964066, "learning_rate": 4.598906503135336e-06, "loss": 0.0005, "step": 140140 }, { "epoch": 1.1460931430674244, "grad_norm": 0.06514199078083038, "learning_rate": 4.598195175270207e-06, "loss": 0.0015, "step": 140150 }, { "epoch": 1.1461749192460235, "grad_norm": 0.042268719524145126, "learning_rate": 4.597483855590171e-06, "loss": 0.0007, "step": 140160 }, { "epoch": 1.1462566954246227, "grad_norm": 0.04449545592069626, "learning_rate": 4.5967725441097154e-06, "loss": 0.0019, "step": 140170 }, { "epoch": 1.1463384716032219, "grad_norm": 0.06633387506008148, "learning_rate": 4.596061240843331e-06, "loss": 0.0014, "step": 140180 }, { "epoch": 1.1464202477818213, "grad_norm": 0.05190658196806908, "learning_rate": 4.595349945805507e-06, "loss": 0.0012, "step": 140190 }, { "epoch": 1.1465020239604202, "grad_norm": 0.039964742958545685, "learning_rate": 4.594638659010734e-06, "loss": 0.0009, "step": 140200 }, { "epoch": 1.1465838001390196, "grad_norm": 0.006301830522716045, "learning_rate": 4.5939273804735016e-06, "loss": 0.0011, "step": 140210 }, { "epoch": 1.1466655763176186, "grad_norm": 0.07611013948917389, "learning_rate": 4.593216110208299e-06, "loss": 0.0012, "step": 140220 }, { "epoch": 1.146747352496218, "grad_norm": 0.010866043157875538, "learning_rate": 4.592504848229612e-06, "loss": 0.0007, "step": 140230 }, { "epoch": 1.1468291286748171, "grad_norm": 0.036757633090019226, "learning_rate": 4.591793594551935e-06, "loss": 0.0014, "step": 140240 }, { "epoch": 1.1469109048534163, "grad_norm": 0.038691841065883636, "learning_rate": 4.591082349189753e-06, "loss": 0.0008, "step": 140250 }, { "epoch": 1.1469926810320155, "grad_norm": 0.008771412074565887, "learning_rate": 4.590371112157557e-06, "loss": 0.0012, "step": 140260 }, { "epoch": 1.1470744572106146, "grad_norm": 0.015179895795881748, "learning_rate": 4.589659883469833e-06, "loss": 0.0007, "step": 140270 }, { "epoch": 1.1471562333892138, "grad_norm": 0.02115798555314541, "learning_rate": 4.588948663141071e-06, "loss": 0.0015, "step": 140280 }, { "epoch": 1.147238009567813, "grad_norm": 0.01584389992058277, "learning_rate": 4.588237451185759e-06, "loss": 0.0008, "step": 140290 }, { "epoch": 1.1473197857464121, "grad_norm": 0.038428083062171936, "learning_rate": 4.587526247618386e-06, "loss": 0.0014, "step": 140300 }, { "epoch": 1.1474015619250113, "grad_norm": 0.06749952584505081, "learning_rate": 4.586815052453437e-06, "loss": 0.0011, "step": 140310 }, { "epoch": 1.1474833381036105, "grad_norm": 0.0042142970487475395, "learning_rate": 4.586103865705403e-06, "loss": 0.0009, "step": 140320 }, { "epoch": 1.1475651142822096, "grad_norm": 0.002041608327999711, "learning_rate": 4.585392687388769e-06, "loss": 0.001, "step": 140330 }, { "epoch": 1.1476468904608088, "grad_norm": 0.041886597871780396, "learning_rate": 4.584681517518023e-06, "loss": 0.0011, "step": 140340 }, { "epoch": 1.147728666639408, "grad_norm": 0.02470238320529461, "learning_rate": 4.583970356107651e-06, "loss": 0.001, "step": 140350 }, { "epoch": 1.1478104428180071, "grad_norm": 0.008299293927848339, "learning_rate": 4.583259203172142e-06, "loss": 0.0012, "step": 140360 }, { "epoch": 1.1478922189966063, "grad_norm": 0.0452040359377861, "learning_rate": 4.582548058725982e-06, "loss": 0.0004, "step": 140370 }, { "epoch": 1.1479739951752055, "grad_norm": 0.05303094908595085, "learning_rate": 4.581836922783658e-06, "loss": 0.0008, "step": 140380 }, { "epoch": 1.1480557713538047, "grad_norm": 0.033837102353572845, "learning_rate": 4.5811257953596525e-06, "loss": 0.0021, "step": 140390 }, { "epoch": 1.1481375475324038, "grad_norm": 0.1278001368045807, "learning_rate": 4.580414676468457e-06, "loss": 0.0018, "step": 140400 }, { "epoch": 1.148219323711003, "grad_norm": 0.012935131788253784, "learning_rate": 4.579703566124556e-06, "loss": 0.0022, "step": 140410 }, { "epoch": 1.1483010998896022, "grad_norm": 0.008631311357021332, "learning_rate": 4.578992464342435e-06, "loss": 0.0017, "step": 140420 }, { "epoch": 1.1483828760682013, "grad_norm": 0.0651373341679573, "learning_rate": 4.578281371136579e-06, "loss": 0.0023, "step": 140430 }, { "epoch": 1.1484646522468005, "grad_norm": 0.1519852727651596, "learning_rate": 4.577570286521476e-06, "loss": 0.0019, "step": 140440 }, { "epoch": 1.1485464284253997, "grad_norm": 0.022684892639517784, "learning_rate": 4.576859210511608e-06, "loss": 0.0009, "step": 140450 }, { "epoch": 1.1486282046039988, "grad_norm": 0.06607722491025925, "learning_rate": 4.576148143121463e-06, "loss": 0.0007, "step": 140460 }, { "epoch": 1.148709980782598, "grad_norm": 0.07357224822044373, "learning_rate": 4.575437084365523e-06, "loss": 0.001, "step": 140470 }, { "epoch": 1.1487917569611972, "grad_norm": 0.06012256443500519, "learning_rate": 4.574726034258276e-06, "loss": 0.0012, "step": 140480 }, { "epoch": 1.1488735331397963, "grad_norm": 0.015826037153601646, "learning_rate": 4.574014992814206e-06, "loss": 0.0006, "step": 140490 }, { "epoch": 1.1489553093183955, "grad_norm": 0.051837559789419174, "learning_rate": 4.573303960047796e-06, "loss": 0.0014, "step": 140500 }, { "epoch": 1.1490370854969947, "grad_norm": 0.017442611977458, "learning_rate": 4.572592935973529e-06, "loss": 0.0007, "step": 140510 }, { "epoch": 1.1491188616755939, "grad_norm": 0.05899200588464737, "learning_rate": 4.571881920605894e-06, "loss": 0.001, "step": 140520 }, { "epoch": 1.149200637854193, "grad_norm": 0.1710338294506073, "learning_rate": 4.571170913959373e-06, "loss": 0.0023, "step": 140530 }, { "epoch": 1.1492824140327922, "grad_norm": 0.07322726398706436, "learning_rate": 4.570459916048447e-06, "loss": 0.001, "step": 140540 }, { "epoch": 1.1493641902113914, "grad_norm": 0.03603346645832062, "learning_rate": 4.569748926887602e-06, "loss": 0.0009, "step": 140550 }, { "epoch": 1.1494459663899905, "grad_norm": 0.051526665687561035, "learning_rate": 4.569037946491322e-06, "loss": 0.001, "step": 140560 }, { "epoch": 1.1495277425685897, "grad_norm": 0.03191017732024193, "learning_rate": 4.5683269748740895e-06, "loss": 0.0009, "step": 140570 }, { "epoch": 1.1496095187471889, "grad_norm": 0.02471117302775383, "learning_rate": 4.567616012050387e-06, "loss": 0.0011, "step": 140580 }, { "epoch": 1.149691294925788, "grad_norm": 0.025013284757733345, "learning_rate": 4.566905058034697e-06, "loss": 0.0013, "step": 140590 }, { "epoch": 1.1497730711043872, "grad_norm": 0.20183734595775604, "learning_rate": 4.566194112841504e-06, "loss": 0.0016, "step": 140600 }, { "epoch": 1.1498548472829864, "grad_norm": 0.007783857174217701, "learning_rate": 4.5654831764852894e-06, "loss": 0.0018, "step": 140610 }, { "epoch": 1.1499366234615858, "grad_norm": 0.26892176270484924, "learning_rate": 4.5647722489805355e-06, "loss": 0.0007, "step": 140620 }, { "epoch": 1.1500183996401847, "grad_norm": 0.050612468272447586, "learning_rate": 4.564061330341723e-06, "loss": 0.0011, "step": 140630 }, { "epoch": 1.150100175818784, "grad_norm": 0.02295774407684803, "learning_rate": 4.5633504205833376e-06, "loss": 0.0011, "step": 140640 }, { "epoch": 1.150181951997383, "grad_norm": 0.0007987874560058117, "learning_rate": 4.562639519719859e-06, "loss": 0.0008, "step": 140650 }, { "epoch": 1.1502637281759824, "grad_norm": 0.019701996818184853, "learning_rate": 4.561928627765769e-06, "loss": 0.0021, "step": 140660 }, { "epoch": 1.1503455043545816, "grad_norm": 0.020849617198109627, "learning_rate": 4.561217744735547e-06, "loss": 0.0012, "step": 140670 }, { "epoch": 1.1504272805331808, "grad_norm": 0.0823659896850586, "learning_rate": 4.5605068706436785e-06, "loss": 0.0011, "step": 140680 }, { "epoch": 1.15050905671178, "grad_norm": 0.04181796312332153, "learning_rate": 4.559796005504642e-06, "loss": 0.0006, "step": 140690 }, { "epoch": 1.1505908328903791, "grad_norm": 0.06639883667230606, "learning_rate": 4.559085149332918e-06, "loss": 0.0006, "step": 140700 }, { "epoch": 1.1506726090689783, "grad_norm": 0.03447578102350235, "learning_rate": 4.558374302142987e-06, "loss": 0.0007, "step": 140710 }, { "epoch": 1.1507543852475774, "grad_norm": 0.023579945787787437, "learning_rate": 4.557663463949331e-06, "loss": 0.0006, "step": 140720 }, { "epoch": 1.1508361614261766, "grad_norm": 0.025987399742007256, "learning_rate": 4.5569526347664315e-06, "loss": 0.0006, "step": 140730 }, { "epoch": 1.1509179376047758, "grad_norm": 0.04520947486162186, "learning_rate": 4.556241814608766e-06, "loss": 0.0005, "step": 140740 }, { "epoch": 1.150999713783375, "grad_norm": 0.0961134061217308, "learning_rate": 4.555531003490813e-06, "loss": 0.0009, "step": 140750 }, { "epoch": 1.1510814899619741, "grad_norm": 0.0905981957912445, "learning_rate": 4.554820201427057e-06, "loss": 0.0015, "step": 140760 }, { "epoch": 1.1511632661405733, "grad_norm": 0.000892170297447592, "learning_rate": 4.554109408431976e-06, "loss": 0.0013, "step": 140770 }, { "epoch": 1.1512450423191725, "grad_norm": 0.022227350622415543, "learning_rate": 4.553398624520048e-06, "loss": 0.0014, "step": 140780 }, { "epoch": 1.1513268184977716, "grad_norm": 0.017465976998209953, "learning_rate": 4.552687849705753e-06, "loss": 0.0028, "step": 140790 }, { "epoch": 1.1514085946763708, "grad_norm": 0.003812935436144471, "learning_rate": 4.5519770840035695e-06, "loss": 0.0015, "step": 140800 }, { "epoch": 1.15149037085497, "grad_norm": 0.029811294749379158, "learning_rate": 4.551266327427978e-06, "loss": 0.0012, "step": 140810 }, { "epoch": 1.1515721470335691, "grad_norm": 0.00948492530733347, "learning_rate": 4.550555579993456e-06, "loss": 0.0011, "step": 140820 }, { "epoch": 1.1516539232121683, "grad_norm": 0.03381682187318802, "learning_rate": 4.549844841714481e-06, "loss": 0.0014, "step": 140830 }, { "epoch": 1.1517356993907675, "grad_norm": 0.18057753145694733, "learning_rate": 4.549134112605535e-06, "loss": 0.0021, "step": 140840 }, { "epoch": 1.1518174755693666, "grad_norm": 0.047324731945991516, "learning_rate": 4.548423392681092e-06, "loss": 0.0008, "step": 140850 }, { "epoch": 1.1518992517479658, "grad_norm": 0.002779998816549778, "learning_rate": 4.547712681955631e-06, "loss": 0.0014, "step": 140860 }, { "epoch": 1.151981027926565, "grad_norm": 0.0067696403712034225, "learning_rate": 4.547001980443629e-06, "loss": 0.0014, "step": 140870 }, { "epoch": 1.1520628041051642, "grad_norm": 0.03715774416923523, "learning_rate": 4.546291288159568e-06, "loss": 0.0009, "step": 140880 }, { "epoch": 1.1521445802837633, "grad_norm": 0.03436104580760002, "learning_rate": 4.5455806051179205e-06, "loss": 0.0008, "step": 140890 }, { "epoch": 1.1522263564623625, "grad_norm": 0.00907862652093172, "learning_rate": 4.544869931333167e-06, "loss": 0.0007, "step": 140900 }, { "epoch": 1.1523081326409617, "grad_norm": 0.0859944075345993, "learning_rate": 4.544159266819781e-06, "loss": 0.0013, "step": 140910 }, { "epoch": 1.1523899088195608, "grad_norm": 0.012782365083694458, "learning_rate": 4.543448611592242e-06, "loss": 0.0005, "step": 140920 }, { "epoch": 1.15247168499816, "grad_norm": 0.05554009974002838, "learning_rate": 4.542737965665027e-06, "loss": 0.0016, "step": 140930 }, { "epoch": 1.1525534611767592, "grad_norm": 0.028487229719758034, "learning_rate": 4.54202732905261e-06, "loss": 0.0008, "step": 140940 }, { "epoch": 1.1526352373553583, "grad_norm": 0.08131466805934906, "learning_rate": 4.541316701769469e-06, "loss": 0.0013, "step": 140950 }, { "epoch": 1.1527170135339575, "grad_norm": 0.03627311810851097, "learning_rate": 4.540606083830079e-06, "loss": 0.0011, "step": 140960 }, { "epoch": 1.1527987897125567, "grad_norm": 0.02714117430150509, "learning_rate": 4.539895475248918e-06, "loss": 0.0009, "step": 140970 }, { "epoch": 1.1528805658911558, "grad_norm": 0.0020006487611681223, "learning_rate": 4.53918487604046e-06, "loss": 0.0008, "step": 140980 }, { "epoch": 1.152962342069755, "grad_norm": 0.02841375209391117, "learning_rate": 4.538474286219177e-06, "loss": 0.0008, "step": 140990 }, { "epoch": 1.1530441182483542, "grad_norm": 0.02203698828816414, "learning_rate": 4.537763705799552e-06, "loss": 0.0008, "step": 141000 }, { "epoch": 1.1531258944269533, "grad_norm": 0.021821944043040276, "learning_rate": 4.537053134796055e-06, "loss": 0.001, "step": 141010 }, { "epoch": 1.1532076706055525, "grad_norm": 0.07144570350646973, "learning_rate": 4.536342573223161e-06, "loss": 0.0012, "step": 141020 }, { "epoch": 1.1532894467841517, "grad_norm": 0.09428238123655319, "learning_rate": 4.5356320210953465e-06, "loss": 0.0015, "step": 141030 }, { "epoch": 1.1533712229627509, "grad_norm": 0.04113413766026497, "learning_rate": 4.534921478427085e-06, "loss": 0.0009, "step": 141040 }, { "epoch": 1.1534529991413502, "grad_norm": 0.027366163209080696, "learning_rate": 4.534210945232852e-06, "loss": 0.0014, "step": 141050 }, { "epoch": 1.1535347753199492, "grad_norm": 0.04666634649038315, "learning_rate": 4.53350042152712e-06, "loss": 0.001, "step": 141060 }, { "epoch": 1.1536165514985486, "grad_norm": 0.07920514047145844, "learning_rate": 4.532789907324363e-06, "loss": 0.0006, "step": 141070 }, { "epoch": 1.1536983276771475, "grad_norm": 0.03194601833820343, "learning_rate": 4.532079402639056e-06, "loss": 0.001, "step": 141080 }, { "epoch": 1.153780103855747, "grad_norm": 0.005575260613113642, "learning_rate": 4.531368907485671e-06, "loss": 0.0009, "step": 141090 }, { "epoch": 1.153861880034346, "grad_norm": 0.05383450165390968, "learning_rate": 4.5306584218786835e-06, "loss": 0.0008, "step": 141100 }, { "epoch": 1.1539436562129453, "grad_norm": 0.1076686680316925, "learning_rate": 4.529947945832563e-06, "loss": 0.0011, "step": 141110 }, { "epoch": 1.1540254323915444, "grad_norm": 0.033891819417476654, "learning_rate": 4.529237479361787e-06, "loss": 0.001, "step": 141120 }, { "epoch": 1.1541072085701436, "grad_norm": 0.008886277675628662, "learning_rate": 4.528527022480827e-06, "loss": 0.0007, "step": 141130 }, { "epoch": 1.1541889847487428, "grad_norm": 0.048709724098443985, "learning_rate": 4.527816575204153e-06, "loss": 0.0011, "step": 141140 }, { "epoch": 1.154270760927342, "grad_norm": 0.018461082130670547, "learning_rate": 4.52710613754624e-06, "loss": 0.0007, "step": 141150 }, { "epoch": 1.154352537105941, "grad_norm": 0.07035117596387863, "learning_rate": 4.52639570952156e-06, "loss": 0.0012, "step": 141160 }, { "epoch": 1.1544343132845403, "grad_norm": 0.027072971686720848, "learning_rate": 4.525685291144583e-06, "loss": 0.0008, "step": 141170 }, { "epoch": 1.1545160894631394, "grad_norm": 0.11819534748792648, "learning_rate": 4.524974882429784e-06, "loss": 0.0008, "step": 141180 }, { "epoch": 1.1545978656417386, "grad_norm": 0.1719820499420166, "learning_rate": 4.524264483391631e-06, "loss": 0.0011, "step": 141190 }, { "epoch": 1.1546796418203378, "grad_norm": 0.058215003460645676, "learning_rate": 4.523554094044598e-06, "loss": 0.0012, "step": 141200 }, { "epoch": 1.154761417998937, "grad_norm": 0.12643963098526, "learning_rate": 4.522843714403156e-06, "loss": 0.0014, "step": 141210 }, { "epoch": 1.1548431941775361, "grad_norm": 0.023190811276435852, "learning_rate": 4.522133344481775e-06, "loss": 0.0012, "step": 141220 }, { "epoch": 1.1549249703561353, "grad_norm": 0.02679869718849659, "learning_rate": 4.521422984294923e-06, "loss": 0.0008, "step": 141230 }, { "epoch": 1.1550067465347345, "grad_norm": 0.002988258609548211, "learning_rate": 4.520712633857077e-06, "loss": 0.0013, "step": 141240 }, { "epoch": 1.1550885227133336, "grad_norm": 0.09377537667751312, "learning_rate": 4.520002293182704e-06, "loss": 0.002, "step": 141250 }, { "epoch": 1.1551702988919328, "grad_norm": 0.058279965072870255, "learning_rate": 4.519291962286275e-06, "loss": 0.0009, "step": 141260 }, { "epoch": 1.155252075070532, "grad_norm": 0.01978064700961113, "learning_rate": 4.518581641182258e-06, "loss": 0.0008, "step": 141270 }, { "epoch": 1.1553338512491311, "grad_norm": 0.01888670213520527, "learning_rate": 4.517871329885125e-06, "loss": 0.0005, "step": 141280 }, { "epoch": 1.1554156274277303, "grad_norm": 0.030241142958402634, "learning_rate": 4.517161028409345e-06, "loss": 0.0009, "step": 141290 }, { "epoch": 1.1554974036063295, "grad_norm": 0.06551439315080643, "learning_rate": 4.516450736769388e-06, "loss": 0.0008, "step": 141300 }, { "epoch": 1.1555791797849286, "grad_norm": 0.18785758316516876, "learning_rate": 4.515740454979721e-06, "loss": 0.0008, "step": 141310 }, { "epoch": 1.1556609559635278, "grad_norm": 0.015035545453429222, "learning_rate": 4.515030183054816e-06, "loss": 0.0082, "step": 141320 }, { "epoch": 1.155742732142127, "grad_norm": 0.006436829920858145, "learning_rate": 4.514319921009139e-06, "loss": 0.0008, "step": 141330 }, { "epoch": 1.1558245083207261, "grad_norm": 0.05836730822920799, "learning_rate": 4.513609668857162e-06, "loss": 0.0007, "step": 141340 }, { "epoch": 1.1559062844993253, "grad_norm": 0.1350029706954956, "learning_rate": 4.512899426613348e-06, "loss": 0.0005, "step": 141350 }, { "epoch": 1.1559880606779245, "grad_norm": 0.01584021933376789, "learning_rate": 4.512189194292171e-06, "loss": 0.0005, "step": 141360 }, { "epoch": 1.1560698368565236, "grad_norm": 0.04497237503528595, "learning_rate": 4.511478971908096e-06, "loss": 0.002, "step": 141370 }, { "epoch": 1.1561516130351228, "grad_norm": 0.06613212823867798, "learning_rate": 4.510768759475593e-06, "loss": 0.0005, "step": 141380 }, { "epoch": 1.156233389213722, "grad_norm": 0.06722310930490494, "learning_rate": 4.510058557009126e-06, "loss": 0.0009, "step": 141390 }, { "epoch": 1.1563151653923212, "grad_norm": 0.034512922167778015, "learning_rate": 4.509348364523165e-06, "loss": 0.0007, "step": 141400 }, { "epoch": 1.1563969415709203, "grad_norm": 0.31620413064956665, "learning_rate": 4.508638182032177e-06, "loss": 0.0053, "step": 141410 }, { "epoch": 1.1564787177495195, "grad_norm": 0.032452136278152466, "learning_rate": 4.50792800955063e-06, "loss": 0.0007, "step": 141420 }, { "epoch": 1.1565604939281187, "grad_norm": 0.03634265810251236, "learning_rate": 4.507217847092986e-06, "loss": 0.0009, "step": 141430 }, { "epoch": 1.1566422701067178, "grad_norm": 0.048838306218385696, "learning_rate": 4.506507694673718e-06, "loss": 0.0008, "step": 141440 }, { "epoch": 1.156724046285317, "grad_norm": 0.03681691735982895, "learning_rate": 4.505797552307288e-06, "loss": 0.002, "step": 141450 }, { "epoch": 1.1568058224639162, "grad_norm": 0.07490213215351105, "learning_rate": 4.5050874200081645e-06, "loss": 0.0011, "step": 141460 }, { "epoch": 1.1568875986425153, "grad_norm": 0.018826792016625404, "learning_rate": 4.5043772977908126e-06, "loss": 0.0018, "step": 141470 }, { "epoch": 1.1569693748211147, "grad_norm": 0.020317258313298225, "learning_rate": 4.503667185669695e-06, "loss": 0.0008, "step": 141480 }, { "epoch": 1.1570511509997137, "grad_norm": 0.0038373307324945927, "learning_rate": 4.502957083659283e-06, "loss": 0.0008, "step": 141490 }, { "epoch": 1.157132927178313, "grad_norm": 0.023696180433034897, "learning_rate": 4.50224699177404e-06, "loss": 0.0006, "step": 141500 }, { "epoch": 1.157214703356912, "grad_norm": 0.2739715874195099, "learning_rate": 4.50153691002843e-06, "loss": 0.002, "step": 141510 }, { "epoch": 1.1572964795355114, "grad_norm": 0.1769556701183319, "learning_rate": 4.500826838436916e-06, "loss": 0.0012, "step": 141520 }, { "epoch": 1.1573782557141106, "grad_norm": 0.0016149298753589392, "learning_rate": 4.500116777013966e-06, "loss": 0.0004, "step": 141530 }, { "epoch": 1.1574600318927097, "grad_norm": 0.005315759219229221, "learning_rate": 4.499406725774045e-06, "loss": 0.0014, "step": 141540 }, { "epoch": 1.157541808071309, "grad_norm": 0.01687108539044857, "learning_rate": 4.4986966847316154e-06, "loss": 0.0011, "step": 141550 }, { "epoch": 1.157623584249908, "grad_norm": 0.10591410100460052, "learning_rate": 4.497986653901141e-06, "loss": 0.0007, "step": 141560 }, { "epoch": 1.1577053604285072, "grad_norm": 0.19867849349975586, "learning_rate": 4.497276633297087e-06, "loss": 0.0013, "step": 141570 }, { "epoch": 1.1577871366071064, "grad_norm": 0.02595028094947338, "learning_rate": 4.496566622933917e-06, "loss": 0.0018, "step": 141580 }, { "epoch": 1.1578689127857056, "grad_norm": 0.08373569697141647, "learning_rate": 4.495856622826094e-06, "loss": 0.0009, "step": 141590 }, { "epoch": 1.1579506889643048, "grad_norm": 0.05796189233660698, "learning_rate": 4.495146632988079e-06, "loss": 0.0007, "step": 141600 }, { "epoch": 1.158032465142904, "grad_norm": 0.0015071695670485497, "learning_rate": 4.49443665343434e-06, "loss": 0.0008, "step": 141610 }, { "epoch": 1.158114241321503, "grad_norm": 0.0049821739085018635, "learning_rate": 4.493726684179337e-06, "loss": 0.0005, "step": 141620 }, { "epoch": 1.1581960175001023, "grad_norm": 0.04140464961528778, "learning_rate": 4.493016725237535e-06, "loss": 0.0014, "step": 141630 }, { "epoch": 1.1582777936787014, "grad_norm": 0.13883373141288757, "learning_rate": 4.492306776623389e-06, "loss": 0.0017, "step": 141640 }, { "epoch": 1.1583595698573006, "grad_norm": 0.08494468033313751, "learning_rate": 4.4915968383513705e-06, "loss": 0.002, "step": 141650 }, { "epoch": 1.1584413460358998, "grad_norm": 0.026021042838692665, "learning_rate": 4.490886910435938e-06, "loss": 0.0007, "step": 141660 }, { "epoch": 1.158523122214499, "grad_norm": 0.02640872821211815, "learning_rate": 4.490176992891552e-06, "loss": 0.0014, "step": 141670 }, { "epoch": 1.158604898393098, "grad_norm": 0.027219008654356003, "learning_rate": 4.489467085732674e-06, "loss": 0.0015, "step": 141680 }, { "epoch": 1.1586866745716973, "grad_norm": 0.11231308430433273, "learning_rate": 4.488757188973768e-06, "loss": 0.0019, "step": 141690 }, { "epoch": 1.1587684507502964, "grad_norm": 0.008164411410689354, "learning_rate": 4.488047302629294e-06, "loss": 0.0052, "step": 141700 }, { "epoch": 1.1588502269288956, "grad_norm": 0.0022421986795961857, "learning_rate": 4.487337426713712e-06, "loss": 0.0012, "step": 141710 }, { "epoch": 1.1589320031074948, "grad_norm": 0.04983316734433174, "learning_rate": 4.4866275612414805e-06, "loss": 0.0005, "step": 141720 }, { "epoch": 1.159013779286094, "grad_norm": 0.06620906293392181, "learning_rate": 4.485917706227066e-06, "loss": 0.0005, "step": 141730 }, { "epoch": 1.1590955554646931, "grad_norm": 0.07975003868341446, "learning_rate": 4.4852078616849255e-06, "loss": 0.0013, "step": 141740 }, { "epoch": 1.1591773316432923, "grad_norm": 0.02549402043223381, "learning_rate": 4.4844980276295205e-06, "loss": 0.0011, "step": 141750 }, { "epoch": 1.1592591078218915, "grad_norm": 0.024224145337939262, "learning_rate": 4.483788204075306e-06, "loss": 0.0006, "step": 141760 }, { "epoch": 1.1593408840004906, "grad_norm": 0.014264894649386406, "learning_rate": 4.483078391036747e-06, "loss": 0.001, "step": 141770 }, { "epoch": 1.1594226601790898, "grad_norm": 0.009316548705101013, "learning_rate": 4.4823685885283025e-06, "loss": 0.0009, "step": 141780 }, { "epoch": 1.159504436357689, "grad_norm": 0.0710296481847763, "learning_rate": 4.48165879656443e-06, "loss": 0.0011, "step": 141790 }, { "epoch": 1.1595862125362881, "grad_norm": 0.078326016664505, "learning_rate": 4.480949015159589e-06, "loss": 0.0015, "step": 141800 }, { "epoch": 1.1596679887148873, "grad_norm": 0.013397902250289917, "learning_rate": 4.480239244328238e-06, "loss": 0.001, "step": 141810 }, { "epoch": 1.1597497648934865, "grad_norm": 0.028042418882250786, "learning_rate": 4.479529484084838e-06, "loss": 0.001, "step": 141820 }, { "epoch": 1.1598315410720856, "grad_norm": 0.08024908602237701, "learning_rate": 4.478819734443844e-06, "loss": 0.0011, "step": 141830 }, { "epoch": 1.1599133172506848, "grad_norm": 0.029660958796739578, "learning_rate": 4.478109995419714e-06, "loss": 0.0015, "step": 141840 }, { "epoch": 1.159995093429284, "grad_norm": 0.0443437322974205, "learning_rate": 4.477400267026909e-06, "loss": 0.0036, "step": 141850 }, { "epoch": 1.1600768696078831, "grad_norm": 0.09675449132919312, "learning_rate": 4.476690549279886e-06, "loss": 0.001, "step": 141860 }, { "epoch": 1.1601586457864823, "grad_norm": 0.14268970489501953, "learning_rate": 4.475980842193103e-06, "loss": 0.0011, "step": 141870 }, { "epoch": 1.1602404219650815, "grad_norm": 0.050472013652324677, "learning_rate": 4.475271145781013e-06, "loss": 0.0017, "step": 141880 }, { "epoch": 1.1603221981436806, "grad_norm": 0.01526754628866911, "learning_rate": 4.474561460058078e-06, "loss": 0.0007, "step": 141890 }, { "epoch": 1.1604039743222798, "grad_norm": 0.07274184376001358, "learning_rate": 4.473851785038754e-06, "loss": 0.0018, "step": 141900 }, { "epoch": 1.1604857505008792, "grad_norm": 0.01851949281990528, "learning_rate": 4.473142120737496e-06, "loss": 0.0019, "step": 141910 }, { "epoch": 1.1605675266794782, "grad_norm": 0.020594483241438866, "learning_rate": 4.472432467168761e-06, "loss": 0.0014, "step": 141920 }, { "epoch": 1.1606493028580775, "grad_norm": 0.024273931980133057, "learning_rate": 4.471722824347005e-06, "loss": 0.001, "step": 141930 }, { "epoch": 1.1607310790366765, "grad_norm": 0.1964239478111267, "learning_rate": 4.471013192286686e-06, "loss": 0.0018, "step": 141940 }, { "epoch": 1.1608128552152759, "grad_norm": 0.003835576120764017, "learning_rate": 4.470303571002258e-06, "loss": 0.0013, "step": 141950 }, { "epoch": 1.160894631393875, "grad_norm": 0.10426097363233566, "learning_rate": 4.469593960508174e-06, "loss": 0.0015, "step": 141960 }, { "epoch": 1.1609764075724742, "grad_norm": 0.03432174026966095, "learning_rate": 4.468884360818895e-06, "loss": 0.0022, "step": 141970 }, { "epoch": 1.1610581837510734, "grad_norm": 0.04290809854865074, "learning_rate": 4.468174771948873e-06, "loss": 0.001, "step": 141980 }, { "epoch": 1.1611399599296726, "grad_norm": 0.062066979706287384, "learning_rate": 4.467465193912564e-06, "loss": 0.0011, "step": 141990 }, { "epoch": 1.1612217361082717, "grad_norm": 0.12679557502269745, "learning_rate": 4.46675562672442e-06, "loss": 0.0015, "step": 142000 }, { "epoch": 1.161303512286871, "grad_norm": 0.006625326815992594, "learning_rate": 4.466046070398897e-06, "loss": 0.0025, "step": 142010 }, { "epoch": 1.16138528846547, "grad_norm": 0.04306443780660629, "learning_rate": 4.465336524950452e-06, "loss": 0.001, "step": 142020 }, { "epoch": 1.1614670646440692, "grad_norm": 0.05372539535164833, "learning_rate": 4.464626990393535e-06, "loss": 0.0011, "step": 142030 }, { "epoch": 1.1615488408226684, "grad_norm": 0.045481711626052856, "learning_rate": 4.463917466742601e-06, "loss": 0.0009, "step": 142040 }, { "epoch": 1.1616306170012676, "grad_norm": 0.055761732161045074, "learning_rate": 4.463207954012105e-06, "loss": 0.0008, "step": 142050 }, { "epoch": 1.1617123931798667, "grad_norm": 0.006903321947902441, "learning_rate": 4.462498452216499e-06, "loss": 0.0004, "step": 142060 }, { "epoch": 1.161794169358466, "grad_norm": 0.054754167795181274, "learning_rate": 4.461788961370237e-06, "loss": 0.0008, "step": 142070 }, { "epoch": 1.161875945537065, "grad_norm": 0.07950632274150848, "learning_rate": 4.461079481487768e-06, "loss": 0.0012, "step": 142080 }, { "epoch": 1.1619577217156642, "grad_norm": 0.030033890157938004, "learning_rate": 4.460370012583551e-06, "loss": 0.0015, "step": 142090 }, { "epoch": 1.1620394978942634, "grad_norm": 0.024565640836954117, "learning_rate": 4.459660554672036e-06, "loss": 0.0008, "step": 142100 }, { "epoch": 1.1621212740728626, "grad_norm": 0.0634690448641777, "learning_rate": 4.458951107767675e-06, "loss": 0.0009, "step": 142110 }, { "epoch": 1.1622030502514618, "grad_norm": 0.026076264679431915, "learning_rate": 4.4582416718849165e-06, "loss": 0.0007, "step": 142120 }, { "epoch": 1.162284826430061, "grad_norm": 0.010994336567819118, "learning_rate": 4.457532247038218e-06, "loss": 0.0005, "step": 142130 }, { "epoch": 1.16236660260866, "grad_norm": 0.00936125498265028, "learning_rate": 4.456822833242028e-06, "loss": 0.0006, "step": 142140 }, { "epoch": 1.1624483787872593, "grad_norm": 0.028585659340023994, "learning_rate": 4.456113430510799e-06, "loss": 0.0015, "step": 142150 }, { "epoch": 1.1625301549658584, "grad_norm": 0.01184313464909792, "learning_rate": 4.4554040388589805e-06, "loss": 0.0012, "step": 142160 }, { "epoch": 1.1626119311444576, "grad_norm": 0.031749919056892395, "learning_rate": 4.454694658301025e-06, "loss": 0.0011, "step": 142170 }, { "epoch": 1.1626937073230568, "grad_norm": 0.029420191422104836, "learning_rate": 4.453985288851383e-06, "loss": 0.001, "step": 142180 }, { "epoch": 1.162775483501656, "grad_norm": 0.017735866829752922, "learning_rate": 4.453275930524505e-06, "loss": 0.0009, "step": 142190 }, { "epoch": 1.162857259680255, "grad_norm": 0.03539148345589638, "learning_rate": 4.452566583334837e-06, "loss": 0.0008, "step": 142200 }, { "epoch": 1.1629390358588543, "grad_norm": 0.06840597093105316, "learning_rate": 4.451857247296836e-06, "loss": 0.0019, "step": 142210 }, { "epoch": 1.1630208120374534, "grad_norm": 0.014642613008618355, "learning_rate": 4.451147922424948e-06, "loss": 0.0008, "step": 142220 }, { "epoch": 1.1631025882160526, "grad_norm": 0.030428864061832428, "learning_rate": 4.450438608733625e-06, "loss": 0.0031, "step": 142230 }, { "epoch": 1.1631843643946518, "grad_norm": 0.017275899648666382, "learning_rate": 4.449729306237309e-06, "loss": 0.0008, "step": 142240 }, { "epoch": 1.163266140573251, "grad_norm": 0.029184486716985703, "learning_rate": 4.449020014950458e-06, "loss": 0.001, "step": 142250 }, { "epoch": 1.1633479167518501, "grad_norm": 0.012376589700579643, "learning_rate": 4.4483107348875165e-06, "loss": 0.0011, "step": 142260 }, { "epoch": 1.1634296929304493, "grad_norm": 0.002803044393658638, "learning_rate": 4.447601466062934e-06, "loss": 0.0006, "step": 142270 }, { "epoch": 1.1635114691090485, "grad_norm": 0.008008533157408237, "learning_rate": 4.446892208491157e-06, "loss": 0.0008, "step": 142280 }, { "epoch": 1.1635932452876476, "grad_norm": 0.05255942419171333, "learning_rate": 4.4461829621866375e-06, "loss": 0.001, "step": 142290 }, { "epoch": 1.1636750214662468, "grad_norm": 0.10422389209270477, "learning_rate": 4.445473727163821e-06, "loss": 0.001, "step": 142300 }, { "epoch": 1.163756797644846, "grad_norm": 0.01766619272530079, "learning_rate": 4.444764503437155e-06, "loss": 0.0017, "step": 142310 }, { "epoch": 1.1638385738234454, "grad_norm": 0.018029073253273964, "learning_rate": 4.444055291021085e-06, "loss": 0.0011, "step": 142320 }, { "epoch": 1.1639203500020443, "grad_norm": 0.06405412405729294, "learning_rate": 4.4433460899300634e-06, "loss": 0.0011, "step": 142330 }, { "epoch": 1.1640021261806437, "grad_norm": 0.09206525981426239, "learning_rate": 4.442636900178534e-06, "loss": 0.0009, "step": 142340 }, { "epoch": 1.1640839023592426, "grad_norm": 0.04072967916727066, "learning_rate": 4.441927721780946e-06, "loss": 0.0009, "step": 142350 }, { "epoch": 1.164165678537842, "grad_norm": 0.12480124086141586, "learning_rate": 4.441218554751739e-06, "loss": 0.001, "step": 142360 }, { "epoch": 1.164247454716441, "grad_norm": 0.01229859609156847, "learning_rate": 4.4405093991053675e-06, "loss": 0.0009, "step": 142370 }, { "epoch": 1.1643292308950404, "grad_norm": 0.0547766275703907, "learning_rate": 4.439800254856274e-06, "loss": 0.0009, "step": 142380 }, { "epoch": 1.1644110070736395, "grad_norm": 0.04863893985748291, "learning_rate": 4.439091122018905e-06, "loss": 0.0008, "step": 142390 }, { "epoch": 1.1644927832522387, "grad_norm": 0.030438371002674103, "learning_rate": 4.438382000607704e-06, "loss": 0.0016, "step": 142400 }, { "epoch": 1.1645745594308379, "grad_norm": 0.026169009506702423, "learning_rate": 4.43767289063712e-06, "loss": 0.0027, "step": 142410 }, { "epoch": 1.164656335609437, "grad_norm": 0.001192198134958744, "learning_rate": 4.436963792121597e-06, "loss": 0.0014, "step": 142420 }, { "epoch": 1.1647381117880362, "grad_norm": 0.009407003410160542, "learning_rate": 4.436254705075578e-06, "loss": 0.0005, "step": 142430 }, { "epoch": 1.1648198879666354, "grad_norm": 0.007439786568284035, "learning_rate": 4.4355456295135065e-06, "loss": 0.001, "step": 142440 }, { "epoch": 1.1649016641452345, "grad_norm": 0.02706313319504261, "learning_rate": 4.434836565449832e-06, "loss": 0.0016, "step": 142450 }, { "epoch": 1.1649834403238337, "grad_norm": 0.056268155574798584, "learning_rate": 4.434127512898996e-06, "loss": 0.0015, "step": 142460 }, { "epoch": 1.1650652165024329, "grad_norm": 0.05556348338723183, "learning_rate": 4.433418471875444e-06, "loss": 0.0015, "step": 142470 }, { "epoch": 1.165146992681032, "grad_norm": 0.008822835050523281, "learning_rate": 4.432709442393614e-06, "loss": 0.0008, "step": 142480 }, { "epoch": 1.1652287688596312, "grad_norm": 0.16564543545246124, "learning_rate": 4.432000424467957e-06, "loss": 0.0011, "step": 142490 }, { "epoch": 1.1653105450382304, "grad_norm": 0.10394035279750824, "learning_rate": 4.431291418112912e-06, "loss": 0.001, "step": 142500 }, { "epoch": 1.1653923212168296, "grad_norm": 0.010286585427820683, "learning_rate": 4.430582423342924e-06, "loss": 0.0011, "step": 142510 }, { "epoch": 1.1654740973954287, "grad_norm": 0.034886375069618225, "learning_rate": 4.429873440172434e-06, "loss": 0.0008, "step": 142520 }, { "epoch": 1.165555873574028, "grad_norm": 0.06976106762886047, "learning_rate": 4.4291644686158854e-06, "loss": 0.0009, "step": 142530 }, { "epoch": 1.165637649752627, "grad_norm": 0.0574662983417511, "learning_rate": 4.428455508687721e-06, "loss": 0.0017, "step": 142540 }, { "epoch": 1.1657194259312262, "grad_norm": 0.018315138295292854, "learning_rate": 4.4277465604023825e-06, "loss": 0.0008, "step": 142550 }, { "epoch": 1.1658012021098254, "grad_norm": 0.06226268783211708, "learning_rate": 4.42703762377431e-06, "loss": 0.0005, "step": 142560 }, { "epoch": 1.1658829782884246, "grad_norm": 0.04094124957919121, "learning_rate": 4.426328698817948e-06, "loss": 0.0016, "step": 142570 }, { "epoch": 1.1659647544670237, "grad_norm": 0.04797106608748436, "learning_rate": 4.425619785547739e-06, "loss": 0.0009, "step": 142580 }, { "epoch": 1.166046530645623, "grad_norm": 0.4538770318031311, "learning_rate": 4.424910883978121e-06, "loss": 0.0011, "step": 142590 }, { "epoch": 1.166128306824222, "grad_norm": 0.042359158396720886, "learning_rate": 4.424201994123532e-06, "loss": 0.0012, "step": 142600 }, { "epoch": 1.1662100830028213, "grad_norm": 0.012248099781572819, "learning_rate": 4.42349311599842e-06, "loss": 0.0013, "step": 142610 }, { "epoch": 1.1662918591814204, "grad_norm": 0.08653991669416428, "learning_rate": 4.422784249617221e-06, "loss": 0.0015, "step": 142620 }, { "epoch": 1.1663736353600196, "grad_norm": 0.02467484399676323, "learning_rate": 4.422075394994377e-06, "loss": 0.0011, "step": 142630 }, { "epoch": 1.1664554115386188, "grad_norm": 0.0955401062965393, "learning_rate": 4.421366552144325e-06, "loss": 0.0017, "step": 142640 }, { "epoch": 1.166537187717218, "grad_norm": 0.030382951721549034, "learning_rate": 4.420657721081508e-06, "loss": 0.0009, "step": 142650 }, { "epoch": 1.166618963895817, "grad_norm": 0.026397373527288437, "learning_rate": 4.419948901820364e-06, "loss": 0.0008, "step": 142660 }, { "epoch": 1.1667007400744163, "grad_norm": 0.1591206192970276, "learning_rate": 4.419240094375332e-06, "loss": 0.0013, "step": 142670 }, { "epoch": 1.1667825162530154, "grad_norm": 0.06333579868078232, "learning_rate": 4.41853129876085e-06, "loss": 0.0004, "step": 142680 }, { "epoch": 1.1668642924316146, "grad_norm": 0.12342519313097, "learning_rate": 4.417822514991359e-06, "loss": 0.001, "step": 142690 }, { "epoch": 1.1669460686102138, "grad_norm": 0.03796426206827164, "learning_rate": 4.417113743081298e-06, "loss": 0.001, "step": 142700 }, { "epoch": 1.167027844788813, "grad_norm": 0.03653932735323906, "learning_rate": 4.416404983045104e-06, "loss": 0.0011, "step": 142710 }, { "epoch": 1.167109620967412, "grad_norm": 0.008913539350032806, "learning_rate": 4.4156962348972125e-06, "loss": 0.0008, "step": 142720 }, { "epoch": 1.1671913971460113, "grad_norm": 0.05473114550113678, "learning_rate": 4.414987498652065e-06, "loss": 0.0015, "step": 142730 }, { "epoch": 1.1672731733246104, "grad_norm": 0.04677031561732292, "learning_rate": 4.414278774324098e-06, "loss": 0.0007, "step": 142740 }, { "epoch": 1.1673549495032098, "grad_norm": 0.037151042371988297, "learning_rate": 4.4135700619277485e-06, "loss": 0.0013, "step": 142750 }, { "epoch": 1.1674367256818088, "grad_norm": 0.0012946086935698986, "learning_rate": 4.412861361477453e-06, "loss": 0.0008, "step": 142760 }, { "epoch": 1.1675185018604082, "grad_norm": 0.026802441105246544, "learning_rate": 4.412152672987648e-06, "loss": 0.0015, "step": 142770 }, { "epoch": 1.1676002780390071, "grad_norm": 0.11145026981830597, "learning_rate": 4.411443996472773e-06, "loss": 0.0013, "step": 142780 }, { "epoch": 1.1676820542176065, "grad_norm": 0.05710098147392273, "learning_rate": 4.410735331947261e-06, "loss": 0.0022, "step": 142790 }, { "epoch": 1.1677638303962057, "grad_norm": 0.0671469122171402, "learning_rate": 4.410026679425548e-06, "loss": 0.0017, "step": 142800 }, { "epoch": 1.1678456065748049, "grad_norm": 0.05012806877493858, "learning_rate": 4.409318038922072e-06, "loss": 0.0009, "step": 142810 }, { "epoch": 1.167927382753404, "grad_norm": 0.03341592848300934, "learning_rate": 4.40860941045127e-06, "loss": 0.0008, "step": 142820 }, { "epoch": 1.1680091589320032, "grad_norm": 0.011777671054005623, "learning_rate": 4.407900794027573e-06, "loss": 0.0017, "step": 142830 }, { "epoch": 1.1680909351106024, "grad_norm": 0.010505489073693752, "learning_rate": 4.407192189665417e-06, "loss": 0.0005, "step": 142840 }, { "epoch": 1.1681727112892015, "grad_norm": 0.041242100298404694, "learning_rate": 4.4064835973792384e-06, "loss": 0.0015, "step": 142850 }, { "epoch": 1.1682544874678007, "grad_norm": 0.06382625550031662, "learning_rate": 4.405775017183473e-06, "loss": 0.0014, "step": 142860 }, { "epoch": 1.1683362636463999, "grad_norm": 0.0326559841632843, "learning_rate": 4.405066449092553e-06, "loss": 0.0007, "step": 142870 }, { "epoch": 1.168418039824999, "grad_norm": 0.0261341854929924, "learning_rate": 4.404357893120912e-06, "loss": 0.001, "step": 142880 }, { "epoch": 1.1684998160035982, "grad_norm": 0.010314182378351688, "learning_rate": 4.403649349282986e-06, "loss": 0.0008, "step": 142890 }, { "epoch": 1.1685815921821974, "grad_norm": 0.026682009920477867, "learning_rate": 4.402940817593207e-06, "loss": 0.0012, "step": 142900 }, { "epoch": 1.1686633683607965, "grad_norm": 0.028436031192541122, "learning_rate": 4.4022322980660085e-06, "loss": 0.0008, "step": 142910 }, { "epoch": 1.1687451445393957, "grad_norm": 0.08630110323429108, "learning_rate": 4.401523790715824e-06, "loss": 0.0009, "step": 142920 }, { "epoch": 1.1688269207179949, "grad_norm": 0.007634190376847982, "learning_rate": 4.400815295557086e-06, "loss": 0.0014, "step": 142930 }, { "epoch": 1.168908696896594, "grad_norm": 0.017377305775880814, "learning_rate": 4.4001068126042294e-06, "loss": 0.0008, "step": 142940 }, { "epoch": 1.1689904730751932, "grad_norm": 0.07367972284555435, "learning_rate": 4.399398341871684e-06, "loss": 0.0006, "step": 142950 }, { "epoch": 1.1690722492537924, "grad_norm": 0.02346125617623329, "learning_rate": 4.398689883373883e-06, "loss": 0.0016, "step": 142960 }, { "epoch": 1.1691540254323916, "grad_norm": 0.02620503678917885, "learning_rate": 4.397981437125254e-06, "loss": 0.0004, "step": 142970 }, { "epoch": 1.1692358016109907, "grad_norm": 0.019959362223744392, "learning_rate": 4.3972730031402354e-06, "loss": 0.0013, "step": 142980 }, { "epoch": 1.16931757778959, "grad_norm": 0.021383997052907944, "learning_rate": 4.396564581433256e-06, "loss": 0.0008, "step": 142990 }, { "epoch": 1.169399353968189, "grad_norm": 0.01323267538100481, "learning_rate": 4.395856172018746e-06, "loss": 0.0005, "step": 143000 }, { "epoch": 1.1694811301467882, "grad_norm": 0.06608784198760986, "learning_rate": 4.395147774911137e-06, "loss": 0.0006, "step": 143010 }, { "epoch": 1.1695629063253874, "grad_norm": 0.044471390545368195, "learning_rate": 4.394439390124859e-06, "loss": 0.0011, "step": 143020 }, { "epoch": 1.1696446825039866, "grad_norm": 0.04252762347459793, "learning_rate": 4.393731017674344e-06, "loss": 0.0006, "step": 143030 }, { "epoch": 1.1697264586825857, "grad_norm": 0.11241491883993149, "learning_rate": 4.39302265757402e-06, "loss": 0.0007, "step": 143040 }, { "epoch": 1.169808234861185, "grad_norm": 0.01163568627089262, "learning_rate": 4.392314309838317e-06, "loss": 0.0013, "step": 143050 }, { "epoch": 1.169890011039784, "grad_norm": 0.11814309656620026, "learning_rate": 4.391605974481667e-06, "loss": 0.0018, "step": 143060 }, { "epoch": 1.1699717872183832, "grad_norm": 0.0023404036182910204, "learning_rate": 4.390897651518498e-06, "loss": 0.0016, "step": 143070 }, { "epoch": 1.1700535633969824, "grad_norm": 0.03944651409983635, "learning_rate": 4.390189340963239e-06, "loss": 0.0013, "step": 143080 }, { "epoch": 1.1701353395755816, "grad_norm": 0.06693335622549057, "learning_rate": 4.3894810428303165e-06, "loss": 0.0007, "step": 143090 }, { "epoch": 1.1702171157541807, "grad_norm": 0.032136499881744385, "learning_rate": 4.388772757134163e-06, "loss": 0.0006, "step": 143100 }, { "epoch": 1.17029889193278, "grad_norm": 0.06087135151028633, "learning_rate": 4.3880644838892054e-06, "loss": 0.0015, "step": 143110 }, { "epoch": 1.170380668111379, "grad_norm": 0.041819844394922256, "learning_rate": 4.387356223109872e-06, "loss": 0.0009, "step": 143120 }, { "epoch": 1.1704624442899783, "grad_norm": 0.04829518124461174, "learning_rate": 4.38664797481059e-06, "loss": 0.0018, "step": 143130 }, { "epoch": 1.1705442204685774, "grad_norm": 0.05146033689379692, "learning_rate": 4.385939739005787e-06, "loss": 0.001, "step": 143140 }, { "epoch": 1.1706259966471766, "grad_norm": 0.019586393609642982, "learning_rate": 4.3852315157098916e-06, "loss": 0.0007, "step": 143150 }, { "epoch": 1.1707077728257758, "grad_norm": 0.013618546538054943, "learning_rate": 4.384523304937328e-06, "loss": 0.0011, "step": 143160 }, { "epoch": 1.170789549004375, "grad_norm": 0.10688100755214691, "learning_rate": 4.3838151067025255e-06, "loss": 0.0011, "step": 143170 }, { "epoch": 1.1708713251829743, "grad_norm": 0.03433835133910179, "learning_rate": 4.383106921019912e-06, "loss": 0.0013, "step": 143180 }, { "epoch": 1.1709531013615733, "grad_norm": 0.04133685305714607, "learning_rate": 4.382398747903911e-06, "loss": 0.0007, "step": 143190 }, { "epoch": 1.1710348775401727, "grad_norm": 0.010340933687984943, "learning_rate": 4.3816905873689495e-06, "loss": 0.0007, "step": 143200 }, { "epoch": 1.1711166537187716, "grad_norm": 0.18470029532909393, "learning_rate": 4.380982439429451e-06, "loss": 0.0015, "step": 143210 }, { "epoch": 1.171198429897371, "grad_norm": 0.03053056076169014, "learning_rate": 4.380274304099846e-06, "loss": 0.0011, "step": 143220 }, { "epoch": 1.1712802060759702, "grad_norm": 0.05008748173713684, "learning_rate": 4.379566181394557e-06, "loss": 0.0009, "step": 143230 }, { "epoch": 1.1713619822545693, "grad_norm": 0.022593166679143906, "learning_rate": 4.378858071328009e-06, "loss": 0.0006, "step": 143240 }, { "epoch": 1.1714437584331685, "grad_norm": 0.032781362533569336, "learning_rate": 4.378149973914626e-06, "loss": 0.0011, "step": 143250 }, { "epoch": 1.1715255346117677, "grad_norm": 0.008204770274460316, "learning_rate": 4.3774418891688344e-06, "loss": 0.0013, "step": 143260 }, { "epoch": 1.1716073107903668, "grad_norm": 0.011068586260080338, "learning_rate": 4.376733817105058e-06, "loss": 0.0007, "step": 143270 }, { "epoch": 1.171689086968966, "grad_norm": 0.09190291911363602, "learning_rate": 4.37602575773772e-06, "loss": 0.0011, "step": 143280 }, { "epoch": 1.1717708631475652, "grad_norm": 0.03172775357961655, "learning_rate": 4.375317711081242e-06, "loss": 0.0009, "step": 143290 }, { "epoch": 1.1718526393261643, "grad_norm": 0.04466534033417702, "learning_rate": 4.374609677150054e-06, "loss": 0.0011, "step": 143300 }, { "epoch": 1.1719344155047635, "grad_norm": 0.036000195890665054, "learning_rate": 4.373901655958574e-06, "loss": 0.0008, "step": 143310 }, { "epoch": 1.1720161916833627, "grad_norm": 0.07883506268262863, "learning_rate": 4.373193647521226e-06, "loss": 0.0009, "step": 143320 }, { "epoch": 1.1720979678619619, "grad_norm": 0.04181919991970062, "learning_rate": 4.372485651852431e-06, "loss": 0.0011, "step": 143330 }, { "epoch": 1.172179744040561, "grad_norm": 0.2550916075706482, "learning_rate": 4.371777668966615e-06, "loss": 0.001, "step": 143340 }, { "epoch": 1.1722615202191602, "grad_norm": 0.16022776067256927, "learning_rate": 4.371069698878199e-06, "loss": 0.0018, "step": 143350 }, { "epoch": 1.1723432963977594, "grad_norm": 0.06808019429445267, "learning_rate": 4.370361741601604e-06, "loss": 0.0012, "step": 143360 }, { "epoch": 1.1724250725763585, "grad_norm": 0.009546478278934956, "learning_rate": 4.369653797151251e-06, "loss": 0.001, "step": 143370 }, { "epoch": 1.1725068487549577, "grad_norm": 0.03561336174607277, "learning_rate": 4.368945865541564e-06, "loss": 0.0012, "step": 143380 }, { "epoch": 1.1725886249335569, "grad_norm": 0.08868483453989029, "learning_rate": 4.368237946786963e-06, "loss": 0.0021, "step": 143390 }, { "epoch": 1.172670401112156, "grad_norm": 0.04447344318032265, "learning_rate": 4.367530040901868e-06, "loss": 0.0007, "step": 143400 }, { "epoch": 1.1727521772907552, "grad_norm": 0.04480951279401779, "learning_rate": 4.366822147900698e-06, "loss": 0.0006, "step": 143410 }, { "epoch": 1.1728339534693544, "grad_norm": 0.009864197112619877, "learning_rate": 4.3661142677978786e-06, "loss": 0.0008, "step": 143420 }, { "epoch": 1.1729157296479535, "grad_norm": 0.03660503774881363, "learning_rate": 4.365406400607826e-06, "loss": 0.0018, "step": 143430 }, { "epoch": 1.1729975058265527, "grad_norm": 0.031986821442842484, "learning_rate": 4.364698546344961e-06, "loss": 0.001, "step": 143440 }, { "epoch": 1.1730792820051519, "grad_norm": 0.013261962682008743, "learning_rate": 4.3639907050237005e-06, "loss": 0.0009, "step": 143450 }, { "epoch": 1.173161058183751, "grad_norm": 0.045010726898908615, "learning_rate": 4.3632828766584685e-06, "loss": 0.0015, "step": 143460 }, { "epoch": 1.1732428343623502, "grad_norm": 0.007512787822633982, "learning_rate": 4.362575061263683e-06, "loss": 0.0028, "step": 143470 }, { "epoch": 1.1733246105409494, "grad_norm": 0.03344022482633591, "learning_rate": 4.36186725885376e-06, "loss": 0.0009, "step": 143480 }, { "epoch": 1.1734063867195486, "grad_norm": 0.04123086854815483, "learning_rate": 4.3611594694431195e-06, "loss": 0.0007, "step": 143490 }, { "epoch": 1.1734881628981477, "grad_norm": 0.028967445716261864, "learning_rate": 4.36045169304618e-06, "loss": 0.0007, "step": 143500 }, { "epoch": 1.173569939076747, "grad_norm": 0.08237316459417343, "learning_rate": 4.35974392967736e-06, "loss": 0.001, "step": 143510 }, { "epoch": 1.173651715255346, "grad_norm": 0.05634395033121109, "learning_rate": 4.3590361793510775e-06, "loss": 0.0014, "step": 143520 }, { "epoch": 1.1737334914339452, "grad_norm": 0.057047780603170395, "learning_rate": 4.358328442081745e-06, "loss": 0.0015, "step": 143530 }, { "epoch": 1.1738152676125444, "grad_norm": 0.04040195420384407, "learning_rate": 4.357620717883788e-06, "loss": 0.0005, "step": 143540 }, { "epoch": 1.1738970437911436, "grad_norm": 0.017686963081359863, "learning_rate": 4.356913006771618e-06, "loss": 0.0012, "step": 143550 }, { "epoch": 1.1739788199697427, "grad_norm": 0.0038447952829301357, "learning_rate": 4.356205308759653e-06, "loss": 0.0005, "step": 143560 }, { "epoch": 1.174060596148342, "grad_norm": 0.05153121426701546, "learning_rate": 4.355497623862307e-06, "loss": 0.0014, "step": 143570 }, { "epoch": 1.174142372326941, "grad_norm": 0.023916054517030716, "learning_rate": 4.3547899520940005e-06, "loss": 0.0011, "step": 143580 }, { "epoch": 1.1742241485055402, "grad_norm": 0.03243885189294815, "learning_rate": 4.3540822934691466e-06, "loss": 0.0011, "step": 143590 }, { "epoch": 1.1743059246841394, "grad_norm": 0.0459112711250782, "learning_rate": 4.353374648002161e-06, "loss": 0.001, "step": 143600 }, { "epoch": 1.1743877008627388, "grad_norm": 0.04862323775887489, "learning_rate": 4.352667015707459e-06, "loss": 0.0008, "step": 143610 }, { "epoch": 1.1744694770413378, "grad_norm": 0.012266276404261589, "learning_rate": 4.351959396599458e-06, "loss": 0.0011, "step": 143620 }, { "epoch": 1.1745512532199371, "grad_norm": 0.031831152737140656, "learning_rate": 4.35125179069257e-06, "loss": 0.0012, "step": 143630 }, { "epoch": 1.174633029398536, "grad_norm": 0.03676889091730118, "learning_rate": 4.35054419800121e-06, "loss": 0.0011, "step": 143640 }, { "epoch": 1.1747148055771355, "grad_norm": 0.06380821019411087, "learning_rate": 4.34983661853979e-06, "loss": 0.0013, "step": 143650 }, { "epoch": 1.1747965817557346, "grad_norm": 0.0021091382950544357, "learning_rate": 4.349129052322731e-06, "loss": 0.0007, "step": 143660 }, { "epoch": 1.1748783579343338, "grad_norm": 0.04171266406774521, "learning_rate": 4.348421499364441e-06, "loss": 0.0007, "step": 143670 }, { "epoch": 1.174960134112933, "grad_norm": 0.0055740769021213055, "learning_rate": 4.347713959679334e-06, "loss": 0.0014, "step": 143680 }, { "epoch": 1.1750419102915322, "grad_norm": 0.0011589726200327277, "learning_rate": 4.347006433281822e-06, "loss": 0.0007, "step": 143690 }, { "epoch": 1.1751236864701313, "grad_norm": 0.04963883012533188, "learning_rate": 4.346298920186322e-06, "loss": 0.0006, "step": 143700 }, { "epoch": 1.1752054626487305, "grad_norm": 0.06842716783285141, "learning_rate": 4.345591420407244e-06, "loss": 0.002, "step": 143710 }, { "epoch": 1.1752872388273297, "grad_norm": 0.0499226450920105, "learning_rate": 4.344883933959001e-06, "loss": 0.0008, "step": 143720 }, { "epoch": 1.1753690150059288, "grad_norm": 0.014335300773382187, "learning_rate": 4.344176460856004e-06, "loss": 0.001, "step": 143730 }, { "epoch": 1.175450791184528, "grad_norm": 0.12835966050624847, "learning_rate": 4.343469001112667e-06, "loss": 0.0013, "step": 143740 }, { "epoch": 1.1755325673631272, "grad_norm": 0.12658794224262238, "learning_rate": 4.3427615547433996e-06, "loss": 0.0011, "step": 143750 }, { "epoch": 1.1756143435417263, "grad_norm": 0.010270239785313606, "learning_rate": 4.342054121762614e-06, "loss": 0.0018, "step": 143760 }, { "epoch": 1.1756961197203255, "grad_norm": 0.022182632237672806, "learning_rate": 4.341346702184717e-06, "loss": 0.0006, "step": 143770 }, { "epoch": 1.1757778958989247, "grad_norm": 0.020654847845435143, "learning_rate": 4.3406392960241276e-06, "loss": 0.001, "step": 143780 }, { "epoch": 1.1758596720775238, "grad_norm": 0.01900615356862545, "learning_rate": 4.33993190329525e-06, "loss": 0.0014, "step": 143790 }, { "epoch": 1.175941448256123, "grad_norm": 0.04245157167315483, "learning_rate": 4.339224524012497e-06, "loss": 0.0013, "step": 143800 }, { "epoch": 1.1760232244347222, "grad_norm": 0.0024587090592831373, "learning_rate": 4.338517158190274e-06, "loss": 0.0008, "step": 143810 }, { "epoch": 1.1761050006133213, "grad_norm": 0.030269796028733253, "learning_rate": 4.337809805842998e-06, "loss": 0.0024, "step": 143820 }, { "epoch": 1.1761867767919205, "grad_norm": 0.014675403945147991, "learning_rate": 4.337102466985073e-06, "loss": 0.0017, "step": 143830 }, { "epoch": 1.1762685529705197, "grad_norm": 0.021929683163762093, "learning_rate": 4.3363951416309105e-06, "loss": 0.0013, "step": 143840 }, { "epoch": 1.1763503291491189, "grad_norm": 0.04192882776260376, "learning_rate": 4.335687829794917e-06, "loss": 0.0012, "step": 143850 }, { "epoch": 1.176432105327718, "grad_norm": 0.049252089112997055, "learning_rate": 4.334980531491503e-06, "loss": 0.0014, "step": 143860 }, { "epoch": 1.1765138815063172, "grad_norm": 0.09726548194885254, "learning_rate": 4.334273246735077e-06, "loss": 0.0016, "step": 143870 }, { "epoch": 1.1765956576849164, "grad_norm": 0.044471319764852524, "learning_rate": 4.333565975540046e-06, "loss": 0.0008, "step": 143880 }, { "epoch": 1.1766774338635155, "grad_norm": 0.0403597429394722, "learning_rate": 4.332858717920815e-06, "loss": 0.0008, "step": 143890 }, { "epoch": 1.1767592100421147, "grad_norm": 0.019721800461411476, "learning_rate": 4.332151473891798e-06, "loss": 0.0008, "step": 143900 }, { "epoch": 1.1768409862207139, "grad_norm": 0.036089036613702774, "learning_rate": 4.331444243467396e-06, "loss": 0.001, "step": 143910 }, { "epoch": 1.176922762399313, "grad_norm": 0.038868248462677, "learning_rate": 4.33073702666202e-06, "loss": 0.0015, "step": 143920 }, { "epoch": 1.1770045385779122, "grad_norm": 0.06329786032438278, "learning_rate": 4.330029823490072e-06, "loss": 0.0023, "step": 143930 }, { "epoch": 1.1770863147565114, "grad_norm": 0.08537645637989044, "learning_rate": 4.329322633965963e-06, "loss": 0.0012, "step": 143940 }, { "epoch": 1.1771680909351105, "grad_norm": 0.0016922068316489458, "learning_rate": 4.3286154581040964e-06, "loss": 0.0006, "step": 143950 }, { "epoch": 1.1772498671137097, "grad_norm": 0.009323411621153355, "learning_rate": 4.32790829591888e-06, "loss": 0.0009, "step": 143960 }, { "epoch": 1.1773316432923089, "grad_norm": 0.03733878210186958, "learning_rate": 4.327201147424716e-06, "loss": 0.0008, "step": 143970 }, { "epoch": 1.177413419470908, "grad_norm": 0.0074515859596431255, "learning_rate": 4.326494012636012e-06, "loss": 0.0008, "step": 143980 }, { "epoch": 1.1774951956495072, "grad_norm": 0.11470388621091843, "learning_rate": 4.325786891567174e-06, "loss": 0.0016, "step": 143990 }, { "epoch": 1.1775769718281064, "grad_norm": 0.04451572895050049, "learning_rate": 4.325079784232604e-06, "loss": 0.0011, "step": 144000 }, { "epoch": 1.1776587480067056, "grad_norm": 0.02793455868959427, "learning_rate": 4.324372690646706e-06, "loss": 0.0007, "step": 144010 }, { "epoch": 1.1777405241853047, "grad_norm": 0.045470040291547775, "learning_rate": 4.323665610823888e-06, "loss": 0.0019, "step": 144020 }, { "epoch": 1.177822300363904, "grad_norm": 0.02400153875350952, "learning_rate": 4.322958544778551e-06, "loss": 0.001, "step": 144030 }, { "epoch": 1.1779040765425033, "grad_norm": 0.009467788971960545, "learning_rate": 4.322251492525098e-06, "loss": 0.0009, "step": 144040 }, { "epoch": 1.1779858527211022, "grad_norm": 0.0694219246506691, "learning_rate": 4.32154445407793e-06, "loss": 0.001, "step": 144050 }, { "epoch": 1.1780676288997016, "grad_norm": 0.005494162905961275, "learning_rate": 4.320837429451456e-06, "loss": 0.0017, "step": 144060 }, { "epoch": 1.1781494050783006, "grad_norm": 0.001312717329710722, "learning_rate": 4.320130418660077e-06, "loss": 0.0011, "step": 144070 }, { "epoch": 1.1782311812569, "grad_norm": 0.02939072996377945, "learning_rate": 4.319423421718192e-06, "loss": 0.0008, "step": 144080 }, { "epoch": 1.1783129574354991, "grad_norm": 0.12033013999462128, "learning_rate": 4.318716438640205e-06, "loss": 0.0009, "step": 144090 }, { "epoch": 1.1783947336140983, "grad_norm": 0.03232214227318764, "learning_rate": 4.318009469440519e-06, "loss": 0.0015, "step": 144100 }, { "epoch": 1.1784765097926975, "grad_norm": 0.027531547471880913, "learning_rate": 4.317302514133534e-06, "loss": 0.0006, "step": 144110 }, { "epoch": 1.1785582859712966, "grad_norm": 0.22328664362430573, "learning_rate": 4.316595572733652e-06, "loss": 0.0019, "step": 144120 }, { "epoch": 1.1786400621498958, "grad_norm": 0.07358979433774948, "learning_rate": 4.315888645255271e-06, "loss": 0.0012, "step": 144130 }, { "epoch": 1.178721838328495, "grad_norm": 0.0647672563791275, "learning_rate": 4.315181731712798e-06, "loss": 0.0016, "step": 144140 }, { "epoch": 1.1788036145070941, "grad_norm": 0.04027240350842476, "learning_rate": 4.3144748321206295e-06, "loss": 0.0005, "step": 144150 }, { "epoch": 1.1788853906856933, "grad_norm": 0.04278557375073433, "learning_rate": 4.313767946493165e-06, "loss": 0.0011, "step": 144160 }, { "epoch": 1.1789671668642925, "grad_norm": 0.035310763865709305, "learning_rate": 4.313061074844803e-06, "loss": 0.001, "step": 144170 }, { "epoch": 1.1790489430428917, "grad_norm": 0.03578270226716995, "learning_rate": 4.312354217189948e-06, "loss": 0.0007, "step": 144180 }, { "epoch": 1.1791307192214908, "grad_norm": 0.14173178374767303, "learning_rate": 4.311647373542996e-06, "loss": 0.0018, "step": 144190 }, { "epoch": 1.17921249540009, "grad_norm": 0.02134515903890133, "learning_rate": 4.310940543918346e-06, "loss": 0.0009, "step": 144200 }, { "epoch": 1.1792942715786892, "grad_norm": 0.07130514830350876, "learning_rate": 4.310233728330397e-06, "loss": 0.0012, "step": 144210 }, { "epoch": 1.1793760477572883, "grad_norm": 0.02159912697970867, "learning_rate": 4.309526926793549e-06, "loss": 0.0014, "step": 144220 }, { "epoch": 1.1794578239358875, "grad_norm": 0.028813889250159264, "learning_rate": 4.308820139322198e-06, "loss": 0.0012, "step": 144230 }, { "epoch": 1.1795396001144867, "grad_norm": 0.03517211601138115, "learning_rate": 4.308113365930743e-06, "loss": 0.0007, "step": 144240 }, { "epoch": 1.1796213762930858, "grad_norm": 0.030922411009669304, "learning_rate": 4.307406606633579e-06, "loss": 0.0013, "step": 144250 }, { "epoch": 1.179703152471685, "grad_norm": 0.03411679342389107, "learning_rate": 4.306699861445108e-06, "loss": 0.0007, "step": 144260 }, { "epoch": 1.1797849286502842, "grad_norm": 0.056272443383932114, "learning_rate": 4.305993130379724e-06, "loss": 0.0012, "step": 144270 }, { "epoch": 1.1798667048288833, "grad_norm": 0.04268352687358856, "learning_rate": 4.305286413451824e-06, "loss": 0.0007, "step": 144280 }, { "epoch": 1.1799484810074825, "grad_norm": 0.015006831847131252, "learning_rate": 4.3045797106758016e-06, "loss": 0.0007, "step": 144290 }, { "epoch": 1.1800302571860817, "grad_norm": 0.09259788691997528, "learning_rate": 4.3038730220660594e-06, "loss": 0.0015, "step": 144300 }, { "epoch": 1.1801120333646808, "grad_norm": 0.07236935943365097, "learning_rate": 4.303166347636988e-06, "loss": 0.001, "step": 144310 }, { "epoch": 1.18019380954328, "grad_norm": 0.020690344274044037, "learning_rate": 4.302459687402985e-06, "loss": 0.0008, "step": 144320 }, { "epoch": 1.1802755857218792, "grad_norm": 0.03474351763725281, "learning_rate": 4.301753041378445e-06, "loss": 0.0006, "step": 144330 }, { "epoch": 1.1803573619004784, "grad_norm": 0.024279803037643433, "learning_rate": 4.301046409577763e-06, "loss": 0.001, "step": 144340 }, { "epoch": 1.1804391380790775, "grad_norm": 0.05295726656913757, "learning_rate": 4.300339792015334e-06, "loss": 0.0006, "step": 144350 }, { "epoch": 1.1805209142576767, "grad_norm": 0.04843529313802719, "learning_rate": 4.299633188705552e-06, "loss": 0.0007, "step": 144360 }, { "epoch": 1.1806026904362759, "grad_norm": 0.0007785911438986659, "learning_rate": 4.298926599662812e-06, "loss": 0.0008, "step": 144370 }, { "epoch": 1.180684466614875, "grad_norm": 0.08017182350158691, "learning_rate": 4.298220024901505e-06, "loss": 0.0017, "step": 144380 }, { "epoch": 1.1807662427934742, "grad_norm": 0.02640758454799652, "learning_rate": 4.2975134644360285e-06, "loss": 0.0025, "step": 144390 }, { "epoch": 1.1808480189720734, "grad_norm": 0.04811989888548851, "learning_rate": 4.296806918280773e-06, "loss": 0.0009, "step": 144400 }, { "epoch": 1.1809297951506725, "grad_norm": 0.001926005003042519, "learning_rate": 4.2961003864501325e-06, "loss": 0.0004, "step": 144410 }, { "epoch": 1.1810115713292717, "grad_norm": 0.00928007997572422, "learning_rate": 4.295393868958497e-06, "loss": 0.0008, "step": 144420 }, { "epoch": 1.1810933475078709, "grad_norm": 0.06274738907814026, "learning_rate": 4.294687365820264e-06, "loss": 0.0009, "step": 144430 }, { "epoch": 1.18117512368647, "grad_norm": 0.022031018510460854, "learning_rate": 4.2939808770498215e-06, "loss": 0.0007, "step": 144440 }, { "epoch": 1.1812568998650692, "grad_norm": 0.0737065002322197, "learning_rate": 4.293274402661563e-06, "loss": 0.0008, "step": 144450 }, { "epoch": 1.1813386760436684, "grad_norm": 0.018690437078475952, "learning_rate": 4.292567942669879e-06, "loss": 0.0017, "step": 144460 }, { "epoch": 1.1814204522222678, "grad_norm": 0.08015646785497665, "learning_rate": 4.291861497089161e-06, "loss": 0.0009, "step": 144470 }, { "epoch": 1.1815022284008667, "grad_norm": 0.04387515410780907, "learning_rate": 4.2911550659338e-06, "loss": 0.0015, "step": 144480 }, { "epoch": 1.181584004579466, "grad_norm": 0.12560957670211792, "learning_rate": 4.290448649218187e-06, "loss": 0.0018, "step": 144490 }, { "epoch": 1.181665780758065, "grad_norm": 0.006675476208329201, "learning_rate": 4.289742246956711e-06, "loss": 0.0007, "step": 144500 }, { "epoch": 1.1817475569366644, "grad_norm": 0.04150960221886635, "learning_rate": 4.2890358591637625e-06, "loss": 0.0006, "step": 144510 }, { "epoch": 1.1818293331152636, "grad_norm": 0.019324257969856262, "learning_rate": 4.288329485853732e-06, "loss": 0.0007, "step": 144520 }, { "epoch": 1.1819111092938628, "grad_norm": 0.07635456323623657, "learning_rate": 4.287623127041009e-06, "loss": 0.001, "step": 144530 }, { "epoch": 1.181992885472462, "grad_norm": 0.11093056946992874, "learning_rate": 4.2869167827399795e-06, "loss": 0.0009, "step": 144540 }, { "epoch": 1.1820746616510611, "grad_norm": 0.02918536588549614, "learning_rate": 4.2862104529650364e-06, "loss": 0.0005, "step": 144550 }, { "epoch": 1.1821564378296603, "grad_norm": 0.2435297966003418, "learning_rate": 4.2855041377305676e-06, "loss": 0.0013, "step": 144560 }, { "epoch": 1.1822382140082595, "grad_norm": 0.01454563345760107, "learning_rate": 4.28479783705096e-06, "loss": 0.0009, "step": 144570 }, { "epoch": 1.1823199901868586, "grad_norm": 0.02676597610116005, "learning_rate": 4.2840915509406e-06, "loss": 0.0014, "step": 144580 }, { "epoch": 1.1824017663654578, "grad_norm": 0.045905835926532745, "learning_rate": 4.283385279413879e-06, "loss": 0.0008, "step": 144590 }, { "epoch": 1.182483542544057, "grad_norm": 0.030943429097533226, "learning_rate": 4.282679022485181e-06, "loss": 0.0012, "step": 144600 }, { "epoch": 1.1825653187226561, "grad_norm": 0.0489872470498085, "learning_rate": 4.281972780168896e-06, "loss": 0.0008, "step": 144610 }, { "epoch": 1.1826470949012553, "grad_norm": 0.08734475821256638, "learning_rate": 4.281266552479407e-06, "loss": 0.0007, "step": 144620 }, { "epoch": 1.1827288710798545, "grad_norm": 0.09419604390859604, "learning_rate": 4.280560339431104e-06, "loss": 0.0009, "step": 144630 }, { "epoch": 1.1828106472584536, "grad_norm": 0.022840462625026703, "learning_rate": 4.279854141038371e-06, "loss": 0.0008, "step": 144640 }, { "epoch": 1.1828924234370528, "grad_norm": 0.08131757378578186, "learning_rate": 4.279147957315596e-06, "loss": 0.0019, "step": 144650 }, { "epoch": 1.182974199615652, "grad_norm": 0.05422883480787277, "learning_rate": 4.278441788277159e-06, "loss": 0.0006, "step": 144660 }, { "epoch": 1.1830559757942511, "grad_norm": 0.04108545184135437, "learning_rate": 4.2777356339374526e-06, "loss": 0.0011, "step": 144670 }, { "epoch": 1.1831377519728503, "grad_norm": 0.003478449070826173, "learning_rate": 4.277029494310858e-06, "loss": 0.0008, "step": 144680 }, { "epoch": 1.1832195281514495, "grad_norm": 0.029250994324684143, "learning_rate": 4.276323369411761e-06, "loss": 0.0011, "step": 144690 }, { "epoch": 1.1833013043300487, "grad_norm": 0.02850412018597126, "learning_rate": 4.275617259254543e-06, "loss": 0.0031, "step": 144700 }, { "epoch": 1.1833830805086478, "grad_norm": 0.019936032593250275, "learning_rate": 4.274911163853593e-06, "loss": 0.0017, "step": 144710 }, { "epoch": 1.183464856687247, "grad_norm": 0.025850798934698105, "learning_rate": 4.27420508322329e-06, "loss": 0.0009, "step": 144720 }, { "epoch": 1.1835466328658462, "grad_norm": 0.05017394572496414, "learning_rate": 4.273499017378021e-06, "loss": 0.0009, "step": 144730 }, { "epoch": 1.1836284090444453, "grad_norm": 0.03236646577715874, "learning_rate": 4.272792966332166e-06, "loss": 0.0005, "step": 144740 }, { "epoch": 1.1837101852230445, "grad_norm": 0.03744861111044884, "learning_rate": 4.272086930100109e-06, "loss": 0.0025, "step": 144750 }, { "epoch": 1.1837919614016437, "grad_norm": 0.029814306646585464, "learning_rate": 4.271380908696235e-06, "loss": 0.0018, "step": 144760 }, { "epoch": 1.1838737375802428, "grad_norm": 0.01349601149559021, "learning_rate": 4.270674902134923e-06, "loss": 0.0007, "step": 144770 }, { "epoch": 1.183955513758842, "grad_norm": 0.033764176070690155, "learning_rate": 4.269968910430554e-06, "loss": 0.0009, "step": 144780 }, { "epoch": 1.1840372899374412, "grad_norm": 0.026058891788125038, "learning_rate": 4.269262933597514e-06, "loss": 0.0006, "step": 144790 }, { "epoch": 1.1841190661160403, "grad_norm": 0.01353327464312315, "learning_rate": 4.2685569716501825e-06, "loss": 0.0009, "step": 144800 }, { "epoch": 1.1842008422946395, "grad_norm": 0.024494118988513947, "learning_rate": 4.267851024602939e-06, "loss": 0.0014, "step": 144810 }, { "epoch": 1.1842826184732387, "grad_norm": 0.07675180584192276, "learning_rate": 4.267145092470165e-06, "loss": 0.0009, "step": 144820 }, { "epoch": 1.1843643946518378, "grad_norm": 0.042752861976623535, "learning_rate": 4.266439175266242e-06, "loss": 0.0036, "step": 144830 }, { "epoch": 1.184446170830437, "grad_norm": 0.07890671491622925, "learning_rate": 4.265733273005549e-06, "loss": 0.001, "step": 144840 }, { "epoch": 1.1845279470090362, "grad_norm": 0.042572494596242905, "learning_rate": 4.265027385702467e-06, "loss": 0.0009, "step": 144850 }, { "epoch": 1.1846097231876354, "grad_norm": 0.054948002099990845, "learning_rate": 4.264321513371373e-06, "loss": 0.0013, "step": 144860 }, { "epoch": 1.1846914993662345, "grad_norm": 0.11624905467033386, "learning_rate": 4.263615656026649e-06, "loss": 0.0004, "step": 144870 }, { "epoch": 1.184773275544834, "grad_norm": 0.032093316316604614, "learning_rate": 4.2629098136826715e-06, "loss": 0.0006, "step": 144880 }, { "epoch": 1.1848550517234329, "grad_norm": 0.06708255410194397, "learning_rate": 4.262203986353821e-06, "loss": 0.0009, "step": 144890 }, { "epoch": 1.1849368279020323, "grad_norm": 0.023126564919948578, "learning_rate": 4.261498174054473e-06, "loss": 0.0015, "step": 144900 }, { "epoch": 1.1850186040806312, "grad_norm": 0.026212919503450394, "learning_rate": 4.26079237679901e-06, "loss": 0.0006, "step": 144910 }, { "epoch": 1.1851003802592306, "grad_norm": 0.07887930423021317, "learning_rate": 4.260086594601806e-06, "loss": 0.0002, "step": 144920 }, { "epoch": 1.1851821564378295, "grad_norm": 0.08212082087993622, "learning_rate": 4.259380827477241e-06, "loss": 0.0009, "step": 144930 }, { "epoch": 1.185263932616429, "grad_norm": 0.08697603642940521, "learning_rate": 4.258675075439688e-06, "loss": 0.0012, "step": 144940 }, { "epoch": 1.185345708795028, "grad_norm": 0.04898848384618759, "learning_rate": 4.257969338503528e-06, "loss": 0.001, "step": 144950 }, { "epoch": 1.1854274849736273, "grad_norm": 0.020413191989064217, "learning_rate": 4.257263616683136e-06, "loss": 0.0021, "step": 144960 }, { "epoch": 1.1855092611522264, "grad_norm": 0.049311812967061996, "learning_rate": 4.2565579099928875e-06, "loss": 0.0009, "step": 144970 }, { "epoch": 1.1855910373308256, "grad_norm": 0.03775608912110329, "learning_rate": 4.255852218447158e-06, "loss": 0.0007, "step": 144980 }, { "epoch": 1.1856728135094248, "grad_norm": 0.031664587557315826, "learning_rate": 4.255146542060325e-06, "loss": 0.0014, "step": 144990 }, { "epoch": 1.185754589688024, "grad_norm": 0.05506139621138573, "learning_rate": 4.254440880846761e-06, "loss": 0.0008, "step": 145000 }, { "epoch": 1.185836365866623, "grad_norm": 0.027065090835094452, "learning_rate": 4.253735234820844e-06, "loss": 0.0011, "step": 145010 }, { "epoch": 1.1859181420452223, "grad_norm": 0.022940687835216522, "learning_rate": 4.253029603996943e-06, "loss": 0.0012, "step": 145020 }, { "epoch": 1.1859999182238214, "grad_norm": 0.011139771901071072, "learning_rate": 4.25232398838944e-06, "loss": 0.0006, "step": 145030 }, { "epoch": 1.1860816944024206, "grad_norm": 0.1410217434167862, "learning_rate": 4.251618388012704e-06, "loss": 0.001, "step": 145040 }, { "epoch": 1.1861634705810198, "grad_norm": 0.012826929800212383, "learning_rate": 4.25091280288111e-06, "loss": 0.0009, "step": 145050 }, { "epoch": 1.186245246759619, "grad_norm": 0.02233622409403324, "learning_rate": 4.250207233009029e-06, "loss": 0.0009, "step": 145060 }, { "epoch": 1.1863270229382181, "grad_norm": 0.018027380108833313, "learning_rate": 4.249501678410839e-06, "loss": 0.0005, "step": 145070 }, { "epoch": 1.1864087991168173, "grad_norm": 0.026367468759417534, "learning_rate": 4.248796139100909e-06, "loss": 0.0008, "step": 145080 }, { "epoch": 1.1864905752954165, "grad_norm": 0.02411772310733795, "learning_rate": 4.248090615093613e-06, "loss": 0.0011, "step": 145090 }, { "epoch": 1.1865723514740156, "grad_norm": 0.006850528996437788, "learning_rate": 4.24738510640332e-06, "loss": 0.0026, "step": 145100 }, { "epoch": 1.1866541276526148, "grad_norm": 0.015525514259934425, "learning_rate": 4.246679613044406e-06, "loss": 0.0005, "step": 145110 }, { "epoch": 1.186735903831214, "grad_norm": 0.04600901901721954, "learning_rate": 4.24597413503124e-06, "loss": 0.0007, "step": 145120 }, { "epoch": 1.1868176800098131, "grad_norm": 0.1082691103219986, "learning_rate": 4.245268672378195e-06, "loss": 0.0008, "step": 145130 }, { "epoch": 1.1868994561884123, "grad_norm": 0.03828718140721321, "learning_rate": 4.244563225099637e-06, "loss": 0.0014, "step": 145140 }, { "epoch": 1.1869812323670115, "grad_norm": 0.04597242549061775, "learning_rate": 4.243857793209943e-06, "loss": 0.001, "step": 145150 }, { "epoch": 1.1870630085456106, "grad_norm": 0.03513005003333092, "learning_rate": 4.24315237672348e-06, "loss": 0.001, "step": 145160 }, { "epoch": 1.1871447847242098, "grad_norm": 0.021707000210881233, "learning_rate": 4.2424469756546185e-06, "loss": 0.0012, "step": 145170 }, { "epoch": 1.187226560902809, "grad_norm": 0.06534223258495331, "learning_rate": 4.241741590017727e-06, "loss": 0.0012, "step": 145180 }, { "epoch": 1.1873083370814081, "grad_norm": 0.10753584653139114, "learning_rate": 4.241036219827177e-06, "loss": 0.0013, "step": 145190 }, { "epoch": 1.1873901132600073, "grad_norm": 0.024542558938264847, "learning_rate": 4.2403308650973354e-06, "loss": 0.0011, "step": 145200 }, { "epoch": 1.1874718894386065, "grad_norm": 0.032966598868370056, "learning_rate": 4.239625525842573e-06, "loss": 0.0007, "step": 145210 }, { "epoch": 1.1875536656172057, "grad_norm": 0.051193997263908386, "learning_rate": 4.238920202077253e-06, "loss": 0.001, "step": 145220 }, { "epoch": 1.1876354417958048, "grad_norm": 0.04040907323360443, "learning_rate": 4.23821489381575e-06, "loss": 0.001, "step": 145230 }, { "epoch": 1.187717217974404, "grad_norm": 0.06457521766424179, "learning_rate": 4.237509601072429e-06, "loss": 0.0014, "step": 145240 }, { "epoch": 1.1877989941530032, "grad_norm": 0.010031398385763168, "learning_rate": 4.2368043238616565e-06, "loss": 0.0007, "step": 145250 }, { "epoch": 1.1878807703316023, "grad_norm": 0.010757253505289555, "learning_rate": 4.2360990621977985e-06, "loss": 0.0013, "step": 145260 }, { "epoch": 1.1879625465102015, "grad_norm": 0.017110370099544525, "learning_rate": 4.235393816095226e-06, "loss": 0.0008, "step": 145270 }, { "epoch": 1.1880443226888007, "grad_norm": 0.0019567457493394613, "learning_rate": 4.234688585568302e-06, "loss": 0.0007, "step": 145280 }, { "epoch": 1.1881260988673998, "grad_norm": 0.008995817974209785, "learning_rate": 4.2339833706313945e-06, "loss": 0.0021, "step": 145290 }, { "epoch": 1.188207875045999, "grad_norm": 0.04739195480942726, "learning_rate": 4.2332781712988666e-06, "loss": 0.0012, "step": 145300 }, { "epoch": 1.1882896512245984, "grad_norm": 0.05904148519039154, "learning_rate": 4.2325729875850875e-06, "loss": 0.0026, "step": 145310 }, { "epoch": 1.1883714274031973, "grad_norm": 0.2654015123844147, "learning_rate": 4.23186781950442e-06, "loss": 0.0021, "step": 145320 }, { "epoch": 1.1884532035817967, "grad_norm": 0.02859715186059475, "learning_rate": 4.231162667071229e-06, "loss": 0.0005, "step": 145330 }, { "epoch": 1.1885349797603957, "grad_norm": 0.0033232010900974274, "learning_rate": 4.230457530299879e-06, "loss": 0.0008, "step": 145340 }, { "epoch": 1.188616755938995, "grad_norm": 0.07139959186315536, "learning_rate": 4.229752409204736e-06, "loss": 0.0015, "step": 145350 }, { "epoch": 1.188698532117594, "grad_norm": 0.03362681344151497, "learning_rate": 4.229047303800161e-06, "loss": 0.0014, "step": 145360 }, { "epoch": 1.1887803082961934, "grad_norm": 0.001282032928429544, "learning_rate": 4.228342214100521e-06, "loss": 0.0006, "step": 145370 }, { "epoch": 1.1888620844747926, "grad_norm": 0.00551979523152113, "learning_rate": 4.227637140120173e-06, "loss": 0.001, "step": 145380 }, { "epoch": 1.1889438606533917, "grad_norm": 0.18820586800575256, "learning_rate": 4.226932081873488e-06, "loss": 0.0014, "step": 145390 }, { "epoch": 1.189025636831991, "grad_norm": 0.04776013270020485, "learning_rate": 4.226227039374825e-06, "loss": 0.001, "step": 145400 }, { "epoch": 1.18910741301059, "grad_norm": 0.03033984638750553, "learning_rate": 4.225522012638545e-06, "loss": 0.0017, "step": 145410 }, { "epoch": 1.1891891891891893, "grad_norm": 0.01775350794196129, "learning_rate": 4.224817001679011e-06, "loss": 0.0007, "step": 145420 }, { "epoch": 1.1892709653677884, "grad_norm": 0.03243183717131615, "learning_rate": 4.224112006510586e-06, "loss": 0.001, "step": 145430 }, { "epoch": 1.1893527415463876, "grad_norm": 0.07103554904460907, "learning_rate": 4.223407027147629e-06, "loss": 0.0012, "step": 145440 }, { "epoch": 1.1894345177249868, "grad_norm": 0.09927135705947876, "learning_rate": 4.222702063604503e-06, "loss": 0.0011, "step": 145450 }, { "epoch": 1.189516293903586, "grad_norm": 0.007219564635306597, "learning_rate": 4.2219971158955665e-06, "loss": 0.0036, "step": 145460 }, { "epoch": 1.189598070082185, "grad_norm": 0.053264714777469635, "learning_rate": 4.221292184035182e-06, "loss": 0.0021, "step": 145470 }, { "epoch": 1.1896798462607843, "grad_norm": 0.055940620601177216, "learning_rate": 4.220587268037709e-06, "loss": 0.0007, "step": 145480 }, { "epoch": 1.1897616224393834, "grad_norm": 0.05900635942816734, "learning_rate": 4.219882367917506e-06, "loss": 0.0008, "step": 145490 }, { "epoch": 1.1898433986179826, "grad_norm": 0.02528129331767559, "learning_rate": 4.2191774836889325e-06, "loss": 0.0008, "step": 145500 }, { "epoch": 1.1899251747965818, "grad_norm": 0.029990781098604202, "learning_rate": 4.21847261536635e-06, "loss": 0.001, "step": 145510 }, { "epoch": 1.190006950975181, "grad_norm": 0.019860338419675827, "learning_rate": 4.217767762964115e-06, "loss": 0.001, "step": 145520 }, { "epoch": 1.1900887271537801, "grad_norm": 0.0571306049823761, "learning_rate": 4.217062926496587e-06, "loss": 0.0007, "step": 145530 }, { "epoch": 1.1901705033323793, "grad_norm": 0.054115623235702515, "learning_rate": 4.216358105978122e-06, "loss": 0.0013, "step": 145540 }, { "epoch": 1.1902522795109785, "grad_norm": 0.03361983597278595, "learning_rate": 4.215653301423081e-06, "loss": 0.0006, "step": 145550 }, { "epoch": 1.1903340556895776, "grad_norm": 0.06975848227739334, "learning_rate": 4.21494851284582e-06, "loss": 0.0007, "step": 145560 }, { "epoch": 1.1904158318681768, "grad_norm": 0.007976946420967579, "learning_rate": 4.214243740260696e-06, "loss": 0.0009, "step": 145570 }, { "epoch": 1.190497608046776, "grad_norm": 0.037908557802438736, "learning_rate": 4.213538983682064e-06, "loss": 0.0018, "step": 145580 }, { "epoch": 1.1905793842253751, "grad_norm": 0.017183970659971237, "learning_rate": 4.212834243124283e-06, "loss": 0.0005, "step": 145590 }, { "epoch": 1.1906611604039743, "grad_norm": 0.10986895859241486, "learning_rate": 4.212129518601709e-06, "loss": 0.0006, "step": 145600 }, { "epoch": 1.1907429365825735, "grad_norm": 0.15384429693222046, "learning_rate": 4.211424810128697e-06, "loss": 0.0018, "step": 145610 }, { "epoch": 1.1908247127611726, "grad_norm": 0.06832067668437958, "learning_rate": 4.2107201177196e-06, "loss": 0.0008, "step": 145620 }, { "epoch": 1.1909064889397718, "grad_norm": 0.05215725302696228, "learning_rate": 4.210015441388779e-06, "loss": 0.001, "step": 145630 }, { "epoch": 1.190988265118371, "grad_norm": 0.009191390126943588, "learning_rate": 4.209310781150584e-06, "loss": 0.0006, "step": 145640 }, { "epoch": 1.1910700412969701, "grad_norm": 0.020087335258722305, "learning_rate": 4.208606137019372e-06, "loss": 0.0007, "step": 145650 }, { "epoch": 1.1911518174755693, "grad_norm": 0.04814765602350235, "learning_rate": 4.2079015090094955e-06, "loss": 0.001, "step": 145660 }, { "epoch": 1.1912335936541685, "grad_norm": 0.02140211872756481, "learning_rate": 4.2071968971353095e-06, "loss": 0.0007, "step": 145670 }, { "epoch": 1.1913153698327676, "grad_norm": 0.0563330240547657, "learning_rate": 4.206492301411168e-06, "loss": 0.0012, "step": 145680 }, { "epoch": 1.1913971460113668, "grad_norm": 0.03868209198117256, "learning_rate": 4.2057877218514226e-06, "loss": 0.001, "step": 145690 }, { "epoch": 1.191478922189966, "grad_norm": 0.040293145924806595, "learning_rate": 4.205083158470426e-06, "loss": 0.0006, "step": 145700 }, { "epoch": 1.1915606983685652, "grad_norm": 0.04116065055131912, "learning_rate": 4.2043786112825324e-06, "loss": 0.0019, "step": 145710 }, { "epoch": 1.1916424745471643, "grad_norm": 0.026002060621976852, "learning_rate": 4.2036740803020935e-06, "loss": 0.0006, "step": 145720 }, { "epoch": 1.1917242507257635, "grad_norm": 0.0762784406542778, "learning_rate": 4.202969565543461e-06, "loss": 0.0013, "step": 145730 }, { "epoch": 1.1918060269043629, "grad_norm": 0.023260317742824554, "learning_rate": 4.2022650670209835e-06, "loss": 0.0011, "step": 145740 }, { "epoch": 1.1918878030829618, "grad_norm": 0.04899406060576439, "learning_rate": 4.2015605847490176e-06, "loss": 0.0005, "step": 145750 }, { "epoch": 1.1919695792615612, "grad_norm": 0.03899479657411575, "learning_rate": 4.200856118741912e-06, "loss": 0.0008, "step": 145760 }, { "epoch": 1.1920513554401602, "grad_norm": 0.04516855627298355, "learning_rate": 4.200151669014017e-06, "loss": 0.0008, "step": 145770 }, { "epoch": 1.1921331316187596, "grad_norm": 0.10640444606542587, "learning_rate": 4.199447235579681e-06, "loss": 0.0009, "step": 145780 }, { "epoch": 1.1922149077973587, "grad_norm": 0.06307888776063919, "learning_rate": 4.198742818453256e-06, "loss": 0.0013, "step": 145790 }, { "epoch": 1.192296683975958, "grad_norm": 0.06569483131170273, "learning_rate": 4.198038417649092e-06, "loss": 0.0012, "step": 145800 }, { "epoch": 1.192378460154557, "grad_norm": 0.01797853410243988, "learning_rate": 4.1973340331815375e-06, "loss": 0.0018, "step": 145810 }, { "epoch": 1.1924602363331562, "grad_norm": 0.06240873783826828, "learning_rate": 4.1966296650649404e-06, "loss": 0.001, "step": 145820 }, { "epoch": 1.1925420125117554, "grad_norm": 0.03525245934724808, "learning_rate": 4.195925313313649e-06, "loss": 0.0009, "step": 145830 }, { "epoch": 1.1926237886903546, "grad_norm": 0.0694446712732315, "learning_rate": 4.1952209779420136e-06, "loss": 0.0032, "step": 145840 }, { "epoch": 1.1927055648689537, "grad_norm": 0.04069620743393898, "learning_rate": 4.194516658964381e-06, "loss": 0.0009, "step": 145850 }, { "epoch": 1.192787341047553, "grad_norm": 0.09984985738992691, "learning_rate": 4.193812356395099e-06, "loss": 0.0011, "step": 145860 }, { "epoch": 1.192869117226152, "grad_norm": 0.044037915766239166, "learning_rate": 4.193108070248513e-06, "loss": 0.001, "step": 145870 }, { "epoch": 1.1929508934047512, "grad_norm": 0.06775441020727158, "learning_rate": 4.192403800538973e-06, "loss": 0.0008, "step": 145880 }, { "epoch": 1.1930326695833504, "grad_norm": 0.062175750732421875, "learning_rate": 4.191699547280825e-06, "loss": 0.0015, "step": 145890 }, { "epoch": 1.1931144457619496, "grad_norm": 0.03924699127674103, "learning_rate": 4.190995310488413e-06, "loss": 0.0006, "step": 145900 }, { "epoch": 1.1931962219405488, "grad_norm": 0.0018583034398034215, "learning_rate": 4.190291090176084e-06, "loss": 0.0013, "step": 145910 }, { "epoch": 1.193277998119148, "grad_norm": 0.02303417958319187, "learning_rate": 4.189586886358184e-06, "loss": 0.0013, "step": 145920 }, { "epoch": 1.193359774297747, "grad_norm": 0.04280995577573776, "learning_rate": 4.188882699049058e-06, "loss": 0.0007, "step": 145930 }, { "epoch": 1.1934415504763463, "grad_norm": 0.06724990904331207, "learning_rate": 4.188178528263051e-06, "loss": 0.001, "step": 145940 }, { "epoch": 1.1935233266549454, "grad_norm": 0.018452830612659454, "learning_rate": 4.187474374014506e-06, "loss": 0.0007, "step": 145950 }, { "epoch": 1.1936051028335446, "grad_norm": 0.02808927185833454, "learning_rate": 4.18677023631777e-06, "loss": 0.0007, "step": 145960 }, { "epoch": 1.1936868790121438, "grad_norm": 0.06555897742509842, "learning_rate": 4.186066115187185e-06, "loss": 0.0009, "step": 145970 }, { "epoch": 1.193768655190743, "grad_norm": 0.029030120000243187, "learning_rate": 4.185362010637094e-06, "loss": 0.001, "step": 145980 }, { "epoch": 1.193850431369342, "grad_norm": 0.014126639813184738, "learning_rate": 4.18465792268184e-06, "loss": 0.001, "step": 145990 }, { "epoch": 1.1939322075479413, "grad_norm": 0.07619921118021011, "learning_rate": 4.183953851335768e-06, "loss": 0.0017, "step": 146000 }, { "epoch": 1.1940139837265404, "grad_norm": 0.039398252964019775, "learning_rate": 4.18324979661322e-06, "loss": 0.0008, "step": 146010 }, { "epoch": 1.1940957599051396, "grad_norm": 0.06389209628105164, "learning_rate": 4.182545758528538e-06, "loss": 0.0007, "step": 146020 }, { "epoch": 1.1941775360837388, "grad_norm": 0.06870179623365402, "learning_rate": 4.181841737096062e-06, "loss": 0.0009, "step": 146030 }, { "epoch": 1.194259312262338, "grad_norm": 0.04520011693239212, "learning_rate": 4.181137732330135e-06, "loss": 0.0014, "step": 146040 }, { "epoch": 1.1943410884409371, "grad_norm": 0.049252625554800034, "learning_rate": 4.1804337442450986e-06, "loss": 0.0007, "step": 146050 }, { "epoch": 1.1944228646195363, "grad_norm": 0.11111511290073395, "learning_rate": 4.179729772855293e-06, "loss": 0.0008, "step": 146060 }, { "epoch": 1.1945046407981355, "grad_norm": 0.01538731250911951, "learning_rate": 4.179025818175058e-06, "loss": 0.0006, "step": 146070 }, { "epoch": 1.1945864169767346, "grad_norm": 0.0757395476102829, "learning_rate": 4.178321880218735e-06, "loss": 0.0018, "step": 146080 }, { "epoch": 1.1946681931553338, "grad_norm": 0.02535436488687992, "learning_rate": 4.177617959000664e-06, "loss": 0.0006, "step": 146090 }, { "epoch": 1.194749969333933, "grad_norm": 0.02828376553952694, "learning_rate": 4.176914054535183e-06, "loss": 0.0014, "step": 146100 }, { "epoch": 1.1948317455125321, "grad_norm": 0.038290638476610184, "learning_rate": 4.1762101668366294e-06, "loss": 0.0009, "step": 146110 }, { "epoch": 1.1949135216911313, "grad_norm": 0.014340892434120178, "learning_rate": 4.1755062959193464e-06, "loss": 0.0009, "step": 146120 }, { "epoch": 1.1949952978697305, "grad_norm": 0.002475486369803548, "learning_rate": 4.1748024417976705e-06, "loss": 0.0006, "step": 146130 }, { "epoch": 1.1950770740483296, "grad_norm": 0.03825584426522255, "learning_rate": 4.17409860448594e-06, "loss": 0.0015, "step": 146140 }, { "epoch": 1.1951588502269288, "grad_norm": 0.02848096936941147, "learning_rate": 4.173394783998491e-06, "loss": 0.0011, "step": 146150 }, { "epoch": 1.195240626405528, "grad_norm": 0.02777412347495556, "learning_rate": 4.172690980349663e-06, "loss": 0.0016, "step": 146160 }, { "epoch": 1.1953224025841274, "grad_norm": 0.13385750353336334, "learning_rate": 4.171987193553792e-06, "loss": 0.0009, "step": 146170 }, { "epoch": 1.1954041787627263, "grad_norm": 0.039824891835451126, "learning_rate": 4.171283423625215e-06, "loss": 0.0008, "step": 146180 }, { "epoch": 1.1954859549413257, "grad_norm": 0.06615114957094193, "learning_rate": 4.170579670578268e-06, "loss": 0.0063, "step": 146190 }, { "epoch": 1.1955677311199246, "grad_norm": 0.009202306158840656, "learning_rate": 4.169875934427287e-06, "loss": 0.0006, "step": 146200 }, { "epoch": 1.195649507298524, "grad_norm": 0.08088129758834839, "learning_rate": 4.16917221518661e-06, "loss": 0.0021, "step": 146210 }, { "epoch": 1.1957312834771232, "grad_norm": 0.16271673142910004, "learning_rate": 4.168468512870569e-06, "loss": 0.0016, "step": 146220 }, { "epoch": 1.1958130596557224, "grad_norm": 0.03877749666571617, "learning_rate": 4.167764827493498e-06, "loss": 0.0014, "step": 146230 }, { "epoch": 1.1958948358343215, "grad_norm": 0.014303475618362427, "learning_rate": 4.167061159069736e-06, "loss": 0.0005, "step": 146240 }, { "epoch": 1.1959766120129207, "grad_norm": 0.0017396195326000452, "learning_rate": 4.166357507613616e-06, "loss": 0.001, "step": 146250 }, { "epoch": 1.1960583881915199, "grad_norm": 0.08488626033067703, "learning_rate": 4.16565387313947e-06, "loss": 0.0009, "step": 146260 }, { "epoch": 1.196140164370119, "grad_norm": 0.01851031556725502, "learning_rate": 4.164950255661633e-06, "loss": 0.0017, "step": 146270 }, { "epoch": 1.1962219405487182, "grad_norm": 0.04800093546509743, "learning_rate": 4.164246655194438e-06, "loss": 0.0012, "step": 146280 }, { "epoch": 1.1963037167273174, "grad_norm": 0.07955839484930038, "learning_rate": 4.163543071752219e-06, "loss": 0.001, "step": 146290 }, { "epoch": 1.1963854929059166, "grad_norm": 0.02686237171292305, "learning_rate": 4.162839505349308e-06, "loss": 0.0009, "step": 146300 }, { "epoch": 1.1964672690845157, "grad_norm": 0.012692011892795563, "learning_rate": 4.162135956000034e-06, "loss": 0.0007, "step": 146310 }, { "epoch": 1.196549045263115, "grad_norm": 0.050506774336099625, "learning_rate": 4.161432423718734e-06, "loss": 0.0016, "step": 146320 }, { "epoch": 1.196630821441714, "grad_norm": 0.03960064798593521, "learning_rate": 4.160728908519736e-06, "loss": 0.0009, "step": 146330 }, { "epoch": 1.1967125976203132, "grad_norm": 0.012595739215612411, "learning_rate": 4.160025410417373e-06, "loss": 0.0013, "step": 146340 }, { "epoch": 1.1967943737989124, "grad_norm": 0.06940296292304993, "learning_rate": 4.159321929425973e-06, "loss": 0.0015, "step": 146350 }, { "epoch": 1.1968761499775116, "grad_norm": 0.013144209049642086, "learning_rate": 4.158618465559871e-06, "loss": 0.0011, "step": 146360 }, { "epoch": 1.1969579261561107, "grad_norm": 0.1489051878452301, "learning_rate": 4.1579150188333945e-06, "loss": 0.0018, "step": 146370 }, { "epoch": 1.19703970233471, "grad_norm": 0.03841286525130272, "learning_rate": 4.157211589260874e-06, "loss": 0.0007, "step": 146380 }, { "epoch": 1.197121478513309, "grad_norm": 0.03778215870261192, "learning_rate": 4.1565081768566365e-06, "loss": 0.001, "step": 146390 }, { "epoch": 1.1972032546919082, "grad_norm": 0.045221321284770966, "learning_rate": 4.155804781635014e-06, "loss": 0.0012, "step": 146400 }, { "epoch": 1.1972850308705074, "grad_norm": 0.006342183332890272, "learning_rate": 4.155101403610335e-06, "loss": 0.0009, "step": 146410 }, { "epoch": 1.1973668070491066, "grad_norm": 0.05770581215620041, "learning_rate": 4.1543980427969274e-06, "loss": 0.0007, "step": 146420 }, { "epoch": 1.1974485832277058, "grad_norm": 0.13611766695976257, "learning_rate": 4.153694699209117e-06, "loss": 0.001, "step": 146430 }, { "epoch": 1.197530359406305, "grad_norm": 0.04614054784178734, "learning_rate": 4.1529913728612355e-06, "loss": 0.0014, "step": 146440 }, { "epoch": 1.197612135584904, "grad_norm": 0.09159992635250092, "learning_rate": 4.152288063767608e-06, "loss": 0.0015, "step": 146450 }, { "epoch": 1.1976939117635033, "grad_norm": 0.02535044215619564, "learning_rate": 4.1515847719425614e-06, "loss": 0.0012, "step": 146460 }, { "epoch": 1.1977756879421024, "grad_norm": 0.03991614654660225, "learning_rate": 4.150881497400421e-06, "loss": 0.0008, "step": 146470 }, { "epoch": 1.1978574641207016, "grad_norm": 0.0911870151758194, "learning_rate": 4.1501782401555165e-06, "loss": 0.0013, "step": 146480 }, { "epoch": 1.1979392402993008, "grad_norm": 0.0598541684448719, "learning_rate": 4.1494750002221714e-06, "loss": 0.0008, "step": 146490 }, { "epoch": 1.1980210164779, "grad_norm": 0.02086332067847252, "learning_rate": 4.1487717776147126e-06, "loss": 0.0014, "step": 146500 }, { "epoch": 1.198102792656499, "grad_norm": 0.02875855751335621, "learning_rate": 4.148068572347464e-06, "loss": 0.0015, "step": 146510 }, { "epoch": 1.1981845688350983, "grad_norm": 0.017822004854679108, "learning_rate": 4.147365384434752e-06, "loss": 0.0005, "step": 146520 }, { "epoch": 1.1982663450136974, "grad_norm": 0.036848533898591995, "learning_rate": 4.1466622138909e-06, "loss": 0.0014, "step": 146530 }, { "epoch": 1.1983481211922966, "grad_norm": 0.004684942774474621, "learning_rate": 4.145959060730231e-06, "loss": 0.0004, "step": 146540 }, { "epoch": 1.1984298973708958, "grad_norm": 0.07150838524103165, "learning_rate": 4.145255924967071e-06, "loss": 0.0008, "step": 146550 }, { "epoch": 1.198511673549495, "grad_norm": 0.025282392278313637, "learning_rate": 4.144552806615742e-06, "loss": 0.0013, "step": 146560 }, { "epoch": 1.1985934497280941, "grad_norm": 0.017613500356674194, "learning_rate": 4.143849705690569e-06, "loss": 0.0009, "step": 146570 }, { "epoch": 1.1986752259066933, "grad_norm": 0.030858635902404785, "learning_rate": 4.143146622205872e-06, "loss": 0.0027, "step": 146580 }, { "epoch": 1.1987570020852925, "grad_norm": 0.03860729932785034, "learning_rate": 4.142443556175974e-06, "loss": 0.0012, "step": 146590 }, { "epoch": 1.1988387782638918, "grad_norm": 0.05073628947138786, "learning_rate": 4.141740507615199e-06, "loss": 0.0019, "step": 146600 }, { "epoch": 1.1989205544424908, "grad_norm": 0.00864553451538086, "learning_rate": 4.141037476537868e-06, "loss": 0.0008, "step": 146610 }, { "epoch": 1.1990023306210902, "grad_norm": 0.015823030844330788, "learning_rate": 4.1403344629583024e-06, "loss": 0.0011, "step": 146620 }, { "epoch": 1.1990841067996891, "grad_norm": 0.009817196056246758, "learning_rate": 4.139631466890821e-06, "loss": 0.0006, "step": 146630 }, { "epoch": 1.1991658829782885, "grad_norm": 0.04140429198741913, "learning_rate": 4.138928488349747e-06, "loss": 0.0013, "step": 146640 }, { "epoch": 1.1992476591568877, "grad_norm": 0.06677097827196121, "learning_rate": 4.138225527349399e-06, "loss": 0.0008, "step": 146650 }, { "epoch": 1.1993294353354869, "grad_norm": 0.005114049185067415, "learning_rate": 4.1375225839040985e-06, "loss": 0.0011, "step": 146660 }, { "epoch": 1.199411211514086, "grad_norm": 0.06601561605930328, "learning_rate": 4.136819658028162e-06, "loss": 0.0008, "step": 146670 }, { "epoch": 1.1994929876926852, "grad_norm": 0.036705851554870605, "learning_rate": 4.136116749735912e-06, "loss": 0.0009, "step": 146680 }, { "epoch": 1.1995747638712844, "grad_norm": 0.7497487664222717, "learning_rate": 4.135413859041667e-06, "loss": 0.001, "step": 146690 }, { "epoch": 1.1996565400498835, "grad_norm": 0.07301872223615646, "learning_rate": 4.134710985959743e-06, "loss": 0.0009, "step": 146700 }, { "epoch": 1.1997383162284827, "grad_norm": 0.02477007545530796, "learning_rate": 4.134008130504459e-06, "loss": 0.0014, "step": 146710 }, { "epoch": 1.1998200924070819, "grad_norm": 0.020569952204823494, "learning_rate": 4.133305292690134e-06, "loss": 0.0017, "step": 146720 }, { "epoch": 1.199901868585681, "grad_norm": 0.017336204648017883, "learning_rate": 4.132602472531085e-06, "loss": 0.0008, "step": 146730 }, { "epoch": 1.1999836447642802, "grad_norm": 0.06788507103919983, "learning_rate": 4.131899670041629e-06, "loss": 0.0009, "step": 146740 }, { "epoch": 1.2000654209428794, "grad_norm": 0.0443221740424633, "learning_rate": 4.131196885236082e-06, "loss": 0.0007, "step": 146750 }, { "epoch": 1.2001471971214785, "grad_norm": 0.021470487117767334, "learning_rate": 4.130494118128761e-06, "loss": 0.0007, "step": 146760 }, { "epoch": 1.2002289733000777, "grad_norm": 0.028605137020349503, "learning_rate": 4.129791368733981e-06, "loss": 0.001, "step": 146770 }, { "epoch": 1.2003107494786769, "grad_norm": 0.05666901543736458, "learning_rate": 4.1290886370660586e-06, "loss": 0.0005, "step": 146780 }, { "epoch": 1.200392525657276, "grad_norm": 0.20953968167304993, "learning_rate": 4.128385923139307e-06, "loss": 0.0009, "step": 146790 }, { "epoch": 1.2004743018358752, "grad_norm": 0.004576048813760281, "learning_rate": 4.1276832269680445e-06, "loss": 0.0006, "step": 146800 }, { "epoch": 1.2005560780144744, "grad_norm": 0.07221746444702148, "learning_rate": 4.1269805485665835e-06, "loss": 0.0009, "step": 146810 }, { "epoch": 1.2006378541930736, "grad_norm": 0.18955886363983154, "learning_rate": 4.126277887949238e-06, "loss": 0.0012, "step": 146820 }, { "epoch": 1.2007196303716727, "grad_norm": 0.03353220969438553, "learning_rate": 4.125575245130321e-06, "loss": 0.001, "step": 146830 }, { "epoch": 1.200801406550272, "grad_norm": 0.10946479439735413, "learning_rate": 4.124872620124148e-06, "loss": 0.0012, "step": 146840 }, { "epoch": 1.200883182728871, "grad_norm": 0.09000899642705917, "learning_rate": 4.124170012945032e-06, "loss": 0.0019, "step": 146850 }, { "epoch": 1.2009649589074702, "grad_norm": 0.024615947157144547, "learning_rate": 4.123467423607284e-06, "loss": 0.0015, "step": 146860 }, { "epoch": 1.2010467350860694, "grad_norm": 0.0253690704703331, "learning_rate": 4.122764852125216e-06, "loss": 0.0014, "step": 146870 }, { "epoch": 1.2011285112646686, "grad_norm": 0.04060427471995354, "learning_rate": 4.122062298513143e-06, "loss": 0.001, "step": 146880 }, { "epoch": 1.2012102874432677, "grad_norm": 0.04083503410220146, "learning_rate": 4.121359762785374e-06, "loss": 0.0008, "step": 146890 }, { "epoch": 1.201292063621867, "grad_norm": 0.019413841888308525, "learning_rate": 4.12065724495622e-06, "loss": 0.0003, "step": 146900 }, { "epoch": 1.201373839800466, "grad_norm": 0.010772142559289932, "learning_rate": 4.119954745039993e-06, "loss": 0.0006, "step": 146910 }, { "epoch": 1.2014556159790653, "grad_norm": 0.03559397533535957, "learning_rate": 4.119252263051003e-06, "loss": 0.0013, "step": 146920 }, { "epoch": 1.2015373921576644, "grad_norm": 0.008853580802679062, "learning_rate": 4.118549799003561e-06, "loss": 0.0009, "step": 146930 }, { "epoch": 1.2016191683362636, "grad_norm": 0.02029304765164852, "learning_rate": 4.117847352911977e-06, "loss": 0.0005, "step": 146940 }, { "epoch": 1.2017009445148628, "grad_norm": 0.031822171062231064, "learning_rate": 4.117144924790556e-06, "loss": 0.0014, "step": 146950 }, { "epoch": 1.201782720693462, "grad_norm": 0.03809177502989769, "learning_rate": 4.116442514653613e-06, "loss": 0.0006, "step": 146960 }, { "epoch": 1.201864496872061, "grad_norm": 0.03385345637798309, "learning_rate": 4.115740122515454e-06, "loss": 0.0011, "step": 146970 }, { "epoch": 1.2019462730506603, "grad_norm": 0.058096520602703094, "learning_rate": 4.115037748390388e-06, "loss": 0.0016, "step": 146980 }, { "epoch": 1.2020280492292594, "grad_norm": 0.08916744589805603, "learning_rate": 4.114335392292721e-06, "loss": 0.0012, "step": 146990 }, { "epoch": 1.2021098254078586, "grad_norm": 0.10759482532739639, "learning_rate": 4.113633054236762e-06, "loss": 0.001, "step": 147000 }, { "epoch": 1.2021916015864578, "grad_norm": 0.006656798534095287, "learning_rate": 4.112930734236819e-06, "loss": 0.0007, "step": 147010 }, { "epoch": 1.202273377765057, "grad_norm": 0.0016915352316573262, "learning_rate": 4.112228432307199e-06, "loss": 0.0032, "step": 147020 }, { "epoch": 1.2023551539436563, "grad_norm": 0.04471168294548988, "learning_rate": 4.1115261484622055e-06, "loss": 0.0008, "step": 147030 }, { "epoch": 1.2024369301222553, "grad_norm": 0.1384229063987732, "learning_rate": 4.110823882716148e-06, "loss": 0.0012, "step": 147040 }, { "epoch": 1.2025187063008547, "grad_norm": 0.0464649423956871, "learning_rate": 4.11012163508333e-06, "loss": 0.0006, "step": 147050 }, { "epoch": 1.2026004824794536, "grad_norm": 0.023012040182948112, "learning_rate": 4.109419405578058e-06, "loss": 0.0008, "step": 147060 }, { "epoch": 1.202682258658053, "grad_norm": 0.06115804985165596, "learning_rate": 4.108717194214634e-06, "loss": 0.0007, "step": 147070 }, { "epoch": 1.2027640348366522, "grad_norm": 0.03818007931113243, "learning_rate": 4.108015001007368e-06, "loss": 0.0004, "step": 147080 }, { "epoch": 1.2028458110152513, "grad_norm": 0.0024206223897635937, "learning_rate": 4.1073128259705616e-06, "loss": 0.0013, "step": 147090 }, { "epoch": 1.2029275871938505, "grad_norm": 0.0022344179451465607, "learning_rate": 4.10661066911852e-06, "loss": 0.0006, "step": 147100 }, { "epoch": 1.2030093633724497, "grad_norm": 0.026227479800581932, "learning_rate": 4.105908530465541e-06, "loss": 0.0009, "step": 147110 }, { "epoch": 1.2030911395510488, "grad_norm": 0.03571507707238197, "learning_rate": 4.105206410025935e-06, "loss": 0.0014, "step": 147120 }, { "epoch": 1.203172915729648, "grad_norm": 0.012453858740627766, "learning_rate": 4.104504307814001e-06, "loss": 0.0011, "step": 147130 }, { "epoch": 1.2032546919082472, "grad_norm": 0.033648133277893066, "learning_rate": 4.103802223844043e-06, "loss": 0.0009, "step": 147140 }, { "epoch": 1.2033364680868464, "grad_norm": 0.11903161555528641, "learning_rate": 4.10310015813036e-06, "loss": 0.0011, "step": 147150 }, { "epoch": 1.2034182442654455, "grad_norm": 0.05780896544456482, "learning_rate": 4.102398110687257e-06, "loss": 0.001, "step": 147160 }, { "epoch": 1.2035000204440447, "grad_norm": 0.06776542216539383, "learning_rate": 4.101696081529035e-06, "loss": 0.0006, "step": 147170 }, { "epoch": 1.2035817966226439, "grad_norm": 0.16944319009780884, "learning_rate": 4.100994070669994e-06, "loss": 0.0018, "step": 147180 }, { "epoch": 1.203663572801243, "grad_norm": 0.16981790959835052, "learning_rate": 4.100292078124434e-06, "loss": 0.0009, "step": 147190 }, { "epoch": 1.2037453489798422, "grad_norm": 0.10867747664451599, "learning_rate": 4.099590103906653e-06, "loss": 0.0013, "step": 147200 }, { "epoch": 1.2038271251584414, "grad_norm": 0.04652769863605499, "learning_rate": 4.098888148030956e-06, "loss": 0.001, "step": 147210 }, { "epoch": 1.2039089013370405, "grad_norm": 0.06039348617196083, "learning_rate": 4.0981862105116415e-06, "loss": 0.0008, "step": 147220 }, { "epoch": 1.2039906775156397, "grad_norm": 0.03286559134721756, "learning_rate": 4.097484291363005e-06, "loss": 0.0007, "step": 147230 }, { "epoch": 1.2040724536942389, "grad_norm": 0.03671595826745033, "learning_rate": 4.096782390599346e-06, "loss": 0.0011, "step": 147240 }, { "epoch": 1.204154229872838, "grad_norm": 0.013634337112307549, "learning_rate": 4.096080508234965e-06, "loss": 0.0007, "step": 147250 }, { "epoch": 1.2042360060514372, "grad_norm": 0.011460749432444572, "learning_rate": 4.09537864428416e-06, "loss": 0.0008, "step": 147260 }, { "epoch": 1.2043177822300364, "grad_norm": 0.039263442158699036, "learning_rate": 4.094676798761227e-06, "loss": 0.0007, "step": 147270 }, { "epoch": 1.2043995584086356, "grad_norm": 0.19761522114276886, "learning_rate": 4.0939749716804614e-06, "loss": 0.0012, "step": 147280 }, { "epoch": 1.2044813345872347, "grad_norm": 0.039313748478889465, "learning_rate": 4.093273163056164e-06, "loss": 0.001, "step": 147290 }, { "epoch": 1.2045631107658339, "grad_norm": 0.08216935396194458, "learning_rate": 4.092571372902628e-06, "loss": 0.0017, "step": 147300 }, { "epoch": 1.204644886944433, "grad_norm": 0.025177808478474617, "learning_rate": 4.091869601234153e-06, "loss": 0.0016, "step": 147310 }, { "epoch": 1.2047266631230322, "grad_norm": 0.008981752209365368, "learning_rate": 4.0911678480650286e-06, "loss": 0.0011, "step": 147320 }, { "epoch": 1.2048084393016314, "grad_norm": 0.034487269818782806, "learning_rate": 4.090466113409556e-06, "loss": 0.0006, "step": 147330 }, { "epoch": 1.2048902154802306, "grad_norm": 0.02677113749086857, "learning_rate": 4.08976439728203e-06, "loss": 0.0006, "step": 147340 }, { "epoch": 1.2049719916588297, "grad_norm": 0.08883321285247803, "learning_rate": 4.089062699696741e-06, "loss": 0.0014, "step": 147350 }, { "epoch": 1.205053767837429, "grad_norm": 0.03251436725258827, "learning_rate": 4.088361020667984e-06, "loss": 0.0011, "step": 147360 }, { "epoch": 1.205135544016028, "grad_norm": 0.013265935704112053, "learning_rate": 4.0876593602100555e-06, "loss": 0.0006, "step": 147370 }, { "epoch": 1.2052173201946272, "grad_norm": 0.026653170585632324, "learning_rate": 4.086957718337248e-06, "loss": 0.0008, "step": 147380 }, { "epoch": 1.2052990963732264, "grad_norm": 0.17200538516044617, "learning_rate": 4.086256095063853e-06, "loss": 0.0029, "step": 147390 }, { "epoch": 1.2053808725518256, "grad_norm": 0.02109461836516857, "learning_rate": 4.0855544904041645e-06, "loss": 0.0007, "step": 147400 }, { "epoch": 1.2054626487304247, "grad_norm": 0.03624441474676132, "learning_rate": 4.084852904372474e-06, "loss": 0.0011, "step": 147410 }, { "epoch": 1.205544424909024, "grad_norm": 0.002817028434947133, "learning_rate": 4.084151336983075e-06, "loss": 0.0012, "step": 147420 }, { "epoch": 1.205626201087623, "grad_norm": 0.04074260592460632, "learning_rate": 4.083449788250256e-06, "loss": 0.001, "step": 147430 }, { "epoch": 1.2057079772662223, "grad_norm": 0.016713447868824005, "learning_rate": 4.082748258188309e-06, "loss": 0.0007, "step": 147440 }, { "epoch": 1.2057897534448214, "grad_norm": 0.006920781452208757, "learning_rate": 4.082046746811528e-06, "loss": 0.0008, "step": 147450 }, { "epoch": 1.2058715296234208, "grad_norm": 0.06982516497373581, "learning_rate": 4.081345254134202e-06, "loss": 0.0008, "step": 147460 }, { "epoch": 1.2059533058020198, "grad_norm": 0.0333966463804245, "learning_rate": 4.080643780170617e-06, "loss": 0.001, "step": 147470 }, { "epoch": 1.2060350819806192, "grad_norm": 0.1318802535533905, "learning_rate": 4.0799423249350646e-06, "loss": 0.0015, "step": 147480 }, { "epoch": 1.206116858159218, "grad_norm": 0.08897560089826584, "learning_rate": 4.079240888441837e-06, "loss": 0.0008, "step": 147490 }, { "epoch": 1.2061986343378175, "grad_norm": 0.011492645367980003, "learning_rate": 4.078539470705221e-06, "loss": 0.0006, "step": 147500 }, { "epoch": 1.2062804105164167, "grad_norm": 0.018158089369535446, "learning_rate": 4.077838071739505e-06, "loss": 0.0013, "step": 147510 }, { "epoch": 1.2063621866950158, "grad_norm": 0.028568275272846222, "learning_rate": 4.077136691558976e-06, "loss": 0.0009, "step": 147520 }, { "epoch": 1.206443962873615, "grad_norm": 0.02474774420261383, "learning_rate": 4.076435330177923e-06, "loss": 0.0012, "step": 147530 }, { "epoch": 1.2065257390522142, "grad_norm": 0.09377119690179825, "learning_rate": 4.075733987610634e-06, "loss": 0.0021, "step": 147540 }, { "epoch": 1.2066075152308133, "grad_norm": 0.012706766836345196, "learning_rate": 4.075032663871395e-06, "loss": 0.0007, "step": 147550 }, { "epoch": 1.2066892914094125, "grad_norm": 0.03535221517086029, "learning_rate": 4.07433135897449e-06, "loss": 0.0016, "step": 147560 }, { "epoch": 1.2067710675880117, "grad_norm": 0.020115651190280914, "learning_rate": 4.07363007293421e-06, "loss": 0.0009, "step": 147570 }, { "epoch": 1.2068528437666108, "grad_norm": 0.09045685082674026, "learning_rate": 4.072928805764838e-06, "loss": 0.0022, "step": 147580 }, { "epoch": 1.20693461994521, "grad_norm": 0.05311831086874008, "learning_rate": 4.072227557480661e-06, "loss": 0.0017, "step": 147590 }, { "epoch": 1.2070163961238092, "grad_norm": 0.05736773833632469, "learning_rate": 4.071526328095959e-06, "loss": 0.0016, "step": 147600 }, { "epoch": 1.2070981723024083, "grad_norm": 0.05983467027544975, "learning_rate": 4.0708251176250235e-06, "loss": 0.0007, "step": 147610 }, { "epoch": 1.2071799484810075, "grad_norm": 0.008298986591398716, "learning_rate": 4.070123926082135e-06, "loss": 0.0009, "step": 147620 }, { "epoch": 1.2072617246596067, "grad_norm": 0.013351578265428543, "learning_rate": 4.069422753481579e-06, "loss": 0.0006, "step": 147630 }, { "epoch": 1.2073435008382059, "grad_norm": 0.020099209621548653, "learning_rate": 4.068721599837636e-06, "loss": 0.0018, "step": 147640 }, { "epoch": 1.207425277016805, "grad_norm": 0.06867104768753052, "learning_rate": 4.068020465164592e-06, "loss": 0.0011, "step": 147650 }, { "epoch": 1.2075070531954042, "grad_norm": 0.043490197509527206, "learning_rate": 4.0673193494767286e-06, "loss": 0.0008, "step": 147660 }, { "epoch": 1.2075888293740034, "grad_norm": 0.08305522054433823, "learning_rate": 4.066618252788328e-06, "loss": 0.0007, "step": 147670 }, { "epoch": 1.2076706055526025, "grad_norm": 0.03310125693678856, "learning_rate": 4.0659171751136714e-06, "loss": 0.0014, "step": 147680 }, { "epoch": 1.2077523817312017, "grad_norm": 0.1235073059797287, "learning_rate": 4.065216116467042e-06, "loss": 0.0008, "step": 147690 }, { "epoch": 1.2078341579098009, "grad_norm": 0.02544461004436016, "learning_rate": 4.0645150768627215e-06, "loss": 0.0012, "step": 147700 }, { "epoch": 1.2079159340884, "grad_norm": 0.018429698422551155, "learning_rate": 4.063814056314989e-06, "loss": 0.0017, "step": 147710 }, { "epoch": 1.2079977102669992, "grad_norm": 0.09958641231060028, "learning_rate": 4.063113054838122e-06, "loss": 0.0009, "step": 147720 }, { "epoch": 1.2080794864455984, "grad_norm": 0.5174767971038818, "learning_rate": 4.062412072446407e-06, "loss": 0.0012, "step": 147730 }, { "epoch": 1.2081612626241975, "grad_norm": 0.013508303090929985, "learning_rate": 4.061711109154119e-06, "loss": 0.0009, "step": 147740 }, { "epoch": 1.2082430388027967, "grad_norm": 0.26214757561683655, "learning_rate": 4.061010164975539e-06, "loss": 0.0004, "step": 147750 }, { "epoch": 1.2083248149813959, "grad_norm": 0.036497872322797775, "learning_rate": 4.060309239924943e-06, "loss": 0.0004, "step": 147760 }, { "epoch": 1.208406591159995, "grad_norm": 0.009354294277727604, "learning_rate": 4.059608334016614e-06, "loss": 0.0008, "step": 147770 }, { "epoch": 1.2084883673385942, "grad_norm": 0.03995169699192047, "learning_rate": 4.058907447264827e-06, "loss": 0.0016, "step": 147780 }, { "epoch": 1.2085701435171934, "grad_norm": 0.07086855918169022, "learning_rate": 4.05820657968386e-06, "loss": 0.0009, "step": 147790 }, { "epoch": 1.2086519196957926, "grad_norm": 0.00736836576834321, "learning_rate": 4.05750573128799e-06, "loss": 0.0013, "step": 147800 }, { "epoch": 1.2087336958743917, "grad_norm": 0.02314395271241665, "learning_rate": 4.056804902091495e-06, "loss": 0.001, "step": 147810 }, { "epoch": 1.208815472052991, "grad_norm": 0.0318097323179245, "learning_rate": 4.056104092108652e-06, "loss": 0.0006, "step": 147820 }, { "epoch": 1.20889724823159, "grad_norm": 0.010098147206008434, "learning_rate": 4.055403301353735e-06, "loss": 0.0014, "step": 147830 }, { "epoch": 1.2089790244101892, "grad_norm": 0.06968849152326584, "learning_rate": 4.054702529841018e-06, "loss": 0.0005, "step": 147840 }, { "epoch": 1.2090608005887884, "grad_norm": 0.05236798897385597, "learning_rate": 4.054001777584782e-06, "loss": 0.001, "step": 147850 }, { "epoch": 1.2091425767673876, "grad_norm": 0.08878914266824722, "learning_rate": 4.053301044599298e-06, "loss": 0.001, "step": 147860 }, { "epoch": 1.209224352945987, "grad_norm": 0.01573917269706726, "learning_rate": 4.052600330898841e-06, "loss": 0.0008, "step": 147870 }, { "epoch": 1.209306129124586, "grad_norm": 0.030646678060293198, "learning_rate": 4.0518996364976844e-06, "loss": 0.0008, "step": 147880 }, { "epoch": 1.2093879053031853, "grad_norm": 0.03716409206390381, "learning_rate": 4.051198961410105e-06, "loss": 0.0006, "step": 147890 }, { "epoch": 1.2094696814817842, "grad_norm": 0.06579674780368805, "learning_rate": 4.050498305650373e-06, "loss": 0.0013, "step": 147900 }, { "epoch": 1.2095514576603836, "grad_norm": 0.0224376879632473, "learning_rate": 4.049797669232763e-06, "loss": 0.0005, "step": 147910 }, { "epoch": 1.2096332338389826, "grad_norm": 0.029529502615332603, "learning_rate": 4.0490970521715435e-06, "loss": 0.0017, "step": 147920 }, { "epoch": 1.209715010017582, "grad_norm": 0.03773393854498863, "learning_rate": 4.048396454480992e-06, "loss": 0.0008, "step": 147930 }, { "epoch": 1.2097967861961811, "grad_norm": 0.12788191437721252, "learning_rate": 4.047695876175381e-06, "loss": 0.0013, "step": 147940 }, { "epoch": 1.2098785623747803, "grad_norm": 0.0026142701972275972, "learning_rate": 4.046995317268977e-06, "loss": 0.0013, "step": 147950 }, { "epoch": 1.2099603385533795, "grad_norm": 0.05094324052333832, "learning_rate": 4.0462947777760504e-06, "loss": 0.0008, "step": 147960 }, { "epoch": 1.2100421147319786, "grad_norm": 0.02753022313117981, "learning_rate": 4.045594257710877e-06, "loss": 0.0007, "step": 147970 }, { "epoch": 1.2101238909105778, "grad_norm": 0.07225736230611801, "learning_rate": 4.044893757087725e-06, "loss": 0.0009, "step": 147980 }, { "epoch": 1.210205667089177, "grad_norm": 0.02801738865673542, "learning_rate": 4.0441932759208625e-06, "loss": 0.001, "step": 147990 }, { "epoch": 1.2102874432677762, "grad_norm": 0.07291220873594284, "learning_rate": 4.043492814224559e-06, "loss": 0.0012, "step": 148000 }, { "epoch": 1.2103692194463753, "grad_norm": 0.22345589101314545, "learning_rate": 4.042792372013085e-06, "loss": 0.0012, "step": 148010 }, { "epoch": 1.2104509956249745, "grad_norm": 0.047304488718509674, "learning_rate": 4.04209194930071e-06, "loss": 0.0008, "step": 148020 }, { "epoch": 1.2105327718035737, "grad_norm": 0.013972039334475994, "learning_rate": 4.0413915461017e-06, "loss": 0.001, "step": 148030 }, { "epoch": 1.2106145479821728, "grad_norm": 0.015332312323153019, "learning_rate": 4.040691162430321e-06, "loss": 0.001, "step": 148040 }, { "epoch": 1.210696324160772, "grad_norm": 0.048243679106235504, "learning_rate": 4.039990798300844e-06, "loss": 0.0012, "step": 148050 }, { "epoch": 1.2107781003393712, "grad_norm": 0.024355759844183922, "learning_rate": 4.039290453727538e-06, "loss": 0.0014, "step": 148060 }, { "epoch": 1.2108598765179703, "grad_norm": 0.020111151039600372, "learning_rate": 4.038590128724663e-06, "loss": 0.0017, "step": 148070 }, { "epoch": 1.2109416526965695, "grad_norm": 0.057222187519073486, "learning_rate": 4.037889823306489e-06, "loss": 0.0005, "step": 148080 }, { "epoch": 1.2110234288751687, "grad_norm": 0.07394228130578995, "learning_rate": 4.037189537487282e-06, "loss": 0.0008, "step": 148090 }, { "epoch": 1.2111052050537678, "grad_norm": 0.048662688583135605, "learning_rate": 4.0364892712813055e-06, "loss": 0.0016, "step": 148100 }, { "epoch": 1.211186981232367, "grad_norm": 0.06240006536245346, "learning_rate": 4.035789024702828e-06, "loss": 0.0007, "step": 148110 }, { "epoch": 1.2112687574109662, "grad_norm": 0.11778685450553894, "learning_rate": 4.035088797766109e-06, "loss": 0.001, "step": 148120 }, { "epoch": 1.2113505335895653, "grad_norm": 0.04177239537239075, "learning_rate": 4.034388590485418e-06, "loss": 0.0008, "step": 148130 }, { "epoch": 1.2114323097681645, "grad_norm": 0.040395643562078476, "learning_rate": 4.033688402875015e-06, "loss": 0.0009, "step": 148140 }, { "epoch": 1.2115140859467637, "grad_norm": 0.1123025044798851, "learning_rate": 4.032988234949165e-06, "loss": 0.001, "step": 148150 }, { "epoch": 1.2115958621253629, "grad_norm": 0.0737973153591156, "learning_rate": 4.032288086722129e-06, "loss": 0.0009, "step": 148160 }, { "epoch": 1.211677638303962, "grad_norm": 0.007052016444504261, "learning_rate": 4.031587958208173e-06, "loss": 0.0012, "step": 148170 }, { "epoch": 1.2117594144825612, "grad_norm": 0.031863171607255936, "learning_rate": 4.030887849421558e-06, "loss": 0.0018, "step": 148180 }, { "epoch": 1.2118411906611604, "grad_norm": 0.0057816761545836926, "learning_rate": 4.030187760376546e-06, "loss": 0.0007, "step": 148190 }, { "epoch": 1.2119229668397595, "grad_norm": 0.04406452924013138, "learning_rate": 4.029487691087394e-06, "loss": 0.0007, "step": 148200 }, { "epoch": 1.2120047430183587, "grad_norm": 0.010448693297803402, "learning_rate": 4.028787641568369e-06, "loss": 0.0005, "step": 148210 }, { "epoch": 1.2120865191969579, "grad_norm": 0.017486846074461937, "learning_rate": 4.02808761183373e-06, "loss": 0.0011, "step": 148220 }, { "epoch": 1.212168295375557, "grad_norm": 0.09921658039093018, "learning_rate": 4.0273876018977355e-06, "loss": 0.0009, "step": 148230 }, { "epoch": 1.2122500715541562, "grad_norm": 0.03989536687731743, "learning_rate": 4.0266876117746455e-06, "loss": 0.0007, "step": 148240 }, { "epoch": 1.2123318477327554, "grad_norm": 0.0058310627937316895, "learning_rate": 4.025987641478721e-06, "loss": 0.0006, "step": 148250 }, { "epoch": 1.2124136239113545, "grad_norm": 0.04887457937002182, "learning_rate": 4.025287691024219e-06, "loss": 0.0008, "step": 148260 }, { "epoch": 1.2124954000899537, "grad_norm": 0.023297814652323723, "learning_rate": 4.0245877604253995e-06, "loss": 0.0009, "step": 148270 }, { "epoch": 1.2125771762685529, "grad_norm": 0.0998276099562645, "learning_rate": 4.0238878496965186e-06, "loss": 0.0012, "step": 148280 }, { "epoch": 1.212658952447152, "grad_norm": 0.09993492066860199, "learning_rate": 4.023187958851838e-06, "loss": 0.0006, "step": 148290 }, { "epoch": 1.2127407286257514, "grad_norm": 0.13523246347904205, "learning_rate": 4.022488087905613e-06, "loss": 0.0012, "step": 148300 }, { "epoch": 1.2128225048043504, "grad_norm": 0.03305572271347046, "learning_rate": 4.021788236872099e-06, "loss": 0.0008, "step": 148310 }, { "epoch": 1.2129042809829498, "grad_norm": 0.028300980105996132, "learning_rate": 4.021088405765552e-06, "loss": 0.0014, "step": 148320 }, { "epoch": 1.2129860571615487, "grad_norm": 0.022304505109786987, "learning_rate": 4.020388594600232e-06, "loss": 0.0012, "step": 148330 }, { "epoch": 1.2130678333401481, "grad_norm": 0.05017069727182388, "learning_rate": 4.019688803390392e-06, "loss": 0.0006, "step": 148340 }, { "epoch": 1.2131496095187473, "grad_norm": 0.03298687934875488, "learning_rate": 4.018989032150289e-06, "loss": 0.0006, "step": 148350 }, { "epoch": 1.2132313856973465, "grad_norm": 0.025905989110469818, "learning_rate": 4.018289280894176e-06, "loss": 0.0005, "step": 148360 }, { "epoch": 1.2133131618759456, "grad_norm": 0.031346168369054794, "learning_rate": 4.0175895496363085e-06, "loss": 0.0009, "step": 148370 }, { "epoch": 1.2133949380545448, "grad_norm": 0.026517294347286224, "learning_rate": 4.01688983839094e-06, "loss": 0.0004, "step": 148380 }, { "epoch": 1.213476714233144, "grad_norm": 0.06993690878152847, "learning_rate": 4.016190147172325e-06, "loss": 0.0005, "step": 148390 }, { "epoch": 1.2135584904117431, "grad_norm": 0.026556292548775673, "learning_rate": 4.015490475994714e-06, "loss": 0.0009, "step": 148400 }, { "epoch": 1.2136402665903423, "grad_norm": 0.015475907362997532, "learning_rate": 4.014790824872366e-06, "loss": 0.0008, "step": 148410 }, { "epoch": 1.2137220427689415, "grad_norm": 0.038334574550390244, "learning_rate": 4.014091193819529e-06, "loss": 0.0007, "step": 148420 }, { "epoch": 1.2138038189475406, "grad_norm": 0.0598229244351387, "learning_rate": 4.0133915828504555e-06, "loss": 0.0011, "step": 148430 }, { "epoch": 1.2138855951261398, "grad_norm": 0.018693532794713974, "learning_rate": 4.0126919919793945e-06, "loss": 0.0004, "step": 148440 }, { "epoch": 1.213967371304739, "grad_norm": 0.0016696880338713527, "learning_rate": 4.011992421220603e-06, "loss": 0.0012, "step": 148450 }, { "epoch": 1.2140491474833381, "grad_norm": 0.09782783687114716, "learning_rate": 4.011292870588329e-06, "loss": 0.0008, "step": 148460 }, { "epoch": 1.2141309236619373, "grad_norm": 0.02834169939160347, "learning_rate": 4.010593340096823e-06, "loss": 0.0016, "step": 148470 }, { "epoch": 1.2142126998405365, "grad_norm": 0.009293588809669018, "learning_rate": 4.009893829760333e-06, "loss": 0.0009, "step": 148480 }, { "epoch": 1.2142944760191356, "grad_norm": 0.06434650719165802, "learning_rate": 4.009194339593112e-06, "loss": 0.0012, "step": 148490 }, { "epoch": 1.2143762521977348, "grad_norm": 0.030686568468809128, "learning_rate": 4.008494869609408e-06, "loss": 0.0006, "step": 148500 }, { "epoch": 1.214458028376334, "grad_norm": 0.0023049376904964447, "learning_rate": 4.007795419823469e-06, "loss": 0.001, "step": 148510 }, { "epoch": 1.2145398045549332, "grad_norm": 0.03534433990716934, "learning_rate": 4.007095990249542e-06, "loss": 0.0015, "step": 148520 }, { "epoch": 1.2146215807335323, "grad_norm": 0.04912208393216133, "learning_rate": 4.006396580901879e-06, "loss": 0.0015, "step": 148530 }, { "epoch": 1.2147033569121315, "grad_norm": 0.04450583457946777, "learning_rate": 4.005697191794725e-06, "loss": 0.001, "step": 148540 }, { "epoch": 1.2147851330907307, "grad_norm": 0.04154398292303085, "learning_rate": 4.004997822942328e-06, "loss": 0.001, "step": 148550 }, { "epoch": 1.2148669092693298, "grad_norm": 0.038915738463401794, "learning_rate": 4.004298474358932e-06, "loss": 0.0011, "step": 148560 }, { "epoch": 1.214948685447929, "grad_norm": 0.12914656102657318, "learning_rate": 4.003599146058787e-06, "loss": 0.0007, "step": 148570 }, { "epoch": 1.2150304616265282, "grad_norm": 0.07189708948135376, "learning_rate": 4.002899838056137e-06, "loss": 0.0009, "step": 148580 }, { "epoch": 1.2151122378051273, "grad_norm": 0.0031895071733742952, "learning_rate": 4.002200550365228e-06, "loss": 0.0009, "step": 148590 }, { "epoch": 1.2151940139837265, "grad_norm": 0.04347112029790878, "learning_rate": 4.001501283000304e-06, "loss": 0.0006, "step": 148600 }, { "epoch": 1.2152757901623257, "grad_norm": 0.028390975669026375, "learning_rate": 4.0008020359756114e-06, "loss": 0.0009, "step": 148610 }, { "epoch": 1.2153575663409248, "grad_norm": 0.010332686826586723, "learning_rate": 4.000102809305393e-06, "loss": 0.0009, "step": 148620 }, { "epoch": 1.215439342519524, "grad_norm": 0.02392827905714512, "learning_rate": 3.9994036030038945e-06, "loss": 0.0019, "step": 148630 }, { "epoch": 1.2155211186981232, "grad_norm": 0.0016850774409249425, "learning_rate": 3.998704417085357e-06, "loss": 0.0007, "step": 148640 }, { "epoch": 1.2156028948767224, "grad_norm": 0.022355351597070694, "learning_rate": 3.9980052515640225e-06, "loss": 0.0007, "step": 148650 }, { "epoch": 1.2156846710553215, "grad_norm": 0.022228341549634933, "learning_rate": 3.997306106454138e-06, "loss": 0.0005, "step": 148660 }, { "epoch": 1.2157664472339207, "grad_norm": 0.050968948751688004, "learning_rate": 3.996606981769943e-06, "loss": 0.0005, "step": 148670 }, { "epoch": 1.2158482234125199, "grad_norm": 0.01788288541138172, "learning_rate": 3.995907877525679e-06, "loss": 0.0004, "step": 148680 }, { "epoch": 1.215929999591119, "grad_norm": 0.028224758803844452, "learning_rate": 3.995208793735586e-06, "loss": 0.0005, "step": 148690 }, { "epoch": 1.2160117757697182, "grad_norm": 0.031732358038425446, "learning_rate": 3.994509730413908e-06, "loss": 0.0009, "step": 148700 }, { "epoch": 1.2160935519483174, "grad_norm": 0.1174684688448906, "learning_rate": 3.993810687574884e-06, "loss": 0.0018, "step": 148710 }, { "epoch": 1.2161753281269165, "grad_norm": 0.04933226853609085, "learning_rate": 3.993111665232755e-06, "loss": 0.0013, "step": 148720 }, { "epoch": 1.216257104305516, "grad_norm": 0.058052223175764084, "learning_rate": 3.992412663401758e-06, "loss": 0.0014, "step": 148730 }, { "epoch": 1.2163388804841149, "grad_norm": 0.1294022500514984, "learning_rate": 3.991713682096135e-06, "loss": 0.0012, "step": 148740 }, { "epoch": 1.2164206566627143, "grad_norm": 0.0694039911031723, "learning_rate": 3.991014721330125e-06, "loss": 0.0006, "step": 148750 }, { "epoch": 1.2165024328413132, "grad_norm": 0.06478247046470642, "learning_rate": 3.990315781117965e-06, "loss": 0.002, "step": 148760 }, { "epoch": 1.2165842090199126, "grad_norm": 0.020562471821904182, "learning_rate": 3.98961686147389e-06, "loss": 0.0007, "step": 148770 }, { "epoch": 1.2166659851985118, "grad_norm": 0.04967496544122696, "learning_rate": 3.988917962412145e-06, "loss": 0.0009, "step": 148780 }, { "epoch": 1.216747761377111, "grad_norm": 0.02744932286441326, "learning_rate": 3.988219083946962e-06, "loss": 0.0007, "step": 148790 }, { "epoch": 1.21682953755571, "grad_norm": 0.018070103600621223, "learning_rate": 3.987520226092578e-06, "loss": 0.0014, "step": 148800 }, { "epoch": 1.2169113137343093, "grad_norm": 0.03871649503707886, "learning_rate": 3.986821388863228e-06, "loss": 0.0006, "step": 148810 }, { "epoch": 1.2169930899129084, "grad_norm": 0.062136415392160416, "learning_rate": 3.986122572273152e-06, "loss": 0.0008, "step": 148820 }, { "epoch": 1.2170748660915076, "grad_norm": 0.04571675881743431, "learning_rate": 3.985423776336584e-06, "loss": 0.0005, "step": 148830 }, { "epoch": 1.2171566422701068, "grad_norm": 0.06129899248480797, "learning_rate": 3.984725001067757e-06, "loss": 0.0013, "step": 148840 }, { "epoch": 1.217238418448706, "grad_norm": 0.03221151605248451, "learning_rate": 3.984026246480907e-06, "loss": 0.0006, "step": 148850 }, { "epoch": 1.2173201946273051, "grad_norm": 0.012785454280674458, "learning_rate": 3.98332751259027e-06, "loss": 0.0009, "step": 148860 }, { "epoch": 1.2174019708059043, "grad_norm": 0.008965187706053257, "learning_rate": 3.982628799410076e-06, "loss": 0.0007, "step": 148870 }, { "epoch": 1.2174837469845035, "grad_norm": 0.010591563768684864, "learning_rate": 3.981930106954561e-06, "loss": 0.0006, "step": 148880 }, { "epoch": 1.2175655231631026, "grad_norm": 0.049005426466464996, "learning_rate": 3.981231435237955e-06, "loss": 0.0014, "step": 148890 }, { "epoch": 1.2176472993417018, "grad_norm": 0.04456046223640442, "learning_rate": 3.980532784274498e-06, "loss": 0.0017, "step": 148900 }, { "epoch": 1.217729075520301, "grad_norm": 0.0032343112397938967, "learning_rate": 3.979834154078414e-06, "loss": 0.0011, "step": 148910 }, { "epoch": 1.2178108516989001, "grad_norm": 0.0013883445644751191, "learning_rate": 3.9791355446639365e-06, "loss": 0.0005, "step": 148920 }, { "epoch": 1.2178926278774993, "grad_norm": 0.02209342271089554, "learning_rate": 3.978436956045297e-06, "loss": 0.0009, "step": 148930 }, { "epoch": 1.2179744040560985, "grad_norm": 0.03350052237510681, "learning_rate": 3.97773838823673e-06, "loss": 0.0011, "step": 148940 }, { "epoch": 1.2180561802346976, "grad_norm": 0.042117126286029816, "learning_rate": 3.977039841252462e-06, "loss": 0.0012, "step": 148950 }, { "epoch": 1.2181379564132968, "grad_norm": 0.0441816970705986, "learning_rate": 3.976341315106723e-06, "loss": 0.0004, "step": 148960 }, { "epoch": 1.218219732591896, "grad_norm": 0.0020546643063426018, "learning_rate": 3.975642809813743e-06, "loss": 0.0012, "step": 148970 }, { "epoch": 1.2183015087704951, "grad_norm": 0.09998591989278793, "learning_rate": 3.9749443253877535e-06, "loss": 0.0013, "step": 148980 }, { "epoch": 1.2183832849490943, "grad_norm": 0.03758619353175163, "learning_rate": 3.974245861842981e-06, "loss": 0.0013, "step": 148990 }, { "epoch": 1.2184650611276935, "grad_norm": 0.21569843590259552, "learning_rate": 3.973547419193653e-06, "loss": 0.0009, "step": 149000 }, { "epoch": 1.2185468373062927, "grad_norm": 0.014871801249682903, "learning_rate": 3.972848997453997e-06, "loss": 0.0009, "step": 149010 }, { "epoch": 1.2186286134848918, "grad_norm": 0.011506435461342335, "learning_rate": 3.972150596638246e-06, "loss": 0.001, "step": 149020 }, { "epoch": 1.218710389663491, "grad_norm": 0.03830504044890404, "learning_rate": 3.971452216760621e-06, "loss": 0.0009, "step": 149030 }, { "epoch": 1.2187921658420902, "grad_norm": 0.052500639110803604, "learning_rate": 3.97075385783535e-06, "loss": 0.0008, "step": 149040 }, { "epoch": 1.2188739420206893, "grad_norm": 0.06414638459682465, "learning_rate": 3.9700555198766575e-06, "loss": 0.0021, "step": 149050 }, { "epoch": 1.2189557181992885, "grad_norm": 0.05235438421368599, "learning_rate": 3.969357202898774e-06, "loss": 0.0006, "step": 149060 }, { "epoch": 1.2190374943778877, "grad_norm": 0.016599923372268677, "learning_rate": 3.968658906915922e-06, "loss": 0.0013, "step": 149070 }, { "epoch": 1.2191192705564868, "grad_norm": 0.01862863264977932, "learning_rate": 3.967960631942325e-06, "loss": 0.0011, "step": 149080 }, { "epoch": 1.219201046735086, "grad_norm": 0.009962867945432663, "learning_rate": 3.96726237799221e-06, "loss": 0.0011, "step": 149090 }, { "epoch": 1.2192828229136852, "grad_norm": 0.06569968909025192, "learning_rate": 3.966564145079799e-06, "loss": 0.0008, "step": 149100 }, { "epoch": 1.2193645990922843, "grad_norm": 0.10986919701099396, "learning_rate": 3.965865933219317e-06, "loss": 0.0015, "step": 149110 }, { "epoch": 1.2194463752708835, "grad_norm": 0.08857710659503937, "learning_rate": 3.965167742424986e-06, "loss": 0.0013, "step": 149120 }, { "epoch": 1.2195281514494827, "grad_norm": 0.013964112848043442, "learning_rate": 3.9644695727110275e-06, "loss": 0.0007, "step": 149130 }, { "epoch": 1.2196099276280818, "grad_norm": 0.10205040127038956, "learning_rate": 3.963771424091669e-06, "loss": 0.0013, "step": 149140 }, { "epoch": 1.219691703806681, "grad_norm": 0.015616647899150848, "learning_rate": 3.963073296581128e-06, "loss": 0.0015, "step": 149150 }, { "epoch": 1.2197734799852804, "grad_norm": 0.048374585807323456, "learning_rate": 3.962375190193626e-06, "loss": 0.0008, "step": 149160 }, { "epoch": 1.2198552561638794, "grad_norm": 0.00773992482572794, "learning_rate": 3.9616771049433834e-06, "loss": 0.0033, "step": 149170 }, { "epoch": 1.2199370323424787, "grad_norm": 0.007892890833318233, "learning_rate": 3.960979040844624e-06, "loss": 0.0007, "step": 149180 }, { "epoch": 1.2200188085210777, "grad_norm": 0.05748360976576805, "learning_rate": 3.960280997911566e-06, "loss": 0.001, "step": 149190 }, { "epoch": 1.220100584699677, "grad_norm": 0.03853501379489899, "learning_rate": 3.95958297615843e-06, "loss": 0.0006, "step": 149200 }, { "epoch": 1.2201823608782763, "grad_norm": 0.030418207868933678, "learning_rate": 3.958884975599432e-06, "loss": 0.0011, "step": 149210 }, { "epoch": 1.2202641370568754, "grad_norm": 0.061823632568120956, "learning_rate": 3.958186996248796e-06, "loss": 0.0005, "step": 149220 }, { "epoch": 1.2203459132354746, "grad_norm": 0.02748914621770382, "learning_rate": 3.957489038120736e-06, "loss": 0.0007, "step": 149230 }, { "epoch": 1.2204276894140738, "grad_norm": 0.11721968650817871, "learning_rate": 3.956791101229473e-06, "loss": 0.0005, "step": 149240 }, { "epoch": 1.220509465592673, "grad_norm": 0.0069079771637916565, "learning_rate": 3.95609318558922e-06, "loss": 0.0007, "step": 149250 }, { "epoch": 1.220591241771272, "grad_norm": 0.008939717896282673, "learning_rate": 3.955395291214201e-06, "loss": 0.0012, "step": 149260 }, { "epoch": 1.2206730179498713, "grad_norm": 0.05345241725444794, "learning_rate": 3.954697418118629e-06, "loss": 0.0006, "step": 149270 }, { "epoch": 1.2207547941284704, "grad_norm": 0.006927349604666233, "learning_rate": 3.953999566316719e-06, "loss": 0.0009, "step": 149280 }, { "epoch": 1.2208365703070696, "grad_norm": 0.0047810873948037624, "learning_rate": 3.953301735822687e-06, "loss": 0.0008, "step": 149290 }, { "epoch": 1.2209183464856688, "grad_norm": 0.05458318069577217, "learning_rate": 3.95260392665075e-06, "loss": 0.001, "step": 149300 }, { "epoch": 1.221000122664268, "grad_norm": 0.08157633990049362, "learning_rate": 3.951906138815124e-06, "loss": 0.0006, "step": 149310 }, { "epoch": 1.221081898842867, "grad_norm": 0.025376541540026665, "learning_rate": 3.951208372330021e-06, "loss": 0.0011, "step": 149320 }, { "epoch": 1.2211636750214663, "grad_norm": 0.08200429379940033, "learning_rate": 3.9505106272096544e-06, "loss": 0.0007, "step": 149330 }, { "epoch": 1.2212454512000654, "grad_norm": 0.009621953591704369, "learning_rate": 3.949812903468241e-06, "loss": 0.0009, "step": 149340 }, { "epoch": 1.2213272273786646, "grad_norm": 0.03972995653748512, "learning_rate": 3.949115201119993e-06, "loss": 0.001, "step": 149350 }, { "epoch": 1.2214090035572638, "grad_norm": 0.005192631855607033, "learning_rate": 3.948417520179122e-06, "loss": 0.0009, "step": 149360 }, { "epoch": 1.221490779735863, "grad_norm": 0.025037847459316254, "learning_rate": 3.947719860659839e-06, "loss": 0.0009, "step": 149370 }, { "epoch": 1.2215725559144621, "grad_norm": 0.01703784428536892, "learning_rate": 3.94702222257636e-06, "loss": 0.0009, "step": 149380 }, { "epoch": 1.2216543320930613, "grad_norm": 0.034451182931661606, "learning_rate": 3.946324605942894e-06, "loss": 0.0009, "step": 149390 }, { "epoch": 1.2217361082716605, "grad_norm": 0.028863616287708282, "learning_rate": 3.945627010773652e-06, "loss": 0.0014, "step": 149400 }, { "epoch": 1.2218178844502596, "grad_norm": 0.009429596364498138, "learning_rate": 3.944929437082843e-06, "loss": 0.001, "step": 149410 }, { "epoch": 1.2218996606288588, "grad_norm": 0.052813269197940826, "learning_rate": 3.944231884884681e-06, "loss": 0.0005, "step": 149420 }, { "epoch": 1.221981436807458, "grad_norm": 0.04469941183924675, "learning_rate": 3.943534354193374e-06, "loss": 0.0004, "step": 149430 }, { "epoch": 1.2220632129860571, "grad_norm": 0.00402017030864954, "learning_rate": 3.94283684502313e-06, "loss": 0.0008, "step": 149440 }, { "epoch": 1.2221449891646563, "grad_norm": 0.019151460379362106, "learning_rate": 3.942139357388158e-06, "loss": 0.0013, "step": 149450 }, { "epoch": 1.2222267653432555, "grad_norm": 0.5795890092849731, "learning_rate": 3.941441891302669e-06, "loss": 0.001, "step": 149460 }, { "epoch": 1.2223085415218546, "grad_norm": 0.10184787213802338, "learning_rate": 3.9407444467808686e-06, "loss": 0.0006, "step": 149470 }, { "epoch": 1.2223903177004538, "grad_norm": 0.010973856784403324, "learning_rate": 3.940047023836965e-06, "loss": 0.0011, "step": 149480 }, { "epoch": 1.222472093879053, "grad_norm": 0.0777214989066124, "learning_rate": 3.939349622485163e-06, "loss": 0.0012, "step": 149490 }, { "epoch": 1.2225538700576521, "grad_norm": 0.03476623445749283, "learning_rate": 3.938652242739674e-06, "loss": 0.0017, "step": 149500 }, { "epoch": 1.2226356462362513, "grad_norm": 0.09722041338682175, "learning_rate": 3.937954884614701e-06, "loss": 0.0014, "step": 149510 }, { "epoch": 1.2227174224148505, "grad_norm": 0.05398453399538994, "learning_rate": 3.9372575481244505e-06, "loss": 0.0048, "step": 149520 }, { "epoch": 1.2227991985934497, "grad_norm": 0.03364121913909912, "learning_rate": 3.936560233283124e-06, "loss": 0.0014, "step": 149530 }, { "epoch": 1.2228809747720488, "grad_norm": 0.0147801972925663, "learning_rate": 3.9358629401049335e-06, "loss": 0.0013, "step": 149540 }, { "epoch": 1.222962750950648, "grad_norm": 0.053334593772888184, "learning_rate": 3.93516566860408e-06, "loss": 0.0008, "step": 149550 }, { "epoch": 1.2230445271292472, "grad_norm": 0.09007539600133896, "learning_rate": 3.9344684187947665e-06, "loss": 0.0017, "step": 149560 }, { "epoch": 1.2231263033078463, "grad_norm": 0.03478739410638809, "learning_rate": 3.933771190691197e-06, "loss": 0.0006, "step": 149570 }, { "epoch": 1.2232080794864455, "grad_norm": 0.0740438774228096, "learning_rate": 3.933073984307576e-06, "loss": 0.0015, "step": 149580 }, { "epoch": 1.223289855665045, "grad_norm": 0.015546715818345547, "learning_rate": 3.932376799658105e-06, "loss": 0.0004, "step": 149590 }, { "epoch": 1.2233716318436438, "grad_norm": 0.08765377849340439, "learning_rate": 3.931679636756986e-06, "loss": 0.0013, "step": 149600 }, { "epoch": 1.2234534080222432, "grad_norm": 0.015756923705339432, "learning_rate": 3.9309824956184195e-06, "loss": 0.0012, "step": 149610 }, { "epoch": 1.2235351842008422, "grad_norm": 0.046016182750463486, "learning_rate": 3.930285376256612e-06, "loss": 0.0009, "step": 149620 }, { "epoch": 1.2236169603794416, "grad_norm": 0.013550704345107079, "learning_rate": 3.929588278685759e-06, "loss": 0.0006, "step": 149630 }, { "epoch": 1.2236987365580407, "grad_norm": 0.02110644243657589, "learning_rate": 3.928891202920063e-06, "loss": 0.001, "step": 149640 }, { "epoch": 1.22378051273664, "grad_norm": 0.03861241042613983, "learning_rate": 3.928194148973722e-06, "loss": 0.0023, "step": 149650 }, { "epoch": 1.223862288915239, "grad_norm": 0.035829465836286545, "learning_rate": 3.92749711686094e-06, "loss": 0.0009, "step": 149660 }, { "epoch": 1.2239440650938382, "grad_norm": 0.018238956108689308, "learning_rate": 3.926800106595913e-06, "loss": 0.0008, "step": 149670 }, { "epoch": 1.2240258412724374, "grad_norm": 0.02723545767366886, "learning_rate": 3.92610311819284e-06, "loss": 0.0008, "step": 149680 }, { "epoch": 1.2241076174510366, "grad_norm": 0.04881228134036064, "learning_rate": 3.925406151665918e-06, "loss": 0.001, "step": 149690 }, { "epoch": 1.2241893936296357, "grad_norm": 0.05651179701089859, "learning_rate": 3.924709207029349e-06, "loss": 0.0019, "step": 149700 }, { "epoch": 1.224271169808235, "grad_norm": 0.03502993658185005, "learning_rate": 3.924012284297326e-06, "loss": 0.0011, "step": 149710 }, { "epoch": 1.224352945986834, "grad_norm": 0.056467246264219284, "learning_rate": 3.923315383484048e-06, "loss": 0.001, "step": 149720 }, { "epoch": 1.2244347221654333, "grad_norm": 0.03950072452425957, "learning_rate": 3.922618504603709e-06, "loss": 0.0006, "step": 149730 }, { "epoch": 1.2245164983440324, "grad_norm": 0.02468106336891651, "learning_rate": 3.921921647670508e-06, "loss": 0.0006, "step": 149740 }, { "epoch": 1.2245982745226316, "grad_norm": 0.06711389124393463, "learning_rate": 3.92122481269864e-06, "loss": 0.0005, "step": 149750 }, { "epoch": 1.2246800507012308, "grad_norm": 0.027430608868598938, "learning_rate": 3.920527999702298e-06, "loss": 0.0011, "step": 149760 }, { "epoch": 1.22476182687983, "grad_norm": 0.03401034697890282, "learning_rate": 3.919831208695677e-06, "loss": 0.0011, "step": 149770 }, { "epoch": 1.224843603058429, "grad_norm": 0.018589558079838753, "learning_rate": 3.919134439692975e-06, "loss": 0.0006, "step": 149780 }, { "epoch": 1.2249253792370283, "grad_norm": 0.012374775484204292, "learning_rate": 3.9184376927083825e-06, "loss": 0.0006, "step": 149790 }, { "epoch": 1.2250071554156274, "grad_norm": 0.0424557700753212, "learning_rate": 3.917740967756093e-06, "loss": 0.0007, "step": 149800 }, { "epoch": 1.2250889315942266, "grad_norm": 0.01042154897004366, "learning_rate": 3.9170442648502995e-06, "loss": 0.0005, "step": 149810 }, { "epoch": 1.2251707077728258, "grad_norm": 0.001439715619198978, "learning_rate": 3.916347584005195e-06, "loss": 0.0013, "step": 149820 }, { "epoch": 1.225252483951425, "grad_norm": 0.03691041097044945, "learning_rate": 3.9156509252349706e-06, "loss": 0.0006, "step": 149830 }, { "epoch": 1.2253342601300241, "grad_norm": 0.07375072687864304, "learning_rate": 3.914954288553819e-06, "loss": 0.0007, "step": 149840 }, { "epoch": 1.2254160363086233, "grad_norm": 0.014634991064667702, "learning_rate": 3.914257673975929e-06, "loss": 0.0006, "step": 149850 }, { "epoch": 1.2254978124872224, "grad_norm": 0.06496270000934601, "learning_rate": 3.913561081515494e-06, "loss": 0.0011, "step": 149860 }, { "epoch": 1.2255795886658216, "grad_norm": 0.02512367255985737, "learning_rate": 3.912864511186703e-06, "loss": 0.0006, "step": 149870 }, { "epoch": 1.2256613648444208, "grad_norm": 0.03635793924331665, "learning_rate": 3.912167963003745e-06, "loss": 0.0007, "step": 149880 }, { "epoch": 1.22574314102302, "grad_norm": 0.028674256056547165, "learning_rate": 3.911471436980808e-06, "loss": 0.0009, "step": 149890 }, { "epoch": 1.2258249172016191, "grad_norm": 0.07205371558666229, "learning_rate": 3.910774933132086e-06, "loss": 0.0013, "step": 149900 }, { "epoch": 1.2259066933802183, "grad_norm": 0.011047621257603168, "learning_rate": 3.9100784514717625e-06, "loss": 0.0012, "step": 149910 }, { "epoch": 1.2259884695588175, "grad_norm": 0.07989970594644547, "learning_rate": 3.909381992014027e-06, "loss": 0.0016, "step": 149920 }, { "epoch": 1.2260702457374166, "grad_norm": 0.005135496612638235, "learning_rate": 3.908685554773066e-06, "loss": 0.0009, "step": 149930 }, { "epoch": 1.2261520219160158, "grad_norm": 0.05026771500706673, "learning_rate": 3.9079891397630685e-06, "loss": 0.0007, "step": 149940 }, { "epoch": 1.226233798094615, "grad_norm": 0.0878056213259697, "learning_rate": 3.9072927469982194e-06, "loss": 0.0017, "step": 149950 }, { "epoch": 1.2263155742732141, "grad_norm": 0.03615252301096916, "learning_rate": 3.906596376492706e-06, "loss": 0.001, "step": 149960 }, { "epoch": 1.2263973504518133, "grad_norm": 0.018350817263126373, "learning_rate": 3.9059000282607115e-06, "loss": 0.0011, "step": 149970 }, { "epoch": 1.2264791266304125, "grad_norm": 0.04065199941396713, "learning_rate": 3.905203702316424e-06, "loss": 0.0012, "step": 149980 }, { "epoch": 1.2265609028090116, "grad_norm": 0.012767470441758633, "learning_rate": 3.904507398674027e-06, "loss": 0.0013, "step": 149990 }, { "epoch": 1.2266426789876108, "grad_norm": 0.01738819293677807, "learning_rate": 3.903811117347705e-06, "loss": 0.0011, "step": 150000 }, { "epoch": 1.22672445516621, "grad_norm": 0.027449356392025948, "learning_rate": 3.903114858351638e-06, "loss": 0.0005, "step": 150010 }, { "epoch": 1.2268062313448094, "grad_norm": 0.02075635828077793, "learning_rate": 3.9024186217000175e-06, "loss": 0.0005, "step": 150020 }, { "epoch": 1.2268880075234083, "grad_norm": 0.01380945835262537, "learning_rate": 3.9017224074070204e-06, "loss": 0.0013, "step": 150030 }, { "epoch": 1.2269697837020077, "grad_norm": 0.03099370002746582, "learning_rate": 3.901026215486831e-06, "loss": 0.0008, "step": 150040 }, { "epoch": 1.2270515598806067, "grad_norm": 0.01995697058737278, "learning_rate": 3.900330045953629e-06, "loss": 0.0006, "step": 150050 }, { "epoch": 1.227133336059206, "grad_norm": 0.003437698818743229, "learning_rate": 3.8996338988216e-06, "loss": 0.0006, "step": 150060 }, { "epoch": 1.2272151122378052, "grad_norm": 0.054283175617456436, "learning_rate": 3.898937774104922e-06, "loss": 0.0012, "step": 150070 }, { "epoch": 1.2272968884164044, "grad_norm": 0.021664367988705635, "learning_rate": 3.898241671817777e-06, "loss": 0.0007, "step": 150080 }, { "epoch": 1.2273786645950036, "grad_norm": 0.014036215841770172, "learning_rate": 3.897545591974344e-06, "loss": 0.0006, "step": 150090 }, { "epoch": 1.2274604407736027, "grad_norm": 0.007614995818585157, "learning_rate": 3.896849534588804e-06, "loss": 0.0004, "step": 150100 }, { "epoch": 1.227542216952202, "grad_norm": 0.045720163732767105, "learning_rate": 3.896153499675334e-06, "loss": 0.0007, "step": 150110 }, { "epoch": 1.227623993130801, "grad_norm": 0.09002033621072769, "learning_rate": 3.895457487248117e-06, "loss": 0.0013, "step": 150120 }, { "epoch": 1.2277057693094002, "grad_norm": 0.0011073556961491704, "learning_rate": 3.894761497321328e-06, "loss": 0.0008, "step": 150130 }, { "epoch": 1.2277875454879994, "grad_norm": 0.006438853684812784, "learning_rate": 3.894065529909144e-06, "loss": 0.0008, "step": 150140 }, { "epoch": 1.2278693216665986, "grad_norm": 0.0014510898618027568, "learning_rate": 3.8933695850257455e-06, "loss": 0.0019, "step": 150150 }, { "epoch": 1.2279510978451977, "grad_norm": 0.04921570047736168, "learning_rate": 3.892673662685308e-06, "loss": 0.0015, "step": 150160 }, { "epoch": 1.228032874023797, "grad_norm": 0.0965329110622406, "learning_rate": 3.8919777629020085e-06, "loss": 0.0012, "step": 150170 }, { "epoch": 1.228114650202396, "grad_norm": 0.04963333532214165, "learning_rate": 3.8912818856900216e-06, "loss": 0.0007, "step": 150180 }, { "epoch": 1.2281964263809952, "grad_norm": 0.020088545978069305, "learning_rate": 3.890586031063524e-06, "loss": 0.0008, "step": 150190 }, { "epoch": 1.2282782025595944, "grad_norm": 0.04168948158621788, "learning_rate": 3.889890199036692e-06, "loss": 0.0009, "step": 150200 }, { "epoch": 1.2283599787381936, "grad_norm": 0.015740370377898216, "learning_rate": 3.889194389623698e-06, "loss": 0.001, "step": 150210 }, { "epoch": 1.2284417549167927, "grad_norm": 0.0010503075318410993, "learning_rate": 3.8884986028387175e-06, "loss": 0.0007, "step": 150220 }, { "epoch": 1.228523531095392, "grad_norm": 0.04166807606816292, "learning_rate": 3.887802838695924e-06, "loss": 0.0011, "step": 150230 }, { "epoch": 1.228605307273991, "grad_norm": 0.14926515519618988, "learning_rate": 3.88710709720949e-06, "loss": 0.0007, "step": 150240 }, { "epoch": 1.2286870834525903, "grad_norm": 0.026647403836250305, "learning_rate": 3.88641137839359e-06, "loss": 0.001, "step": 150250 }, { "epoch": 1.2287688596311894, "grad_norm": 0.1264098584651947, "learning_rate": 3.885715682262393e-06, "loss": 0.0007, "step": 150260 }, { "epoch": 1.2288506358097886, "grad_norm": 0.005353325046598911, "learning_rate": 3.885020008830076e-06, "loss": 0.0007, "step": 150270 }, { "epoch": 1.2289324119883878, "grad_norm": 0.09586083143949509, "learning_rate": 3.884324358110807e-06, "loss": 0.0008, "step": 150280 }, { "epoch": 1.229014188166987, "grad_norm": 0.0420079380273819, "learning_rate": 3.8836287301187585e-06, "loss": 0.0009, "step": 150290 }, { "epoch": 1.229095964345586, "grad_norm": 0.02055429108440876, "learning_rate": 3.882933124868099e-06, "loss": 0.0004, "step": 150300 }, { "epoch": 1.2291777405241853, "grad_norm": 0.01285608485341072, "learning_rate": 3.882237542372999e-06, "loss": 0.001, "step": 150310 }, { "epoch": 1.2292595167027844, "grad_norm": 0.10961282253265381, "learning_rate": 3.8815419826476304e-06, "loss": 0.0009, "step": 150320 }, { "epoch": 1.2293412928813836, "grad_norm": 0.05929803103208542, "learning_rate": 3.880846445706161e-06, "loss": 0.0006, "step": 150330 }, { "epoch": 1.2294230690599828, "grad_norm": 0.02248634770512581, "learning_rate": 3.880150931562757e-06, "loss": 0.0011, "step": 150340 }, { "epoch": 1.229504845238582, "grad_norm": 0.09448613971471786, "learning_rate": 3.8794554402315915e-06, "loss": 0.001, "step": 150350 }, { "epoch": 1.2295866214171811, "grad_norm": 0.06788646429777145, "learning_rate": 3.878759971726829e-06, "loss": 0.0012, "step": 150360 }, { "epoch": 1.2296683975957803, "grad_norm": 0.12128768861293793, "learning_rate": 3.878064526062636e-06, "loss": 0.0012, "step": 150370 }, { "epoch": 1.2297501737743795, "grad_norm": 0.015604335814714432, "learning_rate": 3.87736910325318e-06, "loss": 0.0017, "step": 150380 }, { "epoch": 1.2298319499529786, "grad_norm": 0.1495259553194046, "learning_rate": 3.8766737033126285e-06, "loss": 0.0011, "step": 150390 }, { "epoch": 1.2299137261315778, "grad_norm": 0.0021347098518162966, "learning_rate": 3.875978326255148e-06, "loss": 0.001, "step": 150400 }, { "epoch": 1.229995502310177, "grad_norm": 0.018147500231862068, "learning_rate": 3.875282972094902e-06, "loss": 0.001, "step": 150410 }, { "epoch": 1.2300772784887761, "grad_norm": 0.07873266935348511, "learning_rate": 3.874587640846055e-06, "loss": 0.0007, "step": 150420 }, { "epoch": 1.2301590546673755, "grad_norm": 0.041897863149642944, "learning_rate": 3.873892332522774e-06, "loss": 0.0008, "step": 150430 }, { "epoch": 1.2302408308459745, "grad_norm": 0.042183488607406616, "learning_rate": 3.873197047139221e-06, "loss": 0.0016, "step": 150440 }, { "epoch": 1.2303226070245739, "grad_norm": 0.005864225327968597, "learning_rate": 3.872501784709561e-06, "loss": 0.0006, "step": 150450 }, { "epoch": 1.2304043832031728, "grad_norm": 0.003340255469083786, "learning_rate": 3.871806545247954e-06, "loss": 0.0005, "step": 150460 }, { "epoch": 1.2304861593817722, "grad_norm": 0.02316085249185562, "learning_rate": 3.871111328768566e-06, "loss": 0.0007, "step": 150470 }, { "epoch": 1.2305679355603711, "grad_norm": 0.04875936731696129, "learning_rate": 3.8704161352855585e-06, "loss": 0.001, "step": 150480 }, { "epoch": 1.2306497117389705, "grad_norm": 0.0834466740489006, "learning_rate": 3.869720964813092e-06, "loss": 0.0011, "step": 150490 }, { "epoch": 1.2307314879175697, "grad_norm": 0.1018800288438797, "learning_rate": 3.869025817365326e-06, "loss": 0.0014, "step": 150500 }, { "epoch": 1.2308132640961689, "grad_norm": 0.06542112678289413, "learning_rate": 3.868330692956425e-06, "loss": 0.0008, "step": 150510 }, { "epoch": 1.230895040274768, "grad_norm": 0.06637176871299744, "learning_rate": 3.867635591600549e-06, "loss": 0.0006, "step": 150520 }, { "epoch": 1.2309768164533672, "grad_norm": 0.01811809465289116, "learning_rate": 3.866940513311856e-06, "loss": 0.0024, "step": 150530 }, { "epoch": 1.2310585926319664, "grad_norm": 0.2475391924381256, "learning_rate": 3.866245458104504e-06, "loss": 0.0022, "step": 150540 }, { "epoch": 1.2311403688105655, "grad_norm": 0.12338768690824509, "learning_rate": 3.865550425992655e-06, "loss": 0.0007, "step": 150550 }, { "epoch": 1.2312221449891647, "grad_norm": 0.04173719510436058, "learning_rate": 3.864855416990466e-06, "loss": 0.0011, "step": 150560 }, { "epoch": 1.2313039211677639, "grad_norm": 0.032081931829452515, "learning_rate": 3.864160431112095e-06, "loss": 0.0009, "step": 150570 }, { "epoch": 1.231385697346363, "grad_norm": 0.026530398055911064, "learning_rate": 3.8634654683716976e-06, "loss": 0.002, "step": 150580 }, { "epoch": 1.2314674735249622, "grad_norm": 0.027182642370462418, "learning_rate": 3.862770528783434e-06, "loss": 0.0007, "step": 150590 }, { "epoch": 1.2315492497035614, "grad_norm": 0.017161473631858826, "learning_rate": 3.8620756123614585e-06, "loss": 0.0011, "step": 150600 }, { "epoch": 1.2316310258821606, "grad_norm": 0.030202511698007584, "learning_rate": 3.861380719119929e-06, "loss": 0.0006, "step": 150610 }, { "epoch": 1.2317128020607597, "grad_norm": 0.006382795516401529, "learning_rate": 3.860685849072997e-06, "loss": 0.001, "step": 150620 }, { "epoch": 1.231794578239359, "grad_norm": 0.07075880467891693, "learning_rate": 3.859991002234822e-06, "loss": 0.0011, "step": 150630 }, { "epoch": 1.231876354417958, "grad_norm": 0.06091056391596794, "learning_rate": 3.859296178619558e-06, "loss": 0.0008, "step": 150640 }, { "epoch": 1.2319581305965572, "grad_norm": 0.03556251898407936, "learning_rate": 3.8586013782413564e-06, "loss": 0.0008, "step": 150650 }, { "epoch": 1.2320399067751564, "grad_norm": 0.011283783242106438, "learning_rate": 3.857906601114373e-06, "loss": 0.003, "step": 150660 }, { "epoch": 1.2321216829537556, "grad_norm": 0.052565816789865494, "learning_rate": 3.857211847252761e-06, "loss": 0.0012, "step": 150670 }, { "epoch": 1.2322034591323547, "grad_norm": 0.05341876670718193, "learning_rate": 3.856517116670672e-06, "loss": 0.001, "step": 150680 }, { "epoch": 1.232285235310954, "grad_norm": 0.02206156775355339, "learning_rate": 3.855822409382259e-06, "loss": 0.0015, "step": 150690 }, { "epoch": 1.232367011489553, "grad_norm": 0.024996057152748108, "learning_rate": 3.8551277254016734e-06, "loss": 0.0008, "step": 150700 }, { "epoch": 1.2324487876681522, "grad_norm": 0.029964156448841095, "learning_rate": 3.854433064743067e-06, "loss": 0.0013, "step": 150710 }, { "epoch": 1.2325305638467514, "grad_norm": 0.07618981599807739, "learning_rate": 3.853738427420591e-06, "loss": 0.0008, "step": 150720 }, { "epoch": 1.2326123400253506, "grad_norm": 0.039033565670251846, "learning_rate": 3.853043813448394e-06, "loss": 0.001, "step": 150730 }, { "epoch": 1.2326941162039498, "grad_norm": 0.03911631926894188, "learning_rate": 3.852349222840625e-06, "loss": 0.0009, "step": 150740 }, { "epoch": 1.232775892382549, "grad_norm": 0.011375864036381245, "learning_rate": 3.851654655611437e-06, "loss": 0.0008, "step": 150750 }, { "epoch": 1.232857668561148, "grad_norm": 0.048253558576107025, "learning_rate": 3.8509601117749774e-06, "loss": 0.001, "step": 150760 }, { "epoch": 1.2329394447397473, "grad_norm": 0.03759468346834183, "learning_rate": 3.850265591345394e-06, "loss": 0.0021, "step": 150770 }, { "epoch": 1.2330212209183464, "grad_norm": 0.03002961538732052, "learning_rate": 3.8495710943368344e-06, "loss": 0.0012, "step": 150780 }, { "epoch": 1.2331029970969456, "grad_norm": 0.08402013033628464, "learning_rate": 3.848876620763448e-06, "loss": 0.0007, "step": 150790 }, { "epoch": 1.2331847732755448, "grad_norm": 0.027477603405714035, "learning_rate": 3.8481821706393805e-06, "loss": 0.0017, "step": 150800 }, { "epoch": 1.233266549454144, "grad_norm": 0.005823400337249041, "learning_rate": 3.847487743978778e-06, "loss": 0.0017, "step": 150810 }, { "epoch": 1.233348325632743, "grad_norm": 0.010891905054450035, "learning_rate": 3.846793340795786e-06, "loss": 0.0022, "step": 150820 }, { "epoch": 1.2334301018113423, "grad_norm": 0.033979322761297226, "learning_rate": 3.846098961104552e-06, "loss": 0.0009, "step": 150830 }, { "epoch": 1.2335118779899414, "grad_norm": 0.03683554753661156, "learning_rate": 3.84540460491922e-06, "loss": 0.0007, "step": 150840 }, { "epoch": 1.2335936541685406, "grad_norm": 0.004896731581538916, "learning_rate": 3.844710272253935e-06, "loss": 0.0006, "step": 150850 }, { "epoch": 1.23367543034714, "grad_norm": 0.001587657374329865, "learning_rate": 3.844015963122839e-06, "loss": 0.0006, "step": 150860 }, { "epoch": 1.233757206525739, "grad_norm": 0.02144685946404934, "learning_rate": 3.843321677540079e-06, "loss": 0.0014, "step": 150870 }, { "epoch": 1.2338389827043383, "grad_norm": 0.07158234715461731, "learning_rate": 3.8426274155197965e-06, "loss": 0.001, "step": 150880 }, { "epoch": 1.2339207588829373, "grad_norm": 0.008325793780386448, "learning_rate": 3.841933177076135e-06, "loss": 0.0005, "step": 150890 }, { "epoch": 1.2340025350615367, "grad_norm": 0.01128159649670124, "learning_rate": 3.841238962223234e-06, "loss": 0.0009, "step": 150900 }, { "epoch": 1.2340843112401358, "grad_norm": 0.0406852662563324, "learning_rate": 3.840544770975239e-06, "loss": 0.0006, "step": 150910 }, { "epoch": 1.234166087418735, "grad_norm": 0.06979347765445709, "learning_rate": 3.83985060334629e-06, "loss": 0.0011, "step": 150920 }, { "epoch": 1.2342478635973342, "grad_norm": 0.012201677076518536, "learning_rate": 3.839156459350526e-06, "loss": 0.0006, "step": 150930 }, { "epoch": 1.2343296397759334, "grad_norm": 0.023349959403276443, "learning_rate": 3.838462339002088e-06, "loss": 0.0012, "step": 150940 }, { "epoch": 1.2344114159545325, "grad_norm": 0.04419822245836258, "learning_rate": 3.837768242315117e-06, "loss": 0.0009, "step": 150950 }, { "epoch": 1.2344931921331317, "grad_norm": 0.019962280988693237, "learning_rate": 3.837074169303753e-06, "loss": 0.001, "step": 150960 }, { "epoch": 1.2345749683117309, "grad_norm": 0.07533898204565048, "learning_rate": 3.836380119982132e-06, "loss": 0.0009, "step": 150970 }, { "epoch": 1.23465674449033, "grad_norm": 0.037383489310741425, "learning_rate": 3.835686094364392e-06, "loss": 0.0009, "step": 150980 }, { "epoch": 1.2347385206689292, "grad_norm": 0.06416109949350357, "learning_rate": 3.834992092464675e-06, "loss": 0.0008, "step": 150990 }, { "epoch": 1.2348202968475284, "grad_norm": 0.027157291769981384, "learning_rate": 3.834298114297116e-06, "loss": 0.0004, "step": 151000 }, { "epoch": 1.2349020730261275, "grad_norm": 0.05509749799966812, "learning_rate": 3.833604159875852e-06, "loss": 0.0007, "step": 151010 }, { "epoch": 1.2349838492047267, "grad_norm": 0.2127581685781479, "learning_rate": 3.8329102292150175e-06, "loss": 0.0018, "step": 151020 }, { "epoch": 1.2350656253833259, "grad_norm": 0.02710573934018612, "learning_rate": 3.832216322328752e-06, "loss": 0.0012, "step": 151030 }, { "epoch": 1.235147401561925, "grad_norm": 0.03263385593891144, "learning_rate": 3.8315224392311894e-06, "loss": 0.0007, "step": 151040 }, { "epoch": 1.2352291777405242, "grad_norm": 0.06112857535481453, "learning_rate": 3.830828579936464e-06, "loss": 0.0009, "step": 151050 }, { "epoch": 1.2353109539191234, "grad_norm": 0.00808997917920351, "learning_rate": 3.8301347444587105e-06, "loss": 0.0008, "step": 151060 }, { "epoch": 1.2353927300977225, "grad_norm": 0.20878221094608307, "learning_rate": 3.829440932812064e-06, "loss": 0.0008, "step": 151070 }, { "epoch": 1.2354745062763217, "grad_norm": 0.07838138192892075, "learning_rate": 3.828747145010657e-06, "loss": 0.0008, "step": 151080 }, { "epoch": 1.2355562824549209, "grad_norm": 0.01794286072254181, "learning_rate": 3.828053381068622e-06, "loss": 0.0005, "step": 151090 }, { "epoch": 1.23563805863352, "grad_norm": 0.04944288358092308, "learning_rate": 3.827359641000091e-06, "loss": 0.0008, "step": 151100 }, { "epoch": 1.2357198348121192, "grad_norm": 0.02598465234041214, "learning_rate": 3.826665924819199e-06, "loss": 0.0007, "step": 151110 }, { "epoch": 1.2358016109907184, "grad_norm": 0.09553568065166473, "learning_rate": 3.825972232540076e-06, "loss": 0.0009, "step": 151120 }, { "epoch": 1.2358833871693176, "grad_norm": 0.0165274478495121, "learning_rate": 3.825278564176853e-06, "loss": 0.0008, "step": 151130 }, { "epoch": 1.2359651633479167, "grad_norm": 0.014725069515407085, "learning_rate": 3.82458491974366e-06, "loss": 0.0008, "step": 151140 }, { "epoch": 1.236046939526516, "grad_norm": 0.030065830796957016, "learning_rate": 3.823891299254628e-06, "loss": 0.0008, "step": 151150 }, { "epoch": 1.236128715705115, "grad_norm": 0.02322840318083763, "learning_rate": 3.823197702723887e-06, "loss": 0.0008, "step": 151160 }, { "epoch": 1.2362104918837142, "grad_norm": 0.026123980060219765, "learning_rate": 3.822504130165564e-06, "loss": 0.0011, "step": 151170 }, { "epoch": 1.2362922680623134, "grad_norm": 0.08148414641618729, "learning_rate": 3.8218105815937885e-06, "loss": 0.0005, "step": 151180 }, { "epoch": 1.2363740442409126, "grad_norm": 0.07742930203676224, "learning_rate": 3.82111705702269e-06, "loss": 0.0022, "step": 151190 }, { "epoch": 1.2364558204195117, "grad_norm": 0.023534197360277176, "learning_rate": 3.820423556466395e-06, "loss": 0.0011, "step": 151200 }, { "epoch": 1.236537596598111, "grad_norm": 0.059382300823926926, "learning_rate": 3.819730079939032e-06, "loss": 0.0008, "step": 151210 }, { "epoch": 1.23661937277671, "grad_norm": 0.07007168978452682, "learning_rate": 3.819036627454724e-06, "loss": 0.0011, "step": 151220 }, { "epoch": 1.2367011489553092, "grad_norm": 0.05638497695326805, "learning_rate": 3.818343199027602e-06, "loss": 0.0011, "step": 151230 }, { "epoch": 1.2367829251339084, "grad_norm": 0.05960497260093689, "learning_rate": 3.817649794671789e-06, "loss": 0.0009, "step": 151240 }, { "epoch": 1.2368647013125076, "grad_norm": 0.04051664471626282, "learning_rate": 3.816956414401412e-06, "loss": 0.0012, "step": 151250 }, { "epoch": 1.2369464774911068, "grad_norm": 0.048545338213443756, "learning_rate": 3.816263058230593e-06, "loss": 0.0014, "step": 151260 }, { "epoch": 1.237028253669706, "grad_norm": 0.015938542783260345, "learning_rate": 3.815569726173458e-06, "loss": 0.0008, "step": 151270 }, { "epoch": 1.237110029848305, "grad_norm": 0.14639557898044586, "learning_rate": 3.814876418244131e-06, "loss": 0.0015, "step": 151280 }, { "epoch": 1.2371918060269045, "grad_norm": 0.07222160696983337, "learning_rate": 3.814183134456735e-06, "loss": 0.001, "step": 151290 }, { "epoch": 1.2372735822055034, "grad_norm": 0.01243432704359293, "learning_rate": 3.8134898748253903e-06, "loss": 0.0017, "step": 151300 }, { "epoch": 1.2373553583841028, "grad_norm": 0.041277747601270676, "learning_rate": 3.812796639364223e-06, "loss": 0.001, "step": 151310 }, { "epoch": 1.2374371345627018, "grad_norm": 0.05466499179601669, "learning_rate": 3.8121034280873533e-06, "loss": 0.0007, "step": 151320 }, { "epoch": 1.2375189107413012, "grad_norm": 0.0015646804822608829, "learning_rate": 3.811410241008902e-06, "loss": 0.0016, "step": 151330 }, { "epoch": 1.2376006869199003, "grad_norm": 0.02219466306269169, "learning_rate": 3.810717078142988e-06, "loss": 0.001, "step": 151340 }, { "epoch": 1.2376824630984995, "grad_norm": 0.020762652158737183, "learning_rate": 3.8100239395037365e-06, "loss": 0.0008, "step": 151350 }, { "epoch": 1.2377642392770987, "grad_norm": 0.005094216205179691, "learning_rate": 3.809330825105264e-06, "loss": 0.0006, "step": 151360 }, { "epoch": 1.2378460154556978, "grad_norm": 0.02964731492102146, "learning_rate": 3.808637734961691e-06, "loss": 0.0006, "step": 151370 }, { "epoch": 1.237927791634297, "grad_norm": 0.006736015900969505, "learning_rate": 3.807944669087135e-06, "loss": 0.0005, "step": 151380 }, { "epoch": 1.2380095678128962, "grad_norm": 0.014713296666741371, "learning_rate": 3.807251627495715e-06, "loss": 0.0006, "step": 151390 }, { "epoch": 1.2380913439914953, "grad_norm": 0.019201219081878662, "learning_rate": 3.8065586102015495e-06, "loss": 0.0007, "step": 151400 }, { "epoch": 1.2381731201700945, "grad_norm": 0.0024418935645371675, "learning_rate": 3.8058656172187547e-06, "loss": 0.0012, "step": 151410 }, { "epoch": 1.2382548963486937, "grad_norm": 0.021815722808241844, "learning_rate": 3.8051726485614476e-06, "loss": 0.0008, "step": 151420 }, { "epoch": 1.2383366725272928, "grad_norm": 0.030001480132341385, "learning_rate": 3.8044797042437454e-06, "loss": 0.0013, "step": 151430 }, { "epoch": 1.238418448705892, "grad_norm": 0.03323632851243019, "learning_rate": 3.803786784279764e-06, "loss": 0.0007, "step": 151440 }, { "epoch": 1.2385002248844912, "grad_norm": 0.028574805706739426, "learning_rate": 3.8030938886836173e-06, "loss": 0.0006, "step": 151450 }, { "epoch": 1.2385820010630904, "grad_norm": 0.09535875171422958, "learning_rate": 3.8024010174694193e-06, "loss": 0.0007, "step": 151460 }, { "epoch": 1.2386637772416895, "grad_norm": 0.04225165769457817, "learning_rate": 3.801708170651288e-06, "loss": 0.0009, "step": 151470 }, { "epoch": 1.2387455534202887, "grad_norm": 0.07612427324056625, "learning_rate": 3.8010153482433355e-06, "loss": 0.0008, "step": 151480 }, { "epoch": 1.2388273295988879, "grad_norm": 0.048625629395246506, "learning_rate": 3.8003225502596746e-06, "loss": 0.0005, "step": 151490 }, { "epoch": 1.238909105777487, "grad_norm": 0.031123086810112, "learning_rate": 3.79962977671442e-06, "loss": 0.0009, "step": 151500 }, { "epoch": 1.2389908819560862, "grad_norm": 0.030324578285217285, "learning_rate": 3.7989370276216785e-06, "loss": 0.001, "step": 151510 }, { "epoch": 1.2390726581346854, "grad_norm": 0.03588535264134407, "learning_rate": 3.798244302995569e-06, "loss": 0.0025, "step": 151520 }, { "epoch": 1.2391544343132845, "grad_norm": 0.03204195946455002, "learning_rate": 3.7975516028502e-06, "loss": 0.0011, "step": 151530 }, { "epoch": 1.2392362104918837, "grad_norm": 0.054031576961278915, "learning_rate": 3.7968589271996815e-06, "loss": 0.0015, "step": 151540 }, { "epoch": 1.2393179866704829, "grad_norm": 0.05232948437333107, "learning_rate": 3.796166276058124e-06, "loss": 0.0007, "step": 151550 }, { "epoch": 1.239399762849082, "grad_norm": 0.03128756210207939, "learning_rate": 3.7954736494396393e-06, "loss": 0.0012, "step": 151560 }, { "epoch": 1.2394815390276812, "grad_norm": 0.0061107599176466465, "learning_rate": 3.794781047358335e-06, "loss": 0.0004, "step": 151570 }, { "epoch": 1.2395633152062804, "grad_norm": 0.029904546216130257, "learning_rate": 3.794088469828321e-06, "loss": 0.0008, "step": 151580 }, { "epoch": 1.2396450913848795, "grad_norm": 0.30068501830101013, "learning_rate": 3.793395916863702e-06, "loss": 0.0034, "step": 151590 }, { "epoch": 1.2397268675634787, "grad_norm": 0.04669088497757912, "learning_rate": 3.792703388478592e-06, "loss": 0.001, "step": 151600 }, { "epoch": 1.2398086437420779, "grad_norm": 0.05825043097138405, "learning_rate": 3.7920108846870944e-06, "loss": 0.0016, "step": 151610 }, { "epoch": 1.239890419920677, "grad_norm": 0.06743483245372772, "learning_rate": 3.7913184055033186e-06, "loss": 0.0006, "step": 151620 }, { "epoch": 1.2399721960992762, "grad_norm": 0.03775855898857117, "learning_rate": 3.7906259509413662e-06, "loss": 0.0008, "step": 151630 }, { "epoch": 1.2400539722778754, "grad_norm": 0.007375222630798817, "learning_rate": 3.7899335210153485e-06, "loss": 0.0009, "step": 151640 }, { "epoch": 1.2401357484564746, "grad_norm": 0.029973652213811874, "learning_rate": 3.789241115739368e-06, "loss": 0.0012, "step": 151650 }, { "epoch": 1.2402175246350737, "grad_norm": 0.08342308551073074, "learning_rate": 3.7885487351275306e-06, "loss": 0.0008, "step": 151660 }, { "epoch": 1.240299300813673, "grad_norm": 0.0073020183481276035, "learning_rate": 3.7878563791939392e-06, "loss": 0.0005, "step": 151670 }, { "epoch": 1.240381076992272, "grad_norm": 0.08317072689533234, "learning_rate": 3.7871640479526995e-06, "loss": 0.0015, "step": 151680 }, { "epoch": 1.2404628531708712, "grad_norm": 0.09875202178955078, "learning_rate": 3.7864717414179137e-06, "loss": 0.0014, "step": 151690 }, { "epoch": 1.2405446293494704, "grad_norm": 0.049325913190841675, "learning_rate": 3.785779459603686e-06, "loss": 0.0011, "step": 151700 }, { "epoch": 1.2406264055280696, "grad_norm": 0.044317882508039474, "learning_rate": 3.7850872025241143e-06, "loss": 0.0007, "step": 151710 }, { "epoch": 1.240708181706669, "grad_norm": 0.00888146087527275, "learning_rate": 3.784394970193306e-06, "loss": 0.0009, "step": 151720 }, { "epoch": 1.240789957885268, "grad_norm": 0.047428540885448456, "learning_rate": 3.783702762625361e-06, "loss": 0.0008, "step": 151730 }, { "epoch": 1.2408717340638673, "grad_norm": 0.02621261589229107, "learning_rate": 3.7830105798343795e-06, "loss": 0.0013, "step": 151740 }, { "epoch": 1.2409535102424663, "grad_norm": 0.04490842670202255, "learning_rate": 3.7823184218344596e-06, "loss": 0.0009, "step": 151750 }, { "epoch": 1.2410352864210656, "grad_norm": 0.031794626265764236, "learning_rate": 3.7816262886397047e-06, "loss": 0.0008, "step": 151760 }, { "epoch": 1.2411170625996648, "grad_norm": 0.12361247092485428, "learning_rate": 3.780934180264213e-06, "loss": 0.0013, "step": 151770 }, { "epoch": 1.241198838778264, "grad_norm": 0.01823711395263672, "learning_rate": 3.7802420967220825e-06, "loss": 0.0007, "step": 151780 }, { "epoch": 1.2412806149568631, "grad_norm": 0.025157073512673378, "learning_rate": 3.7795500380274113e-06, "loss": 0.0014, "step": 151790 }, { "epoch": 1.2413623911354623, "grad_norm": 0.06428025662899017, "learning_rate": 3.7788580041942984e-06, "loss": 0.0013, "step": 151800 }, { "epoch": 1.2414441673140615, "grad_norm": 0.07164910435676575, "learning_rate": 3.778165995236841e-06, "loss": 0.001, "step": 151810 }, { "epoch": 1.2415259434926607, "grad_norm": 0.02270839922130108, "learning_rate": 3.7774740111691356e-06, "loss": 0.0008, "step": 151820 }, { "epoch": 1.2416077196712598, "grad_norm": 0.23002217710018158, "learning_rate": 3.7767820520052754e-06, "loss": 0.0013, "step": 151830 }, { "epoch": 1.241689495849859, "grad_norm": 0.07495633512735367, "learning_rate": 3.7760901177593623e-06, "loss": 0.0006, "step": 151840 }, { "epoch": 1.2417712720284582, "grad_norm": 0.021893490105867386, "learning_rate": 3.775398208445488e-06, "loss": 0.0011, "step": 151850 }, { "epoch": 1.2418530482070573, "grad_norm": 0.07231070846319199, "learning_rate": 3.774706324077748e-06, "loss": 0.001, "step": 151860 }, { "epoch": 1.2419348243856565, "grad_norm": 0.11883050203323364, "learning_rate": 3.7740144646702336e-06, "loss": 0.0009, "step": 151870 }, { "epoch": 1.2420166005642557, "grad_norm": 0.0211635772138834, "learning_rate": 3.773322630237043e-06, "loss": 0.0006, "step": 151880 }, { "epoch": 1.2420983767428548, "grad_norm": 0.009372363798320293, "learning_rate": 3.7726308207922673e-06, "loss": 0.0009, "step": 151890 }, { "epoch": 1.242180152921454, "grad_norm": 0.10842310637235641, "learning_rate": 3.7719390363499995e-06, "loss": 0.0007, "step": 151900 }, { "epoch": 1.2422619291000532, "grad_norm": 0.04058123379945755, "learning_rate": 3.7712472769243314e-06, "loss": 0.0009, "step": 151910 }, { "epoch": 1.2423437052786523, "grad_norm": 0.018150340765714645, "learning_rate": 3.7705555425293556e-06, "loss": 0.0008, "step": 151920 }, { "epoch": 1.2424254814572515, "grad_norm": 0.00944606401026249, "learning_rate": 3.769863833179163e-06, "loss": 0.0005, "step": 151930 }, { "epoch": 1.2425072576358507, "grad_norm": 0.03724265471100807, "learning_rate": 3.769172148887843e-06, "loss": 0.0005, "step": 151940 }, { "epoch": 1.2425890338144499, "grad_norm": 0.02387755550444126, "learning_rate": 3.768480489669486e-06, "loss": 0.0012, "step": 151950 }, { "epoch": 1.242670809993049, "grad_norm": 0.05269116908311844, "learning_rate": 3.7677888555381836e-06, "loss": 0.0011, "step": 151960 }, { "epoch": 1.2427525861716482, "grad_norm": 0.0869787409901619, "learning_rate": 3.767097246508024e-06, "loss": 0.0009, "step": 151970 }, { "epoch": 1.2428343623502474, "grad_norm": 0.024580039083957672, "learning_rate": 3.766405662593097e-06, "loss": 0.0015, "step": 151980 }, { "epoch": 1.2429161385288465, "grad_norm": 0.03867581486701965, "learning_rate": 3.765714103807486e-06, "loss": 0.0019, "step": 151990 }, { "epoch": 1.2429979147074457, "grad_norm": 0.020221147686243057, "learning_rate": 3.7650225701652844e-06, "loss": 0.0013, "step": 152000 }, { "epoch": 1.2430796908860449, "grad_norm": 0.01482499111443758, "learning_rate": 3.7643310616805766e-06, "loss": 0.0009, "step": 152010 }, { "epoch": 1.243161467064644, "grad_norm": 0.03482973575592041, "learning_rate": 3.7636395783674497e-06, "loss": 0.0006, "step": 152020 }, { "epoch": 1.2432432432432432, "grad_norm": 0.1919124573469162, "learning_rate": 3.7629481202399886e-06, "loss": 0.0011, "step": 152030 }, { "epoch": 1.2433250194218424, "grad_norm": 0.055223628878593445, "learning_rate": 3.76225668731228e-06, "loss": 0.0009, "step": 152040 }, { "epoch": 1.2434067956004415, "grad_norm": 0.014519431628286839, "learning_rate": 3.7615652795984098e-06, "loss": 0.0007, "step": 152050 }, { "epoch": 1.2434885717790407, "grad_norm": 0.018401052802801132, "learning_rate": 3.760873897112461e-06, "loss": 0.0013, "step": 152060 }, { "epoch": 1.2435703479576399, "grad_norm": 0.003931107930839062, "learning_rate": 3.760182539868516e-06, "loss": 0.0008, "step": 152070 }, { "epoch": 1.243652124136239, "grad_norm": 0.0071971220895648, "learning_rate": 3.759491207880663e-06, "loss": 0.0013, "step": 152080 }, { "epoch": 1.2437339003148382, "grad_norm": 0.0037174138706177473, "learning_rate": 3.7587999011629817e-06, "loss": 0.0014, "step": 152090 }, { "epoch": 1.2438156764934374, "grad_norm": 0.004826948046684265, "learning_rate": 3.7581086197295568e-06, "loss": 0.0008, "step": 152100 }, { "epoch": 1.2438974526720366, "grad_norm": 0.07334765791893005, "learning_rate": 3.7574173635944657e-06, "loss": 0.001, "step": 152110 }, { "epoch": 1.2439792288506357, "grad_norm": 0.14674906432628632, "learning_rate": 3.756726132771795e-06, "loss": 0.0015, "step": 152120 }, { "epoch": 1.244061005029235, "grad_norm": 0.01562853343784809, "learning_rate": 3.7560349272756235e-06, "loss": 0.001, "step": 152130 }, { "epoch": 1.244142781207834, "grad_norm": 0.03954457864165306, "learning_rate": 3.7553437471200316e-06, "loss": 0.001, "step": 152140 }, { "epoch": 1.2442245573864334, "grad_norm": 0.06646426022052765, "learning_rate": 3.754652592319098e-06, "loss": 0.0007, "step": 152150 }, { "epoch": 1.2443063335650324, "grad_norm": 0.021081523969769478, "learning_rate": 3.7539614628869046e-06, "loss": 0.0006, "step": 152160 }, { "epoch": 1.2443881097436318, "grad_norm": 0.0520305410027504, "learning_rate": 3.7532703588375296e-06, "loss": 0.0011, "step": 152170 }, { "epoch": 1.2444698859222307, "grad_norm": 0.015593132935464382, "learning_rate": 3.75257928018505e-06, "loss": 0.0009, "step": 152180 }, { "epoch": 1.2445516621008301, "grad_norm": 0.05838356912136078, "learning_rate": 3.7518882269435425e-06, "loss": 0.0008, "step": 152190 }, { "epoch": 1.2446334382794293, "grad_norm": 0.02123885788023472, "learning_rate": 3.751197199127089e-06, "loss": 0.0009, "step": 152200 }, { "epoch": 1.2447152144580285, "grad_norm": 0.055950239300727844, "learning_rate": 3.750506196749763e-06, "loss": 0.0072, "step": 152210 }, { "epoch": 1.2447969906366276, "grad_norm": 0.07135511934757233, "learning_rate": 3.7498152198256427e-06, "loss": 0.0009, "step": 152220 }, { "epoch": 1.2448787668152268, "grad_norm": 0.0050359126180410385, "learning_rate": 3.7491242683687987e-06, "loss": 0.0016, "step": 152230 }, { "epoch": 1.244960542993826, "grad_norm": 0.021006068214774132, "learning_rate": 3.7484333423933134e-06, "loss": 0.0014, "step": 152240 }, { "epoch": 1.2450423191724251, "grad_norm": 0.07053679972887039, "learning_rate": 3.747742441913258e-06, "loss": 0.0009, "step": 152250 }, { "epoch": 1.2451240953510243, "grad_norm": 0.012074030935764313, "learning_rate": 3.7470515669427065e-06, "loss": 0.0016, "step": 152260 }, { "epoch": 1.2452058715296235, "grad_norm": 0.05875369533896446, "learning_rate": 3.746360717495732e-06, "loss": 0.001, "step": 152270 }, { "epoch": 1.2452876477082226, "grad_norm": 0.021936243399977684, "learning_rate": 3.74566989358641e-06, "loss": 0.0013, "step": 152280 }, { "epoch": 1.2453694238868218, "grad_norm": 0.02499319054186344, "learning_rate": 3.7449790952288118e-06, "loss": 0.0009, "step": 152290 }, { "epoch": 1.245451200065421, "grad_norm": 0.15809029340744019, "learning_rate": 3.74428832243701e-06, "loss": 0.0007, "step": 152300 }, { "epoch": 1.2455329762440202, "grad_norm": 0.042813289910554886, "learning_rate": 3.7435975752250726e-06, "loss": 0.001, "step": 152310 }, { "epoch": 1.2456147524226193, "grad_norm": 0.03274289891123772, "learning_rate": 3.7429068536070765e-06, "loss": 0.0014, "step": 152320 }, { "epoch": 1.2456965286012185, "grad_norm": 0.09706085175275803, "learning_rate": 3.7422161575970896e-06, "loss": 0.0012, "step": 152330 }, { "epoch": 1.2457783047798177, "grad_norm": 0.008664743974804878, "learning_rate": 3.741525487209183e-06, "loss": 0.0009, "step": 152340 }, { "epoch": 1.2458600809584168, "grad_norm": 0.042100731283426285, "learning_rate": 3.740834842457422e-06, "loss": 0.0007, "step": 152350 }, { "epoch": 1.245941857137016, "grad_norm": 0.058499306440353394, "learning_rate": 3.7401442233558805e-06, "loss": 0.0017, "step": 152360 }, { "epoch": 1.2460236333156152, "grad_norm": 0.004804656375199556, "learning_rate": 3.7394536299186256e-06, "loss": 0.0016, "step": 152370 }, { "epoch": 1.2461054094942143, "grad_norm": 0.022932883352041245, "learning_rate": 3.7387630621597256e-06, "loss": 0.0009, "step": 152380 }, { "epoch": 1.2461871856728135, "grad_norm": 0.16669298708438873, "learning_rate": 3.7380725200932453e-06, "loss": 0.0011, "step": 152390 }, { "epoch": 1.2462689618514127, "grad_norm": 0.03401662036776543, "learning_rate": 3.737382003733254e-06, "loss": 0.0005, "step": 152400 }, { "epoch": 1.2463507380300118, "grad_norm": 0.05167157202959061, "learning_rate": 3.736691513093819e-06, "loss": 0.0014, "step": 152410 }, { "epoch": 1.246432514208611, "grad_norm": 0.025019319728016853, "learning_rate": 3.7360010481890043e-06, "loss": 0.001, "step": 152420 }, { "epoch": 1.2465142903872102, "grad_norm": 0.11432540416717529, "learning_rate": 3.735310609032873e-06, "loss": 0.0011, "step": 152430 }, { "epoch": 1.2465960665658093, "grad_norm": 0.03438450023531914, "learning_rate": 3.734620195639495e-06, "loss": 0.0009, "step": 152440 }, { "epoch": 1.2466778427444085, "grad_norm": 0.022126276046037674, "learning_rate": 3.7339298080229326e-06, "loss": 0.0008, "step": 152450 }, { "epoch": 1.2467596189230077, "grad_norm": 0.07650325447320938, "learning_rate": 3.7332394461972492e-06, "loss": 0.0011, "step": 152460 }, { "epoch": 1.2468413951016069, "grad_norm": 0.0481855571269989, "learning_rate": 3.7325491101765055e-06, "loss": 0.0009, "step": 152470 }, { "epoch": 1.246923171280206, "grad_norm": 0.017218468710780144, "learning_rate": 3.7318587999747684e-06, "loss": 0.0009, "step": 152480 }, { "epoch": 1.2470049474588052, "grad_norm": 0.024729320779442787, "learning_rate": 3.731168515606099e-06, "loss": 0.002, "step": 152490 }, { "epoch": 1.2470867236374044, "grad_norm": 0.03307109326124191, "learning_rate": 3.7304782570845575e-06, "loss": 0.0004, "step": 152500 }, { "epoch": 1.2471684998160035, "grad_norm": 0.036277592182159424, "learning_rate": 3.7297880244242056e-06, "loss": 0.001, "step": 152510 }, { "epoch": 1.2472502759946027, "grad_norm": 0.35220953822135925, "learning_rate": 3.7290978176391046e-06, "loss": 0.0007, "step": 152520 }, { "epoch": 1.2473320521732019, "grad_norm": 0.02797038108110428, "learning_rate": 3.728407636743314e-06, "loss": 0.0015, "step": 152530 }, { "epoch": 1.247413828351801, "grad_norm": 0.031159380450844765, "learning_rate": 3.727717481750894e-06, "loss": 0.001, "step": 152540 }, { "epoch": 1.2474956045304002, "grad_norm": 0.01981183886528015, "learning_rate": 3.7270273526759003e-06, "loss": 0.0006, "step": 152550 }, { "epoch": 1.2475773807089994, "grad_norm": 0.020805995911359787, "learning_rate": 3.7263372495323968e-06, "loss": 0.0017, "step": 152560 }, { "epoch": 1.2476591568875985, "grad_norm": 0.0425700768828392, "learning_rate": 3.7256471723344382e-06, "loss": 0.0009, "step": 152570 }, { "epoch": 1.247740933066198, "grad_norm": 0.030245084315538406, "learning_rate": 3.7249571210960846e-06, "loss": 0.0004, "step": 152580 }, { "epoch": 1.2478227092447969, "grad_norm": 0.005457005929201841, "learning_rate": 3.7242670958313864e-06, "loss": 0.0006, "step": 152590 }, { "epoch": 1.2479044854233963, "grad_norm": 0.07262226939201355, "learning_rate": 3.723577096554407e-06, "loss": 0.0009, "step": 152600 }, { "epoch": 1.2479862616019952, "grad_norm": 0.03408411890268326, "learning_rate": 3.7228871232792002e-06, "loss": 0.0007, "step": 152610 }, { "epoch": 1.2480680377805946, "grad_norm": 0.03968662768602371, "learning_rate": 3.7221971760198206e-06, "loss": 0.0007, "step": 152620 }, { "epoch": 1.2481498139591938, "grad_norm": 0.027942953631281853, "learning_rate": 3.721507254790322e-06, "loss": 0.0009, "step": 152630 }, { "epoch": 1.248231590137793, "grad_norm": 0.03140610456466675, "learning_rate": 3.7208173596047608e-06, "loss": 0.0008, "step": 152640 }, { "epoch": 1.2483133663163921, "grad_norm": 0.07080257683992386, "learning_rate": 3.72012749047719e-06, "loss": 0.0007, "step": 152650 }, { "epoch": 1.2483951424949913, "grad_norm": 0.01608707383275032, "learning_rate": 3.719437647421662e-06, "loss": 0.0009, "step": 152660 }, { "epoch": 1.2484769186735905, "grad_norm": 0.03194350376725197, "learning_rate": 3.7187478304522285e-06, "loss": 0.0012, "step": 152670 }, { "epoch": 1.2485586948521896, "grad_norm": 0.05281206592917442, "learning_rate": 3.7180580395829446e-06, "loss": 0.0013, "step": 152680 }, { "epoch": 1.2486404710307888, "grad_norm": 0.04761269688606262, "learning_rate": 3.7173682748278617e-06, "loss": 0.0009, "step": 152690 }, { "epoch": 1.248722247209388, "grad_norm": 0.03367099165916443, "learning_rate": 3.7166785362010293e-06, "loss": 0.0008, "step": 152700 }, { "epoch": 1.2488040233879871, "grad_norm": 0.08410272747278214, "learning_rate": 3.7159888237164957e-06, "loss": 0.0009, "step": 152710 }, { "epoch": 1.2488857995665863, "grad_norm": 0.009289899840950966, "learning_rate": 3.715299137388316e-06, "loss": 0.0008, "step": 152720 }, { "epoch": 1.2489675757451855, "grad_norm": 0.060544367879629135, "learning_rate": 3.7146094772305365e-06, "loss": 0.002, "step": 152730 }, { "epoch": 1.2490493519237846, "grad_norm": 0.027135014533996582, "learning_rate": 3.7139198432572064e-06, "loss": 0.0008, "step": 152740 }, { "epoch": 1.2491311281023838, "grad_norm": 0.021507106721401215, "learning_rate": 3.7132302354823746e-06, "loss": 0.0007, "step": 152750 }, { "epoch": 1.249212904280983, "grad_norm": 0.12813979387283325, "learning_rate": 3.712540653920089e-06, "loss": 0.0009, "step": 152760 }, { "epoch": 1.2492946804595821, "grad_norm": 0.006009883712977171, "learning_rate": 3.711851098584397e-06, "loss": 0.0004, "step": 152770 }, { "epoch": 1.2493764566381813, "grad_norm": 0.0017006859416142106, "learning_rate": 3.7111615694893453e-06, "loss": 0.0008, "step": 152780 }, { "epoch": 1.2494582328167805, "grad_norm": 0.0009516208083368838, "learning_rate": 3.710472066648978e-06, "loss": 0.0011, "step": 152790 }, { "epoch": 1.2495400089953796, "grad_norm": 0.023380490019917488, "learning_rate": 3.7097825900773445e-06, "loss": 0.001, "step": 152800 }, { "epoch": 1.2496217851739788, "grad_norm": 0.12383171170949936, "learning_rate": 3.709093139788489e-06, "loss": 0.0035, "step": 152810 }, { "epoch": 1.249703561352578, "grad_norm": 0.039759311825037, "learning_rate": 3.7084037157964557e-06, "loss": 0.0009, "step": 152820 }, { "epoch": 1.2497853375311772, "grad_norm": 0.07230095565319061, "learning_rate": 3.7077143181152864e-06, "loss": 0.0015, "step": 152830 }, { "epoch": 1.2498671137097763, "grad_norm": 0.029275264590978622, "learning_rate": 3.7070249467590287e-06, "loss": 0.0013, "step": 152840 }, { "epoch": 1.2499488898883755, "grad_norm": 0.1031637117266655, "learning_rate": 3.7063356017417233e-06, "loss": 0.001, "step": 152850 }, { "epoch": 1.2500306660669747, "grad_norm": 0.01232550572603941, "learning_rate": 3.7056462830774137e-06, "loss": 0.0004, "step": 152860 }, { "epoch": 1.2501124422455738, "grad_norm": 0.0134665472432971, "learning_rate": 3.7049569907801404e-06, "loss": 0.0007, "step": 152870 }, { "epoch": 1.250194218424173, "grad_norm": 0.12397777289152145, "learning_rate": 3.704267724863947e-06, "loss": 0.0019, "step": 152880 }, { "epoch": 1.2502759946027722, "grad_norm": 0.10668889433145523, "learning_rate": 3.703578485342873e-06, "loss": 0.001, "step": 152890 }, { "epoch": 1.2503577707813713, "grad_norm": 0.033596381545066833, "learning_rate": 3.702889272230959e-06, "loss": 0.0009, "step": 152900 }, { "epoch": 1.2504395469599705, "grad_norm": 0.06966749578714371, "learning_rate": 3.702200085542243e-06, "loss": 0.0011, "step": 152910 }, { "epoch": 1.2505213231385697, "grad_norm": 0.04393333941698074, "learning_rate": 3.7015109252907675e-06, "loss": 0.0008, "step": 152920 }, { "epoch": 1.2506030993171688, "grad_norm": 0.04007519781589508, "learning_rate": 3.7008217914905707e-06, "loss": 0.001, "step": 152930 }, { "epoch": 1.250684875495768, "grad_norm": 0.05715421214699745, "learning_rate": 3.7001326841556918e-06, "loss": 0.0019, "step": 152940 }, { "epoch": 1.2507666516743672, "grad_norm": 0.036713484674692154, "learning_rate": 3.6994436033001647e-06, "loss": 0.0006, "step": 152950 }, { "epoch": 1.2508484278529663, "grad_norm": 0.08978521078824997, "learning_rate": 3.6987545489380262e-06, "loss": 0.0011, "step": 152960 }, { "epoch": 1.2509302040315657, "grad_norm": 0.0011013250332325697, "learning_rate": 3.6980655210833175e-06, "loss": 0.0005, "step": 152970 }, { "epoch": 1.2510119802101647, "grad_norm": 0.026078276336193085, "learning_rate": 3.697376519750073e-06, "loss": 0.0009, "step": 152980 }, { "epoch": 1.251093756388764, "grad_norm": 0.03949500992894173, "learning_rate": 3.696687544952327e-06, "loss": 0.001, "step": 152990 }, { "epoch": 1.251175532567363, "grad_norm": 0.030238298699259758, "learning_rate": 3.6959985967041146e-06, "loss": 0.0012, "step": 153000 }, { "epoch": 1.2512573087459624, "grad_norm": 0.06596489250659943, "learning_rate": 3.6953096750194712e-06, "loss": 0.0013, "step": 153010 }, { "epoch": 1.2513390849245614, "grad_norm": 0.1172623336315155, "learning_rate": 3.6946207799124305e-06, "loss": 0.0011, "step": 153020 }, { "epoch": 1.2514208611031608, "grad_norm": 0.03697970137000084, "learning_rate": 3.6939319113970257e-06, "loss": 0.0012, "step": 153030 }, { "epoch": 1.2515026372817597, "grad_norm": 0.018289944157004356, "learning_rate": 3.6932430694872865e-06, "loss": 0.0005, "step": 153040 }, { "epoch": 1.251584413460359, "grad_norm": 0.017826102674007416, "learning_rate": 3.6925542541972514e-06, "loss": 0.0005, "step": 153050 }, { "epoch": 1.251666189638958, "grad_norm": 0.004886506590992212, "learning_rate": 3.6918654655409497e-06, "loss": 0.002, "step": 153060 }, { "epoch": 1.2517479658175574, "grad_norm": 0.15100577473640442, "learning_rate": 3.691176703532411e-06, "loss": 0.002, "step": 153070 }, { "epoch": 1.2518297419961566, "grad_norm": 0.007074335590004921, "learning_rate": 3.6904879681856637e-06, "loss": 0.0018, "step": 153080 }, { "epoch": 1.2519115181747558, "grad_norm": 0.018132008612155914, "learning_rate": 3.689799259514744e-06, "loss": 0.0017, "step": 153090 }, { "epoch": 1.251993294353355, "grad_norm": 0.02507605403661728, "learning_rate": 3.689110577533678e-06, "loss": 0.0021, "step": 153100 }, { "epoch": 1.252075070531954, "grad_norm": 0.031556643545627594, "learning_rate": 3.6884219222564945e-06, "loss": 0.0009, "step": 153110 }, { "epoch": 1.2521568467105533, "grad_norm": 0.18424664437770844, "learning_rate": 3.6877332936972223e-06, "loss": 0.0008, "step": 153120 }, { "epoch": 1.2522386228891524, "grad_norm": 0.02107390947639942, "learning_rate": 3.6870446918698897e-06, "loss": 0.0007, "step": 153130 }, { "epoch": 1.2523203990677516, "grad_norm": 0.04134945943951607, "learning_rate": 3.686356116788524e-06, "loss": 0.0006, "step": 153140 }, { "epoch": 1.2524021752463508, "grad_norm": 0.05927899107336998, "learning_rate": 3.685667568467153e-06, "loss": 0.0011, "step": 153150 }, { "epoch": 1.25248395142495, "grad_norm": 0.02418220229446888, "learning_rate": 3.6849790469197993e-06, "loss": 0.0019, "step": 153160 }, { "epoch": 1.2525657276035491, "grad_norm": 0.047815147787332535, "learning_rate": 3.6842905521604933e-06, "loss": 0.0009, "step": 153170 }, { "epoch": 1.2526475037821483, "grad_norm": 0.0021220191847532988, "learning_rate": 3.6836020842032592e-06, "loss": 0.0014, "step": 153180 }, { "epoch": 1.2527292799607475, "grad_norm": 0.031190622597932816, "learning_rate": 3.6829136430621205e-06, "loss": 0.0007, "step": 153190 }, { "epoch": 1.2528110561393466, "grad_norm": 0.007951373234391212, "learning_rate": 3.6822252287510985e-06, "loss": 0.0006, "step": 153200 }, { "epoch": 1.2528928323179458, "grad_norm": 0.04732269421219826, "learning_rate": 3.6815368412842223e-06, "loss": 0.0009, "step": 153210 }, { "epoch": 1.252974608496545, "grad_norm": 0.09428440779447556, "learning_rate": 3.680848480675512e-06, "loss": 0.0015, "step": 153220 }, { "epoch": 1.2530563846751441, "grad_norm": 0.0545048825442791, "learning_rate": 3.6801601469389902e-06, "loss": 0.0005, "step": 153230 }, { "epoch": 1.2531381608537433, "grad_norm": 0.0865592285990715, "learning_rate": 3.6794718400886784e-06, "loss": 0.0014, "step": 153240 }, { "epoch": 1.2532199370323425, "grad_norm": 0.04534945636987686, "learning_rate": 3.6787835601386003e-06, "loss": 0.0008, "step": 153250 }, { "epoch": 1.2533017132109416, "grad_norm": 0.04141493886709213, "learning_rate": 3.6780953071027735e-06, "loss": 0.0007, "step": 153260 }, { "epoch": 1.2533834893895408, "grad_norm": 0.018680403009057045, "learning_rate": 3.6774070809952214e-06, "loss": 0.0003, "step": 153270 }, { "epoch": 1.25346526556814, "grad_norm": 0.14112234115600586, "learning_rate": 3.6767188818299585e-06, "loss": 0.0019, "step": 153280 }, { "epoch": 1.2535470417467391, "grad_norm": 0.003984980285167694, "learning_rate": 3.67603070962101e-06, "loss": 0.0009, "step": 153290 }, { "epoch": 1.2536288179253383, "grad_norm": 0.05724741891026497, "learning_rate": 3.6753425643823935e-06, "loss": 0.0009, "step": 153300 }, { "epoch": 1.2537105941039375, "grad_norm": 0.04730065539479256, "learning_rate": 3.6746544461281255e-06, "loss": 0.0009, "step": 153310 }, { "epoch": 1.2537923702825367, "grad_norm": 0.03032001666724682, "learning_rate": 3.6739663548722203e-06, "loss": 0.0006, "step": 153320 }, { "epoch": 1.2538741464611358, "grad_norm": 0.02011369913816452, "learning_rate": 3.673278290628701e-06, "loss": 0.0008, "step": 153330 }, { "epoch": 1.253955922639735, "grad_norm": 0.049867890775203705, "learning_rate": 3.6725902534115817e-06, "loss": 0.0011, "step": 153340 }, { "epoch": 1.2540376988183342, "grad_norm": 0.04908376559615135, "learning_rate": 3.6719022432348773e-06, "loss": 0.0011, "step": 153350 }, { "epoch": 1.2541194749969333, "grad_norm": 0.02117021381855011, "learning_rate": 3.6712142601126026e-06, "loss": 0.0014, "step": 153360 }, { "epoch": 1.2542012511755325, "grad_norm": 0.03614208847284317, "learning_rate": 3.6705263040587747e-06, "loss": 0.0008, "step": 153370 }, { "epoch": 1.2542830273541317, "grad_norm": 0.04320281744003296, "learning_rate": 3.6698383750874066e-06, "loss": 0.0008, "step": 153380 }, { "epoch": 1.2543648035327308, "grad_norm": 0.04953921213746071, "learning_rate": 3.6691504732125127e-06, "loss": 0.001, "step": 153390 }, { "epoch": 1.2544465797113302, "grad_norm": 0.03283075988292694, "learning_rate": 3.6684625984481026e-06, "loss": 0.0006, "step": 153400 }, { "epoch": 1.2545283558899292, "grad_norm": 0.022845182567834854, "learning_rate": 3.667774750808194e-06, "loss": 0.0011, "step": 153410 }, { "epoch": 1.2546101320685286, "grad_norm": 0.05193376541137695, "learning_rate": 3.667086930306798e-06, "loss": 0.0011, "step": 153420 }, { "epoch": 1.2546919082471275, "grad_norm": 0.024142853915691376, "learning_rate": 3.666399136957923e-06, "loss": 0.0007, "step": 153430 }, { "epoch": 1.254773684425727, "grad_norm": 0.01897803507745266, "learning_rate": 3.6657113707755796e-06, "loss": 0.0012, "step": 153440 }, { "epoch": 1.2548554606043258, "grad_norm": 0.09445421397686005, "learning_rate": 3.665023631773783e-06, "loss": 0.0011, "step": 153450 }, { "epoch": 1.2549372367829252, "grad_norm": 0.16891154646873474, "learning_rate": 3.6643359199665397e-06, "loss": 0.0014, "step": 153460 }, { "epoch": 1.2550190129615242, "grad_norm": 0.007359189912676811, "learning_rate": 3.663648235367859e-06, "loss": 0.0008, "step": 153470 }, { "epoch": 1.2551007891401236, "grad_norm": 0.049895137548446655, "learning_rate": 3.662960577991749e-06, "loss": 0.0009, "step": 153480 }, { "epoch": 1.2551825653187225, "grad_norm": 0.06056131049990654, "learning_rate": 3.66227294785222e-06, "loss": 0.0006, "step": 153490 }, { "epoch": 1.255264341497322, "grad_norm": 0.012397379614412785, "learning_rate": 3.661585344963279e-06, "loss": 0.0013, "step": 153500 }, { "epoch": 1.255346117675921, "grad_norm": 0.021179817616939545, "learning_rate": 3.6608977693389316e-06, "loss": 0.0012, "step": 153510 }, { "epoch": 1.2554278938545202, "grad_norm": 0.01269776001572609, "learning_rate": 3.6602102209931833e-06, "loss": 0.0008, "step": 153520 }, { "epoch": 1.2555096700331194, "grad_norm": 0.04819919914007187, "learning_rate": 3.6595226999400437e-06, "loss": 0.0009, "step": 153530 }, { "epoch": 1.2555914462117186, "grad_norm": 0.054030612111091614, "learning_rate": 3.658835206193518e-06, "loss": 0.0018, "step": 153540 }, { "epoch": 1.2556732223903178, "grad_norm": 0.016070319339632988, "learning_rate": 3.6581477397676078e-06, "loss": 0.001, "step": 153550 }, { "epoch": 1.255754998568917, "grad_norm": 0.015167896635830402, "learning_rate": 3.6574603006763165e-06, "loss": 0.0007, "step": 153560 }, { "epoch": 1.255836774747516, "grad_norm": 0.0849972516298294, "learning_rate": 3.656772888933653e-06, "loss": 0.0012, "step": 153570 }, { "epoch": 1.2559185509261153, "grad_norm": 0.11807897686958313, "learning_rate": 3.656085504553617e-06, "loss": 0.001, "step": 153580 }, { "epoch": 1.2560003271047144, "grad_norm": 0.04560258239507675, "learning_rate": 3.655398147550212e-06, "loss": 0.0008, "step": 153590 }, { "epoch": 1.2560821032833136, "grad_norm": 0.003938178066164255, "learning_rate": 3.654710817937438e-06, "loss": 0.0016, "step": 153600 }, { "epoch": 1.2561638794619128, "grad_norm": 0.0361994206905365, "learning_rate": 3.6540235157292995e-06, "loss": 0.0005, "step": 153610 }, { "epoch": 1.256245655640512, "grad_norm": 0.03528924286365509, "learning_rate": 3.6533362409397966e-06, "loss": 0.0011, "step": 153620 }, { "epoch": 1.256327431819111, "grad_norm": 0.003454473800957203, "learning_rate": 3.6526489935829286e-06, "loss": 0.0007, "step": 153630 }, { "epoch": 1.2564092079977103, "grad_norm": 0.03906570374965668, "learning_rate": 3.6519617736726937e-06, "loss": 0.0007, "step": 153640 }, { "epoch": 1.2564909841763094, "grad_norm": 0.15003523230552673, "learning_rate": 3.6512745812230955e-06, "loss": 0.0019, "step": 153650 }, { "epoch": 1.2565727603549086, "grad_norm": 0.02948911115527153, "learning_rate": 3.6505874162481314e-06, "loss": 0.0004, "step": 153660 }, { "epoch": 1.2566545365335078, "grad_norm": 0.04379216209053993, "learning_rate": 3.6499002787617972e-06, "loss": 0.0015, "step": 153670 }, { "epoch": 1.256736312712107, "grad_norm": 0.04280755668878555, "learning_rate": 3.6492131687780896e-06, "loss": 0.0004, "step": 153680 }, { "epoch": 1.2568180888907061, "grad_norm": 0.009163801558315754, "learning_rate": 3.64852608631101e-06, "loss": 0.0006, "step": 153690 }, { "epoch": 1.2568998650693053, "grad_norm": 0.023080943152308464, "learning_rate": 3.6478390313745527e-06, "loss": 0.0009, "step": 153700 }, { "epoch": 1.2569816412479045, "grad_norm": 0.07544626295566559, "learning_rate": 3.6471520039827137e-06, "loss": 0.001, "step": 153710 }, { "epoch": 1.2570634174265036, "grad_norm": 0.015147127211093903, "learning_rate": 3.6464650041494863e-06, "loss": 0.001, "step": 153720 }, { "epoch": 1.2571451936051028, "grad_norm": 0.061009764671325684, "learning_rate": 3.645778031888868e-06, "loss": 0.0015, "step": 153730 }, { "epoch": 1.257226969783702, "grad_norm": 0.038017235696315765, "learning_rate": 3.6450910872148525e-06, "loss": 0.0004, "step": 153740 }, { "epoch": 1.2573087459623011, "grad_norm": 0.06434550881385803, "learning_rate": 3.6444041701414324e-06, "loss": 0.001, "step": 153750 }, { "epoch": 1.2573905221409003, "grad_norm": 0.03185214474797249, "learning_rate": 3.6437172806825992e-06, "loss": 0.0006, "step": 153760 }, { "epoch": 1.2574722983194995, "grad_norm": 0.0034273057244718075, "learning_rate": 3.6430304188523503e-06, "loss": 0.0005, "step": 153770 }, { "epoch": 1.2575540744980986, "grad_norm": 0.0645020380616188, "learning_rate": 3.6423435846646748e-06, "loss": 0.0009, "step": 153780 }, { "epoch": 1.2576358506766978, "grad_norm": 0.06363935023546219, "learning_rate": 3.641656778133563e-06, "loss": 0.0006, "step": 153790 }, { "epoch": 1.257717626855297, "grad_norm": 0.015166870318353176, "learning_rate": 3.640969999273005e-06, "loss": 0.0011, "step": 153800 }, { "epoch": 1.2577994030338961, "grad_norm": 0.06521903723478317, "learning_rate": 3.6402832480969953e-06, "loss": 0.0006, "step": 153810 }, { "epoch": 1.2578811792124953, "grad_norm": 0.02173074334859848, "learning_rate": 3.6395965246195207e-06, "loss": 0.0006, "step": 153820 }, { "epoch": 1.2579629553910947, "grad_norm": 0.06339781731367111, "learning_rate": 3.638909828854571e-06, "loss": 0.0008, "step": 153830 }, { "epoch": 1.2580447315696937, "grad_norm": 0.005383630283176899, "learning_rate": 3.6382231608161335e-06, "loss": 0.0017, "step": 153840 }, { "epoch": 1.258126507748293, "grad_norm": 0.006057626102119684, "learning_rate": 3.637536520518197e-06, "loss": 0.0003, "step": 153850 }, { "epoch": 1.258208283926892, "grad_norm": 0.018917188048362732, "learning_rate": 3.6368499079747498e-06, "loss": 0.0008, "step": 153860 }, { "epoch": 1.2582900601054914, "grad_norm": 0.08939718455076218, "learning_rate": 3.6361633231997783e-06, "loss": 0.0008, "step": 153870 }, { "epoch": 1.2583718362840903, "grad_norm": 0.00749351317062974, "learning_rate": 3.635476766207266e-06, "loss": 0.0005, "step": 153880 }, { "epoch": 1.2584536124626897, "grad_norm": 0.003916626330465078, "learning_rate": 3.6347902370112032e-06, "loss": 0.0011, "step": 153890 }, { "epoch": 1.2585353886412887, "grad_norm": 0.00398412998765707, "learning_rate": 3.6341037356255742e-06, "loss": 0.0011, "step": 153900 }, { "epoch": 1.258617164819888, "grad_norm": 0.020725294947624207, "learning_rate": 3.6334172620643613e-06, "loss": 0.0006, "step": 153910 }, { "epoch": 1.258698940998487, "grad_norm": 0.02089342474937439, "learning_rate": 3.6327308163415478e-06, "loss": 0.0014, "step": 153920 }, { "epoch": 1.2587807171770864, "grad_norm": 0.021370772272348404, "learning_rate": 3.63204439847112e-06, "loss": 0.0009, "step": 153930 }, { "epoch": 1.2588624933556856, "grad_norm": 0.049817804247140884, "learning_rate": 3.631358008467061e-06, "loss": 0.0008, "step": 153940 }, { "epoch": 1.2589442695342847, "grad_norm": 0.035570621490478516, "learning_rate": 3.6306716463433512e-06, "loss": 0.0011, "step": 153950 }, { "epoch": 1.259026045712884, "grad_norm": 0.09629767388105392, "learning_rate": 3.6299853121139717e-06, "loss": 0.0013, "step": 153960 }, { "epoch": 1.259107821891483, "grad_norm": 0.06523394584655762, "learning_rate": 3.629299005792907e-06, "loss": 0.0007, "step": 153970 }, { "epoch": 1.2591895980700822, "grad_norm": 0.0541529655456543, "learning_rate": 3.628612727394134e-06, "loss": 0.0009, "step": 153980 }, { "epoch": 1.2592713742486814, "grad_norm": 0.20850390195846558, "learning_rate": 3.627926476931636e-06, "loss": 0.0028, "step": 153990 }, { "epoch": 1.2593531504272806, "grad_norm": 0.004757848102599382, "learning_rate": 3.6272402544193883e-06, "loss": 0.001, "step": 154000 }, { "epoch": 1.2594349266058797, "grad_norm": 0.04514557868242264, "learning_rate": 3.6265540598713754e-06, "loss": 0.001, "step": 154010 }, { "epoch": 1.259516702784479, "grad_norm": 0.05203848332166672, "learning_rate": 3.6258678933015723e-06, "loss": 0.001, "step": 154020 }, { "epoch": 1.259598478963078, "grad_norm": 0.014085717499256134, "learning_rate": 3.6251817547239566e-06, "loss": 0.0005, "step": 154030 }, { "epoch": 1.2596802551416773, "grad_norm": 0.05769683048129082, "learning_rate": 3.624495644152504e-06, "loss": 0.0019, "step": 154040 }, { "epoch": 1.2597620313202764, "grad_norm": 0.0995282307267189, "learning_rate": 3.623809561601196e-06, "loss": 0.0009, "step": 154050 }, { "epoch": 1.2598438074988756, "grad_norm": 0.0687532052397728, "learning_rate": 3.6231235070840053e-06, "loss": 0.0012, "step": 154060 }, { "epoch": 1.2599255836774748, "grad_norm": 0.01755298487842083, "learning_rate": 3.6224374806149076e-06, "loss": 0.0015, "step": 154070 }, { "epoch": 1.260007359856074, "grad_norm": 0.06847160309553146, "learning_rate": 3.6217514822078777e-06, "loss": 0.0006, "step": 154080 }, { "epoch": 1.260089136034673, "grad_norm": 0.03178482875227928, "learning_rate": 3.621065511876891e-06, "loss": 0.0012, "step": 154090 }, { "epoch": 1.2601709122132723, "grad_norm": 0.02375287190079689, "learning_rate": 3.620379569635921e-06, "loss": 0.0007, "step": 154100 }, { "epoch": 1.2602526883918714, "grad_norm": 0.07267889380455017, "learning_rate": 3.6196936554989403e-06, "loss": 0.0011, "step": 154110 }, { "epoch": 1.2603344645704706, "grad_norm": 0.040383897721767426, "learning_rate": 3.6190077694799197e-06, "loss": 0.0009, "step": 154120 }, { "epoch": 1.2604162407490698, "grad_norm": 0.017606711015105247, "learning_rate": 3.6183219115928368e-06, "loss": 0.0004, "step": 154130 }, { "epoch": 1.260498016927669, "grad_norm": 0.05895744636654854, "learning_rate": 3.617636081851659e-06, "loss": 0.0014, "step": 154140 }, { "epoch": 1.260579793106268, "grad_norm": 0.07276492565870285, "learning_rate": 3.616950280270357e-06, "loss": 0.0008, "step": 154150 }, { "epoch": 1.2606615692848673, "grad_norm": 0.005628226324915886, "learning_rate": 3.6162645068628998e-06, "loss": 0.0012, "step": 154160 }, { "epoch": 1.2607433454634664, "grad_norm": 0.03495927155017853, "learning_rate": 3.6155787616432623e-06, "loss": 0.0008, "step": 154170 }, { "epoch": 1.2608251216420656, "grad_norm": 0.01990446075797081, "learning_rate": 3.6148930446254107e-06, "loss": 0.002, "step": 154180 }, { "epoch": 1.2609068978206648, "grad_norm": 0.01952151022851467, "learning_rate": 3.614207355823313e-06, "loss": 0.0007, "step": 154190 }, { "epoch": 1.260988673999264, "grad_norm": 0.018663180992007256, "learning_rate": 3.6135216952509367e-06, "loss": 0.001, "step": 154200 }, { "epoch": 1.2610704501778631, "grad_norm": 0.0433584563434124, "learning_rate": 3.6128360629222525e-06, "loss": 0.0007, "step": 154210 }, { "epoch": 1.2611522263564623, "grad_norm": 0.001791235408745706, "learning_rate": 3.6121504588512245e-06, "loss": 0.0011, "step": 154220 }, { "epoch": 1.2612340025350615, "grad_norm": 0.037130579352378845, "learning_rate": 3.6114648830518196e-06, "loss": 0.0011, "step": 154230 }, { "epoch": 1.2613157787136606, "grad_norm": 0.011009163223206997, "learning_rate": 3.610779335538002e-06, "loss": 0.0014, "step": 154240 }, { "epoch": 1.2613975548922598, "grad_norm": 0.011791938915848732, "learning_rate": 3.610093816323742e-06, "loss": 0.0023, "step": 154250 }, { "epoch": 1.2614793310708592, "grad_norm": 0.0538342148065567, "learning_rate": 3.609408325422999e-06, "loss": 0.0007, "step": 154260 }, { "epoch": 1.2615611072494581, "grad_norm": 0.01578475907444954, "learning_rate": 3.6087228628497388e-06, "loss": 0.0008, "step": 154270 }, { "epoch": 1.2616428834280575, "grad_norm": 0.07650598883628845, "learning_rate": 3.608037428617923e-06, "loss": 0.001, "step": 154280 }, { "epoch": 1.2617246596066565, "grad_norm": 0.05998731777071953, "learning_rate": 3.6073520227415183e-06, "loss": 0.001, "step": 154290 }, { "epoch": 1.2618064357852559, "grad_norm": 0.08915723860263824, "learning_rate": 3.6066666452344847e-06, "loss": 0.0012, "step": 154300 }, { "epoch": 1.2618882119638548, "grad_norm": 0.11204737424850464, "learning_rate": 3.6059812961107845e-06, "loss": 0.0015, "step": 154310 }, { "epoch": 1.2619699881424542, "grad_norm": 0.12332404404878616, "learning_rate": 3.605295975384377e-06, "loss": 0.002, "step": 154320 }, { "epoch": 1.2620517643210531, "grad_norm": 0.12934045493602753, "learning_rate": 3.604610683069225e-06, "loss": 0.0006, "step": 154330 }, { "epoch": 1.2621335404996525, "grad_norm": 0.00913049653172493, "learning_rate": 3.6039254191792884e-06, "loss": 0.0013, "step": 154340 }, { "epoch": 1.2622153166782515, "grad_norm": 0.015912368893623352, "learning_rate": 3.603240183728525e-06, "loss": 0.001, "step": 154350 }, { "epoch": 1.2622970928568509, "grad_norm": 0.03605727478861809, "learning_rate": 3.6025549767308948e-06, "loss": 0.001, "step": 154360 }, { "epoch": 1.26237886903545, "grad_norm": 0.051587894558906555, "learning_rate": 3.6018697982003547e-06, "loss": 0.0009, "step": 154370 }, { "epoch": 1.2624606452140492, "grad_norm": 0.025520214810967445, "learning_rate": 3.601184648150864e-06, "loss": 0.0008, "step": 154380 }, { "epoch": 1.2625424213926484, "grad_norm": 0.0383916012942791, "learning_rate": 3.6004995265963792e-06, "loss": 0.0008, "step": 154390 }, { "epoch": 1.2626241975712476, "grad_norm": 0.035848576575517654, "learning_rate": 3.599814433550857e-06, "loss": 0.0005, "step": 154400 }, { "epoch": 1.2627059737498467, "grad_norm": 0.038286611437797546, "learning_rate": 3.59912936902825e-06, "loss": 0.0005, "step": 154410 }, { "epoch": 1.262787749928446, "grad_norm": 0.022074244916439056, "learning_rate": 3.5984443330425184e-06, "loss": 0.0013, "step": 154420 }, { "epoch": 1.262869526107045, "grad_norm": 0.009514561854302883, "learning_rate": 3.597759325607616e-06, "loss": 0.0008, "step": 154430 }, { "epoch": 1.2629513022856442, "grad_norm": 0.0323067344725132, "learning_rate": 3.597074346737495e-06, "loss": 0.0007, "step": 154440 }, { "epoch": 1.2630330784642434, "grad_norm": 0.007724998518824577, "learning_rate": 3.5963893964461094e-06, "loss": 0.0004, "step": 154450 }, { "epoch": 1.2631148546428426, "grad_norm": 0.035472072660923004, "learning_rate": 3.5957044747474135e-06, "loss": 0.0006, "step": 154460 }, { "epoch": 1.2631966308214417, "grad_norm": 0.010434452444314957, "learning_rate": 3.5950195816553592e-06, "loss": 0.0009, "step": 154470 }, { "epoch": 1.263278407000041, "grad_norm": 0.005920730996876955, "learning_rate": 3.5943347171838977e-06, "loss": 0.0009, "step": 154480 }, { "epoch": 1.26336018317864, "grad_norm": 0.010890644043684006, "learning_rate": 3.59364988134698e-06, "loss": 0.0008, "step": 154490 }, { "epoch": 1.2634419593572392, "grad_norm": 0.07947846502065659, "learning_rate": 3.5929650741585577e-06, "loss": 0.0011, "step": 154500 }, { "epoch": 1.2635237355358384, "grad_norm": 0.02331381104886532, "learning_rate": 3.592280295632581e-06, "loss": 0.0006, "step": 154510 }, { "epoch": 1.2636055117144376, "grad_norm": 0.005997054278850555, "learning_rate": 3.591595545782999e-06, "loss": 0.001, "step": 154520 }, { "epoch": 1.2636872878930367, "grad_norm": 0.07644083350896835, "learning_rate": 3.5909108246237585e-06, "loss": 0.0008, "step": 154530 }, { "epoch": 1.263769064071636, "grad_norm": 0.025730649009346962, "learning_rate": 3.5902261321688114e-06, "loss": 0.0009, "step": 154540 }, { "epoch": 1.263850840250235, "grad_norm": 0.008153488859534264, "learning_rate": 3.589541468432105e-06, "loss": 0.0008, "step": 154550 }, { "epoch": 1.2639326164288343, "grad_norm": 0.010590183548629284, "learning_rate": 3.588856833427584e-06, "loss": 0.0006, "step": 154560 }, { "epoch": 1.2640143926074334, "grad_norm": 0.030903685837984085, "learning_rate": 3.5881722271691965e-06, "loss": 0.0011, "step": 154570 }, { "epoch": 1.2640961687860326, "grad_norm": 0.17306187748908997, "learning_rate": 3.587487649670889e-06, "loss": 0.0009, "step": 154580 }, { "epoch": 1.2641779449646318, "grad_norm": 0.021168801933526993, "learning_rate": 3.586803100946606e-06, "loss": 0.0008, "step": 154590 }, { "epoch": 1.264259721143231, "grad_norm": 0.01107824221253395, "learning_rate": 3.586118581010293e-06, "loss": 0.0013, "step": 154600 }, { "epoch": 1.26434149732183, "grad_norm": 0.08966333419084549, "learning_rate": 3.5854340898758917e-06, "loss": 0.0009, "step": 154610 }, { "epoch": 1.2644232735004293, "grad_norm": 0.08396955579519272, "learning_rate": 3.5847496275573497e-06, "loss": 0.0008, "step": 154620 }, { "epoch": 1.2645050496790284, "grad_norm": 0.015402568504214287, "learning_rate": 3.584065194068608e-06, "loss": 0.0008, "step": 154630 }, { "epoch": 1.2645868258576276, "grad_norm": 0.0645582303404808, "learning_rate": 3.5833807894236087e-06, "loss": 0.0016, "step": 154640 }, { "epoch": 1.2646686020362268, "grad_norm": 0.10067866742610931, "learning_rate": 3.582696413636293e-06, "loss": 0.0007, "step": 154650 }, { "epoch": 1.264750378214826, "grad_norm": 0.029610542580485344, "learning_rate": 3.582012066720605e-06, "loss": 0.0012, "step": 154660 }, { "epoch": 1.2648321543934251, "grad_norm": 0.05160140618681908, "learning_rate": 3.581327748690484e-06, "loss": 0.0006, "step": 154670 }, { "epoch": 1.2649139305720243, "grad_norm": 0.034900784492492676, "learning_rate": 3.580643459559869e-06, "loss": 0.0006, "step": 154680 }, { "epoch": 1.2649957067506237, "grad_norm": 0.02750515751540661, "learning_rate": 3.5799591993427006e-06, "loss": 0.0005, "step": 154690 }, { "epoch": 1.2650774829292226, "grad_norm": 0.0561944954097271, "learning_rate": 3.579274968052918e-06, "loss": 0.0006, "step": 154700 }, { "epoch": 1.265159259107822, "grad_norm": 0.026711519807577133, "learning_rate": 3.5785907657044593e-06, "loss": 0.0008, "step": 154710 }, { "epoch": 1.265241035286421, "grad_norm": 0.018847011029720306, "learning_rate": 3.577906592311262e-06, "loss": 0.0011, "step": 154720 }, { "epoch": 1.2653228114650203, "grad_norm": 0.09429618716239929, "learning_rate": 3.5772224478872632e-06, "loss": 0.0022, "step": 154730 }, { "epoch": 1.2654045876436193, "grad_norm": 0.03500259295105934, "learning_rate": 3.5765383324463996e-06, "loss": 0.0005, "step": 154740 }, { "epoch": 1.2654863638222187, "grad_norm": 0.024119485169649124, "learning_rate": 3.5758542460026074e-06, "loss": 0.0012, "step": 154750 }, { "epoch": 1.2655681400008176, "grad_norm": 0.022110138088464737, "learning_rate": 3.575170188569822e-06, "loss": 0.0013, "step": 154760 }, { "epoch": 1.265649916179417, "grad_norm": 0.11273450404405594, "learning_rate": 3.5744861601619764e-06, "loss": 0.0011, "step": 154770 }, { "epoch": 1.2657316923580162, "grad_norm": 0.03219767287373543, "learning_rate": 3.5738021607930073e-06, "loss": 0.0009, "step": 154780 }, { "epoch": 1.2658134685366154, "grad_norm": 0.020723780617117882, "learning_rate": 3.5731181904768485e-06, "loss": 0.0008, "step": 154790 }, { "epoch": 1.2658952447152145, "grad_norm": 0.34233009815216064, "learning_rate": 3.5724342492274312e-06, "loss": 0.0008, "step": 154800 }, { "epoch": 1.2659770208938137, "grad_norm": 0.0102914497256279, "learning_rate": 3.571750337058688e-06, "loss": 0.0008, "step": 154810 }, { "epoch": 1.2660587970724129, "grad_norm": 0.021287281066179276, "learning_rate": 3.571066453984553e-06, "loss": 0.0006, "step": 154820 }, { "epoch": 1.266140573251012, "grad_norm": 0.030621416866779327, "learning_rate": 3.5703826000189546e-06, "loss": 0.0006, "step": 154830 }, { "epoch": 1.2662223494296112, "grad_norm": 0.019403276965022087, "learning_rate": 3.5696987751758256e-06, "loss": 0.0005, "step": 154840 }, { "epoch": 1.2663041256082104, "grad_norm": 0.06392594426870346, "learning_rate": 3.569014979469094e-06, "loss": 0.0012, "step": 154850 }, { "epoch": 1.2663859017868095, "grad_norm": 0.10408302396535873, "learning_rate": 3.568331212912691e-06, "loss": 0.0008, "step": 154860 }, { "epoch": 1.2664676779654087, "grad_norm": 0.05428820103406906, "learning_rate": 3.5676474755205453e-06, "loss": 0.0008, "step": 154870 }, { "epoch": 1.2665494541440079, "grad_norm": 0.035594433546066284, "learning_rate": 3.5669637673065847e-06, "loss": 0.0007, "step": 154880 }, { "epoch": 1.266631230322607, "grad_norm": 0.00692926486954093, "learning_rate": 3.5662800882847346e-06, "loss": 0.0008, "step": 154890 }, { "epoch": 1.2667130065012062, "grad_norm": 0.03299274668097496, "learning_rate": 3.5655964384689274e-06, "loss": 0.0007, "step": 154900 }, { "epoch": 1.2667947826798054, "grad_norm": 0.07915021479129791, "learning_rate": 3.564912817873086e-06, "loss": 0.0009, "step": 154910 }, { "epoch": 1.2668765588584046, "grad_norm": 0.08797967433929443, "learning_rate": 3.564229226511138e-06, "loss": 0.0007, "step": 154920 }, { "epoch": 1.2669583350370037, "grad_norm": 0.06310345977544785, "learning_rate": 3.563545664397005e-06, "loss": 0.001, "step": 154930 }, { "epoch": 1.267040111215603, "grad_norm": 0.01645473949611187, "learning_rate": 3.5628621315446165e-06, "loss": 0.0011, "step": 154940 }, { "epoch": 1.267121887394202, "grad_norm": 0.010629656724631786, "learning_rate": 3.5621786279678944e-06, "loss": 0.0004, "step": 154950 }, { "epoch": 1.2672036635728012, "grad_norm": 0.0013730961363762617, "learning_rate": 3.5614951536807623e-06, "loss": 0.0006, "step": 154960 }, { "epoch": 1.2672854397514004, "grad_norm": 0.055619560182094574, "learning_rate": 3.5608117086971424e-06, "loss": 0.0006, "step": 154970 }, { "epoch": 1.2673672159299996, "grad_norm": 0.05133635550737381, "learning_rate": 3.5601282930309587e-06, "loss": 0.0021, "step": 154980 }, { "epoch": 1.2674489921085987, "grad_norm": 0.08320298045873642, "learning_rate": 3.559444906696132e-06, "loss": 0.0006, "step": 154990 }, { "epoch": 1.267530768287198, "grad_norm": 0.021767010912299156, "learning_rate": 3.558761549706583e-06, "loss": 0.0014, "step": 155000 }, { "epoch": 1.267612544465797, "grad_norm": 0.006236320361495018, "learning_rate": 3.5580782220762315e-06, "loss": 0.0006, "step": 155010 }, { "epoch": 1.2676943206443962, "grad_norm": 0.06186698004603386, "learning_rate": 3.5573949238190007e-06, "loss": 0.0007, "step": 155020 }, { "epoch": 1.2677760968229954, "grad_norm": 0.023794615641236305, "learning_rate": 3.5567116549488072e-06, "loss": 0.0007, "step": 155030 }, { "epoch": 1.2678578730015946, "grad_norm": 0.00448873545974493, "learning_rate": 3.556028415479571e-06, "loss": 0.0007, "step": 155040 }, { "epoch": 1.2679396491801938, "grad_norm": 0.07895268499851227, "learning_rate": 3.5553452054252085e-06, "loss": 0.0013, "step": 155050 }, { "epoch": 1.268021425358793, "grad_norm": 0.052292339503765106, "learning_rate": 3.55466202479964e-06, "loss": 0.0016, "step": 155060 }, { "epoch": 1.268103201537392, "grad_norm": 0.07707010954618454, "learning_rate": 3.5539788736167803e-06, "loss": 0.0008, "step": 155070 }, { "epoch": 1.2681849777159913, "grad_norm": 0.010959703475236893, "learning_rate": 3.5532957518905466e-06, "loss": 0.0006, "step": 155080 }, { "epoch": 1.2682667538945904, "grad_norm": 0.011329211294651031, "learning_rate": 3.5526126596348533e-06, "loss": 0.0009, "step": 155090 }, { "epoch": 1.2683485300731898, "grad_norm": 0.047270920127630234, "learning_rate": 3.551929596863617e-06, "loss": 0.0007, "step": 155100 }, { "epoch": 1.2684303062517888, "grad_norm": 0.06406304985284805, "learning_rate": 3.551246563590752e-06, "loss": 0.0012, "step": 155110 }, { "epoch": 1.2685120824303882, "grad_norm": 0.04284615069627762, "learning_rate": 3.550563559830173e-06, "loss": 0.0009, "step": 155120 }, { "epoch": 1.268593858608987, "grad_norm": 0.019240733236074448, "learning_rate": 3.5498805855957897e-06, "loss": 0.0016, "step": 155130 }, { "epoch": 1.2686756347875865, "grad_norm": 0.00831928662955761, "learning_rate": 3.5491976409015198e-06, "loss": 0.0006, "step": 155140 }, { "epoch": 1.2687574109661854, "grad_norm": 0.05315250903367996, "learning_rate": 3.5485147257612733e-06, "loss": 0.0009, "step": 155150 }, { "epoch": 1.2688391871447848, "grad_norm": 0.04416265711188316, "learning_rate": 3.5478318401889616e-06, "loss": 0.0014, "step": 155160 }, { "epoch": 1.2689209633233838, "grad_norm": 0.0055349720641970634, "learning_rate": 3.5471489841984947e-06, "loss": 0.0007, "step": 155170 }, { "epoch": 1.2690027395019832, "grad_norm": 0.05230938270688057, "learning_rate": 3.546466157803785e-06, "loss": 0.0012, "step": 155180 }, { "epoch": 1.2690845156805821, "grad_norm": 0.03938300907611847, "learning_rate": 3.5457833610187407e-06, "loss": 0.0008, "step": 155190 }, { "epoch": 1.2691662918591815, "grad_norm": 0.001552464091219008, "learning_rate": 3.5451005938572724e-06, "loss": 0.001, "step": 155200 }, { "epoch": 1.2692480680377807, "grad_norm": 0.011019508354365826, "learning_rate": 3.5444178563332853e-06, "loss": 0.0007, "step": 155210 }, { "epoch": 1.2693298442163798, "grad_norm": 0.04935489594936371, "learning_rate": 3.543735148460692e-06, "loss": 0.0018, "step": 155220 }, { "epoch": 1.269411620394979, "grad_norm": 0.019257377833127975, "learning_rate": 3.5430524702533964e-06, "loss": 0.0008, "step": 155230 }, { "epoch": 1.2694933965735782, "grad_norm": 0.02133342996239662, "learning_rate": 3.5423698217253065e-06, "loss": 0.0007, "step": 155240 }, { "epoch": 1.2695751727521773, "grad_norm": 0.03830866888165474, "learning_rate": 3.541687202890326e-06, "loss": 0.0009, "step": 155250 }, { "epoch": 1.2696569489307765, "grad_norm": 0.04942670464515686, "learning_rate": 3.5410046137623654e-06, "loss": 0.0022, "step": 155260 }, { "epoch": 1.2697387251093757, "grad_norm": 0.08135518431663513, "learning_rate": 3.5403220543553265e-06, "loss": 0.0007, "step": 155270 }, { "epoch": 1.2698205012879749, "grad_norm": 0.08647426217794418, "learning_rate": 3.5396395246831128e-06, "loss": 0.0008, "step": 155280 }, { "epoch": 1.269902277466574, "grad_norm": 0.05161096155643463, "learning_rate": 3.5389570247596288e-06, "loss": 0.0009, "step": 155290 }, { "epoch": 1.2699840536451732, "grad_norm": 0.16143850982189178, "learning_rate": 3.5382745545987785e-06, "loss": 0.0015, "step": 155300 }, { "epoch": 1.2700658298237724, "grad_norm": 0.04474244266748428, "learning_rate": 3.5375921142144643e-06, "loss": 0.0008, "step": 155310 }, { "epoch": 1.2701476060023715, "grad_norm": 0.04035259783267975, "learning_rate": 3.536909703620587e-06, "loss": 0.0006, "step": 155320 }, { "epoch": 1.2702293821809707, "grad_norm": 0.03306370973587036, "learning_rate": 3.5362273228310472e-06, "loss": 0.0012, "step": 155330 }, { "epoch": 1.2703111583595699, "grad_norm": 0.08840567618608475, "learning_rate": 3.535544971859748e-06, "loss": 0.0012, "step": 155340 }, { "epoch": 1.270392934538169, "grad_norm": 0.0758310928940773, "learning_rate": 3.5348626507205875e-06, "loss": 0.0015, "step": 155350 }, { "epoch": 1.2704747107167682, "grad_norm": 0.009587405249476433, "learning_rate": 3.5341803594274657e-06, "loss": 0.0007, "step": 155360 }, { "epoch": 1.2705564868953674, "grad_norm": 0.017781857401132584, "learning_rate": 3.53349809799428e-06, "loss": 0.0123, "step": 155370 }, { "epoch": 1.2706382630739665, "grad_norm": 0.03376918286085129, "learning_rate": 3.532815866434932e-06, "loss": 0.0008, "step": 155380 }, { "epoch": 1.2707200392525657, "grad_norm": 0.025697818025946617, "learning_rate": 3.5321336647633175e-06, "loss": 0.001, "step": 155390 }, { "epoch": 1.2708018154311649, "grad_norm": 0.06526584923267365, "learning_rate": 3.531451492993333e-06, "loss": 0.0011, "step": 155400 }, { "epoch": 1.270883591609764, "grad_norm": 0.008341707289218903, "learning_rate": 3.5307693511388745e-06, "loss": 0.0012, "step": 155410 }, { "epoch": 1.2709653677883632, "grad_norm": 0.13572756946086884, "learning_rate": 3.5300872392138397e-06, "loss": 0.0019, "step": 155420 }, { "epoch": 1.2710471439669624, "grad_norm": 0.026579635217785835, "learning_rate": 3.5294051572321226e-06, "loss": 0.0024, "step": 155430 }, { "epoch": 1.2711289201455616, "grad_norm": 0.05476703494787216, "learning_rate": 3.5287231052076176e-06, "loss": 0.0013, "step": 155440 }, { "epoch": 1.2712106963241607, "grad_norm": 0.055325958877801895, "learning_rate": 3.5280410831542176e-06, "loss": 0.0005, "step": 155450 }, { "epoch": 1.27129247250276, "grad_norm": 0.016565410420298576, "learning_rate": 3.527359091085819e-06, "loss": 0.0013, "step": 155460 }, { "epoch": 1.271374248681359, "grad_norm": 0.0198008231818676, "learning_rate": 3.526677129016312e-06, "loss": 0.0006, "step": 155470 }, { "epoch": 1.2714560248599582, "grad_norm": 0.031276196241378784, "learning_rate": 3.52599519695959e-06, "loss": 0.0008, "step": 155480 }, { "epoch": 1.2715378010385574, "grad_norm": 0.17904184758663177, "learning_rate": 3.5253132949295417e-06, "loss": 0.0019, "step": 155490 }, { "epoch": 1.2716195772171566, "grad_norm": 0.12443473935127258, "learning_rate": 3.5246314229400625e-06, "loss": 0.0013, "step": 155500 }, { "epoch": 1.2717013533957557, "grad_norm": 0.0012952238321304321, "learning_rate": 3.52394958100504e-06, "loss": 0.001, "step": 155510 }, { "epoch": 1.271783129574355, "grad_norm": 0.11642799526453018, "learning_rate": 3.523267769138364e-06, "loss": 0.0009, "step": 155520 }, { "epoch": 1.2718649057529543, "grad_norm": 0.015348026528954506, "learning_rate": 3.5225859873539234e-06, "loss": 0.0011, "step": 155530 }, { "epoch": 1.2719466819315532, "grad_norm": 0.0440613329410553, "learning_rate": 3.5219042356656086e-06, "loss": 0.0007, "step": 155540 }, { "epoch": 1.2720284581101526, "grad_norm": 0.001775973360054195, "learning_rate": 3.521222514087306e-06, "loss": 0.0008, "step": 155550 }, { "epoch": 1.2721102342887516, "grad_norm": 0.04264894127845764, "learning_rate": 3.520540822632902e-06, "loss": 0.001, "step": 155560 }, { "epoch": 1.272192010467351, "grad_norm": 0.04342980682849884, "learning_rate": 3.5198591613162837e-06, "loss": 0.0013, "step": 155570 }, { "epoch": 1.27227378664595, "grad_norm": 0.01403114665299654, "learning_rate": 3.5191775301513382e-06, "loss": 0.0008, "step": 155580 }, { "epoch": 1.2723555628245493, "grad_norm": 0.11077796667814255, "learning_rate": 3.51849592915195e-06, "loss": 0.0006, "step": 155590 }, { "epoch": 1.2724373390031483, "grad_norm": 0.1756792962551117, "learning_rate": 3.5178143583320037e-06, "loss": 0.0029, "step": 155600 }, { "epoch": 1.2725191151817477, "grad_norm": 0.047059670090675354, "learning_rate": 3.517132817705382e-06, "loss": 0.0006, "step": 155610 }, { "epoch": 1.2726008913603466, "grad_norm": 0.06127776950597763, "learning_rate": 3.5164513072859717e-06, "loss": 0.0005, "step": 155620 }, { "epoch": 1.272682667538946, "grad_norm": 0.006923572160303593, "learning_rate": 3.515769827087655e-06, "loss": 0.0008, "step": 155630 }, { "epoch": 1.2727644437175452, "grad_norm": 0.017814017832279205, "learning_rate": 3.5150883771243128e-06, "loss": 0.0004, "step": 155640 }, { "epoch": 1.2728462198961443, "grad_norm": 0.022617077454924583, "learning_rate": 3.514406957409826e-06, "loss": 0.002, "step": 155650 }, { "epoch": 1.2729279960747435, "grad_norm": 0.20738117396831512, "learning_rate": 3.513725567958079e-06, "loss": 0.0019, "step": 155660 }, { "epoch": 1.2730097722533427, "grad_norm": 0.03311406075954437, "learning_rate": 3.5130442087829498e-06, "loss": 0.0011, "step": 155670 }, { "epoch": 1.2730915484319418, "grad_norm": 0.029516778886318207, "learning_rate": 3.512362879898319e-06, "loss": 0.0004, "step": 155680 }, { "epoch": 1.273173324610541, "grad_norm": 0.007493466138839722, "learning_rate": 3.5116815813180636e-06, "loss": 0.0013, "step": 155690 }, { "epoch": 1.2732551007891402, "grad_norm": 0.015838980674743652, "learning_rate": 3.511000313056066e-06, "loss": 0.0006, "step": 155700 }, { "epoch": 1.2733368769677393, "grad_norm": 0.11458151042461395, "learning_rate": 3.5103190751262023e-06, "loss": 0.001, "step": 155710 }, { "epoch": 1.2734186531463385, "grad_norm": 0.06375478953123093, "learning_rate": 3.5096378675423498e-06, "loss": 0.0008, "step": 155720 }, { "epoch": 1.2735004293249377, "grad_norm": 0.010914376936852932, "learning_rate": 3.508956690318383e-06, "loss": 0.0014, "step": 155730 }, { "epoch": 1.2735822055035368, "grad_norm": 0.09613826125860214, "learning_rate": 3.5082755434681825e-06, "loss": 0.0015, "step": 155740 }, { "epoch": 1.273663981682136, "grad_norm": 0.11007395386695862, "learning_rate": 3.507594427005622e-06, "loss": 0.001, "step": 155750 }, { "epoch": 1.2737457578607352, "grad_norm": 0.04440571740269661, "learning_rate": 3.5069133409445768e-06, "loss": 0.0008, "step": 155760 }, { "epoch": 1.2738275340393344, "grad_norm": 0.055700790137052536, "learning_rate": 3.5062322852989185e-06, "loss": 0.0005, "step": 155770 }, { "epoch": 1.2739093102179335, "grad_norm": 0.015699397772550583, "learning_rate": 3.5055512600825236e-06, "loss": 0.0005, "step": 155780 }, { "epoch": 1.2739910863965327, "grad_norm": 0.02856123074889183, "learning_rate": 3.5048702653092646e-06, "loss": 0.0009, "step": 155790 }, { "epoch": 1.2740728625751319, "grad_norm": 0.05289386585354805, "learning_rate": 3.504189300993014e-06, "loss": 0.0007, "step": 155800 }, { "epoch": 1.274154638753731, "grad_norm": 0.04040007293224335, "learning_rate": 3.503508367147643e-06, "loss": 0.0038, "step": 155810 }, { "epoch": 1.2742364149323302, "grad_norm": 0.01347860973328352, "learning_rate": 3.502827463787022e-06, "loss": 0.0006, "step": 155820 }, { "epoch": 1.2743181911109294, "grad_norm": 0.04458596557378769, "learning_rate": 3.5021465909250228e-06, "loss": 0.0006, "step": 155830 }, { "epoch": 1.2743999672895285, "grad_norm": 0.0015412342036142945, "learning_rate": 3.5014657485755154e-06, "loss": 0.0016, "step": 155840 }, { "epoch": 1.2744817434681277, "grad_norm": 0.025892559438943863, "learning_rate": 3.5007849367523694e-06, "loss": 0.0012, "step": 155850 }, { "epoch": 1.2745635196467269, "grad_norm": 0.015813471749424934, "learning_rate": 3.5001041554694503e-06, "loss": 0.0004, "step": 155860 }, { "epoch": 1.274645295825326, "grad_norm": 0.04030200093984604, "learning_rate": 3.4994234047406305e-06, "loss": 0.0005, "step": 155870 }, { "epoch": 1.2747270720039252, "grad_norm": 0.02245258167386055, "learning_rate": 3.4987426845797766e-06, "loss": 0.0008, "step": 155880 }, { "epoch": 1.2748088481825244, "grad_norm": 0.08969828486442566, "learning_rate": 3.4980619950007538e-06, "loss": 0.0016, "step": 155890 }, { "epoch": 1.2748906243611235, "grad_norm": 0.0009768687887117267, "learning_rate": 3.497381336017428e-06, "loss": 0.0006, "step": 155900 }, { "epoch": 1.2749724005397227, "grad_norm": 0.06762968748807907, "learning_rate": 3.496700707643667e-06, "loss": 0.0018, "step": 155910 }, { "epoch": 1.2750541767183219, "grad_norm": 0.027112780138850212, "learning_rate": 3.4960201098933344e-06, "loss": 0.0012, "step": 155920 }, { "epoch": 1.275135952896921, "grad_norm": 0.14050067961215973, "learning_rate": 3.495339542780295e-06, "loss": 0.0003, "step": 155930 }, { "epoch": 1.2752177290755202, "grad_norm": 0.029293963685631752, "learning_rate": 3.4946590063184106e-06, "loss": 0.0011, "step": 155940 }, { "epoch": 1.2752995052541194, "grad_norm": 0.03481784462928772, "learning_rate": 3.4939785005215473e-06, "loss": 0.0007, "step": 155950 }, { "epoch": 1.2753812814327188, "grad_norm": 0.02445473149418831, "learning_rate": 3.493298025403566e-06, "loss": 0.0013, "step": 155960 }, { "epoch": 1.2754630576113177, "grad_norm": 0.019418150186538696, "learning_rate": 3.492617580978328e-06, "loss": 0.0006, "step": 155970 }, { "epoch": 1.2755448337899171, "grad_norm": 0.032004375010728836, "learning_rate": 3.4919371672596938e-06, "loss": 0.0009, "step": 155980 }, { "epoch": 1.275626609968516, "grad_norm": 0.07682880759239197, "learning_rate": 3.4912567842615276e-06, "loss": 0.0005, "step": 155990 }, { "epoch": 1.2757083861471155, "grad_norm": 0.0014391709119081497, "learning_rate": 3.4905764319976865e-06, "loss": 0.0009, "step": 156000 }, { "epoch": 1.2757901623257144, "grad_norm": 0.03811437264084816, "learning_rate": 3.4898961104820307e-06, "loss": 0.0003, "step": 156010 }, { "epoch": 1.2758719385043138, "grad_norm": 0.12478048354387283, "learning_rate": 3.4892158197284177e-06, "loss": 0.0008, "step": 156020 }, { "epoch": 1.2759537146829127, "grad_norm": 0.23145617544651031, "learning_rate": 3.4885355597507063e-06, "loss": 0.0014, "step": 156030 }, { "epoch": 1.2760354908615121, "grad_norm": 0.07802321761846542, "learning_rate": 3.487855330562756e-06, "loss": 0.0007, "step": 156040 }, { "epoch": 1.276117267040111, "grad_norm": 0.01042364165186882, "learning_rate": 3.487175132178421e-06, "loss": 0.0007, "step": 156050 }, { "epoch": 1.2761990432187105, "grad_norm": 0.11021357029676437, "learning_rate": 3.4864949646115565e-06, "loss": 0.0008, "step": 156060 }, { "epoch": 1.2762808193973096, "grad_norm": 0.045575857162475586, "learning_rate": 3.4858148278760216e-06, "loss": 0.0015, "step": 156070 }, { "epoch": 1.2763625955759088, "grad_norm": 0.026987269520759583, "learning_rate": 3.485134721985669e-06, "loss": 0.0007, "step": 156080 }, { "epoch": 1.276444371754508, "grad_norm": 0.11980163305997849, "learning_rate": 3.484454646954354e-06, "loss": 0.001, "step": 156090 }, { "epoch": 1.2765261479331071, "grad_norm": 0.051559220999479294, "learning_rate": 3.483774602795927e-06, "loss": 0.0006, "step": 156100 }, { "epoch": 1.2766079241117063, "grad_norm": 0.04138484597206116, "learning_rate": 3.4830945895242464e-06, "loss": 0.0011, "step": 156110 }, { "epoch": 1.2766897002903055, "grad_norm": 0.04416115581989288, "learning_rate": 3.482414607153162e-06, "loss": 0.0006, "step": 156120 }, { "epoch": 1.2767714764689047, "grad_norm": 0.010576327331364155, "learning_rate": 3.4817346556965254e-06, "loss": 0.0006, "step": 156130 }, { "epoch": 1.2768532526475038, "grad_norm": 0.01875154860317707, "learning_rate": 3.4810547351681867e-06, "loss": 0.0007, "step": 156140 }, { "epoch": 1.276935028826103, "grad_norm": 0.022390216588974, "learning_rate": 3.480374845581999e-06, "loss": 0.0007, "step": 156150 }, { "epoch": 1.2770168050047022, "grad_norm": 0.025563998147845268, "learning_rate": 3.4796949869518115e-06, "loss": 0.0005, "step": 156160 }, { "epoch": 1.2770985811833013, "grad_norm": 0.04316901043057442, "learning_rate": 3.479015159291472e-06, "loss": 0.001, "step": 156170 }, { "epoch": 1.2771803573619005, "grad_norm": 0.05722721666097641, "learning_rate": 3.478335362614829e-06, "loss": 0.0007, "step": 156180 }, { "epoch": 1.2772621335404997, "grad_norm": 0.04354033246636391, "learning_rate": 3.477655596935733e-06, "loss": 0.0024, "step": 156190 }, { "epoch": 1.2773439097190988, "grad_norm": 0.016845954582095146, "learning_rate": 3.47697586226803e-06, "loss": 0.0009, "step": 156200 }, { "epoch": 1.277425685897698, "grad_norm": 0.04071527346968651, "learning_rate": 3.476296158625566e-06, "loss": 0.0012, "step": 156210 }, { "epoch": 1.2775074620762972, "grad_norm": 0.016060860827565193, "learning_rate": 3.4756164860221864e-06, "loss": 0.0011, "step": 156220 }, { "epoch": 1.2775892382548963, "grad_norm": 0.03288668394088745, "learning_rate": 3.4749368444717404e-06, "loss": 0.0008, "step": 156230 }, { "epoch": 1.2776710144334955, "grad_norm": 0.0006824401789344847, "learning_rate": 3.47425723398807e-06, "loss": 0.002, "step": 156240 }, { "epoch": 1.2777527906120947, "grad_norm": 0.008975065313279629, "learning_rate": 3.4735776545850195e-06, "loss": 0.0005, "step": 156250 }, { "epoch": 1.2778345667906938, "grad_norm": 0.007253880146890879, "learning_rate": 3.472898106276432e-06, "loss": 0.0007, "step": 156260 }, { "epoch": 1.277916342969293, "grad_norm": 0.02779621258378029, "learning_rate": 3.472218589076153e-06, "loss": 0.001, "step": 156270 }, { "epoch": 1.2779981191478922, "grad_norm": 0.059342920780181885, "learning_rate": 3.471539102998023e-06, "loss": 0.0015, "step": 156280 }, { "epoch": 1.2780798953264914, "grad_norm": 0.019777363166213036, "learning_rate": 3.470859648055884e-06, "loss": 0.0009, "step": 156290 }, { "epoch": 1.2781616715050905, "grad_norm": 0.061253856867551804, "learning_rate": 3.470180224263575e-06, "loss": 0.0012, "step": 156300 }, { "epoch": 1.2782434476836897, "grad_norm": 0.02960120514035225, "learning_rate": 3.469500831634941e-06, "loss": 0.0012, "step": 156310 }, { "epoch": 1.2783252238622889, "grad_norm": 0.042448364198207855, "learning_rate": 3.4688214701838175e-06, "loss": 0.0007, "step": 156320 }, { "epoch": 1.278407000040888, "grad_norm": 0.02538708783686161, "learning_rate": 3.468142139924046e-06, "loss": 0.0009, "step": 156330 }, { "epoch": 1.2784887762194872, "grad_norm": 0.0010001464979723096, "learning_rate": 3.467462840869462e-06, "loss": 0.0022, "step": 156340 }, { "epoch": 1.2785705523980864, "grad_norm": 0.005635633133351803, "learning_rate": 3.4667835730339084e-06, "loss": 0.0004, "step": 156350 }, { "epoch": 1.2786523285766855, "grad_norm": 0.04580875113606453, "learning_rate": 3.4661043364312188e-06, "loss": 0.001, "step": 156360 }, { "epoch": 1.2787341047552847, "grad_norm": 0.08030049502849579, "learning_rate": 3.4654251310752306e-06, "loss": 0.0007, "step": 156370 }, { "epoch": 1.2788158809338839, "grad_norm": 0.016701240092515945, "learning_rate": 3.4647459569797794e-06, "loss": 0.0006, "step": 156380 }, { "epoch": 1.2788976571124833, "grad_norm": 0.004722790326923132, "learning_rate": 3.464066814158702e-06, "loss": 0.0007, "step": 156390 }, { "epoch": 1.2789794332910822, "grad_norm": 0.11845846474170685, "learning_rate": 3.463387702625832e-06, "loss": 0.0006, "step": 156400 }, { "epoch": 1.2790612094696816, "grad_norm": 0.0058093019761145115, "learning_rate": 3.462708622395003e-06, "loss": 0.0004, "step": 156410 }, { "epoch": 1.2791429856482806, "grad_norm": 0.02490396425127983, "learning_rate": 3.462029573480048e-06, "loss": 0.001, "step": 156420 }, { "epoch": 1.27922476182688, "grad_norm": 0.04567859321832657, "learning_rate": 3.4613505558948025e-06, "loss": 0.0009, "step": 156430 }, { "epoch": 1.2793065380054789, "grad_norm": 0.0048251161351799965, "learning_rate": 3.460671569653096e-06, "loss": 0.0007, "step": 156440 }, { "epoch": 1.2793883141840783, "grad_norm": 0.05512576550245285, "learning_rate": 3.459992614768761e-06, "loss": 0.0016, "step": 156450 }, { "epoch": 1.2794700903626772, "grad_norm": 0.004771648906171322, "learning_rate": 3.4593136912556268e-06, "loss": 0.0012, "step": 156460 }, { "epoch": 1.2795518665412766, "grad_norm": 0.04099183529615402, "learning_rate": 3.4586347991275267e-06, "loss": 0.001, "step": 156470 }, { "epoch": 1.2796336427198756, "grad_norm": 0.007197789382189512, "learning_rate": 3.4579559383982887e-06, "loss": 0.0026, "step": 156480 }, { "epoch": 1.279715418898475, "grad_norm": 0.022334594279527664, "learning_rate": 3.4572771090817414e-06, "loss": 0.0004, "step": 156490 }, { "epoch": 1.2797971950770741, "grad_norm": 0.12708799540996552, "learning_rate": 3.4565983111917123e-06, "loss": 0.0006, "step": 156500 }, { "epoch": 1.2798789712556733, "grad_norm": 0.07807903736829758, "learning_rate": 3.4559195447420313e-06, "loss": 0.0022, "step": 156510 }, { "epoch": 1.2799607474342725, "grad_norm": 0.012585427612066269, "learning_rate": 3.4552408097465245e-06, "loss": 0.001, "step": 156520 }, { "epoch": 1.2800425236128716, "grad_norm": 0.05243181437253952, "learning_rate": 3.454562106219018e-06, "loss": 0.0008, "step": 156530 }, { "epoch": 1.2801242997914708, "grad_norm": 0.06864496320486069, "learning_rate": 3.4538834341733363e-06, "loss": 0.002, "step": 156540 }, { "epoch": 1.28020607597007, "grad_norm": 0.0011815258767455816, "learning_rate": 3.453204793623307e-06, "loss": 0.0009, "step": 156550 }, { "epoch": 1.2802878521486691, "grad_norm": 0.04819653183221817, "learning_rate": 3.4525261845827533e-06, "loss": 0.0017, "step": 156560 }, { "epoch": 1.2803696283272683, "grad_norm": 0.17108894884586334, "learning_rate": 3.4518476070654995e-06, "loss": 0.0013, "step": 156570 }, { "epoch": 1.2804514045058675, "grad_norm": 0.05359647795557976, "learning_rate": 3.4511690610853652e-06, "loss": 0.0009, "step": 156580 }, { "epoch": 1.2805331806844666, "grad_norm": 0.06890679150819778, "learning_rate": 3.4504905466561795e-06, "loss": 0.0009, "step": 156590 }, { "epoch": 1.2806149568630658, "grad_norm": 0.015542012639343739, "learning_rate": 3.4498120637917596e-06, "loss": 0.0011, "step": 156600 }, { "epoch": 1.280696733041665, "grad_norm": 0.0319204144179821, "learning_rate": 3.4491336125059295e-06, "loss": 0.0006, "step": 156610 }, { "epoch": 1.2807785092202641, "grad_norm": 0.010767289437353611, "learning_rate": 3.448455192812506e-06, "loss": 0.0017, "step": 156620 }, { "epoch": 1.2808602853988633, "grad_norm": 0.021570075303316116, "learning_rate": 3.447776804725313e-06, "loss": 0.0013, "step": 156630 }, { "epoch": 1.2809420615774625, "grad_norm": 0.02219546213746071, "learning_rate": 3.4470984482581683e-06, "loss": 0.0013, "step": 156640 }, { "epoch": 1.2810238377560617, "grad_norm": 0.0182070080190897, "learning_rate": 3.4464201234248895e-06, "loss": 0.0011, "step": 156650 }, { "epoch": 1.2811056139346608, "grad_norm": 0.007130124606192112, "learning_rate": 3.4457418302392955e-06, "loss": 0.0005, "step": 156660 }, { "epoch": 1.28118739011326, "grad_norm": 0.05883046239614487, "learning_rate": 3.4450635687152044e-06, "loss": 0.0007, "step": 156670 }, { "epoch": 1.2812691662918592, "grad_norm": 0.05042679235339165, "learning_rate": 3.444385338866433e-06, "loss": 0.001, "step": 156680 }, { "epoch": 1.2813509424704583, "grad_norm": 0.05665937438607216, "learning_rate": 3.443707140706796e-06, "loss": 0.0021, "step": 156690 }, { "epoch": 1.2814327186490575, "grad_norm": 0.07046900689601898, "learning_rate": 3.443028974250108e-06, "loss": 0.0008, "step": 156700 }, { "epoch": 1.2815144948276567, "grad_norm": 0.020951468497514725, "learning_rate": 3.442350839510187e-06, "loss": 0.0008, "step": 156710 }, { "epoch": 1.2815962710062558, "grad_norm": 0.04346604645252228, "learning_rate": 3.4416727365008453e-06, "loss": 0.0011, "step": 156720 }, { "epoch": 1.281678047184855, "grad_norm": 0.024596281349658966, "learning_rate": 3.440994665235897e-06, "loss": 0.0006, "step": 156730 }, { "epoch": 1.2817598233634542, "grad_norm": 0.03188839182257652, "learning_rate": 3.440316625729154e-06, "loss": 0.0013, "step": 156740 }, { "epoch": 1.2818415995420533, "grad_norm": 0.13690432906150818, "learning_rate": 3.4396386179944297e-06, "loss": 0.0014, "step": 156750 }, { "epoch": 1.2819233757206525, "grad_norm": 0.06097007915377617, "learning_rate": 3.4389606420455346e-06, "loss": 0.0015, "step": 156760 }, { "epoch": 1.2820051518992517, "grad_norm": 0.011864440515637398, "learning_rate": 3.4382826978962807e-06, "loss": 0.0006, "step": 156770 }, { "epoch": 1.2820869280778509, "grad_norm": 0.025055034086108208, "learning_rate": 3.4376047855604764e-06, "loss": 0.0004, "step": 156780 }, { "epoch": 1.28216870425645, "grad_norm": 0.015254545025527477, "learning_rate": 3.436926905051934e-06, "loss": 0.0005, "step": 156790 }, { "epoch": 1.2822504804350492, "grad_norm": 0.030012084171175957, "learning_rate": 3.436249056384461e-06, "loss": 0.0009, "step": 156800 }, { "epoch": 1.2823322566136484, "grad_norm": 0.1891183853149414, "learning_rate": 3.4355712395718656e-06, "loss": 0.0011, "step": 156810 }, { "epoch": 1.2824140327922477, "grad_norm": 0.02159919962286949, "learning_rate": 3.4348934546279535e-06, "loss": 0.0009, "step": 156820 }, { "epoch": 1.2824958089708467, "grad_norm": 0.08095324784517288, "learning_rate": 3.4342157015665356e-06, "loss": 0.0012, "step": 156830 }, { "epoch": 1.282577585149446, "grad_norm": 0.09258992224931717, "learning_rate": 3.4335379804014173e-06, "loss": 0.001, "step": 156840 }, { "epoch": 1.282659361328045, "grad_norm": 0.06237150728702545, "learning_rate": 3.4328602911464035e-06, "loss": 0.0011, "step": 156850 }, { "epoch": 1.2827411375066444, "grad_norm": 0.0437050424516201, "learning_rate": 3.432182633815299e-06, "loss": 0.0013, "step": 156860 }, { "epoch": 1.2828229136852434, "grad_norm": 0.011203702539205551, "learning_rate": 3.431505008421908e-06, "loss": 0.0013, "step": 156870 }, { "epoch": 1.2829046898638428, "grad_norm": 0.023036936298012733, "learning_rate": 3.4308274149800366e-06, "loss": 0.0005, "step": 156880 }, { "epoch": 1.2829864660424417, "grad_norm": 0.013691097497940063, "learning_rate": 3.4301498535034847e-06, "loss": 0.0003, "step": 156890 }, { "epoch": 1.283068242221041, "grad_norm": 0.03450777009129524, "learning_rate": 3.4294723240060566e-06, "loss": 0.001, "step": 156900 }, { "epoch": 1.28315001839964, "grad_norm": 0.19381053745746613, "learning_rate": 3.4287948265015546e-06, "loss": 0.0012, "step": 156910 }, { "epoch": 1.2832317945782394, "grad_norm": 0.05525241792201996, "learning_rate": 3.4281173610037786e-06, "loss": 0.0008, "step": 156920 }, { "epoch": 1.2833135707568386, "grad_norm": 0.03825065493583679, "learning_rate": 3.42743992752653e-06, "loss": 0.001, "step": 156930 }, { "epoch": 1.2833953469354378, "grad_norm": 0.012759176082909107, "learning_rate": 3.426762526083606e-06, "loss": 0.0014, "step": 156940 }, { "epoch": 1.283477123114037, "grad_norm": 0.015482700429856777, "learning_rate": 3.426085156688811e-06, "loss": 0.0009, "step": 156950 }, { "epoch": 1.2835588992926361, "grad_norm": 0.01365247555077076, "learning_rate": 3.4254078193559397e-06, "loss": 0.0007, "step": 156960 }, { "epoch": 1.2836406754712353, "grad_norm": 0.011655700393021107, "learning_rate": 3.424730514098792e-06, "loss": 0.0012, "step": 156970 }, { "epoch": 1.2837224516498345, "grad_norm": 0.026879673823714256, "learning_rate": 3.424053240931162e-06, "loss": 0.0009, "step": 156980 }, { "epoch": 1.2838042278284336, "grad_norm": 0.0012587116798385978, "learning_rate": 3.42337599986685e-06, "loss": 0.0012, "step": 156990 }, { "epoch": 1.2838860040070328, "grad_norm": 0.056135911494493484, "learning_rate": 3.4226987909196507e-06, "loss": 0.0007, "step": 157000 }, { "epoch": 1.283967780185632, "grad_norm": 0.04844760522246361, "learning_rate": 3.422021614103359e-06, "loss": 0.0006, "step": 157010 }, { "epoch": 1.2840495563642311, "grad_norm": 0.01413871068507433, "learning_rate": 3.4213444694317687e-06, "loss": 0.0013, "step": 157020 }, { "epoch": 1.2841313325428303, "grad_norm": 0.07680302113294601, "learning_rate": 3.4206673569186753e-06, "loss": 0.0007, "step": 157030 }, { "epoch": 1.2842131087214295, "grad_norm": 0.026573115959763527, "learning_rate": 3.4199902765778727e-06, "loss": 0.0008, "step": 157040 }, { "epoch": 1.2842948849000286, "grad_norm": 0.024324458092451096, "learning_rate": 3.419313228423151e-06, "loss": 0.0008, "step": 157050 }, { "epoch": 1.2843766610786278, "grad_norm": 0.08732586354017258, "learning_rate": 3.4186362124683027e-06, "loss": 0.0013, "step": 157060 }, { "epoch": 1.284458437257227, "grad_norm": 0.033790670335292816, "learning_rate": 3.417959228727121e-06, "loss": 0.0006, "step": 157070 }, { "epoch": 1.2845402134358261, "grad_norm": 0.031044915318489075, "learning_rate": 3.4172822772133975e-06, "loss": 0.0012, "step": 157080 }, { "epoch": 1.2846219896144253, "grad_norm": 0.05454907566308975, "learning_rate": 3.4166053579409197e-06, "loss": 0.0006, "step": 157090 }, { "epoch": 1.2847037657930245, "grad_norm": 0.07462930679321289, "learning_rate": 3.4159284709234763e-06, "loss": 0.0015, "step": 157100 }, { "epoch": 1.2847855419716236, "grad_norm": 0.0676540806889534, "learning_rate": 3.415251616174859e-06, "loss": 0.0016, "step": 157110 }, { "epoch": 1.2848673181502228, "grad_norm": 0.026154721155762672, "learning_rate": 3.4145747937088546e-06, "loss": 0.0013, "step": 157120 }, { "epoch": 1.284949094328822, "grad_norm": 0.10720309615135193, "learning_rate": 3.41389800353925e-06, "loss": 0.0013, "step": 157130 }, { "epoch": 1.2850308705074212, "grad_norm": 0.0056629478931427, "learning_rate": 3.4132212456798313e-06, "loss": 0.0009, "step": 157140 }, { "epoch": 1.2851126466860203, "grad_norm": 0.009577619843184948, "learning_rate": 3.412544520144387e-06, "loss": 0.001, "step": 157150 }, { "epoch": 1.2851944228646195, "grad_norm": 0.04103482887148857, "learning_rate": 3.411867826946701e-06, "loss": 0.0006, "step": 157160 }, { "epoch": 1.2852761990432187, "grad_norm": 0.009941783733665943, "learning_rate": 3.411191166100558e-06, "loss": 0.0012, "step": 157170 }, { "epoch": 1.2853579752218178, "grad_norm": 0.06296854466199875, "learning_rate": 3.41051453761974e-06, "loss": 0.0007, "step": 157180 }, { "epoch": 1.285439751400417, "grad_norm": 0.043005459010601044, "learning_rate": 3.409837941518036e-06, "loss": 0.0009, "step": 157190 }, { "epoch": 1.2855215275790162, "grad_norm": 0.09075026959180832, "learning_rate": 3.409161377809225e-06, "loss": 0.0015, "step": 157200 }, { "epoch": 1.2856033037576153, "grad_norm": 0.009916317649185658, "learning_rate": 3.40848484650709e-06, "loss": 0.0007, "step": 157210 }, { "epoch": 1.2856850799362145, "grad_norm": 0.025207852944731712, "learning_rate": 3.4078083476254137e-06, "loss": 0.0006, "step": 157220 }, { "epoch": 1.2857668561148137, "grad_norm": 0.020291510969400406, "learning_rate": 3.407131881177972e-06, "loss": 0.0006, "step": 157230 }, { "epoch": 1.2858486322934128, "grad_norm": 0.016922323033213615, "learning_rate": 3.4064554471785505e-06, "loss": 0.001, "step": 157240 }, { "epoch": 1.2859304084720122, "grad_norm": 0.012576950713992119, "learning_rate": 3.405779045640927e-06, "loss": 0.0008, "step": 157250 }, { "epoch": 1.2860121846506112, "grad_norm": 0.05396094545722008, "learning_rate": 3.4051026765788807e-06, "loss": 0.0008, "step": 157260 }, { "epoch": 1.2860939608292106, "grad_norm": 0.004965473897755146, "learning_rate": 3.404426340006188e-06, "loss": 0.0015, "step": 157270 }, { "epoch": 1.2861757370078095, "grad_norm": 0.0046937186270952225, "learning_rate": 3.4037500359366283e-06, "loss": 0.0009, "step": 157280 }, { "epoch": 1.286257513186409, "grad_norm": 0.08945024013519287, "learning_rate": 3.4030737643839784e-06, "loss": 0.0006, "step": 157290 }, { "epoch": 1.2863392893650079, "grad_norm": 0.026754489168524742, "learning_rate": 3.4023975253620135e-06, "loss": 0.0009, "step": 157300 }, { "epoch": 1.2864210655436072, "grad_norm": 0.0844845101237297, "learning_rate": 3.401721318884508e-06, "loss": 0.0008, "step": 157310 }, { "epoch": 1.2865028417222062, "grad_norm": 0.057073622941970825, "learning_rate": 3.40104514496524e-06, "loss": 0.0008, "step": 157320 }, { "epoch": 1.2865846179008056, "grad_norm": 0.0707065612077713, "learning_rate": 3.4003690036179825e-06, "loss": 0.0006, "step": 157330 }, { "epoch": 1.2866663940794048, "grad_norm": 0.05387604236602783, "learning_rate": 3.3996928948565096e-06, "loss": 0.0009, "step": 157340 }, { "epoch": 1.286748170258004, "grad_norm": 0.013740346767008305, "learning_rate": 3.3990168186945897e-06, "loss": 0.0008, "step": 157350 }, { "epoch": 1.286829946436603, "grad_norm": 0.029351500794291496, "learning_rate": 3.3983407751460013e-06, "loss": 0.0006, "step": 157360 }, { "epoch": 1.2869117226152023, "grad_norm": 0.06860257685184479, "learning_rate": 3.397664764224513e-06, "loss": 0.0005, "step": 157370 }, { "epoch": 1.2869934987938014, "grad_norm": 0.05163457989692688, "learning_rate": 3.3969887859438955e-06, "loss": 0.0009, "step": 157380 }, { "epoch": 1.2870752749724006, "grad_norm": 0.027278341352939606, "learning_rate": 3.3963128403179186e-06, "loss": 0.0008, "step": 157390 }, { "epoch": 1.2871570511509998, "grad_norm": 0.06488297879695892, "learning_rate": 3.395636927360354e-06, "loss": 0.0009, "step": 157400 }, { "epoch": 1.287238827329599, "grad_norm": 0.036660559475421906, "learning_rate": 3.3949610470849688e-06, "loss": 0.0006, "step": 157410 }, { "epoch": 1.287320603508198, "grad_norm": 0.015363466925919056, "learning_rate": 3.394285199505532e-06, "loss": 0.0008, "step": 157420 }, { "epoch": 1.2874023796867973, "grad_norm": 0.04766828566789627, "learning_rate": 3.3936093846358085e-06, "loss": 0.0007, "step": 157430 }, { "epoch": 1.2874841558653964, "grad_norm": 0.09096749126911163, "learning_rate": 3.3929336024895697e-06, "loss": 0.0009, "step": 157440 }, { "epoch": 1.2875659320439956, "grad_norm": 0.09684277325868607, "learning_rate": 3.3922578530805804e-06, "loss": 0.0013, "step": 157450 }, { "epoch": 1.2876477082225948, "grad_norm": 0.011914437636733055, "learning_rate": 3.3915821364226058e-06, "loss": 0.0013, "step": 157460 }, { "epoch": 1.287729484401194, "grad_norm": 0.06663978844881058, "learning_rate": 3.3909064525294076e-06, "loss": 0.0007, "step": 157470 }, { "epoch": 1.2878112605797931, "grad_norm": 0.04934891685843468, "learning_rate": 3.390230801414755e-06, "loss": 0.0009, "step": 157480 }, { "epoch": 1.2878930367583923, "grad_norm": 0.015677709132432938, "learning_rate": 3.3895551830924095e-06, "loss": 0.0006, "step": 157490 }, { "epoch": 1.2879748129369915, "grad_norm": 0.017523156479001045, "learning_rate": 3.3888795975761335e-06, "loss": 0.0008, "step": 157500 }, { "epoch": 1.2880565891155906, "grad_norm": 0.06364941596984863, "learning_rate": 3.3882040448796893e-06, "loss": 0.0006, "step": 157510 }, { "epoch": 1.2881383652941898, "grad_norm": 0.0062495023012161255, "learning_rate": 3.38752852501684e-06, "loss": 0.0006, "step": 157520 }, { "epoch": 1.288220141472789, "grad_norm": 0.012554416432976723, "learning_rate": 3.386853038001344e-06, "loss": 0.0009, "step": 157530 }, { "epoch": 1.2883019176513881, "grad_norm": 0.02406911738216877, "learning_rate": 3.3861775838469647e-06, "loss": 0.0009, "step": 157540 }, { "epoch": 1.2883836938299873, "grad_norm": 0.137311190366745, "learning_rate": 3.385502162567456e-06, "loss": 0.0013, "step": 157550 }, { "epoch": 1.2884654700085865, "grad_norm": 0.0005778744234703481, "learning_rate": 3.384826774176584e-06, "loss": 0.0006, "step": 157560 }, { "epoch": 1.2885472461871856, "grad_norm": 0.047186966985464096, "learning_rate": 3.3841514186881023e-06, "loss": 0.0008, "step": 157570 }, { "epoch": 1.2886290223657848, "grad_norm": 0.0013497273903340101, "learning_rate": 3.383476096115772e-06, "loss": 0.0004, "step": 157580 }, { "epoch": 1.288710798544384, "grad_norm": 0.023339519277215004, "learning_rate": 3.382800806473343e-06, "loss": 0.0004, "step": 157590 }, { "epoch": 1.2887925747229831, "grad_norm": 0.021917065605521202, "learning_rate": 3.38212554977458e-06, "loss": 0.0011, "step": 157600 }, { "epoch": 1.2888743509015823, "grad_norm": 0.0032192785292863846, "learning_rate": 3.381450326033233e-06, "loss": 0.0009, "step": 157610 }, { "epoch": 1.2889561270801815, "grad_norm": 0.01762704737484455, "learning_rate": 3.3807751352630592e-06, "loss": 0.0005, "step": 157620 }, { "epoch": 1.2890379032587806, "grad_norm": 0.04049920663237572, "learning_rate": 3.380099977477811e-06, "loss": 0.0008, "step": 157630 }, { "epoch": 1.2891196794373798, "grad_norm": 0.023961246013641357, "learning_rate": 3.3794248526912434e-06, "loss": 0.0005, "step": 157640 }, { "epoch": 1.289201455615979, "grad_norm": 0.004984273109585047, "learning_rate": 3.3787497609171093e-06, "loss": 0.001, "step": 157650 }, { "epoch": 1.2892832317945784, "grad_norm": 0.12556491792201996, "learning_rate": 3.3780747021691608e-06, "loss": 0.0007, "step": 157660 }, { "epoch": 1.2893650079731773, "grad_norm": 0.052049994468688965, "learning_rate": 3.377399676461146e-06, "loss": 0.0014, "step": 157670 }, { "epoch": 1.2894467841517767, "grad_norm": 0.038245998322963715, "learning_rate": 3.3767246838068214e-06, "loss": 0.0011, "step": 157680 }, { "epoch": 1.2895285603303757, "grad_norm": 0.029716623947024345, "learning_rate": 3.3760497242199346e-06, "loss": 0.0012, "step": 157690 }, { "epoch": 1.289610336508975, "grad_norm": 0.022544704377651215, "learning_rate": 3.375374797714236e-06, "loss": 0.0011, "step": 157700 }, { "epoch": 1.289692112687574, "grad_norm": 0.03205842897295952, "learning_rate": 3.37469990430347e-06, "loss": 0.0012, "step": 157710 }, { "epoch": 1.2897738888661734, "grad_norm": 0.06988970935344696, "learning_rate": 3.37402504400139e-06, "loss": 0.0008, "step": 157720 }, { "epoch": 1.2898556650447723, "grad_norm": 0.10170413553714752, "learning_rate": 3.3733502168217414e-06, "loss": 0.0015, "step": 157730 }, { "epoch": 1.2899374412233717, "grad_norm": 0.06000497564673424, "learning_rate": 3.372675422778271e-06, "loss": 0.0012, "step": 157740 }, { "epoch": 1.2900192174019707, "grad_norm": 0.03459571674466133, "learning_rate": 3.3720006618847245e-06, "loss": 0.0011, "step": 157750 }, { "epoch": 1.29010099358057, "grad_norm": 0.031850285828113556, "learning_rate": 3.3713259341548487e-06, "loss": 0.0008, "step": 157760 }, { "epoch": 1.2901827697591692, "grad_norm": 0.0309478547424078, "learning_rate": 3.370651239602387e-06, "loss": 0.0011, "step": 157770 }, { "epoch": 1.2902645459377684, "grad_norm": 0.03976484760642052, "learning_rate": 3.369976578241084e-06, "loss": 0.0008, "step": 157780 }, { "epoch": 1.2903463221163676, "grad_norm": 0.05622740462422371, "learning_rate": 3.369301950084682e-06, "loss": 0.0031, "step": 157790 }, { "epoch": 1.2904280982949667, "grad_norm": 0.012421873398125172, "learning_rate": 3.3686273551469264e-06, "loss": 0.0008, "step": 157800 }, { "epoch": 1.290509874473566, "grad_norm": 0.007841584272682667, "learning_rate": 3.367952793441557e-06, "loss": 0.0007, "step": 157810 }, { "epoch": 1.290591650652165, "grad_norm": 0.08117593079805374, "learning_rate": 3.367278264982317e-06, "loss": 0.001, "step": 157820 }, { "epoch": 1.2906734268307642, "grad_norm": 0.010802729055285454, "learning_rate": 3.3666037697829436e-06, "loss": 0.0006, "step": 157830 }, { "epoch": 1.2907552030093634, "grad_norm": 0.010693194344639778, "learning_rate": 3.36592930785718e-06, "loss": 0.0007, "step": 157840 }, { "epoch": 1.2908369791879626, "grad_norm": 0.352431982755661, "learning_rate": 3.3652548792187654e-06, "loss": 0.0007, "step": 157850 }, { "epoch": 1.2909187553665618, "grad_norm": 0.007234068121761084, "learning_rate": 3.3645804838814377e-06, "loss": 0.0004, "step": 157860 }, { "epoch": 1.291000531545161, "grad_norm": 0.02101830020546913, "learning_rate": 3.363906121858933e-06, "loss": 0.0004, "step": 157870 }, { "epoch": 1.29108230772376, "grad_norm": 0.11810753494501114, "learning_rate": 3.3632317931649926e-06, "loss": 0.0011, "step": 157880 }, { "epoch": 1.2911640839023593, "grad_norm": 0.06403825432062149, "learning_rate": 3.3625574978133503e-06, "loss": 0.0005, "step": 157890 }, { "epoch": 1.2912458600809584, "grad_norm": 0.10230156034231186, "learning_rate": 3.361883235817743e-06, "loss": 0.0005, "step": 157900 }, { "epoch": 1.2913276362595576, "grad_norm": 0.009555277414619923, "learning_rate": 3.3612090071919036e-06, "loss": 0.0007, "step": 157910 }, { "epoch": 1.2914094124381568, "grad_norm": 0.02260073833167553, "learning_rate": 3.3605348119495714e-06, "loss": 0.0015, "step": 157920 }, { "epoch": 1.291491188616756, "grad_norm": 0.05897396802902222, "learning_rate": 3.3598606501044773e-06, "loss": 0.0005, "step": 157930 }, { "epoch": 1.291572964795355, "grad_norm": 0.03133727237582207, "learning_rate": 3.3591865216703555e-06, "loss": 0.0011, "step": 157940 }, { "epoch": 1.2916547409739543, "grad_norm": 0.032694559544324875, "learning_rate": 3.3585124266609352e-06, "loss": 0.0008, "step": 157950 }, { "epoch": 1.2917365171525534, "grad_norm": 0.05359846353530884, "learning_rate": 3.357838365089953e-06, "loss": 0.0004, "step": 157960 }, { "epoch": 1.2918182933311526, "grad_norm": 0.12220940738916397, "learning_rate": 3.357164336971138e-06, "loss": 0.0013, "step": 157970 }, { "epoch": 1.2919000695097518, "grad_norm": 0.04684881493449211, "learning_rate": 3.3564903423182217e-06, "loss": 0.0006, "step": 157980 }, { "epoch": 1.291981845688351, "grad_norm": 0.050594642758369446, "learning_rate": 3.3558163811449317e-06, "loss": 0.0009, "step": 157990 }, { "epoch": 1.2920636218669501, "grad_norm": 0.11325179040431976, "learning_rate": 3.355142453464999e-06, "loss": 0.0014, "step": 158000 }, { "epoch": 1.2921453980455493, "grad_norm": 0.07059406489133835, "learning_rate": 3.3544685592921516e-06, "loss": 0.0009, "step": 158010 }, { "epoch": 1.2922271742241485, "grad_norm": 0.13315294682979584, "learning_rate": 3.353794698640117e-06, "loss": 0.0024, "step": 158020 }, { "epoch": 1.2923089504027476, "grad_norm": 0.02336784452199936, "learning_rate": 3.3531208715226216e-06, "loss": 0.001, "step": 158030 }, { "epoch": 1.2923907265813468, "grad_norm": 0.05475112795829773, "learning_rate": 3.3524470779533935e-06, "loss": 0.0007, "step": 158040 }, { "epoch": 1.292472502759946, "grad_norm": 0.01858736388385296, "learning_rate": 3.351773317946158e-06, "loss": 0.001, "step": 158050 }, { "epoch": 1.2925542789385451, "grad_norm": 0.044835563749074936, "learning_rate": 3.351099591514641e-06, "loss": 0.0019, "step": 158060 }, { "epoch": 1.2926360551171443, "grad_norm": 0.00435127317905426, "learning_rate": 3.3504258986725622e-06, "loss": 0.0007, "step": 158070 }, { "epoch": 1.2927178312957435, "grad_norm": 0.016299698501825333, "learning_rate": 3.3497522394336514e-06, "loss": 0.001, "step": 158080 }, { "epoch": 1.2927996074743429, "grad_norm": 0.05245966836810112, "learning_rate": 3.3490786138116283e-06, "loss": 0.0007, "step": 158090 }, { "epoch": 1.2928813836529418, "grad_norm": 0.02951221726834774, "learning_rate": 3.3484050218202158e-06, "loss": 0.0006, "step": 158100 }, { "epoch": 1.2929631598315412, "grad_norm": 0.058944061398506165, "learning_rate": 3.3477314634731344e-06, "loss": 0.0007, "step": 158110 }, { "epoch": 1.2930449360101401, "grad_norm": 0.028615770861506462, "learning_rate": 3.3470579387841075e-06, "loss": 0.0008, "step": 158120 }, { "epoch": 1.2931267121887395, "grad_norm": 0.049629345536231995, "learning_rate": 3.346384447766854e-06, "loss": 0.001, "step": 158130 }, { "epoch": 1.2932084883673385, "grad_norm": 0.015764286741614342, "learning_rate": 3.3457109904350927e-06, "loss": 0.0008, "step": 158140 }, { "epoch": 1.2932902645459379, "grad_norm": 0.04235846921801567, "learning_rate": 3.3450375668025415e-06, "loss": 0.0012, "step": 158150 }, { "epoch": 1.2933720407245368, "grad_norm": 0.022815024480223656, "learning_rate": 3.3443641768829228e-06, "loss": 0.0008, "step": 158160 }, { "epoch": 1.2934538169031362, "grad_norm": 0.09305380284786224, "learning_rate": 3.343690820689951e-06, "loss": 0.0014, "step": 158170 }, { "epoch": 1.2935355930817352, "grad_norm": 0.010789349675178528, "learning_rate": 3.3430174982373447e-06, "loss": 0.0016, "step": 158180 }, { "epoch": 1.2936173692603345, "grad_norm": 0.0036717052571475506, "learning_rate": 3.3423442095388158e-06, "loss": 0.0006, "step": 158190 }, { "epoch": 1.2936991454389337, "grad_norm": 0.05309167131781578, "learning_rate": 3.341670954608085e-06, "loss": 0.0006, "step": 158200 }, { "epoch": 1.2937809216175329, "grad_norm": 0.0013852983247488737, "learning_rate": 3.3409977334588644e-06, "loss": 0.0009, "step": 158210 }, { "epoch": 1.293862697796132, "grad_norm": 0.027963975444436073, "learning_rate": 3.340324546104869e-06, "loss": 0.001, "step": 158220 }, { "epoch": 1.2939444739747312, "grad_norm": 0.04254429414868355, "learning_rate": 3.3396513925598103e-06, "loss": 0.001, "step": 158230 }, { "epoch": 1.2940262501533304, "grad_norm": 0.08311015367507935, "learning_rate": 3.338978272837403e-06, "loss": 0.0012, "step": 158240 }, { "epoch": 1.2941080263319296, "grad_norm": 0.1375684142112732, "learning_rate": 3.3383051869513593e-06, "loss": 0.0008, "step": 158250 }, { "epoch": 1.2941898025105287, "grad_norm": 0.1695670336484909, "learning_rate": 3.337632134915389e-06, "loss": 0.0017, "step": 158260 }, { "epoch": 1.294271578689128, "grad_norm": 0.13414201140403748, "learning_rate": 3.3369591167432014e-06, "loss": 0.0005, "step": 158270 }, { "epoch": 1.294353354867727, "grad_norm": 0.0003088552039116621, "learning_rate": 3.3362861324485105e-06, "loss": 0.0009, "step": 158280 }, { "epoch": 1.2944351310463262, "grad_norm": 0.030606785789132118, "learning_rate": 3.3356131820450245e-06, "loss": 0.0005, "step": 158290 }, { "epoch": 1.2945169072249254, "grad_norm": 0.023069363087415695, "learning_rate": 3.3349402655464496e-06, "loss": 0.0006, "step": 158300 }, { "epoch": 1.2945986834035246, "grad_norm": 0.016664370894432068, "learning_rate": 3.334267382966493e-06, "loss": 0.0019, "step": 158310 }, { "epoch": 1.2946804595821237, "grad_norm": 0.028327593579888344, "learning_rate": 3.333594534318866e-06, "loss": 0.0011, "step": 158320 }, { "epoch": 1.294762235760723, "grad_norm": 0.022194864228367805, "learning_rate": 3.3329217196172723e-06, "loss": 0.0009, "step": 158330 }, { "epoch": 1.294844011939322, "grad_norm": 0.020834021270275116, "learning_rate": 3.332248938875419e-06, "loss": 0.0007, "step": 158340 }, { "epoch": 1.2949257881179213, "grad_norm": 0.035306233912706375, "learning_rate": 3.331576192107009e-06, "loss": 0.0009, "step": 158350 }, { "epoch": 1.2950075642965204, "grad_norm": 0.0023117559030652046, "learning_rate": 3.3309034793257496e-06, "loss": 0.0009, "step": 158360 }, { "epoch": 1.2950893404751196, "grad_norm": 0.009851458482444286, "learning_rate": 3.330230800545342e-06, "loss": 0.0021, "step": 158370 }, { "epoch": 1.2951711166537188, "grad_norm": 0.06941403448581696, "learning_rate": 3.3295581557794916e-06, "loss": 0.0014, "step": 158380 }, { "epoch": 1.295252892832318, "grad_norm": 0.020050469785928726, "learning_rate": 3.3288855450418968e-06, "loss": 0.0007, "step": 158390 }, { "epoch": 1.295334669010917, "grad_norm": 0.023804737254977226, "learning_rate": 3.3282129683462635e-06, "loss": 0.0006, "step": 158400 }, { "epoch": 1.2954164451895163, "grad_norm": 0.020296653732657433, "learning_rate": 3.327540425706292e-06, "loss": 0.0044, "step": 158410 }, { "epoch": 1.2954982213681154, "grad_norm": 0.08296163380146027, "learning_rate": 3.3268679171356805e-06, "loss": 0.0015, "step": 158420 }, { "epoch": 1.2955799975467146, "grad_norm": 0.06648435443639755, "learning_rate": 3.326195442648128e-06, "loss": 0.0015, "step": 158430 }, { "epoch": 1.2956617737253138, "grad_norm": 0.022711247205734253, "learning_rate": 3.3255230022573358e-06, "loss": 0.0008, "step": 158440 }, { "epoch": 1.295743549903913, "grad_norm": 0.08170365542173386, "learning_rate": 3.3248505959770023e-06, "loss": 0.0014, "step": 158450 }, { "epoch": 1.295825326082512, "grad_norm": 0.0200444757938385, "learning_rate": 3.324178223820823e-06, "loss": 0.0004, "step": 158460 }, { "epoch": 1.2959071022611113, "grad_norm": 0.09280326962471008, "learning_rate": 3.3235058858024948e-06, "loss": 0.0014, "step": 158470 }, { "epoch": 1.2959888784397104, "grad_norm": 0.12393741309642792, "learning_rate": 3.3228335819357155e-06, "loss": 0.0013, "step": 158480 }, { "epoch": 1.2960706546183096, "grad_norm": 0.09993166476488113, "learning_rate": 3.3221613122341788e-06, "loss": 0.0012, "step": 158490 }, { "epoch": 1.2961524307969088, "grad_norm": 0.06451287865638733, "learning_rate": 3.321489076711581e-06, "loss": 0.0021, "step": 158500 }, { "epoch": 1.296234206975508, "grad_norm": 0.002726759761571884, "learning_rate": 3.320816875381613e-06, "loss": 0.0007, "step": 158510 }, { "epoch": 1.2963159831541073, "grad_norm": 0.010780265554785728, "learning_rate": 3.320144708257971e-06, "loss": 0.0015, "step": 158520 }, { "epoch": 1.2963977593327063, "grad_norm": 0.05987466126680374, "learning_rate": 3.319472575354349e-06, "loss": 0.0012, "step": 158530 }, { "epoch": 1.2964795355113057, "grad_norm": 0.019423171877861023, "learning_rate": 3.318800476684435e-06, "loss": 0.0012, "step": 158540 }, { "epoch": 1.2965613116899046, "grad_norm": 0.04403473436832428, "learning_rate": 3.3181284122619193e-06, "loss": 0.0008, "step": 158550 }, { "epoch": 1.296643087868504, "grad_norm": 0.08780152350664139, "learning_rate": 3.317456382100498e-06, "loss": 0.0022, "step": 158560 }, { "epoch": 1.296724864047103, "grad_norm": 0.03861571103334427, "learning_rate": 3.3167843862138572e-06, "loss": 0.0008, "step": 158570 }, { "epoch": 1.2968066402257024, "grad_norm": 0.10079112648963928, "learning_rate": 3.316112424615687e-06, "loss": 0.0009, "step": 158580 }, { "epoch": 1.2968884164043013, "grad_norm": 0.002440173178911209, "learning_rate": 3.315440497319674e-06, "loss": 0.0011, "step": 158590 }, { "epoch": 1.2969701925829007, "grad_norm": 0.009203184396028519, "learning_rate": 3.314768604339509e-06, "loss": 0.0008, "step": 158600 }, { "epoch": 1.2970519687614996, "grad_norm": 0.05508983135223389, "learning_rate": 3.3140967456888768e-06, "loss": 0.0005, "step": 158610 }, { "epoch": 1.297133744940099, "grad_norm": 0.006942208390682936, "learning_rate": 3.3134249213814647e-06, "loss": 0.0007, "step": 158620 }, { "epoch": 1.2972155211186982, "grad_norm": 0.030949538573622704, "learning_rate": 3.3127531314309553e-06, "loss": 0.0008, "step": 158630 }, { "epoch": 1.2972972972972974, "grad_norm": 0.0812540277838707, "learning_rate": 3.3120813758510385e-06, "loss": 0.0008, "step": 158640 }, { "epoch": 1.2973790734758965, "grad_norm": 0.0411064550280571, "learning_rate": 3.311409654655397e-06, "loss": 0.0012, "step": 158650 }, { "epoch": 1.2974608496544957, "grad_norm": 0.03403342515230179, "learning_rate": 3.3107379678577124e-06, "loss": 0.0006, "step": 158660 }, { "epoch": 1.2975426258330949, "grad_norm": 0.015692859888076782, "learning_rate": 3.3100663154716684e-06, "loss": 0.001, "step": 158670 }, { "epoch": 1.297624402011694, "grad_norm": 0.016879936680197716, "learning_rate": 3.309394697510945e-06, "loss": 0.0012, "step": 158680 }, { "epoch": 1.2977061781902932, "grad_norm": 0.0654488056898117, "learning_rate": 3.3087231139892282e-06, "loss": 0.0008, "step": 158690 }, { "epoch": 1.2977879543688924, "grad_norm": 0.021205585449934006, "learning_rate": 3.308051564920196e-06, "loss": 0.0012, "step": 158700 }, { "epoch": 1.2978697305474916, "grad_norm": 0.009061603806912899, "learning_rate": 3.307380050317529e-06, "loss": 0.0008, "step": 158710 }, { "epoch": 1.2979515067260907, "grad_norm": 0.033057719469070435, "learning_rate": 3.3067085701949052e-06, "loss": 0.001, "step": 158720 }, { "epoch": 1.29803328290469, "grad_norm": 0.044513024389743805, "learning_rate": 3.306037124566005e-06, "loss": 0.0008, "step": 158730 }, { "epoch": 1.298115059083289, "grad_norm": 0.01941060647368431, "learning_rate": 3.305365713444506e-06, "loss": 0.0008, "step": 158740 }, { "epoch": 1.2981968352618882, "grad_norm": 0.009575472213327885, "learning_rate": 3.304694336844084e-06, "loss": 0.0004, "step": 158750 }, { "epoch": 1.2982786114404874, "grad_norm": 0.03588278219103813, "learning_rate": 3.3040229947784155e-06, "loss": 0.0009, "step": 158760 }, { "epoch": 1.2983603876190866, "grad_norm": 0.008600651286542416, "learning_rate": 3.3033516872611794e-06, "loss": 0.0007, "step": 158770 }, { "epoch": 1.2984421637976857, "grad_norm": 0.017505604773759842, "learning_rate": 3.3026804143060475e-06, "loss": 0.0005, "step": 158780 }, { "epoch": 1.298523939976285, "grad_norm": 0.02341245301067829, "learning_rate": 3.302009175926695e-06, "loss": 0.0016, "step": 158790 }, { "epoch": 1.298605716154884, "grad_norm": 0.0454966202378273, "learning_rate": 3.3013379721367943e-06, "loss": 0.001, "step": 158800 }, { "epoch": 1.2986874923334832, "grad_norm": 0.06321907788515091, "learning_rate": 3.3006668029500216e-06, "loss": 0.0008, "step": 158810 }, { "epoch": 1.2987692685120824, "grad_norm": 0.007322845049202442, "learning_rate": 3.2999956683800473e-06, "loss": 0.0019, "step": 158820 }, { "epoch": 1.2988510446906816, "grad_norm": 0.06456466019153595, "learning_rate": 3.299324568440543e-06, "loss": 0.0006, "step": 158830 }, { "epoch": 1.2989328208692807, "grad_norm": 0.12878593802452087, "learning_rate": 3.2986535031451796e-06, "loss": 0.0009, "step": 158840 }, { "epoch": 1.29901459704788, "grad_norm": 0.015593770891427994, "learning_rate": 3.2979824725076272e-06, "loss": 0.0009, "step": 158850 }, { "epoch": 1.299096373226479, "grad_norm": 0.020092573016881943, "learning_rate": 3.297311476541556e-06, "loss": 0.0008, "step": 158860 }, { "epoch": 1.2991781494050783, "grad_norm": 0.0415470264852047, "learning_rate": 3.2966405152606333e-06, "loss": 0.0009, "step": 158870 }, { "epoch": 1.2992599255836774, "grad_norm": 0.03134801611304283, "learning_rate": 3.295969588678526e-06, "loss": 0.0006, "step": 158880 }, { "epoch": 1.2993417017622766, "grad_norm": 0.08786766231060028, "learning_rate": 3.2952986968089067e-06, "loss": 0.0012, "step": 158890 }, { "epoch": 1.2994234779408758, "grad_norm": 0.08921575546264648, "learning_rate": 3.2946278396654376e-06, "loss": 0.001, "step": 158900 }, { "epoch": 1.299505254119475, "grad_norm": 0.00937976036220789, "learning_rate": 3.2939570172617852e-06, "loss": 0.0005, "step": 158910 }, { "epoch": 1.299587030298074, "grad_norm": 0.07920240610837936, "learning_rate": 3.2932862296116135e-06, "loss": 0.0009, "step": 158920 }, { "epoch": 1.2996688064766733, "grad_norm": 0.003351362654939294, "learning_rate": 3.29261547672859e-06, "loss": 0.0011, "step": 158930 }, { "epoch": 1.2997505826552724, "grad_norm": 0.018460972234606743, "learning_rate": 3.291944758626378e-06, "loss": 0.0003, "step": 158940 }, { "epoch": 1.2998323588338718, "grad_norm": 0.0432366207242012, "learning_rate": 3.2912740753186388e-06, "loss": 0.0012, "step": 158950 }, { "epoch": 1.2999141350124708, "grad_norm": 0.06652242690324783, "learning_rate": 3.290603426819035e-06, "loss": 0.0017, "step": 158960 }, { "epoch": 1.2999959111910702, "grad_norm": 0.038916442543268204, "learning_rate": 3.28993281314123e-06, "loss": 0.001, "step": 158970 }, { "epoch": 1.300077687369669, "grad_norm": 0.0007800350431352854, "learning_rate": 3.289262234298883e-06, "loss": 0.003, "step": 158980 }, { "epoch": 1.3001594635482685, "grad_norm": 0.06210607662796974, "learning_rate": 3.288591690305656e-06, "loss": 0.0007, "step": 158990 }, { "epoch": 1.3002412397268674, "grad_norm": 0.07916007936000824, "learning_rate": 3.2879211811752045e-06, "loss": 0.0013, "step": 159000 }, { "epoch": 1.3003230159054668, "grad_norm": 0.18969839811325073, "learning_rate": 3.2872507069211936e-06, "loss": 0.0009, "step": 159010 }, { "epoch": 1.3004047920840658, "grad_norm": 0.008844379335641861, "learning_rate": 3.2865802675572766e-06, "loss": 0.0008, "step": 159020 }, { "epoch": 1.3004865682626652, "grad_norm": 0.023058272898197174, "learning_rate": 3.285909863097112e-06, "loss": 0.001, "step": 159030 }, { "epoch": 1.3005683444412641, "grad_norm": 0.04867898300290108, "learning_rate": 3.285239493554355e-06, "loss": 0.0008, "step": 159040 }, { "epoch": 1.3006501206198635, "grad_norm": 0.10247723758220673, "learning_rate": 3.284569158942666e-06, "loss": 0.0012, "step": 159050 }, { "epoch": 1.3007318967984627, "grad_norm": 0.005389743950217962, "learning_rate": 3.283898859275697e-06, "loss": 0.0005, "step": 159060 }, { "epoch": 1.3008136729770619, "grad_norm": 0.011556167155504227, "learning_rate": 3.2832285945671033e-06, "loss": 0.0005, "step": 159070 }, { "epoch": 1.300895449155661, "grad_norm": 0.0171912033110857, "learning_rate": 3.2825583648305377e-06, "loss": 0.0012, "step": 159080 }, { "epoch": 1.3009772253342602, "grad_norm": 0.0034527538809925318, "learning_rate": 3.2818881700796545e-06, "loss": 0.0007, "step": 159090 }, { "epoch": 1.3010590015128594, "grad_norm": 0.04038343206048012, "learning_rate": 3.2812180103281066e-06, "loss": 0.0012, "step": 159100 }, { "epoch": 1.3011407776914585, "grad_norm": 0.03414652496576309, "learning_rate": 3.2805478855895452e-06, "loss": 0.0011, "step": 159110 }, { "epoch": 1.3012225538700577, "grad_norm": 0.13962993025779724, "learning_rate": 3.2798777958776186e-06, "loss": 0.001, "step": 159120 }, { "epoch": 1.3013043300486569, "grad_norm": 0.03540470823645592, "learning_rate": 3.279207741205982e-06, "loss": 0.0007, "step": 159130 }, { "epoch": 1.301386106227256, "grad_norm": 0.028498489409685135, "learning_rate": 3.2785377215882817e-06, "loss": 0.0006, "step": 159140 }, { "epoch": 1.3014678824058552, "grad_norm": 0.06875348836183548, "learning_rate": 3.277867737038168e-06, "loss": 0.0013, "step": 159150 }, { "epoch": 1.3015496585844544, "grad_norm": 0.10433157533407211, "learning_rate": 3.277197787569285e-06, "loss": 0.0012, "step": 159160 }, { "epoch": 1.3016314347630535, "grad_norm": 0.009755769744515419, "learning_rate": 3.2765278731952865e-06, "loss": 0.0005, "step": 159170 }, { "epoch": 1.3017132109416527, "grad_norm": 0.09929478168487549, "learning_rate": 3.2758579939298163e-06, "loss": 0.0012, "step": 159180 }, { "epoch": 1.3017949871202519, "grad_norm": 0.07050034403800964, "learning_rate": 3.2751881497865197e-06, "loss": 0.0004, "step": 159190 }, { "epoch": 1.301876763298851, "grad_norm": 0.05957883223891258, "learning_rate": 3.2745183407790414e-06, "loss": 0.0013, "step": 159200 }, { "epoch": 1.3019585394774502, "grad_norm": 0.009182780981063843, "learning_rate": 3.2738485669210286e-06, "loss": 0.0009, "step": 159210 }, { "epoch": 1.3020403156560494, "grad_norm": 0.004937098827213049, "learning_rate": 3.2731788282261223e-06, "loss": 0.0006, "step": 159220 }, { "epoch": 1.3021220918346486, "grad_norm": 0.04434005543589592, "learning_rate": 3.2725091247079684e-06, "loss": 0.0008, "step": 159230 }, { "epoch": 1.3022038680132477, "grad_norm": 0.05666821077466011, "learning_rate": 3.271839456380205e-06, "loss": 0.0008, "step": 159240 }, { "epoch": 1.302285644191847, "grad_norm": 0.00959441252052784, "learning_rate": 3.27116982325648e-06, "loss": 0.0025, "step": 159250 }, { "epoch": 1.302367420370446, "grad_norm": 0.038812246173620224, "learning_rate": 3.270500225350429e-06, "loss": 0.0011, "step": 159260 }, { "epoch": 1.3024491965490452, "grad_norm": 0.01240916270762682, "learning_rate": 3.269830662675695e-06, "loss": 0.0013, "step": 159270 }, { "epoch": 1.3025309727276444, "grad_norm": 0.021220536902546883, "learning_rate": 3.269161135245915e-06, "loss": 0.0003, "step": 159280 }, { "epoch": 1.3026127489062436, "grad_norm": 0.012060478329658508, "learning_rate": 3.2684916430747316e-06, "loss": 0.0015, "step": 159290 }, { "epoch": 1.3026945250848427, "grad_norm": 0.02002190798521042, "learning_rate": 3.26782218617578e-06, "loss": 0.0005, "step": 159300 }, { "epoch": 1.302776301263442, "grad_norm": 0.04635128006339073, "learning_rate": 3.2671527645626992e-06, "loss": 0.0007, "step": 159310 }, { "epoch": 1.302858077442041, "grad_norm": 0.01554796751588583, "learning_rate": 3.266483378249124e-06, "loss": 0.0006, "step": 159320 }, { "epoch": 1.3029398536206402, "grad_norm": 0.008307092823088169, "learning_rate": 3.2658140272486926e-06, "loss": 0.0006, "step": 159330 }, { "epoch": 1.3030216297992394, "grad_norm": 0.023182054981589317, "learning_rate": 3.2651447115750388e-06, "loss": 0.001, "step": 159340 }, { "epoch": 1.3031034059778386, "grad_norm": 0.003501386847347021, "learning_rate": 3.264475431241797e-06, "loss": 0.0006, "step": 159350 }, { "epoch": 1.3031851821564377, "grad_norm": 0.04347583279013634, "learning_rate": 3.2638061862626003e-06, "loss": 0.0008, "step": 159360 }, { "epoch": 1.303266958335037, "grad_norm": 0.0186948012560606, "learning_rate": 3.263136976651085e-06, "loss": 0.0004, "step": 159370 }, { "epoch": 1.3033487345136363, "grad_norm": 0.022203955799341202, "learning_rate": 3.262467802420881e-06, "loss": 0.0006, "step": 159380 }, { "epoch": 1.3034305106922353, "grad_norm": 0.08062372356653214, "learning_rate": 3.2617986635856197e-06, "loss": 0.0006, "step": 159390 }, { "epoch": 1.3035122868708346, "grad_norm": 0.06589780002832413, "learning_rate": 3.2611295601589306e-06, "loss": 0.0006, "step": 159400 }, { "epoch": 1.3035940630494336, "grad_norm": 0.053481366485357285, "learning_rate": 3.260460492154448e-06, "loss": 0.0007, "step": 159410 }, { "epoch": 1.303675839228033, "grad_norm": 0.016598565503954887, "learning_rate": 3.2597914595857984e-06, "loss": 0.001, "step": 159420 }, { "epoch": 1.303757615406632, "grad_norm": 0.01564449444413185, "learning_rate": 3.2591224624666116e-06, "loss": 0.0004, "step": 159430 }, { "epoch": 1.3038393915852313, "grad_norm": 0.07392585277557373, "learning_rate": 3.258453500810515e-06, "loss": 0.001, "step": 159440 }, { "epoch": 1.3039211677638303, "grad_norm": 0.027339033782482147, "learning_rate": 3.2577845746311367e-06, "loss": 0.0008, "step": 159450 }, { "epoch": 1.3040029439424297, "grad_norm": 0.08850377798080444, "learning_rate": 3.2571156839421026e-06, "loss": 0.0006, "step": 159460 }, { "epoch": 1.3040847201210286, "grad_norm": 0.020044293254613876, "learning_rate": 3.2564468287570394e-06, "loss": 0.0008, "step": 159470 }, { "epoch": 1.304166496299628, "grad_norm": 0.013840219005942345, "learning_rate": 3.2557780090895686e-06, "loss": 0.0009, "step": 159480 }, { "epoch": 1.3042482724782272, "grad_norm": 0.023501068353652954, "learning_rate": 3.255109224953321e-06, "loss": 0.0009, "step": 159490 }, { "epoch": 1.3043300486568263, "grad_norm": 0.015182167291641235, "learning_rate": 3.254440476361916e-06, "loss": 0.0011, "step": 159500 }, { "epoch": 1.3044118248354255, "grad_norm": 0.04709954559803009, "learning_rate": 3.253771763328977e-06, "loss": 0.0006, "step": 159510 }, { "epoch": 1.3044936010140247, "grad_norm": 0.02706710249185562, "learning_rate": 3.2531030858681244e-06, "loss": 0.0015, "step": 159520 }, { "epoch": 1.3045753771926238, "grad_norm": 0.06769177317619324, "learning_rate": 3.2524344439929844e-06, "loss": 0.0005, "step": 159530 }, { "epoch": 1.304657153371223, "grad_norm": 0.07939453423023224, "learning_rate": 3.251765837717175e-06, "loss": 0.0013, "step": 159540 }, { "epoch": 1.3047389295498222, "grad_norm": 0.01925705000758171, "learning_rate": 3.251097267054317e-06, "loss": 0.0008, "step": 159550 }, { "epoch": 1.3048207057284213, "grad_norm": 0.044285546988248825, "learning_rate": 3.2504287320180273e-06, "loss": 0.0007, "step": 159560 }, { "epoch": 1.3049024819070205, "grad_norm": 0.002262844704091549, "learning_rate": 3.249760232621928e-06, "loss": 0.0007, "step": 159570 }, { "epoch": 1.3049842580856197, "grad_norm": 0.050847213715314865, "learning_rate": 3.2490917688796354e-06, "loss": 0.0008, "step": 159580 }, { "epoch": 1.3050660342642189, "grad_norm": 0.009546421468257904, "learning_rate": 3.2484233408047673e-06, "loss": 0.0005, "step": 159590 }, { "epoch": 1.305147810442818, "grad_norm": 0.03253455460071564, "learning_rate": 3.247754948410936e-06, "loss": 0.0015, "step": 159600 }, { "epoch": 1.3052295866214172, "grad_norm": 0.013371420092880726, "learning_rate": 3.247086591711765e-06, "loss": 0.0034, "step": 159610 }, { "epoch": 1.3053113628000164, "grad_norm": 0.027584806084632874, "learning_rate": 3.2464182707208635e-06, "loss": 0.0005, "step": 159620 }, { "epoch": 1.3053931389786155, "grad_norm": 0.03896315395832062, "learning_rate": 3.2457499854518472e-06, "loss": 0.0007, "step": 159630 }, { "epoch": 1.3054749151572147, "grad_norm": 0.021777957677841187, "learning_rate": 3.2450817359183277e-06, "loss": 0.0015, "step": 159640 }, { "epoch": 1.3055566913358139, "grad_norm": 0.03916545212268829, "learning_rate": 3.2444135221339218e-06, "loss": 0.0006, "step": 159650 }, { "epoch": 1.305638467514413, "grad_norm": 0.051847390830516815, "learning_rate": 3.243745344112239e-06, "loss": 0.0009, "step": 159660 }, { "epoch": 1.3057202436930122, "grad_norm": 0.0029515624046325684, "learning_rate": 3.243077201866892e-06, "loss": 0.0008, "step": 159670 }, { "epoch": 1.3058020198716114, "grad_norm": 0.030603893101215363, "learning_rate": 3.2424090954114883e-06, "loss": 0.001, "step": 159680 }, { "epoch": 1.3058837960502105, "grad_norm": 0.013556285761296749, "learning_rate": 3.241741024759642e-06, "loss": 0.0019, "step": 159690 }, { "epoch": 1.3059655722288097, "grad_norm": 0.03020298108458519, "learning_rate": 3.24107298992496e-06, "loss": 0.0006, "step": 159700 }, { "epoch": 1.3060473484074089, "grad_norm": 0.0645107701420784, "learning_rate": 3.240404990921051e-06, "loss": 0.0012, "step": 159710 }, { "epoch": 1.306129124586008, "grad_norm": 0.029640929773449898, "learning_rate": 3.23973702776152e-06, "loss": 0.0009, "step": 159720 }, { "epoch": 1.3062109007646072, "grad_norm": 0.0836172103881836, "learning_rate": 3.23906910045998e-06, "loss": 0.0011, "step": 159730 }, { "epoch": 1.3062926769432064, "grad_norm": 0.053517553955316544, "learning_rate": 3.2384012090300326e-06, "loss": 0.0012, "step": 159740 }, { "epoch": 1.3063744531218056, "grad_norm": 0.06196670979261398, "learning_rate": 3.237733353485285e-06, "loss": 0.0007, "step": 159750 }, { "epoch": 1.3064562293004047, "grad_norm": 0.09529460221529007, "learning_rate": 3.2370655338393386e-06, "loss": 0.0012, "step": 159760 }, { "epoch": 1.306538005479004, "grad_norm": 0.022537363693118095, "learning_rate": 3.236397750105803e-06, "loss": 0.0012, "step": 159770 }, { "epoch": 1.306619781657603, "grad_norm": 0.02130502089858055, "learning_rate": 3.2357300022982784e-06, "loss": 0.0007, "step": 159780 }, { "epoch": 1.3067015578362022, "grad_norm": 0.04161927103996277, "learning_rate": 3.2350622904303677e-06, "loss": 0.0004, "step": 159790 }, { "epoch": 1.3067833340148014, "grad_norm": 0.018404902890324593, "learning_rate": 3.234394614515671e-06, "loss": 0.0006, "step": 159800 }, { "epoch": 1.3068651101934008, "grad_norm": 0.04207047075033188, "learning_rate": 3.2337269745677936e-06, "loss": 0.0011, "step": 159810 }, { "epoch": 1.3069468863719997, "grad_norm": 0.062284231185913086, "learning_rate": 3.2330593706003327e-06, "loss": 0.0012, "step": 159820 }, { "epoch": 1.3070286625505991, "grad_norm": 0.0464034304022789, "learning_rate": 3.2323918026268885e-06, "loss": 0.0009, "step": 159830 }, { "epoch": 1.307110438729198, "grad_norm": 0.012441159226000309, "learning_rate": 3.2317242706610585e-06, "loss": 0.0006, "step": 159840 }, { "epoch": 1.3071922149077975, "grad_norm": 0.030317815020680428, "learning_rate": 3.2310567747164454e-06, "loss": 0.0007, "step": 159850 }, { "epoch": 1.3072739910863964, "grad_norm": 0.02321953885257244, "learning_rate": 3.230389314806642e-06, "loss": 0.0007, "step": 159860 }, { "epoch": 1.3073557672649958, "grad_norm": 0.01567012257874012, "learning_rate": 3.229721890945248e-06, "loss": 0.0009, "step": 159870 }, { "epoch": 1.3074375434435948, "grad_norm": 0.04813413321971893, "learning_rate": 3.2290545031458548e-06, "loss": 0.0014, "step": 159880 }, { "epoch": 1.3075193196221941, "grad_norm": 0.024632185697555542, "learning_rate": 3.2283871514220633e-06, "loss": 0.0008, "step": 159890 }, { "epoch": 1.3076010958007933, "grad_norm": 0.0038224004674702883, "learning_rate": 3.2277198357874654e-06, "loss": 0.0004, "step": 159900 }, { "epoch": 1.3076828719793925, "grad_norm": 0.03808845579624176, "learning_rate": 3.2270525562556555e-06, "loss": 0.0013, "step": 159910 }, { "epoch": 1.3077646481579916, "grad_norm": 0.07036179304122925, "learning_rate": 3.2263853128402243e-06, "loss": 0.002, "step": 159920 }, { "epoch": 1.3078464243365908, "grad_norm": 0.04160253331065178, "learning_rate": 3.2257181055547683e-06, "loss": 0.0012, "step": 159930 }, { "epoch": 1.30792820051519, "grad_norm": 0.007882264442741871, "learning_rate": 3.2250509344128756e-06, "loss": 0.0013, "step": 159940 }, { "epoch": 1.3080099766937892, "grad_norm": 0.030062176287174225, "learning_rate": 3.2243837994281385e-06, "loss": 0.001, "step": 159950 }, { "epoch": 1.3080917528723883, "grad_norm": 0.060190197080373764, "learning_rate": 3.2237167006141444e-06, "loss": 0.0006, "step": 159960 }, { "epoch": 1.3081735290509875, "grad_norm": 0.07374649494886398, "learning_rate": 3.2230496379844885e-06, "loss": 0.0009, "step": 159970 }, { "epoch": 1.3082553052295867, "grad_norm": 0.0608815960586071, "learning_rate": 3.2223826115527546e-06, "loss": 0.0008, "step": 159980 }, { "epoch": 1.3083370814081858, "grad_norm": 0.0020882743410766125, "learning_rate": 3.2217156213325314e-06, "loss": 0.0008, "step": 159990 }, { "epoch": 1.308418857586785, "grad_norm": 0.0037864046171307564, "learning_rate": 3.2210486673374048e-06, "loss": 0.0009, "step": 160000 }, { "epoch": 1.3085006337653842, "grad_norm": 0.005572703666985035, "learning_rate": 3.2203817495809645e-06, "loss": 0.0006, "step": 160010 }, { "epoch": 1.3085824099439833, "grad_norm": 0.1927938014268875, "learning_rate": 3.2197148680767954e-06, "loss": 0.001, "step": 160020 }, { "epoch": 1.3086641861225825, "grad_norm": 0.09088399261236191, "learning_rate": 3.219048022838481e-06, "loss": 0.001, "step": 160030 }, { "epoch": 1.3087459623011817, "grad_norm": 0.018773863092064857, "learning_rate": 3.2183812138796046e-06, "loss": 0.0007, "step": 160040 }, { "epoch": 1.3088277384797808, "grad_norm": 0.050124138593673706, "learning_rate": 3.2177144412137525e-06, "loss": 0.0005, "step": 160050 }, { "epoch": 1.30890951465838, "grad_norm": 0.008307511918246746, "learning_rate": 3.2170477048545056e-06, "loss": 0.0005, "step": 160060 }, { "epoch": 1.3089912908369792, "grad_norm": 0.01625196821987629, "learning_rate": 3.2163810048154466e-06, "loss": 0.0005, "step": 160070 }, { "epoch": 1.3090730670155784, "grad_norm": 0.08647921681404114, "learning_rate": 3.2157143411101564e-06, "loss": 0.0015, "step": 160080 }, { "epoch": 1.3091548431941775, "grad_norm": 0.2114701122045517, "learning_rate": 3.215047713752214e-06, "loss": 0.0017, "step": 160090 }, { "epoch": 1.3092366193727767, "grad_norm": 0.1473839282989502, "learning_rate": 3.214381122755202e-06, "loss": 0.0032, "step": 160100 }, { "epoch": 1.3093183955513759, "grad_norm": 0.0028368993662297726, "learning_rate": 3.2137145681326976e-06, "loss": 0.0005, "step": 160110 }, { "epoch": 1.309400171729975, "grad_norm": 0.11198826879262924, "learning_rate": 3.21304804989828e-06, "loss": 0.0014, "step": 160120 }, { "epoch": 1.3094819479085742, "grad_norm": 0.007870323024690151, "learning_rate": 3.2123815680655235e-06, "loss": 0.0007, "step": 160130 }, { "epoch": 1.3095637240871734, "grad_norm": 0.04783451557159424, "learning_rate": 3.211715122648009e-06, "loss": 0.0007, "step": 160140 }, { "epoch": 1.3096455002657725, "grad_norm": 0.04428200423717499, "learning_rate": 3.211048713659312e-06, "loss": 0.0005, "step": 160150 }, { "epoch": 1.3097272764443717, "grad_norm": 0.07301648706197739, "learning_rate": 3.2103823411130065e-06, "loss": 0.0008, "step": 160160 }, { "epoch": 1.3098090526229709, "grad_norm": 0.01568782515823841, "learning_rate": 3.209716005022666e-06, "loss": 0.0006, "step": 160170 }, { "epoch": 1.30989082880157, "grad_norm": 0.034435756504535675, "learning_rate": 3.2090497054018675e-06, "loss": 0.001, "step": 160180 }, { "epoch": 1.3099726049801692, "grad_norm": 0.09952697902917862, "learning_rate": 3.2083834422641823e-06, "loss": 0.001, "step": 160190 }, { "epoch": 1.3100543811587684, "grad_norm": 0.044986601918935776, "learning_rate": 3.2077172156231817e-06, "loss": 0.0009, "step": 160200 }, { "epoch": 1.3101361573373675, "grad_norm": 0.07520173490047455, "learning_rate": 3.207051025492438e-06, "loss": 0.0026, "step": 160210 }, { "epoch": 1.310217933515967, "grad_norm": 0.05528285354375839, "learning_rate": 3.2063848718855236e-06, "loss": 0.0008, "step": 160220 }, { "epoch": 1.3102997096945659, "grad_norm": 0.020339485257864, "learning_rate": 3.2057187548160063e-06, "loss": 0.0004, "step": 160230 }, { "epoch": 1.3103814858731653, "grad_norm": 0.025946078822016716, "learning_rate": 3.2050526742974573e-06, "loss": 0.007, "step": 160240 }, { "epoch": 1.3104632620517642, "grad_norm": 0.0032581640407443047, "learning_rate": 3.204386630343442e-06, "loss": 0.0007, "step": 160250 }, { "epoch": 1.3105450382303636, "grad_norm": 0.08945204317569733, "learning_rate": 3.2037206229675323e-06, "loss": 0.0021, "step": 160260 }, { "epoch": 1.3106268144089626, "grad_norm": 0.10070201754570007, "learning_rate": 3.203054652183294e-06, "loss": 0.0009, "step": 160270 }, { "epoch": 1.310708590587562, "grad_norm": 0.021897759288549423, "learning_rate": 3.2023887180042927e-06, "loss": 0.0005, "step": 160280 }, { "epoch": 1.310790366766161, "grad_norm": 0.021027758717536926, "learning_rate": 3.201722820444094e-06, "loss": 0.0006, "step": 160290 }, { "epoch": 1.3108721429447603, "grad_norm": 0.06265755742788315, "learning_rate": 3.201056959516263e-06, "loss": 0.0012, "step": 160300 }, { "epoch": 1.3109539191233592, "grad_norm": 0.04108351841568947, "learning_rate": 3.200391135234365e-06, "loss": 0.0013, "step": 160310 }, { "epoch": 1.3110356953019586, "grad_norm": 0.024115774780511856, "learning_rate": 3.199725347611963e-06, "loss": 0.001, "step": 160320 }, { "epoch": 1.3111174714805578, "grad_norm": 0.06737498939037323, "learning_rate": 3.1990595966626165e-06, "loss": 0.0008, "step": 160330 }, { "epoch": 1.311199247659157, "grad_norm": 0.029398158192634583, "learning_rate": 3.1983938823998917e-06, "loss": 0.0004, "step": 160340 }, { "epoch": 1.3112810238377561, "grad_norm": 0.008638410829007626, "learning_rate": 3.1977282048373477e-06, "loss": 0.0013, "step": 160350 }, { "epoch": 1.3113628000163553, "grad_norm": 0.044866617769002914, "learning_rate": 3.197062563988545e-06, "loss": 0.0007, "step": 160360 }, { "epoch": 1.3114445761949545, "grad_norm": 0.08520027995109558, "learning_rate": 3.196396959867042e-06, "loss": 0.0035, "step": 160370 }, { "epoch": 1.3115263523735536, "grad_norm": 0.04706559330224991, "learning_rate": 3.1957313924864e-06, "loss": 0.0011, "step": 160380 }, { "epoch": 1.3116081285521528, "grad_norm": 0.02882457710802555, "learning_rate": 3.195065861860177e-06, "loss": 0.0005, "step": 160390 }, { "epoch": 1.311689904730752, "grad_norm": 0.031541381031274796, "learning_rate": 3.1944003680019286e-06, "loss": 0.0007, "step": 160400 }, { "epoch": 1.3117716809093511, "grad_norm": 0.05976065620779991, "learning_rate": 3.1937349109252126e-06, "loss": 0.0009, "step": 160410 }, { "epoch": 1.3118534570879503, "grad_norm": 0.07503050565719604, "learning_rate": 3.1930694906435846e-06, "loss": 0.0009, "step": 160420 }, { "epoch": 1.3119352332665495, "grad_norm": 0.1152757853269577, "learning_rate": 3.1924041071706004e-06, "loss": 0.0014, "step": 160430 }, { "epoch": 1.3120170094451487, "grad_norm": 0.04413694888353348, "learning_rate": 3.1917387605198134e-06, "loss": 0.001, "step": 160440 }, { "epoch": 1.3120987856237478, "grad_norm": 0.008696825243532658, "learning_rate": 3.191073450704777e-06, "loss": 0.001, "step": 160450 }, { "epoch": 1.312180561802347, "grad_norm": 0.028021961450576782, "learning_rate": 3.1904081777390463e-06, "loss": 0.0007, "step": 160460 }, { "epoch": 1.3122623379809462, "grad_norm": 0.011450800113379955, "learning_rate": 3.189742941636171e-06, "loss": 0.0013, "step": 160470 }, { "epoch": 1.3123441141595453, "grad_norm": 0.048628926277160645, "learning_rate": 3.1890777424097035e-06, "loss": 0.0007, "step": 160480 }, { "epoch": 1.3124258903381445, "grad_norm": 0.06705190241336823, "learning_rate": 3.1884125800731923e-06, "loss": 0.0012, "step": 160490 }, { "epoch": 1.3125076665167437, "grad_norm": 0.02457340434193611, "learning_rate": 3.187747454640192e-06, "loss": 0.001, "step": 160500 }, { "epoch": 1.3125894426953428, "grad_norm": 0.003512959461659193, "learning_rate": 3.187082366124249e-06, "loss": 0.001, "step": 160510 }, { "epoch": 1.312671218873942, "grad_norm": 0.0184041615575552, "learning_rate": 3.186417314538912e-06, "loss": 0.0013, "step": 160520 }, { "epoch": 1.3127529950525412, "grad_norm": 0.02614440582692623, "learning_rate": 3.185752299897727e-06, "loss": 0.0013, "step": 160530 }, { "epoch": 1.3128347712311403, "grad_norm": 0.05103670060634613, "learning_rate": 3.185087322214243e-06, "loss": 0.0007, "step": 160540 }, { "epoch": 1.3129165474097395, "grad_norm": 0.05402839928865433, "learning_rate": 3.184422381502006e-06, "loss": 0.0017, "step": 160550 }, { "epoch": 1.3129983235883387, "grad_norm": 0.029546789824962616, "learning_rate": 3.183757477774561e-06, "loss": 0.0005, "step": 160560 }, { "epoch": 1.3130800997669378, "grad_norm": 0.06589794903993607, "learning_rate": 3.1830926110454514e-06, "loss": 0.0006, "step": 160570 }, { "epoch": 1.313161875945537, "grad_norm": 0.046744152903556824, "learning_rate": 3.1824277813282232e-06, "loss": 0.0015, "step": 160580 }, { "epoch": 1.3132436521241362, "grad_norm": 0.14590725302696228, "learning_rate": 3.1817629886364184e-06, "loss": 0.0015, "step": 160590 }, { "epoch": 1.3133254283027354, "grad_norm": 0.009583861567080021, "learning_rate": 3.181098232983579e-06, "loss": 0.0006, "step": 160600 }, { "epoch": 1.3134072044813345, "grad_norm": 0.0015624869847670197, "learning_rate": 3.1804335143832455e-06, "loss": 0.0004, "step": 160610 }, { "epoch": 1.3134889806599337, "grad_norm": 0.019512569531798363, "learning_rate": 3.179768832848963e-06, "loss": 0.0005, "step": 160620 }, { "epoch": 1.3135707568385329, "grad_norm": 0.02756306901574135, "learning_rate": 3.1791041883942673e-06, "loss": 0.0009, "step": 160630 }, { "epoch": 1.313652533017132, "grad_norm": 0.05500507354736328, "learning_rate": 3.178439581032701e-06, "loss": 0.0005, "step": 160640 }, { "epoch": 1.3137343091957314, "grad_norm": 0.003823460079729557, "learning_rate": 3.1777750107777995e-06, "loss": 0.0005, "step": 160650 }, { "epoch": 1.3138160853743304, "grad_norm": 0.0044460603967309, "learning_rate": 3.177110477643103e-06, "loss": 0.0008, "step": 160660 }, { "epoch": 1.3138978615529298, "grad_norm": 0.0009358422248624265, "learning_rate": 3.176445981642148e-06, "loss": 0.0009, "step": 160670 }, { "epoch": 1.3139796377315287, "grad_norm": 0.028487956151366234, "learning_rate": 3.17578152278847e-06, "loss": 0.001, "step": 160680 }, { "epoch": 1.314061413910128, "grad_norm": 0.0911979004740715, "learning_rate": 3.175117101095605e-06, "loss": 0.0012, "step": 160690 }, { "epoch": 1.314143190088727, "grad_norm": 0.0764986202120781, "learning_rate": 3.174452716577089e-06, "loss": 0.0008, "step": 160700 }, { "epoch": 1.3142249662673264, "grad_norm": 0.012949598953127861, "learning_rate": 3.1737883692464545e-06, "loss": 0.0017, "step": 160710 }, { "epoch": 1.3143067424459254, "grad_norm": 0.0017952547641471028, "learning_rate": 3.1731240591172356e-06, "loss": 0.0014, "step": 160720 }, { "epoch": 1.3143885186245248, "grad_norm": 0.024789411574602127, "learning_rate": 3.172459786202963e-06, "loss": 0.0018, "step": 160730 }, { "epoch": 1.3144702948031237, "grad_norm": 0.0021061052102595568, "learning_rate": 3.1717955505171717e-06, "loss": 0.0016, "step": 160740 }, { "epoch": 1.314552070981723, "grad_norm": 0.048922087997198105, "learning_rate": 3.1711313520733906e-06, "loss": 0.0012, "step": 160750 }, { "epoch": 1.3146338471603223, "grad_norm": 0.022552264854311943, "learning_rate": 3.170467190885151e-06, "loss": 0.0011, "step": 160760 }, { "epoch": 1.3147156233389214, "grad_norm": 0.031069830060005188, "learning_rate": 3.1698030669659805e-06, "loss": 0.0008, "step": 160770 }, { "epoch": 1.3147973995175206, "grad_norm": 0.009890897199511528, "learning_rate": 3.1691389803294105e-06, "loss": 0.001, "step": 160780 }, { "epoch": 1.3148791756961198, "grad_norm": 0.02589406818151474, "learning_rate": 3.1684749309889674e-06, "loss": 0.0022, "step": 160790 }, { "epoch": 1.314960951874719, "grad_norm": 0.04686252772808075, "learning_rate": 3.1678109189581785e-06, "loss": 0.0008, "step": 160800 }, { "epoch": 1.3150427280533181, "grad_norm": 0.0312822125852108, "learning_rate": 3.16714694425057e-06, "loss": 0.0007, "step": 160810 }, { "epoch": 1.3151245042319173, "grad_norm": 0.053681038320064545, "learning_rate": 3.166483006879669e-06, "loss": 0.0006, "step": 160820 }, { "epoch": 1.3152062804105165, "grad_norm": 0.09761599451303482, "learning_rate": 3.1658191068589993e-06, "loss": 0.0006, "step": 160830 }, { "epoch": 1.3152880565891156, "grad_norm": 0.08461210876703262, "learning_rate": 3.165155244202085e-06, "loss": 0.0009, "step": 160840 }, { "epoch": 1.3153698327677148, "grad_norm": 0.035361580550670624, "learning_rate": 3.1644914189224485e-06, "loss": 0.001, "step": 160850 }, { "epoch": 1.315451608946314, "grad_norm": 0.006279125809669495, "learning_rate": 3.1638276310336154e-06, "loss": 0.0011, "step": 160860 }, { "epoch": 1.3155333851249131, "grad_norm": 0.06409426033496857, "learning_rate": 3.163163880549106e-06, "loss": 0.0011, "step": 160870 }, { "epoch": 1.3156151613035123, "grad_norm": 0.05061221495270729, "learning_rate": 3.1625001674824412e-06, "loss": 0.0008, "step": 160880 }, { "epoch": 1.3156969374821115, "grad_norm": 0.05127289146184921, "learning_rate": 3.1618364918471412e-06, "loss": 0.0015, "step": 160890 }, { "epoch": 1.3157787136607106, "grad_norm": 0.04072953015565872, "learning_rate": 3.1611728536567267e-06, "loss": 0.0009, "step": 160900 }, { "epoch": 1.3158604898393098, "grad_norm": 0.05109873786568642, "learning_rate": 3.160509252924715e-06, "loss": 0.001, "step": 160910 }, { "epoch": 1.315942266017909, "grad_norm": 0.12128312140703201, "learning_rate": 3.1598456896646264e-06, "loss": 0.0006, "step": 160920 }, { "epoch": 1.3160240421965081, "grad_norm": 0.009102546609938145, "learning_rate": 3.159182163889975e-06, "loss": 0.0005, "step": 160930 }, { "epoch": 1.3161058183751073, "grad_norm": 0.06708214432001114, "learning_rate": 3.1585186756142806e-06, "loss": 0.0009, "step": 160940 }, { "epoch": 1.3161875945537065, "grad_norm": 0.03776460886001587, "learning_rate": 3.1578552248510573e-06, "loss": 0.0007, "step": 160950 }, { "epoch": 1.3162693707323057, "grad_norm": 0.06463941931724548, "learning_rate": 3.1571918116138207e-06, "loss": 0.0011, "step": 160960 }, { "epoch": 1.3163511469109048, "grad_norm": 0.0557781420648098, "learning_rate": 3.1565284359160823e-06, "loss": 0.0009, "step": 160970 }, { "epoch": 1.316432923089504, "grad_norm": 0.015430506318807602, "learning_rate": 3.1558650977713608e-06, "loss": 0.0021, "step": 160980 }, { "epoch": 1.3165146992681032, "grad_norm": 0.04845498874783516, "learning_rate": 3.155201797193166e-06, "loss": 0.0014, "step": 160990 }, { "epoch": 1.3165964754467023, "grad_norm": 0.005412273108959198, "learning_rate": 3.1545385341950097e-06, "loss": 0.0009, "step": 161000 }, { "epoch": 1.3166782516253015, "grad_norm": 0.09756031632423401, "learning_rate": 3.1538753087904026e-06, "loss": 0.0006, "step": 161010 }, { "epoch": 1.3167600278039007, "grad_norm": 0.05309959873557091, "learning_rate": 3.1532121209928578e-06, "loss": 0.0008, "step": 161020 }, { "epoch": 1.3168418039824998, "grad_norm": 0.0715612918138504, "learning_rate": 3.152548970815883e-06, "loss": 0.0004, "step": 161030 }, { "epoch": 1.316923580161099, "grad_norm": 0.05859751254320145, "learning_rate": 3.1518858582729873e-06, "loss": 0.0011, "step": 161040 }, { "epoch": 1.3170053563396982, "grad_norm": 0.11308814585208893, "learning_rate": 3.1512227833776778e-06, "loss": 0.0004, "step": 161050 }, { "epoch": 1.3170871325182973, "grad_norm": 0.1393653005361557, "learning_rate": 3.1505597461434635e-06, "loss": 0.0013, "step": 161060 }, { "epoch": 1.3171689086968965, "grad_norm": 0.03419938683509827, "learning_rate": 3.149896746583851e-06, "loss": 0.0009, "step": 161070 }, { "epoch": 1.317250684875496, "grad_norm": 0.03733814135193825, "learning_rate": 3.1492337847123454e-06, "loss": 0.0009, "step": 161080 }, { "epoch": 1.3173324610540948, "grad_norm": 0.0037314712535589933, "learning_rate": 3.1485708605424505e-06, "loss": 0.0009, "step": 161090 }, { "epoch": 1.3174142372326942, "grad_norm": 0.10955346375703812, "learning_rate": 3.147907974087673e-06, "loss": 0.0007, "step": 161100 }, { "epoch": 1.3174960134112932, "grad_norm": 0.02278660610318184, "learning_rate": 3.1472451253615164e-06, "loss": 0.0008, "step": 161110 }, { "epoch": 1.3175777895898926, "grad_norm": 0.01410652231425047, "learning_rate": 3.146582314377482e-06, "loss": 0.0007, "step": 161120 }, { "epoch": 1.3176595657684915, "grad_norm": 0.16640262305736542, "learning_rate": 3.1459195411490717e-06, "loss": 0.0009, "step": 161130 }, { "epoch": 1.317741341947091, "grad_norm": 0.008741995319724083, "learning_rate": 3.145256805689789e-06, "loss": 0.0004, "step": 161140 }, { "epoch": 1.3178231181256899, "grad_norm": 0.016051258891820908, "learning_rate": 3.144594108013132e-06, "loss": 0.0009, "step": 161150 }, { "epoch": 1.3179048943042893, "grad_norm": 0.032713472843170166, "learning_rate": 3.1439314481326016e-06, "loss": 0.0008, "step": 161160 }, { "epoch": 1.3179866704828882, "grad_norm": 0.05493002012372017, "learning_rate": 3.143268826061695e-06, "loss": 0.0005, "step": 161170 }, { "epoch": 1.3180684466614876, "grad_norm": 0.06298622488975525, "learning_rate": 3.1426062418139126e-06, "loss": 0.0012, "step": 161180 }, { "epoch": 1.3181502228400868, "grad_norm": 0.00488954596221447, "learning_rate": 3.1419436954027516e-06, "loss": 0.0004, "step": 161190 }, { "epoch": 1.318231999018686, "grad_norm": 0.057319462299346924, "learning_rate": 3.141281186841707e-06, "loss": 0.0007, "step": 161200 }, { "epoch": 1.318313775197285, "grad_norm": 0.004955845419317484, "learning_rate": 3.1406187161442736e-06, "loss": 0.0005, "step": 161210 }, { "epoch": 1.3183955513758843, "grad_norm": 0.044700466096401215, "learning_rate": 3.1399562833239506e-06, "loss": 0.0006, "step": 161220 }, { "epoch": 1.3184773275544834, "grad_norm": 0.008858327753841877, "learning_rate": 3.139293888394229e-06, "loss": 0.0009, "step": 161230 }, { "epoch": 1.3185591037330826, "grad_norm": 0.05791419744491577, "learning_rate": 3.1386315313686046e-06, "loss": 0.0012, "step": 161240 }, { "epoch": 1.3186408799116818, "grad_norm": 0.005632174201309681, "learning_rate": 3.1379692122605676e-06, "loss": 0.0015, "step": 161250 }, { "epoch": 1.318722656090281, "grad_norm": 0.009434182196855545, "learning_rate": 3.1373069310836126e-06, "loss": 0.0007, "step": 161260 }, { "epoch": 1.3188044322688801, "grad_norm": 0.02456708997488022, "learning_rate": 3.1366446878512285e-06, "loss": 0.0009, "step": 161270 }, { "epoch": 1.3188862084474793, "grad_norm": 0.04276110604405403, "learning_rate": 3.1359824825769077e-06, "loss": 0.0011, "step": 161280 }, { "epoch": 1.3189679846260784, "grad_norm": 0.05852401256561279, "learning_rate": 3.1353203152741375e-06, "loss": 0.0008, "step": 161290 }, { "epoch": 1.3190497608046776, "grad_norm": 0.0022806923370808363, "learning_rate": 3.1346581859564083e-06, "loss": 0.0006, "step": 161300 }, { "epoch": 1.3191315369832768, "grad_norm": 0.013674545101821423, "learning_rate": 3.1339960946372093e-06, "loss": 0.0013, "step": 161310 }, { "epoch": 1.319213313161876, "grad_norm": 0.05386845022439957, "learning_rate": 3.1333340413300263e-06, "loss": 0.0014, "step": 161320 }, { "epoch": 1.3192950893404751, "grad_norm": 0.024692879989743233, "learning_rate": 3.132672026048344e-06, "loss": 0.001, "step": 161330 }, { "epoch": 1.3193768655190743, "grad_norm": 0.05756399780511856, "learning_rate": 3.1320100488056527e-06, "loss": 0.0011, "step": 161340 }, { "epoch": 1.3194586416976735, "grad_norm": 0.03918179124593735, "learning_rate": 3.1313481096154346e-06, "loss": 0.0008, "step": 161350 }, { "epoch": 1.3195404178762726, "grad_norm": 0.06409991532564163, "learning_rate": 3.130686208491175e-06, "loss": 0.0013, "step": 161360 }, { "epoch": 1.3196221940548718, "grad_norm": 0.0461358018219471, "learning_rate": 3.1300243454463552e-06, "loss": 0.0007, "step": 161370 }, { "epoch": 1.319703970233471, "grad_norm": 0.003313399152830243, "learning_rate": 3.129362520494461e-06, "loss": 0.0008, "step": 161380 }, { "epoch": 1.3197857464120701, "grad_norm": 0.013568916358053684, "learning_rate": 3.1287007336489727e-06, "loss": 0.0008, "step": 161390 }, { "epoch": 1.3198675225906693, "grad_norm": 0.03932905197143555, "learning_rate": 3.1280389849233713e-06, "loss": 0.0012, "step": 161400 }, { "epoch": 1.3199492987692685, "grad_norm": 0.04692702740430832, "learning_rate": 3.127377274331136e-06, "loss": 0.0007, "step": 161410 }, { "epoch": 1.3200310749478676, "grad_norm": 0.13138844072818756, "learning_rate": 3.1267156018857494e-06, "loss": 0.0012, "step": 161420 }, { "epoch": 1.3201128511264668, "grad_norm": 0.03550130873918533, "learning_rate": 3.1260539676006887e-06, "loss": 0.0007, "step": 161430 }, { "epoch": 1.320194627305066, "grad_norm": 0.016137562692165375, "learning_rate": 3.1253923714894314e-06, "loss": 0.0009, "step": 161440 }, { "epoch": 1.3202764034836652, "grad_norm": 0.010824284516274929, "learning_rate": 3.1247308135654536e-06, "loss": 0.0007, "step": 161450 }, { "epoch": 1.3203581796622643, "grad_norm": 0.05114402621984482, "learning_rate": 3.124069293842235e-06, "loss": 0.0005, "step": 161460 }, { "epoch": 1.3204399558408635, "grad_norm": 0.05948394536972046, "learning_rate": 3.1234078123332503e-06, "loss": 0.0014, "step": 161470 }, { "epoch": 1.3205217320194627, "grad_norm": 0.061706651002168655, "learning_rate": 3.1227463690519732e-06, "loss": 0.0006, "step": 161480 }, { "epoch": 1.3206035081980618, "grad_norm": 0.05421283841133118, "learning_rate": 3.122084964011878e-06, "loss": 0.0011, "step": 161490 }, { "epoch": 1.320685284376661, "grad_norm": 0.09655576199293137, "learning_rate": 3.1214235972264397e-06, "loss": 0.001, "step": 161500 }, { "epoch": 1.3207670605552604, "grad_norm": 0.08614516258239746, "learning_rate": 3.1207622687091294e-06, "loss": 0.0006, "step": 161510 }, { "epoch": 1.3208488367338593, "grad_norm": 0.03568374738097191, "learning_rate": 3.1201009784734194e-06, "loss": 0.0007, "step": 161520 }, { "epoch": 1.3209306129124587, "grad_norm": 0.042107392102479935, "learning_rate": 3.1194397265327804e-06, "loss": 0.0017, "step": 161530 }, { "epoch": 1.3210123890910577, "grad_norm": 0.03444843366742134, "learning_rate": 3.1187785129006816e-06, "loss": 0.0006, "step": 161540 }, { "epoch": 1.321094165269657, "grad_norm": 0.11788344383239746, "learning_rate": 3.118117337590595e-06, "loss": 0.0014, "step": 161550 }, { "epoch": 1.321175941448256, "grad_norm": 0.01394800003618002, "learning_rate": 3.117456200615987e-06, "loss": 0.0008, "step": 161560 }, { "epoch": 1.3212577176268554, "grad_norm": 0.013148646801710129, "learning_rate": 3.1167951019903275e-06, "loss": 0.0007, "step": 161570 }, { "epoch": 1.3213394938054543, "grad_norm": 0.049094267189502716, "learning_rate": 3.11613404172708e-06, "loss": 0.0008, "step": 161580 }, { "epoch": 1.3214212699840537, "grad_norm": 0.016984375193715096, "learning_rate": 3.1154730198397156e-06, "loss": 0.0016, "step": 161590 }, { "epoch": 1.3215030461626527, "grad_norm": 0.06770016252994537, "learning_rate": 3.114812036341697e-06, "loss": 0.0018, "step": 161600 }, { "epoch": 1.321584822341252, "grad_norm": 0.06256281584501266, "learning_rate": 3.11415109124649e-06, "loss": 0.0009, "step": 161610 }, { "epoch": 1.3216665985198512, "grad_norm": 0.0066482615657150745, "learning_rate": 3.113490184567557e-06, "loss": 0.0007, "step": 161620 }, { "epoch": 1.3217483746984504, "grad_norm": 0.01693350449204445, "learning_rate": 3.1128293163183642e-06, "loss": 0.0004, "step": 161630 }, { "epoch": 1.3218301508770496, "grad_norm": 0.034366901963949203, "learning_rate": 3.1121684865123714e-06, "loss": 0.0005, "step": 161640 }, { "epoch": 1.3219119270556487, "grad_norm": 0.035257697105407715, "learning_rate": 3.1115076951630406e-06, "loss": 0.0006, "step": 161650 }, { "epoch": 1.321993703234248, "grad_norm": 0.01964062824845314, "learning_rate": 3.110846942283833e-06, "loss": 0.0013, "step": 161660 }, { "epoch": 1.322075479412847, "grad_norm": 0.05761353299021721, "learning_rate": 3.1101862278882093e-06, "loss": 0.0009, "step": 161670 }, { "epoch": 1.3221572555914463, "grad_norm": 0.09105180948972702, "learning_rate": 3.1095255519896285e-06, "loss": 0.0012, "step": 161680 }, { "epoch": 1.3222390317700454, "grad_norm": 0.1632968783378601, "learning_rate": 3.1088649146015492e-06, "loss": 0.0007, "step": 161690 }, { "epoch": 1.3223208079486446, "grad_norm": 0.0030675746966153383, "learning_rate": 3.108204315737426e-06, "loss": 0.0016, "step": 161700 }, { "epoch": 1.3224025841272438, "grad_norm": 0.053926315158605576, "learning_rate": 3.107543755410721e-06, "loss": 0.0004, "step": 161710 }, { "epoch": 1.322484360305843, "grad_norm": 0.027762439101934433, "learning_rate": 3.1068832336348875e-06, "loss": 0.0008, "step": 161720 }, { "epoch": 1.322566136484442, "grad_norm": 0.07169635593891144, "learning_rate": 3.1062227504233824e-06, "loss": 0.0011, "step": 161730 }, { "epoch": 1.3226479126630413, "grad_norm": 0.037158139050006866, "learning_rate": 3.1055623057896574e-06, "loss": 0.0008, "step": 161740 }, { "epoch": 1.3227296888416404, "grad_norm": 0.026352936401963234, "learning_rate": 3.104901899747169e-06, "loss": 0.0007, "step": 161750 }, { "epoch": 1.3228114650202396, "grad_norm": 0.16402924060821533, "learning_rate": 3.10424153230937e-06, "loss": 0.0009, "step": 161760 }, { "epoch": 1.3228932411988388, "grad_norm": 0.020490610972046852, "learning_rate": 3.1035812034897116e-06, "loss": 0.0005, "step": 161770 }, { "epoch": 1.322975017377438, "grad_norm": 0.02979307249188423, "learning_rate": 3.102920913301644e-06, "loss": 0.0007, "step": 161780 }, { "epoch": 1.3230567935560371, "grad_norm": 0.001141460845246911, "learning_rate": 3.1022606617586216e-06, "loss": 0.0006, "step": 161790 }, { "epoch": 1.3231385697346363, "grad_norm": 0.17739491164684296, "learning_rate": 3.101600448874091e-06, "loss": 0.002, "step": 161800 }, { "epoch": 1.3232203459132355, "grad_norm": 0.10596966743469238, "learning_rate": 3.1009402746615027e-06, "loss": 0.001, "step": 161810 }, { "epoch": 1.3233021220918346, "grad_norm": 0.013049169443547726, "learning_rate": 3.100280139134303e-06, "loss": 0.0005, "step": 161820 }, { "epoch": 1.3233838982704338, "grad_norm": 0.005428019445389509, "learning_rate": 3.0996200423059424e-06, "loss": 0.0005, "step": 161830 }, { "epoch": 1.323465674449033, "grad_norm": 0.03554693982005119, "learning_rate": 3.0989599841898667e-06, "loss": 0.0008, "step": 161840 }, { "epoch": 1.3235474506276321, "grad_norm": 0.029016118496656418, "learning_rate": 3.098299964799521e-06, "loss": 0.001, "step": 161850 }, { "epoch": 1.3236292268062313, "grad_norm": 0.002120853867381811, "learning_rate": 3.09763998414835e-06, "loss": 0.0006, "step": 161860 }, { "epoch": 1.3237110029848305, "grad_norm": 0.01226078625768423, "learning_rate": 3.0969800422497997e-06, "loss": 0.001, "step": 161870 }, { "epoch": 1.3237927791634296, "grad_norm": 0.030234627425670624, "learning_rate": 3.096320139117314e-06, "loss": 0.0007, "step": 161880 }, { "epoch": 1.3238745553420288, "grad_norm": 0.023526674136519432, "learning_rate": 3.095660274764333e-06, "loss": 0.0004, "step": 161890 }, { "epoch": 1.323956331520628, "grad_norm": 0.022788917645812035, "learning_rate": 3.0950004492042995e-06, "loss": 0.0006, "step": 161900 }, { "epoch": 1.3240381076992271, "grad_norm": 0.032104942947626114, "learning_rate": 3.094340662450656e-06, "loss": 0.0014, "step": 161910 }, { "epoch": 1.3241198838778263, "grad_norm": 0.0024637465830892324, "learning_rate": 3.0936809145168427e-06, "loss": 0.0011, "step": 161920 }, { "epoch": 1.3242016600564255, "grad_norm": 0.011164600029587746, "learning_rate": 3.093021205416299e-06, "loss": 0.0004, "step": 161930 }, { "epoch": 1.3242834362350249, "grad_norm": 0.003490482922643423, "learning_rate": 3.0923615351624602e-06, "loss": 0.0009, "step": 161940 }, { "epoch": 1.3243652124136238, "grad_norm": 0.1129409447312355, "learning_rate": 3.0917019037687707e-06, "loss": 0.0014, "step": 161950 }, { "epoch": 1.3244469885922232, "grad_norm": 0.027588291093707085, "learning_rate": 3.0910423112486643e-06, "loss": 0.0014, "step": 161960 }, { "epoch": 1.3245287647708222, "grad_norm": 0.04209599643945694, "learning_rate": 3.0903827576155775e-06, "loss": 0.0006, "step": 161970 }, { "epoch": 1.3246105409494215, "grad_norm": 0.003217203775420785, "learning_rate": 3.089723242882945e-06, "loss": 0.0005, "step": 161980 }, { "epoch": 1.3246923171280205, "grad_norm": 0.0035577367525547743, "learning_rate": 3.0890637670642044e-06, "loss": 0.0008, "step": 161990 }, { "epoch": 1.3247740933066199, "grad_norm": 0.04477487877011299, "learning_rate": 3.0884043301727873e-06, "loss": 0.0003, "step": 162000 }, { "epoch": 1.3248558694852188, "grad_norm": 0.006460798904299736, "learning_rate": 3.0877449322221285e-06, "loss": 0.0006, "step": 162010 }, { "epoch": 1.3249376456638182, "grad_norm": 0.034168679267168045, "learning_rate": 3.087085573225659e-06, "loss": 0.0013, "step": 162020 }, { "epoch": 1.3250194218424172, "grad_norm": 0.03570510819554329, "learning_rate": 3.0864262531968115e-06, "loss": 0.0015, "step": 162030 }, { "epoch": 1.3251011980210166, "grad_norm": 0.053789373487234116, "learning_rate": 3.085766972149017e-06, "loss": 0.0009, "step": 162040 }, { "epoch": 1.3251829741996157, "grad_norm": 0.08381195366382599, "learning_rate": 3.0851077300957056e-06, "loss": 0.0006, "step": 162050 }, { "epoch": 1.325264750378215, "grad_norm": 0.0713798925280571, "learning_rate": 3.084448527050303e-06, "loss": 0.0009, "step": 162060 }, { "epoch": 1.325346526556814, "grad_norm": 0.04570326581597328, "learning_rate": 3.0837893630262445e-06, "loss": 0.0007, "step": 162070 }, { "epoch": 1.3254283027354132, "grad_norm": 0.00992625206708908, "learning_rate": 3.083130238036953e-06, "loss": 0.0014, "step": 162080 }, { "epoch": 1.3255100789140124, "grad_norm": 0.07645608484745026, "learning_rate": 3.082471152095858e-06, "loss": 0.0015, "step": 162090 }, { "epoch": 1.3255918550926116, "grad_norm": 0.01983334869146347, "learning_rate": 3.081812105216382e-06, "loss": 0.0004, "step": 162100 }, { "epoch": 1.3256736312712107, "grad_norm": 0.08071599155664444, "learning_rate": 3.0811530974119543e-06, "loss": 0.0009, "step": 162110 }, { "epoch": 1.32575540744981, "grad_norm": 0.0087579982355237, "learning_rate": 3.080494128695998e-06, "loss": 0.0007, "step": 162120 }, { "epoch": 1.325837183628409, "grad_norm": 0.039187099784612656, "learning_rate": 3.0798351990819352e-06, "loss": 0.0008, "step": 162130 }, { "epoch": 1.3259189598070082, "grad_norm": 0.10773469507694244, "learning_rate": 3.0791763085831904e-06, "loss": 0.001, "step": 162140 }, { "epoch": 1.3260007359856074, "grad_norm": 0.023352183401584625, "learning_rate": 3.078517457213186e-06, "loss": 0.0004, "step": 162150 }, { "epoch": 1.3260825121642066, "grad_norm": 0.04245869070291519, "learning_rate": 3.077858644985343e-06, "loss": 0.0019, "step": 162160 }, { "epoch": 1.3261642883428058, "grad_norm": 0.02550002932548523, "learning_rate": 3.0771998719130825e-06, "loss": 0.0008, "step": 162170 }, { "epoch": 1.326246064521405, "grad_norm": 0.003098198678344488, "learning_rate": 3.076541138009821e-06, "loss": 0.0007, "step": 162180 }, { "epoch": 1.326327840700004, "grad_norm": 0.01532187219709158, "learning_rate": 3.075882443288981e-06, "loss": 0.0012, "step": 162190 }, { "epoch": 1.3264096168786033, "grad_norm": 0.008758696727454662, "learning_rate": 3.075223787763981e-06, "loss": 0.0004, "step": 162200 }, { "epoch": 1.3264913930572024, "grad_norm": 0.022020505741238594, "learning_rate": 3.0745651714482364e-06, "loss": 0.0009, "step": 162210 }, { "epoch": 1.3265731692358016, "grad_norm": 0.1976104974746704, "learning_rate": 3.0739065943551638e-06, "loss": 0.0018, "step": 162220 }, { "epoch": 1.3266549454144008, "grad_norm": 0.027093229815363884, "learning_rate": 3.07324805649818e-06, "loss": 0.0012, "step": 162230 }, { "epoch": 1.326736721593, "grad_norm": 0.02521185390651226, "learning_rate": 3.0725895578906993e-06, "loss": 0.0017, "step": 162240 }, { "epoch": 1.326818497771599, "grad_norm": 0.020704414695501328, "learning_rate": 3.071931098546136e-06, "loss": 0.0004, "step": 162250 }, { "epoch": 1.3269002739501983, "grad_norm": 0.0858989879488945, "learning_rate": 3.0712726784779027e-06, "loss": 0.0014, "step": 162260 }, { "epoch": 1.3269820501287974, "grad_norm": 0.06132226809859276, "learning_rate": 3.0706142976994137e-06, "loss": 0.0007, "step": 162270 }, { "epoch": 1.3270638263073966, "grad_norm": 0.03814845159649849, "learning_rate": 3.0699559562240798e-06, "loss": 0.0005, "step": 162280 }, { "epoch": 1.3271456024859958, "grad_norm": 0.051434408873319626, "learning_rate": 3.069297654065312e-06, "loss": 0.001, "step": 162290 }, { "epoch": 1.327227378664595, "grad_norm": 0.05362860858440399, "learning_rate": 3.0686393912365174e-06, "loss": 0.0007, "step": 162300 }, { "epoch": 1.3273091548431941, "grad_norm": 0.035595331341028214, "learning_rate": 3.0679811677511116e-06, "loss": 0.0031, "step": 162310 }, { "epoch": 1.3273909310217933, "grad_norm": 0.018122868612408638, "learning_rate": 3.0673229836224992e-06, "loss": 0.0005, "step": 162320 }, { "epoch": 1.3274727072003925, "grad_norm": 0.04964030906558037, "learning_rate": 3.0666648388640883e-06, "loss": 0.0007, "step": 162330 }, { "epoch": 1.3275544833789916, "grad_norm": 0.03788314387202263, "learning_rate": 3.0660067334892857e-06, "loss": 0.0008, "step": 162340 }, { "epoch": 1.3276362595575908, "grad_norm": 0.012705035507678986, "learning_rate": 3.0653486675114984e-06, "loss": 0.0008, "step": 162350 }, { "epoch": 1.32771803573619, "grad_norm": 0.126350998878479, "learning_rate": 3.0646906409441317e-06, "loss": 0.001, "step": 162360 }, { "epoch": 1.3277998119147894, "grad_norm": 0.023197147995233536, "learning_rate": 3.0640326538005897e-06, "loss": 0.0007, "step": 162370 }, { "epoch": 1.3278815880933883, "grad_norm": 0.04390260949730873, "learning_rate": 3.0633747060942753e-06, "loss": 0.0009, "step": 162380 }, { "epoch": 1.3279633642719877, "grad_norm": 0.07092698663473129, "learning_rate": 3.062716797838593e-06, "loss": 0.0016, "step": 162390 }, { "epoch": 1.3280451404505866, "grad_norm": 0.1851717233657837, "learning_rate": 3.062058929046945e-06, "loss": 0.0028, "step": 162400 }, { "epoch": 1.328126916629186, "grad_norm": 0.13322025537490845, "learning_rate": 3.0614010997327304e-06, "loss": 0.0011, "step": 162410 }, { "epoch": 1.328208692807785, "grad_norm": 0.0031261928379535675, "learning_rate": 3.0607433099093505e-06, "loss": 0.0004, "step": 162420 }, { "epoch": 1.3282904689863844, "grad_norm": 0.017389941960573196, "learning_rate": 3.060085559590207e-06, "loss": 0.0013, "step": 162430 }, { "epoch": 1.3283722451649833, "grad_norm": 0.044172611087560654, "learning_rate": 3.0594278487886975e-06, "loss": 0.002, "step": 162440 }, { "epoch": 1.3284540213435827, "grad_norm": 0.019747179001569748, "learning_rate": 3.0587701775182207e-06, "loss": 0.0007, "step": 162450 }, { "epoch": 1.3285357975221819, "grad_norm": 0.1334136575460434, "learning_rate": 3.0581125457921715e-06, "loss": 0.0012, "step": 162460 }, { "epoch": 1.328617573700781, "grad_norm": 0.04766683280467987, "learning_rate": 3.0574549536239494e-06, "loss": 0.0005, "step": 162470 }, { "epoch": 1.3286993498793802, "grad_norm": 0.06776492297649384, "learning_rate": 3.0567974010269495e-06, "loss": 0.0004, "step": 162480 }, { "epoch": 1.3287811260579794, "grad_norm": 0.03328418731689453, "learning_rate": 3.0561398880145653e-06, "loss": 0.0027, "step": 162490 }, { "epoch": 1.3288629022365785, "grad_norm": 0.013785271905362606, "learning_rate": 3.055482414600191e-06, "loss": 0.0019, "step": 162500 }, { "epoch": 1.3289446784151777, "grad_norm": 0.0531330369412899, "learning_rate": 3.0548249807972214e-06, "loss": 0.001, "step": 162510 }, { "epoch": 1.3290264545937769, "grad_norm": 0.013958039693534374, "learning_rate": 3.054167586619048e-06, "loss": 0.001, "step": 162520 }, { "epoch": 1.329108230772376, "grad_norm": 0.007903278805315495, "learning_rate": 3.053510232079063e-06, "loss": 0.0003, "step": 162530 }, { "epoch": 1.3291900069509752, "grad_norm": 0.10049404948949814, "learning_rate": 3.0528529171906542e-06, "loss": 0.0011, "step": 162540 }, { "epoch": 1.3292717831295744, "grad_norm": 0.03520221635699272, "learning_rate": 3.0521956419672163e-06, "loss": 0.0011, "step": 162550 }, { "epoch": 1.3293535593081736, "grad_norm": 0.023281380534172058, "learning_rate": 3.0515384064221363e-06, "loss": 0.0013, "step": 162560 }, { "epoch": 1.3294353354867727, "grad_norm": 0.04233593866229057, "learning_rate": 3.050881210568804e-06, "loss": 0.0007, "step": 162570 }, { "epoch": 1.329517111665372, "grad_norm": 0.0262399110943079, "learning_rate": 3.050224054420603e-06, "loss": 0.0013, "step": 162580 }, { "epoch": 1.329598887843971, "grad_norm": 0.0024735734332352877, "learning_rate": 3.049566937990924e-06, "loss": 0.0012, "step": 162590 }, { "epoch": 1.3296806640225702, "grad_norm": 0.06705228984355927, "learning_rate": 3.0489098612931524e-06, "loss": 0.0009, "step": 162600 }, { "epoch": 1.3297624402011694, "grad_norm": 0.05973506346344948, "learning_rate": 3.048252824340673e-06, "loss": 0.0008, "step": 162610 }, { "epoch": 1.3298442163797686, "grad_norm": 0.009746037423610687, "learning_rate": 3.047595827146869e-06, "loss": 0.0007, "step": 162620 }, { "epoch": 1.3299259925583677, "grad_norm": 0.024196233600378036, "learning_rate": 3.0469388697251257e-06, "loss": 0.0011, "step": 162630 }, { "epoch": 1.330007768736967, "grad_norm": 0.03407725691795349, "learning_rate": 3.0462819520888255e-06, "loss": 0.0014, "step": 162640 }, { "epoch": 1.330089544915566, "grad_norm": 0.13408653438091278, "learning_rate": 3.04562507425135e-06, "loss": 0.0006, "step": 162650 }, { "epoch": 1.3301713210941652, "grad_norm": 0.0035708784125745296, "learning_rate": 3.044968236226078e-06, "loss": 0.0005, "step": 162660 }, { "epoch": 1.3302530972727644, "grad_norm": 0.02636735327541828, "learning_rate": 3.0443114380263944e-06, "loss": 0.0005, "step": 162670 }, { "epoch": 1.3303348734513636, "grad_norm": 0.040110357105731964, "learning_rate": 3.043654679665676e-06, "loss": 0.0024, "step": 162680 }, { "epoch": 1.3304166496299628, "grad_norm": 0.023533444851636887, "learning_rate": 3.0429979611573034e-06, "loss": 0.0009, "step": 162690 }, { "epoch": 1.330498425808562, "grad_norm": 0.031032467260956764, "learning_rate": 3.04234128251465e-06, "loss": 0.0005, "step": 162700 }, { "epoch": 1.330580201987161, "grad_norm": 0.059989042580127716, "learning_rate": 3.0416846437510983e-06, "loss": 0.0011, "step": 162710 }, { "epoch": 1.3306619781657603, "grad_norm": 0.057220395654439926, "learning_rate": 3.041028044880022e-06, "loss": 0.0009, "step": 162720 }, { "epoch": 1.3307437543443594, "grad_norm": 0.16876555979251862, "learning_rate": 3.0403714859147972e-06, "loss": 0.0012, "step": 162730 }, { "epoch": 1.3308255305229586, "grad_norm": 0.11039633303880692, "learning_rate": 3.0397149668687964e-06, "loss": 0.0009, "step": 162740 }, { "epoch": 1.3309073067015578, "grad_norm": 0.09483373910188675, "learning_rate": 3.039058487755396e-06, "loss": 0.0009, "step": 162750 }, { "epoch": 1.330989082880157, "grad_norm": 0.01775895431637764, "learning_rate": 3.0384020485879686e-06, "loss": 0.0005, "step": 162760 }, { "epoch": 1.331070859058756, "grad_norm": 0.08722550421953201, "learning_rate": 3.037745649379885e-06, "loss": 0.0005, "step": 162770 }, { "epoch": 1.3311526352373555, "grad_norm": 0.01489859726279974, "learning_rate": 3.037089290144517e-06, "loss": 0.0031, "step": 162780 }, { "epoch": 1.3312344114159544, "grad_norm": 0.05115772411227226, "learning_rate": 3.0364329708952365e-06, "loss": 0.0007, "step": 162790 }, { "epoch": 1.3313161875945538, "grad_norm": 0.03278624266386032, "learning_rate": 3.0357766916454133e-06, "loss": 0.0006, "step": 162800 }, { "epoch": 1.3313979637731528, "grad_norm": 0.02635219693183899, "learning_rate": 3.035120452408416e-06, "loss": 0.0011, "step": 162810 }, { "epoch": 1.3314797399517522, "grad_norm": 0.15424008667469025, "learning_rate": 3.034464253197609e-06, "loss": 0.001, "step": 162820 }, { "epoch": 1.3315615161303511, "grad_norm": 0.006924258545041084, "learning_rate": 3.033808094026365e-06, "loss": 0.0005, "step": 162830 }, { "epoch": 1.3316432923089505, "grad_norm": 0.04236973077058792, "learning_rate": 3.033151974908049e-06, "loss": 0.001, "step": 162840 }, { "epoch": 1.3317250684875495, "grad_norm": 0.0655781701207161, "learning_rate": 3.0324958958560257e-06, "loss": 0.0011, "step": 162850 }, { "epoch": 1.3318068446661488, "grad_norm": 0.027727726846933365, "learning_rate": 3.0318398568836587e-06, "loss": 0.0007, "step": 162860 }, { "epoch": 1.3318886208447478, "grad_norm": 0.017880864441394806, "learning_rate": 3.031183858004315e-06, "loss": 0.0008, "step": 162870 }, { "epoch": 1.3319703970233472, "grad_norm": 0.0005620882729999721, "learning_rate": 3.030527899231356e-06, "loss": 0.0011, "step": 162880 }, { "epoch": 1.3320521732019464, "grad_norm": 0.0013399749295786023, "learning_rate": 3.0298719805781453e-06, "loss": 0.0006, "step": 162890 }, { "epoch": 1.3321339493805455, "grad_norm": 0.005459307227283716, "learning_rate": 3.029216102058041e-06, "loss": 0.0005, "step": 162900 }, { "epoch": 1.3322157255591447, "grad_norm": 0.013392412103712559, "learning_rate": 3.02856026368441e-06, "loss": 0.0009, "step": 162910 }, { "epoch": 1.3322975017377439, "grad_norm": 0.014753258787095547, "learning_rate": 3.027904465470608e-06, "loss": 0.0008, "step": 162920 }, { "epoch": 1.332379277916343, "grad_norm": 0.018878618255257607, "learning_rate": 3.0272487074299956e-06, "loss": 0.0014, "step": 162930 }, { "epoch": 1.3324610540949422, "grad_norm": 0.008345107547938824, "learning_rate": 3.0265929895759284e-06, "loss": 0.001, "step": 162940 }, { "epoch": 1.3325428302735414, "grad_norm": 0.007165615446865559, "learning_rate": 3.025937311921767e-06, "loss": 0.0012, "step": 162950 }, { "epoch": 1.3326246064521405, "grad_norm": 0.031228400766849518, "learning_rate": 3.0252816744808677e-06, "loss": 0.0007, "step": 162960 }, { "epoch": 1.3327063826307397, "grad_norm": 0.045694295316934586, "learning_rate": 3.0246260772665857e-06, "loss": 0.0011, "step": 162970 }, { "epoch": 1.3327881588093389, "grad_norm": 0.04563559219241142, "learning_rate": 3.023970520292277e-06, "loss": 0.0013, "step": 162980 }, { "epoch": 1.332869934987938, "grad_norm": 0.03840414434671402, "learning_rate": 3.0233150035712935e-06, "loss": 0.0014, "step": 162990 }, { "epoch": 1.3329517111665372, "grad_norm": 0.036640577018260956, "learning_rate": 3.022659527116991e-06, "loss": 0.001, "step": 163000 }, { "epoch": 1.3330334873451364, "grad_norm": 0.01636810414493084, "learning_rate": 3.022004090942721e-06, "loss": 0.0008, "step": 163010 }, { "epoch": 1.3331152635237355, "grad_norm": 0.06711176037788391, "learning_rate": 3.0213486950618354e-06, "loss": 0.0013, "step": 163020 }, { "epoch": 1.3331970397023347, "grad_norm": 0.008603579364717007, "learning_rate": 3.0206933394876837e-06, "loss": 0.0007, "step": 163030 }, { "epoch": 1.3332788158809339, "grad_norm": 0.10435950011014938, "learning_rate": 3.020038024233619e-06, "loss": 0.0007, "step": 163040 }, { "epoch": 1.333360592059533, "grad_norm": 0.023876070976257324, "learning_rate": 3.019382749312991e-06, "loss": 0.0008, "step": 163050 }, { "epoch": 1.3334423682381322, "grad_norm": 0.024317443370819092, "learning_rate": 3.0187275147391447e-06, "loss": 0.0005, "step": 163060 }, { "epoch": 1.3335241444167314, "grad_norm": 0.001605544239282608, "learning_rate": 3.018072320525427e-06, "loss": 0.0004, "step": 163070 }, { "epoch": 1.3336059205953306, "grad_norm": 0.04731268063187599, "learning_rate": 3.01741716668519e-06, "loss": 0.0006, "step": 163080 }, { "epoch": 1.3336876967739297, "grad_norm": 0.03414035588502884, "learning_rate": 3.0167620532317765e-06, "loss": 0.0006, "step": 163090 }, { "epoch": 1.333769472952529, "grad_norm": 0.012430784292519093, "learning_rate": 3.0161069801785314e-06, "loss": 0.0004, "step": 163100 }, { "epoch": 1.333851249131128, "grad_norm": 0.049718476831912994, "learning_rate": 3.0154519475388e-06, "loss": 0.0008, "step": 163110 }, { "epoch": 1.3339330253097272, "grad_norm": 0.036428846418857574, "learning_rate": 3.0147969553259256e-06, "loss": 0.0008, "step": 163120 }, { "epoch": 1.3340148014883264, "grad_norm": 0.012841018848121166, "learning_rate": 3.014142003553251e-06, "loss": 0.001, "step": 163130 }, { "epoch": 1.3340965776669256, "grad_norm": 0.032508205622434616, "learning_rate": 3.013487092234118e-06, "loss": 0.0009, "step": 163140 }, { "epoch": 1.3341783538455247, "grad_norm": 0.006293805316090584, "learning_rate": 3.0128322213818654e-06, "loss": 0.0015, "step": 163150 }, { "epoch": 1.334260130024124, "grad_norm": 0.025485431775450706, "learning_rate": 3.0121773910098383e-06, "loss": 0.0007, "step": 163160 }, { "epoch": 1.334341906202723, "grad_norm": 0.10141830891370773, "learning_rate": 3.011522601131374e-06, "loss": 0.0014, "step": 163170 }, { "epoch": 1.3344236823813223, "grad_norm": 0.03757558390498161, "learning_rate": 3.01086785175981e-06, "loss": 0.0041, "step": 163180 }, { "epoch": 1.3345054585599214, "grad_norm": 0.0160318985581398, "learning_rate": 3.0102131429084822e-06, "loss": 0.0008, "step": 163190 }, { "epoch": 1.3345872347385206, "grad_norm": 0.006831225007772446, "learning_rate": 3.0095584745907325e-06, "loss": 0.0014, "step": 163200 }, { "epoch": 1.33466901091712, "grad_norm": 0.015358944423496723, "learning_rate": 3.008903846819895e-06, "loss": 0.0006, "step": 163210 }, { "epoch": 1.334750787095719, "grad_norm": 0.013749059289693832, "learning_rate": 3.0082492596093037e-06, "loss": 0.0017, "step": 163220 }, { "epoch": 1.3348325632743183, "grad_norm": 0.017594557255506516, "learning_rate": 3.007594712972294e-06, "loss": 0.0004, "step": 163230 }, { "epoch": 1.3349143394529173, "grad_norm": 0.033454861491918564, "learning_rate": 3.0069402069222e-06, "loss": 0.0012, "step": 163240 }, { "epoch": 1.3349961156315167, "grad_norm": 0.21144932508468628, "learning_rate": 3.0062857414723536e-06, "loss": 0.0011, "step": 163250 }, { "epoch": 1.3350778918101156, "grad_norm": 0.031880758702754974, "learning_rate": 3.005631316636087e-06, "loss": 0.0009, "step": 163260 }, { "epoch": 1.335159667988715, "grad_norm": 0.032442107796669006, "learning_rate": 3.0049769324267303e-06, "loss": 0.0003, "step": 163270 }, { "epoch": 1.335241444167314, "grad_norm": 0.027294985949993134, "learning_rate": 3.0043225888576166e-06, "loss": 0.0016, "step": 163280 }, { "epoch": 1.3353232203459133, "grad_norm": 0.05505502223968506, "learning_rate": 3.0036682859420757e-06, "loss": 0.0011, "step": 163290 }, { "epoch": 1.3354049965245123, "grad_norm": 0.017082009464502335, "learning_rate": 3.003014023693433e-06, "loss": 0.0008, "step": 163300 }, { "epoch": 1.3354867727031117, "grad_norm": 0.036963045597076416, "learning_rate": 3.0023598021250156e-06, "loss": 0.0006, "step": 163310 }, { "epoch": 1.3355685488817108, "grad_norm": 0.022675665095448494, "learning_rate": 3.0017056212501555e-06, "loss": 0.0008, "step": 163320 }, { "epoch": 1.33565032506031, "grad_norm": 0.025586625561118126, "learning_rate": 3.0010514810821758e-06, "loss": 0.0009, "step": 163330 }, { "epoch": 1.3357321012389092, "grad_norm": 0.021051879972219467, "learning_rate": 3.0003973816344025e-06, "loss": 0.0005, "step": 163340 }, { "epoch": 1.3358138774175083, "grad_norm": 0.07408939301967621, "learning_rate": 2.9997433229201582e-06, "loss": 0.0009, "step": 163350 }, { "epoch": 1.3358956535961075, "grad_norm": 0.03977934271097183, "learning_rate": 2.99908930495277e-06, "loss": 0.0014, "step": 163360 }, { "epoch": 1.3359774297747067, "grad_norm": 0.02185201831161976, "learning_rate": 2.998435327745559e-06, "loss": 0.0007, "step": 163370 }, { "epoch": 1.3360592059533059, "grad_norm": 0.05635222792625427, "learning_rate": 2.997781391311847e-06, "loss": 0.0006, "step": 163380 }, { "epoch": 1.336140982131905, "grad_norm": 0.026288919150829315, "learning_rate": 2.997127495664954e-06, "loss": 0.0007, "step": 163390 }, { "epoch": 1.3362227583105042, "grad_norm": 0.14334405958652496, "learning_rate": 2.9964736408182035e-06, "loss": 0.0033, "step": 163400 }, { "epoch": 1.3363045344891034, "grad_norm": 0.1493511199951172, "learning_rate": 2.995819826784915e-06, "loss": 0.0049, "step": 163410 }, { "epoch": 1.3363863106677025, "grad_norm": 0.018122458830475807, "learning_rate": 2.9951660535784038e-06, "loss": 0.0006, "step": 163420 }, { "epoch": 1.3364680868463017, "grad_norm": 0.03637511283159256, "learning_rate": 2.9945123212119886e-06, "loss": 0.0012, "step": 163430 }, { "epoch": 1.3365498630249009, "grad_norm": 0.040024641901254654, "learning_rate": 2.993858629698989e-06, "loss": 0.0052, "step": 163440 }, { "epoch": 1.3366316392035, "grad_norm": 0.015697382390499115, "learning_rate": 2.9932049790527197e-06, "loss": 0.0007, "step": 163450 }, { "epoch": 1.3367134153820992, "grad_norm": 0.0004610777832567692, "learning_rate": 2.9925513692864966e-06, "loss": 0.0007, "step": 163460 }, { "epoch": 1.3367951915606984, "grad_norm": 0.041103947907686234, "learning_rate": 2.991897800413633e-06, "loss": 0.0009, "step": 163470 }, { "epoch": 1.3368769677392975, "grad_norm": 0.02910454384982586, "learning_rate": 2.991244272447444e-06, "loss": 0.0011, "step": 163480 }, { "epoch": 1.3369587439178967, "grad_norm": 0.1326621025800705, "learning_rate": 2.990590785401242e-06, "loss": 0.0013, "step": 163490 }, { "epoch": 1.3370405200964959, "grad_norm": 0.05974744260311127, "learning_rate": 2.989937339288339e-06, "loss": 0.0011, "step": 163500 }, { "epoch": 1.337122296275095, "grad_norm": 0.051606517285108566, "learning_rate": 2.9892839341220446e-06, "loss": 0.0008, "step": 163510 }, { "epoch": 1.3372040724536942, "grad_norm": 0.17044873535633087, "learning_rate": 2.9886305699156726e-06, "loss": 0.0018, "step": 163520 }, { "epoch": 1.3372858486322934, "grad_norm": 0.03546467050909996, "learning_rate": 2.987977246682532e-06, "loss": 0.0005, "step": 163530 }, { "epoch": 1.3373676248108926, "grad_norm": 0.034937888383865356, "learning_rate": 2.9873239644359296e-06, "loss": 0.0008, "step": 163540 }, { "epoch": 1.3374494009894917, "grad_norm": 0.04849632456898689, "learning_rate": 2.986670723189171e-06, "loss": 0.0006, "step": 163550 }, { "epoch": 1.337531177168091, "grad_norm": 0.019550131633877754, "learning_rate": 2.9860175229555697e-06, "loss": 0.0008, "step": 163560 }, { "epoch": 1.33761295334669, "grad_norm": 0.03613657131791115, "learning_rate": 2.9853643637484276e-06, "loss": 0.0008, "step": 163570 }, { "epoch": 1.3376947295252892, "grad_norm": 0.05357304587960243, "learning_rate": 2.984711245581051e-06, "loss": 0.0008, "step": 163580 }, { "epoch": 1.3377765057038884, "grad_norm": 0.04572611674666405, "learning_rate": 2.9840581684667438e-06, "loss": 0.0009, "step": 163590 }, { "epoch": 1.3378582818824876, "grad_norm": 0.027063123881816864, "learning_rate": 2.9834051324188113e-06, "loss": 0.0007, "step": 163600 }, { "epoch": 1.3379400580610867, "grad_norm": 0.010307037271559238, "learning_rate": 2.9827521374505554e-06, "loss": 0.001, "step": 163610 }, { "epoch": 1.338021834239686, "grad_norm": 0.04523877054452896, "learning_rate": 2.9820991835752776e-06, "loss": 0.0006, "step": 163620 }, { "epoch": 1.338103610418285, "grad_norm": 0.05321341007947922, "learning_rate": 2.9814462708062787e-06, "loss": 0.0011, "step": 163630 }, { "epoch": 1.3381853865968845, "grad_norm": 0.01984414830803871, "learning_rate": 2.980793399156861e-06, "loss": 0.0006, "step": 163640 }, { "epoch": 1.3382671627754834, "grad_norm": 0.022902561351656914, "learning_rate": 2.9801405686403246e-06, "loss": 0.0008, "step": 163650 }, { "epoch": 1.3383489389540828, "grad_norm": 0.016230834648013115, "learning_rate": 2.9794877792699658e-06, "loss": 0.0006, "step": 163660 }, { "epoch": 1.3384307151326817, "grad_norm": 0.04165614768862724, "learning_rate": 2.978835031059081e-06, "loss": 0.0008, "step": 163670 }, { "epoch": 1.3385124913112811, "grad_norm": 0.039691440761089325, "learning_rate": 2.9781823240209712e-06, "loss": 0.0012, "step": 163680 }, { "epoch": 1.33859426748988, "grad_norm": 0.013475766405463219, "learning_rate": 2.977529658168931e-06, "loss": 0.0011, "step": 163690 }, { "epoch": 1.3386760436684795, "grad_norm": 0.003950190730392933, "learning_rate": 2.9768770335162563e-06, "loss": 0.0005, "step": 163700 }, { "epoch": 1.3387578198470784, "grad_norm": 0.010197783820331097, "learning_rate": 2.9762244500762393e-06, "loss": 0.001, "step": 163710 }, { "epoch": 1.3388395960256778, "grad_norm": 0.018864799290895462, "learning_rate": 2.975571907862176e-06, "loss": 0.0008, "step": 163720 }, { "epoch": 1.3389213722042768, "grad_norm": 0.004191399086266756, "learning_rate": 2.974919406887359e-06, "loss": 0.0007, "step": 163730 }, { "epoch": 1.3390031483828762, "grad_norm": 0.08288513123989105, "learning_rate": 2.9742669471650796e-06, "loss": 0.001, "step": 163740 }, { "epoch": 1.3390849245614753, "grad_norm": 0.016841217875480652, "learning_rate": 2.9736145287086273e-06, "loss": 0.0005, "step": 163750 }, { "epoch": 1.3391667007400745, "grad_norm": 0.03252020478248596, "learning_rate": 2.9729621515312957e-06, "loss": 0.0007, "step": 163760 }, { "epoch": 1.3392484769186737, "grad_norm": 0.08818969130516052, "learning_rate": 2.9723098156463743e-06, "loss": 0.0021, "step": 163770 }, { "epoch": 1.3393302530972728, "grad_norm": 0.09214260429143906, "learning_rate": 2.9716575210671484e-06, "loss": 0.0013, "step": 163780 }, { "epoch": 1.339412029275872, "grad_norm": 0.03642633929848671, "learning_rate": 2.971005267806906e-06, "loss": 0.0016, "step": 163790 }, { "epoch": 1.3394938054544712, "grad_norm": 0.008514086715877056, "learning_rate": 2.9703530558789373e-06, "loss": 0.0007, "step": 163800 }, { "epoch": 1.3395755816330703, "grad_norm": 0.004733850713819265, "learning_rate": 2.969700885296527e-06, "loss": 0.0011, "step": 163810 }, { "epoch": 1.3396573578116695, "grad_norm": 0.049913037568330765, "learning_rate": 2.9690487560729597e-06, "loss": 0.0012, "step": 163820 }, { "epoch": 1.3397391339902687, "grad_norm": 0.02617219090461731, "learning_rate": 2.9683966682215195e-06, "loss": 0.001, "step": 163830 }, { "epoch": 1.3398209101688678, "grad_norm": 0.04804380610585213, "learning_rate": 2.967744621755491e-06, "loss": 0.0015, "step": 163840 }, { "epoch": 1.339902686347467, "grad_norm": 0.02108139917254448, "learning_rate": 2.967092616688157e-06, "loss": 0.0011, "step": 163850 }, { "epoch": 1.3399844625260662, "grad_norm": 0.031027184799313545, "learning_rate": 2.9664406530327985e-06, "loss": 0.0037, "step": 163860 }, { "epoch": 1.3400662387046653, "grad_norm": 0.02657601796090603, "learning_rate": 2.9657887308026946e-06, "loss": 0.0009, "step": 163870 }, { "epoch": 1.3401480148832645, "grad_norm": 0.11090819537639618, "learning_rate": 2.96513685001113e-06, "loss": 0.0007, "step": 163880 }, { "epoch": 1.3402297910618637, "grad_norm": 0.03892411291599274, "learning_rate": 2.964485010671383e-06, "loss": 0.0015, "step": 163890 }, { "epoch": 1.3403115672404629, "grad_norm": 0.0038213443476706743, "learning_rate": 2.9638332127967294e-06, "loss": 0.0015, "step": 163900 }, { "epoch": 1.340393343419062, "grad_norm": 0.0100035909563303, "learning_rate": 2.963181456400447e-06, "loss": 0.0006, "step": 163910 }, { "epoch": 1.3404751195976612, "grad_norm": 0.07560717314481735, "learning_rate": 2.9625297414958156e-06, "loss": 0.001, "step": 163920 }, { "epoch": 1.3405568957762604, "grad_norm": 0.16226927936077118, "learning_rate": 2.961878068096109e-06, "loss": 0.0009, "step": 163930 }, { "epoch": 1.3406386719548595, "grad_norm": 0.0148249426856637, "learning_rate": 2.9612264362146037e-06, "loss": 0.0006, "step": 163940 }, { "epoch": 1.3407204481334587, "grad_norm": 0.03920050337910652, "learning_rate": 2.960574845864572e-06, "loss": 0.0009, "step": 163950 }, { "epoch": 1.3408022243120579, "grad_norm": 0.08647710829973221, "learning_rate": 2.9599232970592884e-06, "loss": 0.002, "step": 163960 }, { "epoch": 1.340884000490657, "grad_norm": 0.09397958219051361, "learning_rate": 2.9592717898120266e-06, "loss": 0.0048, "step": 163970 }, { "epoch": 1.3409657766692562, "grad_norm": 0.0314958356320858, "learning_rate": 2.9586203241360567e-06, "loss": 0.0008, "step": 163980 }, { "epoch": 1.3410475528478554, "grad_norm": 0.04370234161615372, "learning_rate": 2.957968900044648e-06, "loss": 0.0006, "step": 163990 }, { "epoch": 1.3411293290264545, "grad_norm": 0.0066141593270003796, "learning_rate": 2.957317517551075e-06, "loss": 0.0006, "step": 164000 }, { "epoch": 1.3412111052050537, "grad_norm": 0.0033439684193581343, "learning_rate": 2.9566661766686056e-06, "loss": 0.0006, "step": 164010 }, { "epoch": 1.3412928813836529, "grad_norm": 0.02049165964126587, "learning_rate": 2.956014877410506e-06, "loss": 0.0006, "step": 164020 }, { "epoch": 1.341374657562252, "grad_norm": 0.025663210079073906, "learning_rate": 2.9553636197900424e-06, "loss": 0.0009, "step": 164030 }, { "epoch": 1.3414564337408512, "grad_norm": 0.0374993234872818, "learning_rate": 2.9547124038204866e-06, "loss": 0.0011, "step": 164040 }, { "epoch": 1.3415382099194504, "grad_norm": 0.0053270128555595875, "learning_rate": 2.9540612295151007e-06, "loss": 0.0009, "step": 164050 }, { "epoch": 1.3416199860980496, "grad_norm": 0.013531639240682125, "learning_rate": 2.9534100968871516e-06, "loss": 0.0023, "step": 164060 }, { "epoch": 1.341701762276649, "grad_norm": 0.0510520301759243, "learning_rate": 2.9527590059499e-06, "loss": 0.0004, "step": 164070 }, { "epoch": 1.341783538455248, "grad_norm": 0.004839703440666199, "learning_rate": 2.9521079567166137e-06, "loss": 0.0005, "step": 164080 }, { "epoch": 1.3418653146338473, "grad_norm": 0.023979969322681427, "learning_rate": 2.951456949200553e-06, "loss": 0.0008, "step": 164090 }, { "epoch": 1.3419470908124462, "grad_norm": 0.007569883018732071, "learning_rate": 2.9508059834149784e-06, "loss": 0.0009, "step": 164100 }, { "epoch": 1.3420288669910456, "grad_norm": 0.022043734788894653, "learning_rate": 2.9501550593731505e-06, "loss": 0.0014, "step": 164110 }, { "epoch": 1.3421106431696446, "grad_norm": 0.0201051477342844, "learning_rate": 2.9495041770883317e-06, "loss": 0.0004, "step": 164120 }, { "epoch": 1.342192419348244, "grad_norm": 0.0037223531398922205, "learning_rate": 2.948853336573781e-06, "loss": 0.0004, "step": 164130 }, { "epoch": 1.342274195526843, "grad_norm": 0.0016987440176308155, "learning_rate": 2.9482025378427535e-06, "loss": 0.0005, "step": 164140 }, { "epoch": 1.3423559717054423, "grad_norm": 0.006474109832197428, "learning_rate": 2.9475517809085063e-06, "loss": 0.001, "step": 164150 }, { "epoch": 1.3424377478840412, "grad_norm": 0.008817203342914581, "learning_rate": 2.9469010657843e-06, "loss": 0.0018, "step": 164160 }, { "epoch": 1.3425195240626406, "grad_norm": 0.012608642689883709, "learning_rate": 2.9462503924833874e-06, "loss": 0.0008, "step": 164170 }, { "epoch": 1.3426013002412398, "grad_norm": 0.003385016927495599, "learning_rate": 2.945599761019024e-06, "loss": 0.0004, "step": 164180 }, { "epoch": 1.342683076419839, "grad_norm": 0.007995855994522572, "learning_rate": 2.944949171404462e-06, "loss": 0.0017, "step": 164190 }, { "epoch": 1.3427648525984381, "grad_norm": 0.09627237915992737, "learning_rate": 2.944298623652957e-06, "loss": 0.0015, "step": 164200 }, { "epoch": 1.3428466287770373, "grad_norm": 0.039109114557504654, "learning_rate": 2.9436481177777605e-06, "loss": 0.0007, "step": 164210 }, { "epoch": 1.3429284049556365, "grad_norm": 0.019506637006998062, "learning_rate": 2.9429976537921222e-06, "loss": 0.0006, "step": 164220 }, { "epoch": 1.3430101811342356, "grad_norm": 0.0027689861599355936, "learning_rate": 2.9423472317092928e-06, "loss": 0.0008, "step": 164230 }, { "epoch": 1.3430919573128348, "grad_norm": 0.019700467586517334, "learning_rate": 2.9416968515425244e-06, "loss": 0.0006, "step": 164240 }, { "epoch": 1.343173733491434, "grad_norm": 0.04263180121779442, "learning_rate": 2.941046513305065e-06, "loss": 0.001, "step": 164250 }, { "epoch": 1.3432555096700332, "grad_norm": 0.035948459059000015, "learning_rate": 2.9403962170101606e-06, "loss": 0.002, "step": 164260 }, { "epoch": 1.3433372858486323, "grad_norm": 0.031189551576972008, "learning_rate": 2.9397459626710577e-06, "loss": 0.0008, "step": 164270 }, { "epoch": 1.3434190620272315, "grad_norm": 0.02848348207771778, "learning_rate": 2.9390957503010063e-06, "loss": 0.002, "step": 164280 }, { "epoch": 1.3435008382058307, "grad_norm": 0.031084349378943443, "learning_rate": 2.93844557991325e-06, "loss": 0.0009, "step": 164290 }, { "epoch": 1.3435826143844298, "grad_norm": 0.05119512230157852, "learning_rate": 2.9377954515210326e-06, "loss": 0.0007, "step": 164300 }, { "epoch": 1.343664390563029, "grad_norm": 0.02176537550985813, "learning_rate": 2.937145365137597e-06, "loss": 0.0013, "step": 164310 }, { "epoch": 1.3437461667416282, "grad_norm": 0.03283898904919624, "learning_rate": 2.936495320776188e-06, "loss": 0.0008, "step": 164320 }, { "epoch": 1.3438279429202273, "grad_norm": 0.021155107766389847, "learning_rate": 2.935845318450047e-06, "loss": 0.0008, "step": 164330 }, { "epoch": 1.3439097190988265, "grad_norm": 0.0012460134457796812, "learning_rate": 2.9351953581724146e-06, "loss": 0.0005, "step": 164340 }, { "epoch": 1.3439914952774257, "grad_norm": 0.0019441861659288406, "learning_rate": 2.9345454399565287e-06, "loss": 0.001, "step": 164350 }, { "epoch": 1.3440732714560248, "grad_norm": 0.012471478432416916, "learning_rate": 2.933895563815633e-06, "loss": 0.0009, "step": 164360 }, { "epoch": 1.344155047634624, "grad_norm": 0.02956628054380417, "learning_rate": 2.9332457297629657e-06, "loss": 0.0011, "step": 164370 }, { "epoch": 1.3442368238132232, "grad_norm": 0.10821162909269333, "learning_rate": 2.9325959378117613e-06, "loss": 0.0019, "step": 164380 }, { "epoch": 1.3443185999918223, "grad_norm": 0.019487421959638596, "learning_rate": 2.9319461879752587e-06, "loss": 0.0017, "step": 164390 }, { "epoch": 1.3444003761704215, "grad_norm": 0.019803887233138084, "learning_rate": 2.9312964802666906e-06, "loss": 0.0012, "step": 164400 }, { "epoch": 1.3444821523490207, "grad_norm": 0.0065869675017893314, "learning_rate": 2.9306468146992974e-06, "loss": 0.0007, "step": 164410 }, { "epoch": 1.3445639285276199, "grad_norm": 0.005127611570060253, "learning_rate": 2.9299971912863102e-06, "loss": 0.0005, "step": 164420 }, { "epoch": 1.344645704706219, "grad_norm": 0.05550827085971832, "learning_rate": 2.9293476100409623e-06, "loss": 0.0005, "step": 164430 }, { "epoch": 1.3447274808848182, "grad_norm": 0.050516460090875626, "learning_rate": 2.9286980709764867e-06, "loss": 0.001, "step": 164440 }, { "epoch": 1.3448092570634174, "grad_norm": 0.051369454711675644, "learning_rate": 2.928048574106115e-06, "loss": 0.0006, "step": 164450 }, { "epoch": 1.3448910332420165, "grad_norm": 0.06902850419282913, "learning_rate": 2.9273991194430783e-06, "loss": 0.0005, "step": 164460 }, { "epoch": 1.3449728094206157, "grad_norm": 0.008540629409253597, "learning_rate": 2.9267497070006068e-06, "loss": 0.0006, "step": 164470 }, { "epoch": 1.3450545855992149, "grad_norm": 0.10969685763120651, "learning_rate": 2.9261003367919273e-06, "loss": 0.0007, "step": 164480 }, { "epoch": 1.345136361777814, "grad_norm": 0.0524822361767292, "learning_rate": 2.925451008830272e-06, "loss": 0.0011, "step": 164490 }, { "epoch": 1.3452181379564134, "grad_norm": 0.031247051432728767, "learning_rate": 2.924801723128865e-06, "loss": 0.0009, "step": 164500 }, { "epoch": 1.3452999141350124, "grad_norm": 0.0386735238134861, "learning_rate": 2.9241524797009337e-06, "loss": 0.0013, "step": 164510 }, { "epoch": 1.3453816903136118, "grad_norm": 0.01791275665163994, "learning_rate": 2.923503278559702e-06, "loss": 0.001, "step": 164520 }, { "epoch": 1.3454634664922107, "grad_norm": 0.09106244891881943, "learning_rate": 2.922854119718398e-06, "loss": 0.0014, "step": 164530 }, { "epoch": 1.34554524267081, "grad_norm": 0.025555038824677467, "learning_rate": 2.922205003190244e-06, "loss": 0.0009, "step": 164540 }, { "epoch": 1.345627018849409, "grad_norm": 0.0160282664000988, "learning_rate": 2.9215559289884632e-06, "loss": 0.0009, "step": 164550 }, { "epoch": 1.3457087950280084, "grad_norm": 0.02301991917192936, "learning_rate": 2.920906897126276e-06, "loss": 0.0007, "step": 164560 }, { "epoch": 1.3457905712066074, "grad_norm": 0.03263247385621071, "learning_rate": 2.920257907616907e-06, "loss": 0.0008, "step": 164570 }, { "epoch": 1.3458723473852068, "grad_norm": 0.058608923107385635, "learning_rate": 2.9196089604735743e-06, "loss": 0.0012, "step": 164580 }, { "epoch": 1.3459541235638057, "grad_norm": 0.05887222662568092, "learning_rate": 2.9189600557094976e-06, "loss": 0.0013, "step": 164590 }, { "epoch": 1.3460358997424051, "grad_norm": 0.025782931596040726, "learning_rate": 2.918311193337897e-06, "loss": 0.0013, "step": 164600 }, { "epoch": 1.3461176759210043, "grad_norm": 0.019866900518536568, "learning_rate": 2.9176623733719896e-06, "loss": 0.0008, "step": 164610 }, { "epoch": 1.3461994520996035, "grad_norm": 0.026793325319886208, "learning_rate": 2.9170135958249924e-06, "loss": 0.0012, "step": 164620 }, { "epoch": 1.3462812282782026, "grad_norm": 0.002435697242617607, "learning_rate": 2.916364860710121e-06, "loss": 0.0011, "step": 164630 }, { "epoch": 1.3463630044568018, "grad_norm": 0.04957248270511627, "learning_rate": 2.9157161680405886e-06, "loss": 0.001, "step": 164640 }, { "epoch": 1.346444780635401, "grad_norm": 0.03798248618841171, "learning_rate": 2.915067517829615e-06, "loss": 0.0009, "step": 164650 }, { "epoch": 1.3465265568140001, "grad_norm": 0.02870061621069908, "learning_rate": 2.914418910090411e-06, "loss": 0.0013, "step": 164660 }, { "epoch": 1.3466083329925993, "grad_norm": 0.022784875705838203, "learning_rate": 2.9137703448361875e-06, "loss": 0.0011, "step": 164670 }, { "epoch": 1.3466901091711985, "grad_norm": 0.07338468730449677, "learning_rate": 2.913121822080157e-06, "loss": 0.0007, "step": 164680 }, { "epoch": 1.3467718853497976, "grad_norm": 0.10302343964576721, "learning_rate": 2.9124733418355334e-06, "loss": 0.0009, "step": 164690 }, { "epoch": 1.3468536615283968, "grad_norm": 0.04367348179221153, "learning_rate": 2.9118249041155246e-06, "loss": 0.0014, "step": 164700 }, { "epoch": 1.346935437706996, "grad_norm": 0.022539030760526657, "learning_rate": 2.911176508933341e-06, "loss": 0.0012, "step": 164710 }, { "epoch": 1.3470172138855951, "grad_norm": 0.05916169285774231, "learning_rate": 2.9105281563021866e-06, "loss": 0.0014, "step": 164720 }, { "epoch": 1.3470989900641943, "grad_norm": 0.10229924321174622, "learning_rate": 2.909879846235274e-06, "loss": 0.0013, "step": 164730 }, { "epoch": 1.3471807662427935, "grad_norm": 0.05480713024735451, "learning_rate": 2.9092315787458074e-06, "loss": 0.0011, "step": 164740 }, { "epoch": 1.3472625424213927, "grad_norm": 0.039526212960481644, "learning_rate": 2.908583353846993e-06, "loss": 0.0011, "step": 164750 }, { "epoch": 1.3473443185999918, "grad_norm": 0.03487008810043335, "learning_rate": 2.907935171552034e-06, "loss": 0.0008, "step": 164760 }, { "epoch": 1.347426094778591, "grad_norm": 0.038741014897823334, "learning_rate": 2.9072870318741386e-06, "loss": 0.0005, "step": 164770 }, { "epoch": 1.3475078709571902, "grad_norm": 0.03400886058807373, "learning_rate": 2.9066389348265066e-06, "loss": 0.0013, "step": 164780 }, { "epoch": 1.3475896471357893, "grad_norm": 0.06236845999956131, "learning_rate": 2.9059908804223414e-06, "loss": 0.0004, "step": 164790 }, { "epoch": 1.3476714233143885, "grad_norm": 0.010900094173848629, "learning_rate": 2.9053428686748418e-06, "loss": 0.0008, "step": 164800 }, { "epoch": 1.3477531994929877, "grad_norm": 0.04114017263054848, "learning_rate": 2.904694899597213e-06, "loss": 0.0012, "step": 164810 }, { "epoch": 1.3478349756715868, "grad_norm": 0.033044807612895966, "learning_rate": 2.9040469732026523e-06, "loss": 0.0009, "step": 164820 }, { "epoch": 1.347916751850186, "grad_norm": 0.06746096163988113, "learning_rate": 2.9033990895043594e-06, "loss": 0.0011, "step": 164830 }, { "epoch": 1.3479985280287852, "grad_norm": 0.018446486443281174, "learning_rate": 2.9027512485155285e-06, "loss": 0.0004, "step": 164840 }, { "epoch": 1.3480803042073843, "grad_norm": 0.004037330858409405, "learning_rate": 2.9021034502493607e-06, "loss": 0.0007, "step": 164850 }, { "epoch": 1.3481620803859835, "grad_norm": 0.02033049240708351, "learning_rate": 2.9014556947190514e-06, "loss": 0.0015, "step": 164860 }, { "epoch": 1.3482438565645827, "grad_norm": 0.08802440762519836, "learning_rate": 2.900807981937795e-06, "loss": 0.0011, "step": 164870 }, { "epoch": 1.3483256327431818, "grad_norm": 0.023872287943959236, "learning_rate": 2.900160311918784e-06, "loss": 0.0005, "step": 164880 }, { "epoch": 1.348407408921781, "grad_norm": 0.08588593453168869, "learning_rate": 2.8995126846752168e-06, "loss": 0.0013, "step": 164890 }, { "epoch": 1.3484891851003802, "grad_norm": 0.09701838344335556, "learning_rate": 2.898865100220284e-06, "loss": 0.0017, "step": 164900 }, { "epoch": 1.3485709612789794, "grad_norm": 0.06561687588691711, "learning_rate": 2.8982175585671762e-06, "loss": 0.0009, "step": 164910 }, { "epoch": 1.3486527374575785, "grad_norm": 0.07197292149066925, "learning_rate": 2.8975700597290835e-06, "loss": 0.0009, "step": 164920 }, { "epoch": 1.348734513636178, "grad_norm": 0.035078056156635284, "learning_rate": 2.8969226037191993e-06, "loss": 0.0007, "step": 164930 }, { "epoch": 1.3488162898147769, "grad_norm": 0.01804298721253872, "learning_rate": 2.8962751905507113e-06, "loss": 0.0007, "step": 164940 }, { "epoch": 1.3488980659933762, "grad_norm": 0.023811444640159607, "learning_rate": 2.89562782023681e-06, "loss": 0.0007, "step": 164950 }, { "epoch": 1.3489798421719752, "grad_norm": 0.032188404351472855, "learning_rate": 2.8949804927906762e-06, "loss": 0.0007, "step": 164960 }, { "epoch": 1.3490616183505746, "grad_norm": 0.0776485800743103, "learning_rate": 2.8943332082255028e-06, "loss": 0.0009, "step": 164970 }, { "epoch": 1.3491433945291735, "grad_norm": 0.049443427473306656, "learning_rate": 2.8936859665544737e-06, "loss": 0.0019, "step": 164980 }, { "epoch": 1.349225170707773, "grad_norm": 0.03259820118546486, "learning_rate": 2.8930387677907732e-06, "loss": 0.001, "step": 164990 }, { "epoch": 1.3493069468863719, "grad_norm": 0.08358021080493927, "learning_rate": 2.8923916119475836e-06, "loss": 0.0012, "step": 165000 }, { "epoch": 1.3493887230649713, "grad_norm": 0.005028135143220425, "learning_rate": 2.8917444990380927e-06, "loss": 0.0011, "step": 165010 }, { "epoch": 1.3494704992435702, "grad_norm": 0.04254880174994469, "learning_rate": 2.891097429075479e-06, "loss": 0.0013, "step": 165020 }, { "epoch": 1.3495522754221696, "grad_norm": 0.003247599583119154, "learning_rate": 2.8904504020729257e-06, "loss": 0.0007, "step": 165030 }, { "epoch": 1.3496340516007688, "grad_norm": 0.011028176173567772, "learning_rate": 2.88980341804361e-06, "loss": 0.0014, "step": 165040 }, { "epoch": 1.349715827779368, "grad_norm": 0.11714824289083481, "learning_rate": 2.8891564770007164e-06, "loss": 0.0011, "step": 165050 }, { "epoch": 1.349797603957967, "grad_norm": 0.031138576567173004, "learning_rate": 2.8885095789574206e-06, "loss": 0.0008, "step": 165060 }, { "epoch": 1.3498793801365663, "grad_norm": 0.021406525745987892, "learning_rate": 2.887862723926903e-06, "loss": 0.001, "step": 165070 }, { "epoch": 1.3499611563151654, "grad_norm": 0.06423438340425491, "learning_rate": 2.8872159119223353e-06, "loss": 0.0008, "step": 165080 }, { "epoch": 1.3500429324937646, "grad_norm": 0.03719073534011841, "learning_rate": 2.8865691429568983e-06, "loss": 0.0005, "step": 165090 }, { "epoch": 1.3501247086723638, "grad_norm": 0.07192932814359665, "learning_rate": 2.8859224170437663e-06, "loss": 0.0014, "step": 165100 }, { "epoch": 1.350206484850963, "grad_norm": 0.016971595585346222, "learning_rate": 2.885275734196112e-06, "loss": 0.0004, "step": 165110 }, { "epoch": 1.3502882610295621, "grad_norm": 0.03227374702692032, "learning_rate": 2.884629094427108e-06, "loss": 0.0008, "step": 165120 }, { "epoch": 1.3503700372081613, "grad_norm": 0.02474377118051052, "learning_rate": 2.883982497749932e-06, "loss": 0.0007, "step": 165130 }, { "epoch": 1.3504518133867605, "grad_norm": 0.03210046514868736, "learning_rate": 2.8833359441777515e-06, "loss": 0.0005, "step": 165140 }, { "epoch": 1.3505335895653596, "grad_norm": 0.02202031947672367, "learning_rate": 2.882689433723739e-06, "loss": 0.0011, "step": 165150 }, { "epoch": 1.3506153657439588, "grad_norm": 0.07447841763496399, "learning_rate": 2.8820429664010607e-06, "loss": 0.001, "step": 165160 }, { "epoch": 1.350697141922558, "grad_norm": 0.023156076669692993, "learning_rate": 2.8813965422228916e-06, "loss": 0.0004, "step": 165170 }, { "epoch": 1.3507789181011571, "grad_norm": 0.035789571702480316, "learning_rate": 2.8807501612023968e-06, "loss": 0.0006, "step": 165180 }, { "epoch": 1.3508606942797563, "grad_norm": 0.09351778775453568, "learning_rate": 2.8801038233527458e-06, "loss": 0.0007, "step": 165190 }, { "epoch": 1.3509424704583555, "grad_norm": 0.03318792209029198, "learning_rate": 2.8794575286871e-06, "loss": 0.0006, "step": 165200 }, { "epoch": 1.3510242466369546, "grad_norm": 0.08996395021677017, "learning_rate": 2.878811277218628e-06, "loss": 0.0013, "step": 165210 }, { "epoch": 1.3511060228155538, "grad_norm": 0.04676978662610054, "learning_rate": 2.8781650689604957e-06, "loss": 0.0016, "step": 165220 }, { "epoch": 1.351187798994153, "grad_norm": 0.04232945293188095, "learning_rate": 2.8775189039258655e-06, "loss": 0.0007, "step": 165230 }, { "epoch": 1.3512695751727521, "grad_norm": 0.042349621653556824, "learning_rate": 2.8768727821278987e-06, "loss": 0.0006, "step": 165240 }, { "epoch": 1.3513513513513513, "grad_norm": 0.07483477890491486, "learning_rate": 2.8762267035797607e-06, "loss": 0.001, "step": 165250 }, { "epoch": 1.3514331275299505, "grad_norm": 0.001665801159106195, "learning_rate": 2.875580668294612e-06, "loss": 0.0004, "step": 165260 }, { "epoch": 1.3515149037085497, "grad_norm": 0.013803469017148018, "learning_rate": 2.8749346762856112e-06, "loss": 0.0008, "step": 165270 }, { "epoch": 1.3515966798871488, "grad_norm": 0.273188054561615, "learning_rate": 2.874288727565916e-06, "loss": 0.0007, "step": 165280 }, { "epoch": 1.351678456065748, "grad_norm": 0.06442185491323471, "learning_rate": 2.8736428221486894e-06, "loss": 0.0009, "step": 165290 }, { "epoch": 1.3517602322443472, "grad_norm": 0.03507879376411438, "learning_rate": 2.8729969600470877e-06, "loss": 0.0007, "step": 165300 }, { "epoch": 1.3518420084229463, "grad_norm": 0.01814350299537182, "learning_rate": 2.872351141274268e-06, "loss": 0.0024, "step": 165310 }, { "epoch": 1.3519237846015455, "grad_norm": 0.02355884201824665, "learning_rate": 2.8717053658433812e-06, "loss": 0.0006, "step": 165320 }, { "epoch": 1.3520055607801447, "grad_norm": 0.01055169478058815, "learning_rate": 2.871059633767588e-06, "loss": 0.0009, "step": 165330 }, { "epoch": 1.3520873369587438, "grad_norm": 0.013179756700992584, "learning_rate": 2.870413945060041e-06, "loss": 0.0006, "step": 165340 }, { "epoch": 1.352169113137343, "grad_norm": 0.03274077549576759, "learning_rate": 2.8697682997338926e-06, "loss": 0.0007, "step": 165350 }, { "epoch": 1.3522508893159424, "grad_norm": 0.029347732663154602, "learning_rate": 2.8691226978022933e-06, "loss": 0.002, "step": 165360 }, { "epoch": 1.3523326654945413, "grad_norm": 0.08120748400688171, "learning_rate": 2.868477139278399e-06, "loss": 0.0009, "step": 165370 }, { "epoch": 1.3524144416731407, "grad_norm": 0.060058653354644775, "learning_rate": 2.8678316241753577e-06, "loss": 0.0008, "step": 165380 }, { "epoch": 1.3524962178517397, "grad_norm": 0.0007500966894440353, "learning_rate": 2.8671861525063195e-06, "loss": 0.0006, "step": 165390 }, { "epoch": 1.352577994030339, "grad_norm": 0.03811407834291458, "learning_rate": 2.866540724284431e-06, "loss": 0.0024, "step": 165400 }, { "epoch": 1.352659770208938, "grad_norm": 0.033998943865299225, "learning_rate": 2.865895339522844e-06, "loss": 0.0009, "step": 165410 }, { "epoch": 1.3527415463875374, "grad_norm": 0.008029619231820107, "learning_rate": 2.865249998234704e-06, "loss": 0.0009, "step": 165420 }, { "epoch": 1.3528233225661364, "grad_norm": 0.0022494986187666655, "learning_rate": 2.864604700433159e-06, "loss": 0.0006, "step": 165430 }, { "epoch": 1.3529050987447357, "grad_norm": 0.04256988316774368, "learning_rate": 2.863959446131347e-06, "loss": 0.001, "step": 165440 }, { "epoch": 1.352986874923335, "grad_norm": 0.07642373442649841, "learning_rate": 2.8633142353424197e-06, "loss": 0.0008, "step": 165450 }, { "epoch": 1.353068651101934, "grad_norm": 0.05206122249364853, "learning_rate": 2.8626690680795188e-06, "loss": 0.0008, "step": 165460 }, { "epoch": 1.3531504272805333, "grad_norm": 0.007341454271227121, "learning_rate": 2.8620239443557855e-06, "loss": 0.0011, "step": 165470 }, { "epoch": 1.3532322034591324, "grad_norm": 0.03288924694061279, "learning_rate": 2.8613788641843608e-06, "loss": 0.0008, "step": 165480 }, { "epoch": 1.3533139796377316, "grad_norm": 0.090645931661129, "learning_rate": 2.8607338275783892e-06, "loss": 0.001, "step": 165490 }, { "epoch": 1.3533957558163308, "grad_norm": 0.017798790708184242, "learning_rate": 2.8600888345510075e-06, "loss": 0.0007, "step": 165500 }, { "epoch": 1.35347753199493, "grad_norm": 0.01016379427164793, "learning_rate": 2.859443885115356e-06, "loss": 0.0014, "step": 165510 }, { "epoch": 1.353559308173529, "grad_norm": 0.004527513403445482, "learning_rate": 2.858798979284571e-06, "loss": 0.0005, "step": 165520 }, { "epoch": 1.3536410843521283, "grad_norm": 0.0016633364139124751, "learning_rate": 2.858154117071793e-06, "loss": 0.0012, "step": 165530 }, { "epoch": 1.3537228605307274, "grad_norm": 0.028651200234889984, "learning_rate": 2.8575092984901566e-06, "loss": 0.0006, "step": 165540 }, { "epoch": 1.3538046367093266, "grad_norm": 0.0042498549446463585, "learning_rate": 2.8568645235527994e-06, "loss": 0.0008, "step": 165550 }, { "epoch": 1.3538864128879258, "grad_norm": 0.03573790192604065, "learning_rate": 2.8562197922728497e-06, "loss": 0.0006, "step": 165560 }, { "epoch": 1.353968189066525, "grad_norm": 0.030856914818286896, "learning_rate": 2.855575104663447e-06, "loss": 0.0005, "step": 165570 }, { "epoch": 1.354049965245124, "grad_norm": 0.006525895558297634, "learning_rate": 2.8549304607377224e-06, "loss": 0.0007, "step": 165580 }, { "epoch": 1.3541317414237233, "grad_norm": 0.01586788333952427, "learning_rate": 2.8542858605088088e-06, "loss": 0.0007, "step": 165590 }, { "epoch": 1.3542135176023224, "grad_norm": 0.04789334908127785, "learning_rate": 2.8536413039898343e-06, "loss": 0.001, "step": 165600 }, { "epoch": 1.3542952937809216, "grad_norm": 0.02812715247273445, "learning_rate": 2.852996791193933e-06, "loss": 0.0008, "step": 165610 }, { "epoch": 1.3543770699595208, "grad_norm": 0.19424621760845184, "learning_rate": 2.852352322134232e-06, "loss": 0.0007, "step": 165620 }, { "epoch": 1.35445884613812, "grad_norm": 0.006187009159475565, "learning_rate": 2.85170789682386e-06, "loss": 0.001, "step": 165630 }, { "epoch": 1.3545406223167191, "grad_norm": 0.044084686785936356, "learning_rate": 2.8510635152759413e-06, "loss": 0.0012, "step": 165640 }, { "epoch": 1.3546223984953183, "grad_norm": 0.03048894926905632, "learning_rate": 2.850419177503609e-06, "loss": 0.0011, "step": 165650 }, { "epoch": 1.3547041746739175, "grad_norm": 0.030307726934552193, "learning_rate": 2.8497748835199845e-06, "loss": 0.0014, "step": 165660 }, { "epoch": 1.3547859508525166, "grad_norm": 0.11191344261169434, "learning_rate": 2.8491306333381957e-06, "loss": 0.0008, "step": 165670 }, { "epoch": 1.3548677270311158, "grad_norm": 0.06568749248981476, "learning_rate": 2.84848642697136e-06, "loss": 0.0009, "step": 165680 }, { "epoch": 1.354949503209715, "grad_norm": 0.011839455924928188, "learning_rate": 2.8478422644326078e-06, "loss": 0.0007, "step": 165690 }, { "epoch": 1.3550312793883141, "grad_norm": 0.038260914385318756, "learning_rate": 2.847198145735056e-06, "loss": 0.0006, "step": 165700 }, { "epoch": 1.3551130555669133, "grad_norm": 0.038779426366090775, "learning_rate": 2.846554070891829e-06, "loss": 0.0005, "step": 165710 }, { "epoch": 1.3551948317455125, "grad_norm": 0.04293280467391014, "learning_rate": 2.8459100399160444e-06, "loss": 0.0009, "step": 165720 }, { "epoch": 1.3552766079241116, "grad_norm": 0.055108580738306046, "learning_rate": 2.8452660528208247e-06, "loss": 0.0017, "step": 165730 }, { "epoch": 1.3553583841027108, "grad_norm": 0.00884106196463108, "learning_rate": 2.8446221096192873e-06, "loss": 0.0013, "step": 165740 }, { "epoch": 1.35544016028131, "grad_norm": 0.058879703283309937, "learning_rate": 2.843978210324549e-06, "loss": 0.0008, "step": 165750 }, { "epoch": 1.3555219364599091, "grad_norm": 0.027467040345072746, "learning_rate": 2.843334354949725e-06, "loss": 0.0013, "step": 165760 }, { "epoch": 1.3556037126385085, "grad_norm": 0.11823461949825287, "learning_rate": 2.8426905435079356e-06, "loss": 0.0012, "step": 165770 }, { "epoch": 1.3556854888171075, "grad_norm": 0.013655278831720352, "learning_rate": 2.8420467760122933e-06, "loss": 0.0008, "step": 165780 }, { "epoch": 1.3557672649957069, "grad_norm": 0.012619223445653915, "learning_rate": 2.841403052475914e-06, "loss": 0.0004, "step": 165790 }, { "epoch": 1.3558490411743058, "grad_norm": 0.05357133969664574, "learning_rate": 2.8407593729119055e-06, "loss": 0.0005, "step": 165800 }, { "epoch": 1.3559308173529052, "grad_norm": 0.08287514746189117, "learning_rate": 2.8401157373333853e-06, "loss": 0.0007, "step": 165810 }, { "epoch": 1.3560125935315042, "grad_norm": 0.10575028508901596, "learning_rate": 2.8394721457534635e-06, "loss": 0.0007, "step": 165820 }, { "epoch": 1.3560943697101036, "grad_norm": 0.065816730260849, "learning_rate": 2.8388285981852504e-06, "loss": 0.0007, "step": 165830 }, { "epoch": 1.3561761458887025, "grad_norm": 0.02481050416827202, "learning_rate": 2.8381850946418554e-06, "loss": 0.0019, "step": 165840 }, { "epoch": 1.356257922067302, "grad_norm": 0.05174378305673599, "learning_rate": 2.837541635136385e-06, "loss": 0.0009, "step": 165850 }, { "epoch": 1.3563396982459008, "grad_norm": 0.03943121060729027, "learning_rate": 2.8368982196819504e-06, "loss": 0.002, "step": 165860 }, { "epoch": 1.3564214744245002, "grad_norm": 0.011552052572369576, "learning_rate": 2.8362548482916584e-06, "loss": 0.0005, "step": 165870 }, { "epoch": 1.3565032506030994, "grad_norm": 0.09553120285272598, "learning_rate": 2.835611520978614e-06, "loss": 0.0025, "step": 165880 }, { "epoch": 1.3565850267816986, "grad_norm": 0.10091099143028259, "learning_rate": 2.8349682377559196e-06, "loss": 0.0013, "step": 165890 }, { "epoch": 1.3566668029602977, "grad_norm": 0.0007103994721546769, "learning_rate": 2.834324998636685e-06, "loss": 0.0009, "step": 165900 }, { "epoch": 1.356748579138897, "grad_norm": 0.028342777863144875, "learning_rate": 2.833681803634012e-06, "loss": 0.0011, "step": 165910 }, { "epoch": 1.356830355317496, "grad_norm": 0.03550915792584419, "learning_rate": 2.833038652760999e-06, "loss": 0.0005, "step": 165920 }, { "epoch": 1.3569121314960952, "grad_norm": 0.03471377491950989, "learning_rate": 2.8323955460307485e-06, "loss": 0.0005, "step": 165930 }, { "epoch": 1.3569939076746944, "grad_norm": 0.019895115867257118, "learning_rate": 2.8317524834563647e-06, "loss": 0.0012, "step": 165940 }, { "epoch": 1.3570756838532936, "grad_norm": 0.008746284991502762, "learning_rate": 2.831109465050945e-06, "loss": 0.0015, "step": 165950 }, { "epoch": 1.3571574600318927, "grad_norm": 0.030331406742334366, "learning_rate": 2.830466490827588e-06, "loss": 0.0014, "step": 165960 }, { "epoch": 1.357239236210492, "grad_norm": 0.01291785016655922, "learning_rate": 2.829823560799391e-06, "loss": 0.0009, "step": 165970 }, { "epoch": 1.357321012389091, "grad_norm": 0.030722416937351227, "learning_rate": 2.8291806749794538e-06, "loss": 0.0007, "step": 165980 }, { "epoch": 1.3574027885676903, "grad_norm": 0.0777963325381279, "learning_rate": 2.828537833380871e-06, "loss": 0.0009, "step": 165990 }, { "epoch": 1.3574845647462894, "grad_norm": 0.05979016050696373, "learning_rate": 2.827895036016737e-06, "loss": 0.0008, "step": 166000 }, { "epoch": 1.3575663409248886, "grad_norm": 0.08014866709709167, "learning_rate": 2.827252282900145e-06, "loss": 0.0013, "step": 166010 }, { "epoch": 1.3576481171034878, "grad_norm": 0.0482763908803463, "learning_rate": 2.8266095740441922e-06, "loss": 0.0011, "step": 166020 }, { "epoch": 1.357729893282087, "grad_norm": 0.014552460983395576, "learning_rate": 2.825966909461971e-06, "loss": 0.0007, "step": 166030 }, { "epoch": 1.357811669460686, "grad_norm": 0.08602488040924072, "learning_rate": 2.825324289166569e-06, "loss": 0.0014, "step": 166040 }, { "epoch": 1.3578934456392853, "grad_norm": 0.08920378237962723, "learning_rate": 2.824681713171077e-06, "loss": 0.0009, "step": 166050 }, { "epoch": 1.3579752218178844, "grad_norm": 0.04547814279794693, "learning_rate": 2.8240391814885882e-06, "loss": 0.0005, "step": 166060 }, { "epoch": 1.3580569979964836, "grad_norm": 0.03154262527823448, "learning_rate": 2.82339669413219e-06, "loss": 0.0007, "step": 166070 }, { "epoch": 1.3581387741750828, "grad_norm": 0.030085256323218346, "learning_rate": 2.822754251114971e-06, "loss": 0.0004, "step": 166080 }, { "epoch": 1.358220550353682, "grad_norm": 0.011496784165501595, "learning_rate": 2.8221118524500145e-06, "loss": 0.0012, "step": 166090 }, { "epoch": 1.3583023265322811, "grad_norm": 0.0425092913210392, "learning_rate": 2.8214694981504133e-06, "loss": 0.0005, "step": 166100 }, { "epoch": 1.3583841027108803, "grad_norm": 0.014063279144465923, "learning_rate": 2.8208271882292483e-06, "loss": 0.0005, "step": 166110 }, { "epoch": 1.3584658788894795, "grad_norm": 0.007558380253612995, "learning_rate": 2.8201849226996046e-06, "loss": 0.0006, "step": 166120 }, { "epoch": 1.3585476550680786, "grad_norm": 0.016546573489904404, "learning_rate": 2.8195427015745645e-06, "loss": 0.0006, "step": 166130 }, { "epoch": 1.3586294312466778, "grad_norm": 0.13865593075752258, "learning_rate": 2.818900524867214e-06, "loss": 0.0012, "step": 166140 }, { "epoch": 1.358711207425277, "grad_norm": 0.02279459498822689, "learning_rate": 2.818258392590635e-06, "loss": 0.0007, "step": 166150 }, { "epoch": 1.3587929836038761, "grad_norm": 0.024175524711608887, "learning_rate": 2.817616304757903e-06, "loss": 0.0009, "step": 166160 }, { "epoch": 1.3588747597824753, "grad_norm": 0.02779889665544033, "learning_rate": 2.8169742613820995e-06, "loss": 0.0005, "step": 166170 }, { "epoch": 1.3589565359610745, "grad_norm": 0.023343879729509354, "learning_rate": 2.816332262476306e-06, "loss": 0.0007, "step": 166180 }, { "epoch": 1.3590383121396736, "grad_norm": 0.022312521934509277, "learning_rate": 2.8156903080536002e-06, "loss": 0.0005, "step": 166190 }, { "epoch": 1.359120088318273, "grad_norm": 0.03486783429980278, "learning_rate": 2.8150483981270572e-06, "loss": 0.0006, "step": 166200 }, { "epoch": 1.359201864496872, "grad_norm": 0.022751890122890472, "learning_rate": 2.8144065327097536e-06, "loss": 0.0012, "step": 166210 }, { "epoch": 1.3592836406754714, "grad_norm": 0.06644567102193832, "learning_rate": 2.8137647118147666e-06, "loss": 0.0011, "step": 166220 }, { "epoch": 1.3593654168540703, "grad_norm": 0.0010805029887706041, "learning_rate": 2.8131229354551698e-06, "loss": 0.0003, "step": 166230 }, { "epoch": 1.3594471930326697, "grad_norm": 0.034733206033706665, "learning_rate": 2.8124812036440363e-06, "loss": 0.0009, "step": 166240 }, { "epoch": 1.3595289692112686, "grad_norm": 0.0777580514550209, "learning_rate": 2.8118395163944367e-06, "loss": 0.0008, "step": 166250 }, { "epoch": 1.359610745389868, "grad_norm": 0.0383797362446785, "learning_rate": 2.8111978737194466e-06, "loss": 0.001, "step": 166260 }, { "epoch": 1.359692521568467, "grad_norm": 0.049916014075279236, "learning_rate": 2.810556275632138e-06, "loss": 0.0017, "step": 166270 }, { "epoch": 1.3597742977470664, "grad_norm": 0.048194028437137604, "learning_rate": 2.8099147221455747e-06, "loss": 0.0011, "step": 166280 }, { "epoch": 1.3598560739256653, "grad_norm": 0.044856857508420944, "learning_rate": 2.8092732132728273e-06, "loss": 0.0004, "step": 166290 }, { "epoch": 1.3599378501042647, "grad_norm": 0.044597044587135315, "learning_rate": 2.8086317490269665e-06, "loss": 0.001, "step": 166300 }, { "epoch": 1.3600196262828639, "grad_norm": 0.18157099187374115, "learning_rate": 2.807990329421059e-06, "loss": 0.0022, "step": 166310 }, { "epoch": 1.360101402461463, "grad_norm": 0.0255842674523592, "learning_rate": 2.8073489544681696e-06, "loss": 0.0008, "step": 166320 }, { "epoch": 1.3601831786400622, "grad_norm": 0.040683433413505554, "learning_rate": 2.8067076241813628e-06, "loss": 0.0007, "step": 166330 }, { "epoch": 1.3602649548186614, "grad_norm": 0.01272600144147873, "learning_rate": 2.806066338573706e-06, "loss": 0.0007, "step": 166340 }, { "epoch": 1.3603467309972606, "grad_norm": 0.0353107824921608, "learning_rate": 2.805425097658262e-06, "loss": 0.0013, "step": 166350 }, { "epoch": 1.3604285071758597, "grad_norm": 0.015661384910345078, "learning_rate": 2.8047839014480916e-06, "loss": 0.0013, "step": 166360 }, { "epoch": 1.360510283354459, "grad_norm": 0.00834641419351101, "learning_rate": 2.804142749956256e-06, "loss": 0.0009, "step": 166370 }, { "epoch": 1.360592059533058, "grad_norm": 0.24381032586097717, "learning_rate": 2.8035016431958197e-06, "loss": 0.0005, "step": 166380 }, { "epoch": 1.3606738357116572, "grad_norm": 0.10201849043369293, "learning_rate": 2.802860581179842e-06, "loss": 0.001, "step": 166390 }, { "epoch": 1.3607556118902564, "grad_norm": 0.016613030806183815, "learning_rate": 2.8022195639213783e-06, "loss": 0.001, "step": 166400 }, { "epoch": 1.3608373880688556, "grad_norm": 0.03818840906023979, "learning_rate": 2.8015785914334864e-06, "loss": 0.0012, "step": 166410 }, { "epoch": 1.3609191642474547, "grad_norm": 0.03529728949069977, "learning_rate": 2.8009376637292273e-06, "loss": 0.0004, "step": 166420 }, { "epoch": 1.361000940426054, "grad_norm": 0.004042297601699829, "learning_rate": 2.8002967808216565e-06, "loss": 0.0008, "step": 166430 }, { "epoch": 1.361082716604653, "grad_norm": 0.06589416414499283, "learning_rate": 2.7996559427238275e-06, "loss": 0.0008, "step": 166440 }, { "epoch": 1.3611644927832522, "grad_norm": 0.24009448289871216, "learning_rate": 2.7990151494487936e-06, "loss": 0.001, "step": 166450 }, { "epoch": 1.3612462689618514, "grad_norm": 0.020224768668413162, "learning_rate": 2.798374401009612e-06, "loss": 0.0013, "step": 166460 }, { "epoch": 1.3613280451404506, "grad_norm": 0.02161412499845028, "learning_rate": 2.7977336974193335e-06, "loss": 0.0007, "step": 166470 }, { "epoch": 1.3614098213190498, "grad_norm": 0.0034014214761555195, "learning_rate": 2.79709303869101e-06, "loss": 0.001, "step": 166480 }, { "epoch": 1.361491597497649, "grad_norm": 0.040107518434524536, "learning_rate": 2.7964524248376894e-06, "loss": 0.0007, "step": 166490 }, { "epoch": 1.361573373676248, "grad_norm": 0.02072463370859623, "learning_rate": 2.795811855872427e-06, "loss": 0.0007, "step": 166500 }, { "epoch": 1.3616551498548473, "grad_norm": 0.008455964736640453, "learning_rate": 2.7951713318082696e-06, "loss": 0.0005, "step": 166510 }, { "epoch": 1.3617369260334464, "grad_norm": 0.055794790387153625, "learning_rate": 2.7945308526582625e-06, "loss": 0.0006, "step": 166520 }, { "epoch": 1.3618187022120456, "grad_norm": 0.032798152416944504, "learning_rate": 2.793890418435453e-06, "loss": 0.0007, "step": 166530 }, { "epoch": 1.3619004783906448, "grad_norm": 0.03482097387313843, "learning_rate": 2.7932500291528906e-06, "loss": 0.0018, "step": 166540 }, { "epoch": 1.361982254569244, "grad_norm": 0.04009511321783066, "learning_rate": 2.7926096848236194e-06, "loss": 0.0012, "step": 166550 }, { "epoch": 1.362064030747843, "grad_norm": 0.0013454984873533249, "learning_rate": 2.791969385460683e-06, "loss": 0.0006, "step": 166560 }, { "epoch": 1.3621458069264423, "grad_norm": 0.020007850602269173, "learning_rate": 2.791329131077123e-06, "loss": 0.0005, "step": 166570 }, { "epoch": 1.3622275831050414, "grad_norm": 0.04117446392774582, "learning_rate": 2.7906889216859855e-06, "loss": 0.0015, "step": 166580 }, { "epoch": 1.3623093592836406, "grad_norm": 0.04891594126820564, "learning_rate": 2.7900487573003106e-06, "loss": 0.0014, "step": 166590 }, { "epoch": 1.3623911354622398, "grad_norm": 0.02318606711924076, "learning_rate": 2.7894086379331387e-06, "loss": 0.0006, "step": 166600 }, { "epoch": 1.362472911640839, "grad_norm": 0.031993478536605835, "learning_rate": 2.7887685635975083e-06, "loss": 0.0006, "step": 166610 }, { "epoch": 1.3625546878194381, "grad_norm": 0.0330960676074028, "learning_rate": 2.788128534306461e-06, "loss": 0.0005, "step": 166620 }, { "epoch": 1.3626364639980375, "grad_norm": 0.004504469223320484, "learning_rate": 2.787488550073036e-06, "loss": 0.0005, "step": 166630 }, { "epoch": 1.3627182401766365, "grad_norm": 0.09281255304813385, "learning_rate": 2.7868486109102653e-06, "loss": 0.0012, "step": 166640 }, { "epoch": 1.3628000163552358, "grad_norm": 0.005440437234938145, "learning_rate": 2.7862087168311853e-06, "loss": 0.001, "step": 166650 }, { "epoch": 1.3628817925338348, "grad_norm": 0.020214099436998367, "learning_rate": 2.7855688678488353e-06, "loss": 0.0007, "step": 166660 }, { "epoch": 1.3629635687124342, "grad_norm": 0.024001289159059525, "learning_rate": 2.7849290639762473e-06, "loss": 0.0009, "step": 166670 }, { "epoch": 1.3630453448910331, "grad_norm": 0.06091807037591934, "learning_rate": 2.7842893052264553e-06, "loss": 0.0015, "step": 166680 }, { "epoch": 1.3631271210696325, "grad_norm": 0.03688296303153038, "learning_rate": 2.783649591612488e-06, "loss": 0.0005, "step": 166690 }, { "epoch": 1.3632088972482315, "grad_norm": 0.010243624448776245, "learning_rate": 2.783009923147383e-06, "loss": 0.0007, "step": 166700 }, { "epoch": 1.3632906734268309, "grad_norm": 0.03252546861767769, "learning_rate": 2.782370299844168e-06, "loss": 0.0006, "step": 166710 }, { "epoch": 1.3633724496054298, "grad_norm": 0.06975095719099045, "learning_rate": 2.781730721715872e-06, "loss": 0.0007, "step": 166720 }, { "epoch": 1.3634542257840292, "grad_norm": 0.006899638567119837, "learning_rate": 2.7810911887755228e-06, "loss": 0.0007, "step": 166730 }, { "epoch": 1.3635360019626284, "grad_norm": 0.030210673809051514, "learning_rate": 2.7804517010361513e-06, "loss": 0.0008, "step": 166740 }, { "epoch": 1.3636177781412275, "grad_norm": 0.03176207095384598, "learning_rate": 2.7798122585107853e-06, "loss": 0.0011, "step": 166750 }, { "epoch": 1.3636995543198267, "grad_norm": 0.07323642820119858, "learning_rate": 2.7791728612124457e-06, "loss": 0.0022, "step": 166760 }, { "epoch": 1.3637813304984259, "grad_norm": 0.033268895000219345, "learning_rate": 2.778533509154159e-06, "loss": 0.0006, "step": 166770 }, { "epoch": 1.363863106677025, "grad_norm": 0.05797487497329712, "learning_rate": 2.777894202348951e-06, "loss": 0.001, "step": 166780 }, { "epoch": 1.3639448828556242, "grad_norm": 0.04451120272278786, "learning_rate": 2.777254940809845e-06, "loss": 0.0016, "step": 166790 }, { "epoch": 1.3640266590342234, "grad_norm": 0.02202623337507248, "learning_rate": 2.7766157245498625e-06, "loss": 0.0009, "step": 166800 }, { "epoch": 1.3641084352128225, "grad_norm": 0.023509373888373375, "learning_rate": 2.7759765535820236e-06, "loss": 0.0014, "step": 166810 }, { "epoch": 1.3641902113914217, "grad_norm": 0.00617616344243288, "learning_rate": 2.775337427919351e-06, "loss": 0.0009, "step": 166820 }, { "epoch": 1.3642719875700209, "grad_norm": 0.049678388983011246, "learning_rate": 2.7746983475748647e-06, "loss": 0.0009, "step": 166830 }, { "epoch": 1.36435376374862, "grad_norm": 0.03371317312121391, "learning_rate": 2.774059312561581e-06, "loss": 0.0011, "step": 166840 }, { "epoch": 1.3644355399272192, "grad_norm": 0.04390523582696915, "learning_rate": 2.773420322892516e-06, "loss": 0.0008, "step": 166850 }, { "epoch": 1.3645173161058184, "grad_norm": 0.49728572368621826, "learning_rate": 2.772781378580692e-06, "loss": 0.0006, "step": 166860 }, { "epoch": 1.3645990922844176, "grad_norm": 0.0060451147146523, "learning_rate": 2.772142479639123e-06, "loss": 0.001, "step": 166870 }, { "epoch": 1.3646808684630167, "grad_norm": 0.020127050578594208, "learning_rate": 2.7715036260808204e-06, "loss": 0.001, "step": 166880 }, { "epoch": 1.364762644641616, "grad_norm": 0.03889716789126396, "learning_rate": 2.7708648179187984e-06, "loss": 0.0005, "step": 166890 }, { "epoch": 1.364844420820215, "grad_norm": 0.04070860892534256, "learning_rate": 2.7702260551660743e-06, "loss": 0.0005, "step": 166900 }, { "epoch": 1.3649261969988142, "grad_norm": 0.03637334331870079, "learning_rate": 2.769587337835658e-06, "loss": 0.0005, "step": 166910 }, { "epoch": 1.3650079731774134, "grad_norm": 0.005375795532017946, "learning_rate": 2.7689486659405607e-06, "loss": 0.0006, "step": 166920 }, { "epoch": 1.3650897493560126, "grad_norm": 0.02818233333528042, "learning_rate": 2.76831003949379e-06, "loss": 0.0009, "step": 166930 }, { "epoch": 1.3651715255346117, "grad_norm": 0.007154558785259724, "learning_rate": 2.7676714585083597e-06, "loss": 0.0004, "step": 166940 }, { "epoch": 1.365253301713211, "grad_norm": 0.03189359977841377, "learning_rate": 2.767032922997277e-06, "loss": 0.0007, "step": 166950 }, { "epoch": 1.36533507789181, "grad_norm": 0.15697546303272247, "learning_rate": 2.7663944329735484e-06, "loss": 0.0014, "step": 166960 }, { "epoch": 1.3654168540704092, "grad_norm": 0.034798283129930496, "learning_rate": 2.7657559884501784e-06, "loss": 0.001, "step": 166970 }, { "epoch": 1.3654986302490084, "grad_norm": 0.029047857969999313, "learning_rate": 2.7651175894401778e-06, "loss": 0.0005, "step": 166980 }, { "epoch": 1.3655804064276076, "grad_norm": 0.06920856982469559, "learning_rate": 2.7644792359565498e-06, "loss": 0.0029, "step": 166990 }, { "epoch": 1.3656621826062068, "grad_norm": 0.07522626966238022, "learning_rate": 2.763840928012295e-06, "loss": 0.0009, "step": 167000 }, { "epoch": 1.365743958784806, "grad_norm": 0.01784411631524563, "learning_rate": 2.763202665620417e-06, "loss": 0.0006, "step": 167010 }, { "epoch": 1.365825734963405, "grad_norm": 0.057111360132694244, "learning_rate": 2.7625644487939196e-06, "loss": 0.0013, "step": 167020 }, { "epoch": 1.3659075111420043, "grad_norm": 0.04213905707001686, "learning_rate": 2.761926277545804e-06, "loss": 0.0013, "step": 167030 }, { "epoch": 1.3659892873206034, "grad_norm": 0.102623850107193, "learning_rate": 2.7612881518890687e-06, "loss": 0.0005, "step": 167040 }, { "epoch": 1.3660710634992026, "grad_norm": 0.006543587427586317, "learning_rate": 2.7606500718367118e-06, "loss": 0.0004, "step": 167050 }, { "epoch": 1.366152839677802, "grad_norm": 0.044986214488744736, "learning_rate": 2.760012037401735e-06, "loss": 0.0006, "step": 167060 }, { "epoch": 1.366234615856401, "grad_norm": 0.030268078669905663, "learning_rate": 2.7593740485971338e-06, "loss": 0.0005, "step": 167070 }, { "epoch": 1.3663163920350003, "grad_norm": 0.06644408404827118, "learning_rate": 2.7587361054359047e-06, "loss": 0.0007, "step": 167080 }, { "epoch": 1.3663981682135993, "grad_norm": 0.06391576677560806, "learning_rate": 2.7580982079310403e-06, "loss": 0.002, "step": 167090 }, { "epoch": 1.3664799443921987, "grad_norm": 0.1702258586883545, "learning_rate": 2.7574603560955403e-06, "loss": 0.0007, "step": 167100 }, { "epoch": 1.3665617205707976, "grad_norm": 0.02936585061252117, "learning_rate": 2.756822549942398e-06, "loss": 0.0011, "step": 167110 }, { "epoch": 1.366643496749397, "grad_norm": 0.022186968475580215, "learning_rate": 2.7561847894846013e-06, "loss": 0.0006, "step": 167120 }, { "epoch": 1.366725272927996, "grad_norm": 0.01704499125480652, "learning_rate": 2.755547074735142e-06, "loss": 0.0007, "step": 167130 }, { "epoch": 1.3668070491065953, "grad_norm": 0.05207577720284462, "learning_rate": 2.7549094057070152e-06, "loss": 0.0009, "step": 167140 }, { "epoch": 1.3668888252851943, "grad_norm": 0.0020590131171047688, "learning_rate": 2.7542717824132093e-06, "loss": 0.0011, "step": 167150 }, { "epoch": 1.3669706014637937, "grad_norm": 0.055747874081134796, "learning_rate": 2.753634204866712e-06, "loss": 0.0016, "step": 167160 }, { "epoch": 1.3670523776423928, "grad_norm": 0.0161744337528944, "learning_rate": 2.7529966730805103e-06, "loss": 0.0014, "step": 167170 }, { "epoch": 1.367134153820992, "grad_norm": 0.023952171206474304, "learning_rate": 2.752359187067595e-06, "loss": 0.0012, "step": 167180 }, { "epoch": 1.3672159299995912, "grad_norm": 0.0010076714679598808, "learning_rate": 2.7517217468409497e-06, "loss": 0.0006, "step": 167190 }, { "epoch": 1.3672977061781904, "grad_norm": 0.048897553235292435, "learning_rate": 2.7510843524135594e-06, "loss": 0.0006, "step": 167200 }, { "epoch": 1.3673794823567895, "grad_norm": 0.0013512863079085946, "learning_rate": 2.7504470037984076e-06, "loss": 0.0006, "step": 167210 }, { "epoch": 1.3674612585353887, "grad_norm": 0.04470442607998848, "learning_rate": 2.74980970100848e-06, "loss": 0.001, "step": 167220 }, { "epoch": 1.3675430347139879, "grad_norm": 0.020461421459913254, "learning_rate": 2.749172444056761e-06, "loss": 0.0007, "step": 167230 }, { "epoch": 1.367624810892587, "grad_norm": 0.0551295168697834, "learning_rate": 2.7485352329562264e-06, "loss": 0.0016, "step": 167240 }, { "epoch": 1.3677065870711862, "grad_norm": 0.022901661694049835, "learning_rate": 2.7478980677198584e-06, "loss": 0.0009, "step": 167250 }, { "epoch": 1.3677883632497854, "grad_norm": 0.06845570355653763, "learning_rate": 2.7472609483606367e-06, "loss": 0.0011, "step": 167260 }, { "epoch": 1.3678701394283845, "grad_norm": 0.03236600011587143, "learning_rate": 2.746623874891542e-06, "loss": 0.0007, "step": 167270 }, { "epoch": 1.3679519156069837, "grad_norm": 0.019655399024486542, "learning_rate": 2.7459868473255502e-06, "loss": 0.0004, "step": 167280 }, { "epoch": 1.3680336917855829, "grad_norm": 0.11955540627241135, "learning_rate": 2.7453498656756394e-06, "loss": 0.0005, "step": 167290 }, { "epoch": 1.368115467964182, "grad_norm": 0.0250803604722023, "learning_rate": 2.7447129299547814e-06, "loss": 0.0012, "step": 167300 }, { "epoch": 1.3681972441427812, "grad_norm": 0.027741609141230583, "learning_rate": 2.744076040175957e-06, "loss": 0.0013, "step": 167310 }, { "epoch": 1.3682790203213804, "grad_norm": 0.03522232919931412, "learning_rate": 2.7434391963521367e-06, "loss": 0.0004, "step": 167320 }, { "epoch": 1.3683607964999795, "grad_norm": 0.12957166135311127, "learning_rate": 2.7428023984962946e-06, "loss": 0.0014, "step": 167330 }, { "epoch": 1.3684425726785787, "grad_norm": 0.03396281972527504, "learning_rate": 2.742165646621403e-06, "loss": 0.0007, "step": 167340 }, { "epoch": 1.3685243488571779, "grad_norm": 0.027003508061170578, "learning_rate": 2.7415289407404316e-06, "loss": 0.0009, "step": 167350 }, { "epoch": 1.368606125035777, "grad_norm": 0.09858465194702148, "learning_rate": 2.740892280866352e-06, "loss": 0.0013, "step": 167360 }, { "epoch": 1.3686879012143762, "grad_norm": 0.022307518869638443, "learning_rate": 2.7402556670121325e-06, "loss": 0.0006, "step": 167370 }, { "epoch": 1.3687696773929754, "grad_norm": 0.016259973868727684, "learning_rate": 2.73961909919074e-06, "loss": 0.0003, "step": 167380 }, { "epoch": 1.3688514535715746, "grad_norm": 0.0034042401239275932, "learning_rate": 2.738982577415146e-06, "loss": 0.0011, "step": 167390 }, { "epoch": 1.3689332297501737, "grad_norm": 0.03895476460456848, "learning_rate": 2.7383461016983142e-06, "loss": 0.0011, "step": 167400 }, { "epoch": 1.369015005928773, "grad_norm": 0.021705253049731255, "learning_rate": 2.7377096720532115e-06, "loss": 0.0009, "step": 167410 }, { "epoch": 1.369096782107372, "grad_norm": 0.03870847448706627, "learning_rate": 2.737073288492799e-06, "loss": 0.0006, "step": 167420 }, { "epoch": 1.3691785582859712, "grad_norm": 0.005798390135169029, "learning_rate": 2.7364369510300448e-06, "loss": 0.001, "step": 167430 }, { "epoch": 1.3692603344645704, "grad_norm": 0.06977426260709763, "learning_rate": 2.7358006596779106e-06, "loss": 0.0012, "step": 167440 }, { "epoch": 1.3693421106431696, "grad_norm": 0.0010252326028421521, "learning_rate": 2.7351644144493573e-06, "loss": 0.0004, "step": 167450 }, { "epoch": 1.3694238868217687, "grad_norm": 0.0020474023185670376, "learning_rate": 2.7345282153573448e-06, "loss": 0.0008, "step": 167460 }, { "epoch": 1.369505663000368, "grad_norm": 0.0028335903771221638, "learning_rate": 2.7338920624148352e-06, "loss": 0.0012, "step": 167470 }, { "epoch": 1.369587439178967, "grad_norm": 0.03071664087474346, "learning_rate": 2.733255955634786e-06, "loss": 0.0007, "step": 167480 }, { "epoch": 1.3696692153575665, "grad_norm": 0.0012865832541137934, "learning_rate": 2.7326198950301554e-06, "loss": 0.0005, "step": 167490 }, { "epoch": 1.3697509915361654, "grad_norm": 0.08692555874586105, "learning_rate": 2.7319838806138984e-06, "loss": 0.0005, "step": 167500 }, { "epoch": 1.3698327677147648, "grad_norm": 0.01579260267317295, "learning_rate": 2.731347912398975e-06, "loss": 0.0005, "step": 167510 }, { "epoch": 1.3699145438933638, "grad_norm": 0.04285987466573715, "learning_rate": 2.7307119903983393e-06, "loss": 0.0011, "step": 167520 }, { "epoch": 1.3699963200719631, "grad_norm": 0.022128188982605934, "learning_rate": 2.730076114624944e-06, "loss": 0.0011, "step": 167530 }, { "epoch": 1.370078096250562, "grad_norm": 0.03149439021945, "learning_rate": 2.729440285091742e-06, "loss": 0.0005, "step": 167540 }, { "epoch": 1.3701598724291615, "grad_norm": 0.07701141387224197, "learning_rate": 2.7288045018116894e-06, "loss": 0.0004, "step": 167550 }, { "epoch": 1.3702416486077604, "grad_norm": 0.001591630163602531, "learning_rate": 2.728168764797735e-06, "loss": 0.0004, "step": 167560 }, { "epoch": 1.3703234247863598, "grad_norm": 0.05948513001203537, "learning_rate": 2.727533074062829e-06, "loss": 0.0008, "step": 167570 }, { "epoch": 1.3704052009649588, "grad_norm": 0.05027545616030693, "learning_rate": 2.7268974296199222e-06, "loss": 0.0018, "step": 167580 }, { "epoch": 1.3704869771435582, "grad_norm": 0.02050066366791725, "learning_rate": 2.7262618314819624e-06, "loss": 0.0005, "step": 167590 }, { "epoch": 1.3705687533221573, "grad_norm": 0.02896595187485218, "learning_rate": 2.7256262796618965e-06, "loss": 0.0016, "step": 167600 }, { "epoch": 1.3706505295007565, "grad_norm": 0.03747636452317238, "learning_rate": 2.7249907741726723e-06, "loss": 0.0007, "step": 167610 }, { "epoch": 1.3707323056793557, "grad_norm": 0.051117051392793655, "learning_rate": 2.7243553150272333e-06, "loss": 0.0006, "step": 167620 }, { "epoch": 1.3708140818579548, "grad_norm": 0.13730984926223755, "learning_rate": 2.7237199022385286e-06, "loss": 0.0005, "step": 167630 }, { "epoch": 1.370895858036554, "grad_norm": 0.019178297370672226, "learning_rate": 2.723084535819499e-06, "loss": 0.0009, "step": 167640 }, { "epoch": 1.3709776342151532, "grad_norm": 0.012909014709293842, "learning_rate": 2.7224492157830886e-06, "loss": 0.0007, "step": 167650 }, { "epoch": 1.3710594103937523, "grad_norm": 0.018401918932795525, "learning_rate": 2.7218139421422375e-06, "loss": 0.0008, "step": 167660 }, { "epoch": 1.3711411865723515, "grad_norm": 0.004132880829274654, "learning_rate": 2.721178714909889e-06, "loss": 0.0005, "step": 167670 }, { "epoch": 1.3712229627509507, "grad_norm": 0.028675546869635582, "learning_rate": 2.720543534098983e-06, "loss": 0.001, "step": 167680 }, { "epoch": 1.3713047389295498, "grad_norm": 0.076117143034935, "learning_rate": 2.7199083997224586e-06, "loss": 0.0006, "step": 167690 }, { "epoch": 1.371386515108149, "grad_norm": 0.013448584824800491, "learning_rate": 2.7192733117932525e-06, "loss": 0.0007, "step": 167700 }, { "epoch": 1.3714682912867482, "grad_norm": 0.004711383022367954, "learning_rate": 2.7186382703243037e-06, "loss": 0.002, "step": 167710 }, { "epoch": 1.3715500674653474, "grad_norm": 0.01682247593998909, "learning_rate": 2.7180032753285473e-06, "loss": 0.0008, "step": 167720 }, { "epoch": 1.3716318436439465, "grad_norm": 0.021629713475704193, "learning_rate": 2.7173683268189188e-06, "loss": 0.0005, "step": 167730 }, { "epoch": 1.3717136198225457, "grad_norm": 0.03962108865380287, "learning_rate": 2.7167334248083514e-06, "loss": 0.0006, "step": 167740 }, { "epoch": 1.3717953960011449, "grad_norm": 0.03250819444656372, "learning_rate": 2.716098569309782e-06, "loss": 0.0005, "step": 167750 }, { "epoch": 1.371877172179744, "grad_norm": 0.010556383058428764, "learning_rate": 2.715463760336141e-06, "loss": 0.001, "step": 167760 }, { "epoch": 1.3719589483583432, "grad_norm": 0.05589265003800392, "learning_rate": 2.7148289979003607e-06, "loss": 0.0007, "step": 167770 }, { "epoch": 1.3720407245369424, "grad_norm": 0.05099781975150108, "learning_rate": 2.714194282015369e-06, "loss": 0.0005, "step": 167780 }, { "epoch": 1.3721225007155415, "grad_norm": 0.08907698094844818, "learning_rate": 2.713559612694099e-06, "loss": 0.0008, "step": 167790 }, { "epoch": 1.3722042768941407, "grad_norm": 0.026061661541461945, "learning_rate": 2.7129249899494793e-06, "loss": 0.0009, "step": 167800 }, { "epoch": 1.3722860530727399, "grad_norm": 0.007941474206745625, "learning_rate": 2.7122904137944357e-06, "loss": 0.0008, "step": 167810 }, { "epoch": 1.372367829251339, "grad_norm": 0.03792097046971321, "learning_rate": 2.711655884241896e-06, "loss": 0.0008, "step": 167820 }, { "epoch": 1.3724496054299382, "grad_norm": 0.04324707016348839, "learning_rate": 2.7110214013047866e-06, "loss": 0.0008, "step": 167830 }, { "epoch": 1.3725313816085374, "grad_norm": 0.009334707632660866, "learning_rate": 2.710386964996031e-06, "loss": 0.0009, "step": 167840 }, { "epoch": 1.3726131577871366, "grad_norm": 0.025670833885669708, "learning_rate": 2.7097525753285547e-06, "loss": 0.0007, "step": 167850 }, { "epoch": 1.3726949339657357, "grad_norm": 0.024056222289800644, "learning_rate": 2.709118232315278e-06, "loss": 0.0012, "step": 167860 }, { "epoch": 1.3727767101443349, "grad_norm": 0.014100649394094944, "learning_rate": 2.708483935969127e-06, "loss": 0.0004, "step": 167870 }, { "epoch": 1.372858486322934, "grad_norm": 0.03931988775730133, "learning_rate": 2.7078496863030207e-06, "loss": 0.0007, "step": 167880 }, { "epoch": 1.3729402625015332, "grad_norm": 0.035056471824645996, "learning_rate": 2.707215483329879e-06, "loss": 0.0005, "step": 167890 }, { "epoch": 1.3730220386801324, "grad_norm": 0.11644256114959717, "learning_rate": 2.7065813270626205e-06, "loss": 0.0005, "step": 167900 }, { "epoch": 1.3731038148587316, "grad_norm": 0.030526451766490936, "learning_rate": 2.705947217514166e-06, "loss": 0.0012, "step": 167910 }, { "epoch": 1.373185591037331, "grad_norm": 0.07074502110481262, "learning_rate": 2.7053131546974315e-06, "loss": 0.001, "step": 167920 }, { "epoch": 1.37326736721593, "grad_norm": 0.04820237308740616, "learning_rate": 2.7046791386253336e-06, "loss": 0.0012, "step": 167930 }, { "epoch": 1.3733491433945293, "grad_norm": 0.034373652189970016, "learning_rate": 2.7040451693107867e-06, "loss": 0.001, "step": 167940 }, { "epoch": 1.3734309195731282, "grad_norm": 0.03179178759455681, "learning_rate": 2.703411246766706e-06, "loss": 0.0024, "step": 167950 }, { "epoch": 1.3735126957517276, "grad_norm": 0.03651341795921326, "learning_rate": 2.702777371006006e-06, "loss": 0.0008, "step": 167960 }, { "epoch": 1.3735944719303266, "grad_norm": 0.047744251787662506, "learning_rate": 2.702143542041597e-06, "loss": 0.0008, "step": 167970 }, { "epoch": 1.373676248108926, "grad_norm": 0.04492897167801857, "learning_rate": 2.7015097598863906e-06, "loss": 0.0012, "step": 167980 }, { "epoch": 1.373758024287525, "grad_norm": 0.06871497631072998, "learning_rate": 2.7008760245533e-06, "loss": 0.0007, "step": 167990 }, { "epoch": 1.3738398004661243, "grad_norm": 0.006740089040249586, "learning_rate": 2.7002423360552344e-06, "loss": 0.0009, "step": 168000 }, { "epoch": 1.3739215766447235, "grad_norm": 0.0039803399704396725, "learning_rate": 2.6996086944051015e-06, "loss": 0.0005, "step": 168010 }, { "epoch": 1.3740033528233226, "grad_norm": 0.020702850073575974, "learning_rate": 2.6989750996158072e-06, "loss": 0.0008, "step": 168020 }, { "epoch": 1.3740851290019218, "grad_norm": 0.02225390449166298, "learning_rate": 2.698341551700262e-06, "loss": 0.0009, "step": 168030 }, { "epoch": 1.374166905180521, "grad_norm": 0.004426086787134409, "learning_rate": 2.697708050671371e-06, "loss": 0.0008, "step": 168040 }, { "epoch": 1.3742486813591201, "grad_norm": 0.1331426501274109, "learning_rate": 2.697074596542038e-06, "loss": 0.0017, "step": 168050 }, { "epoch": 1.3743304575377193, "grad_norm": 0.03560319170355797, "learning_rate": 2.6964411893251674e-06, "loss": 0.0007, "step": 168060 }, { "epoch": 1.3744122337163185, "grad_norm": 0.001196139259263873, "learning_rate": 2.6958078290336624e-06, "loss": 0.0007, "step": 168070 }, { "epoch": 1.3744940098949177, "grad_norm": 0.059236787259578705, "learning_rate": 2.695174515680425e-06, "loss": 0.001, "step": 168080 }, { "epoch": 1.3745757860735168, "grad_norm": 0.03061237744987011, "learning_rate": 2.6945412492783564e-06, "loss": 0.0007, "step": 168090 }, { "epoch": 1.374657562252116, "grad_norm": 0.08207716047763824, "learning_rate": 2.6939080298403536e-06, "loss": 0.0009, "step": 168100 }, { "epoch": 1.3747393384307152, "grad_norm": 0.0465933121740818, "learning_rate": 2.693274857379321e-06, "loss": 0.002, "step": 168110 }, { "epoch": 1.3748211146093143, "grad_norm": 0.0223739892244339, "learning_rate": 2.692641731908155e-06, "loss": 0.0005, "step": 168120 }, { "epoch": 1.3749028907879135, "grad_norm": 0.03127234801650047, "learning_rate": 2.692008653439752e-06, "loss": 0.0006, "step": 168130 }, { "epoch": 1.3749846669665127, "grad_norm": 0.027558494359254837, "learning_rate": 2.691375621987007e-06, "loss": 0.0011, "step": 168140 }, { "epoch": 1.3750664431451118, "grad_norm": 0.030883777886629105, "learning_rate": 2.6907426375628194e-06, "loss": 0.0006, "step": 168150 }, { "epoch": 1.375148219323711, "grad_norm": 0.03508972376585007, "learning_rate": 2.6901097001800812e-06, "loss": 0.0012, "step": 168160 }, { "epoch": 1.3752299955023102, "grad_norm": 0.028932034969329834, "learning_rate": 2.6894768098516856e-06, "loss": 0.001, "step": 168170 }, { "epoch": 1.3753117716809093, "grad_norm": 0.025955528020858765, "learning_rate": 2.688843966590526e-06, "loss": 0.0011, "step": 168180 }, { "epoch": 1.3753935478595085, "grad_norm": 0.05604095011949539, "learning_rate": 2.6882111704094926e-06, "loss": 0.0009, "step": 168190 }, { "epoch": 1.3754753240381077, "grad_norm": 0.055011190474033356, "learning_rate": 2.6875784213214772e-06, "loss": 0.0012, "step": 168200 }, { "epoch": 1.3755571002167069, "grad_norm": 0.013969499617815018, "learning_rate": 2.686945719339369e-06, "loss": 0.001, "step": 168210 }, { "epoch": 1.375638876395306, "grad_norm": 0.04182858392596245, "learning_rate": 2.686313064476055e-06, "loss": 0.0014, "step": 168220 }, { "epoch": 1.3757206525739052, "grad_norm": 0.0954054594039917, "learning_rate": 2.685680456744426e-06, "loss": 0.0011, "step": 168230 }, { "epoch": 1.3758024287525044, "grad_norm": 0.0289116520434618, "learning_rate": 2.685047896157368e-06, "loss": 0.0006, "step": 168240 }, { "epoch": 1.3758842049311035, "grad_norm": 0.021260054782032967, "learning_rate": 2.6844153827277654e-06, "loss": 0.0004, "step": 168250 }, { "epoch": 1.3759659811097027, "grad_norm": 0.012857104651629925, "learning_rate": 2.6837829164685014e-06, "loss": 0.0011, "step": 168260 }, { "epoch": 1.3760477572883019, "grad_norm": 0.029516667127609253, "learning_rate": 2.683150497392465e-06, "loss": 0.0032, "step": 168270 }, { "epoch": 1.376129533466901, "grad_norm": 0.017875386402010918, "learning_rate": 2.6825181255125355e-06, "loss": 0.0008, "step": 168280 }, { "epoch": 1.3762113096455002, "grad_norm": 0.014934654347598553, "learning_rate": 2.681885800841596e-06, "loss": 0.0005, "step": 168290 }, { "epoch": 1.3762930858240994, "grad_norm": 0.07580383121967316, "learning_rate": 2.681253523392526e-06, "loss": 0.001, "step": 168300 }, { "epoch": 1.3763748620026985, "grad_norm": 0.007838163524866104, "learning_rate": 2.6806212931782075e-06, "loss": 0.0006, "step": 168310 }, { "epoch": 1.3764566381812977, "grad_norm": 0.00810608547180891, "learning_rate": 2.6799891102115184e-06, "loss": 0.0005, "step": 168320 }, { "epoch": 1.376538414359897, "grad_norm": 0.04395807906985283, "learning_rate": 2.679356974505337e-06, "loss": 0.0012, "step": 168330 }, { "epoch": 1.376620190538496, "grad_norm": 0.030773401260375977, "learning_rate": 2.678724886072538e-06, "loss": 0.0011, "step": 168340 }, { "epoch": 1.3767019667170954, "grad_norm": 0.07847330719232559, "learning_rate": 2.6780928449260024e-06, "loss": 0.001, "step": 168350 }, { "epoch": 1.3767837428956944, "grad_norm": 0.0018448070622980595, "learning_rate": 2.6774608510786023e-06, "loss": 0.0004, "step": 168360 }, { "epoch": 1.3768655190742938, "grad_norm": 0.0411444827914238, "learning_rate": 2.6768289045432126e-06, "loss": 0.0009, "step": 168370 }, { "epoch": 1.3769472952528927, "grad_norm": 0.01095134299248457, "learning_rate": 2.6761970053327047e-06, "loss": 0.0028, "step": 168380 }, { "epoch": 1.3770290714314921, "grad_norm": 0.04031096398830414, "learning_rate": 2.6755651534599545e-06, "loss": 0.0006, "step": 168390 }, { "epoch": 1.377110847610091, "grad_norm": 0.05531308427453041, "learning_rate": 2.6749333489378313e-06, "loss": 0.0018, "step": 168400 }, { "epoch": 1.3771926237886905, "grad_norm": 0.059345535933971405, "learning_rate": 2.6743015917792063e-06, "loss": 0.0005, "step": 168410 }, { "epoch": 1.3772743999672894, "grad_norm": 0.012540043331682682, "learning_rate": 2.6736698819969483e-06, "loss": 0.001, "step": 168420 }, { "epoch": 1.3773561761458888, "grad_norm": 0.051611561328172684, "learning_rate": 2.673038219603925e-06, "loss": 0.0008, "step": 168430 }, { "epoch": 1.377437952324488, "grad_norm": 0.027964860200881958, "learning_rate": 2.672406604613005e-06, "loss": 0.0007, "step": 168440 }, { "epoch": 1.3775197285030871, "grad_norm": 0.01410621777176857, "learning_rate": 2.6717750370370547e-06, "loss": 0.001, "step": 168450 }, { "epoch": 1.3776015046816863, "grad_norm": 0.024927208200097084, "learning_rate": 2.671143516888938e-06, "loss": 0.0008, "step": 168460 }, { "epoch": 1.3776832808602855, "grad_norm": 0.04534343630075455, "learning_rate": 2.6705120441815214e-06, "loss": 0.0011, "step": 168470 }, { "epoch": 1.3777650570388846, "grad_norm": 0.018059397116303444, "learning_rate": 2.6698806189276686e-06, "loss": 0.0009, "step": 168480 }, { "epoch": 1.3778468332174838, "grad_norm": 0.00687803141772747, "learning_rate": 2.6692492411402416e-06, "loss": 0.0008, "step": 168490 }, { "epoch": 1.377928609396083, "grad_norm": 0.03749596327543259, "learning_rate": 2.6686179108321003e-06, "loss": 0.0009, "step": 168500 }, { "epoch": 1.3780103855746821, "grad_norm": 0.007954475469887257, "learning_rate": 2.667986628016109e-06, "loss": 0.0004, "step": 168510 }, { "epoch": 1.3780921617532813, "grad_norm": 0.030046887695789337, "learning_rate": 2.6673553927051253e-06, "loss": 0.0008, "step": 168520 }, { "epoch": 1.3781739379318805, "grad_norm": 0.056700076907873154, "learning_rate": 2.666724204912009e-06, "loss": 0.0008, "step": 168530 }, { "epoch": 1.3782557141104796, "grad_norm": 0.04118311405181885, "learning_rate": 2.666093064649617e-06, "loss": 0.0007, "step": 168540 }, { "epoch": 1.3783374902890788, "grad_norm": 0.04969441145658493, "learning_rate": 2.665461971930806e-06, "loss": 0.0008, "step": 168550 }, { "epoch": 1.378419266467678, "grad_norm": 0.0030564661137759686, "learning_rate": 2.6648309267684326e-06, "loss": 0.0008, "step": 168560 }, { "epoch": 1.3785010426462772, "grad_norm": 0.13656064867973328, "learning_rate": 2.6641999291753514e-06, "loss": 0.0011, "step": 168570 }, { "epoch": 1.3785828188248763, "grad_norm": 0.02152085304260254, "learning_rate": 2.6635689791644138e-06, "loss": 0.0004, "step": 168580 }, { "epoch": 1.3786645950034755, "grad_norm": 0.03607060760259628, "learning_rate": 2.662938076748478e-06, "loss": 0.0007, "step": 168590 }, { "epoch": 1.3787463711820747, "grad_norm": 0.03130153939127922, "learning_rate": 2.6623072219403922e-06, "loss": 0.0008, "step": 168600 }, { "epoch": 1.3788281473606738, "grad_norm": 0.08465393632650375, "learning_rate": 2.6616764147530094e-06, "loss": 0.0012, "step": 168610 }, { "epoch": 1.378909923539273, "grad_norm": 0.01403503306210041, "learning_rate": 2.6610456551991766e-06, "loss": 0.0007, "step": 168620 }, { "epoch": 1.3789916997178722, "grad_norm": 0.01803109608590603, "learning_rate": 2.6604149432917466e-06, "loss": 0.0008, "step": 168630 }, { "epoch": 1.3790734758964713, "grad_norm": 0.05588684603571892, "learning_rate": 2.6597842790435656e-06, "loss": 0.0009, "step": 168640 }, { "epoch": 1.3791552520750705, "grad_norm": 0.07598719000816345, "learning_rate": 2.659153662467482e-06, "loss": 0.0005, "step": 168650 }, { "epoch": 1.3792370282536697, "grad_norm": 0.060577090829610825, "learning_rate": 2.6585230935763405e-06, "loss": 0.0009, "step": 168660 }, { "epoch": 1.3793188044322688, "grad_norm": 0.05712449178099632, "learning_rate": 2.657892572382987e-06, "loss": 0.0008, "step": 168670 }, { "epoch": 1.379400580610868, "grad_norm": 0.01717958226799965, "learning_rate": 2.657262098900265e-06, "loss": 0.0008, "step": 168680 }, { "epoch": 1.3794823567894672, "grad_norm": 0.022755229845643044, "learning_rate": 2.6566316731410193e-06, "loss": 0.0006, "step": 168690 }, { "epoch": 1.3795641329680663, "grad_norm": 0.03225482627749443, "learning_rate": 2.65600129511809e-06, "loss": 0.0011, "step": 168700 }, { "epoch": 1.3796459091466655, "grad_norm": 0.02300264686346054, "learning_rate": 2.6553709648443188e-06, "loss": 0.0011, "step": 168710 }, { "epoch": 1.3797276853252647, "grad_norm": 0.011347700841724873, "learning_rate": 2.6547406823325483e-06, "loss": 0.0007, "step": 168720 }, { "epoch": 1.3798094615038639, "grad_norm": 0.0380985252559185, "learning_rate": 2.654110447595617e-06, "loss": 0.0006, "step": 168730 }, { "epoch": 1.379891237682463, "grad_norm": 0.0012866539182141423, "learning_rate": 2.6534802606463616e-06, "loss": 0.0007, "step": 168740 }, { "epoch": 1.3799730138610622, "grad_norm": 0.025164857506752014, "learning_rate": 2.65285012149762e-06, "loss": 0.0011, "step": 168750 }, { "epoch": 1.3800547900396616, "grad_norm": 0.015665119513869286, "learning_rate": 2.6522200301622303e-06, "loss": 0.0011, "step": 168760 }, { "epoch": 1.3801365662182605, "grad_norm": 0.019816521555185318, "learning_rate": 2.651589986653027e-06, "loss": 0.0006, "step": 168770 }, { "epoch": 1.38021834239686, "grad_norm": 0.006584859918802977, "learning_rate": 2.650959990982844e-06, "loss": 0.0006, "step": 168780 }, { "epoch": 1.3803001185754589, "grad_norm": 0.030848268419504166, "learning_rate": 2.6503300431645168e-06, "loss": 0.0011, "step": 168790 }, { "epoch": 1.3803818947540583, "grad_norm": 0.004499126225709915, "learning_rate": 2.649700143210876e-06, "loss": 0.0005, "step": 168800 }, { "epoch": 1.3804636709326572, "grad_norm": 0.07231569290161133, "learning_rate": 2.6490702911347533e-06, "loss": 0.0011, "step": 168810 }, { "epoch": 1.3805454471112566, "grad_norm": 0.03750612214207649, "learning_rate": 2.6484404869489797e-06, "loss": 0.0016, "step": 168820 }, { "epoch": 1.3806272232898555, "grad_norm": 0.030259178951382637, "learning_rate": 2.6478107306663837e-06, "loss": 0.0014, "step": 168830 }, { "epoch": 1.380708999468455, "grad_norm": 0.07714596390724182, "learning_rate": 2.647181022299796e-06, "loss": 0.0011, "step": 168840 }, { "epoch": 1.3807907756470539, "grad_norm": 0.009743746370077133, "learning_rate": 2.6465513618620443e-06, "loss": 0.0004, "step": 168850 }, { "epoch": 1.3808725518256533, "grad_norm": 0.02451174333691597, "learning_rate": 2.6459217493659538e-06, "loss": 0.0008, "step": 168860 }, { "epoch": 1.3809543280042524, "grad_norm": 0.016103563830256462, "learning_rate": 2.6452921848243485e-06, "loss": 0.0007, "step": 168870 }, { "epoch": 1.3810361041828516, "grad_norm": 0.07135401666164398, "learning_rate": 2.6446626682500576e-06, "loss": 0.0018, "step": 168880 }, { "epoch": 1.3811178803614508, "grad_norm": 0.04169461503624916, "learning_rate": 2.644033199655903e-06, "loss": 0.0009, "step": 168890 }, { "epoch": 1.38119965654005, "grad_norm": 0.041136302053928375, "learning_rate": 2.6434037790547067e-06, "loss": 0.0007, "step": 168900 }, { "epoch": 1.3812814327186491, "grad_norm": 0.12116795778274536, "learning_rate": 2.6427744064592914e-06, "loss": 0.0006, "step": 168910 }, { "epoch": 1.3813632088972483, "grad_norm": 0.050954654812812805, "learning_rate": 2.6421450818824766e-06, "loss": 0.0012, "step": 168920 }, { "epoch": 1.3814449850758475, "grad_norm": 0.07197967171669006, "learning_rate": 2.6415158053370845e-06, "loss": 0.0011, "step": 168930 }, { "epoch": 1.3815267612544466, "grad_norm": 0.06320711970329285, "learning_rate": 2.6408865768359317e-06, "loss": 0.0013, "step": 168940 }, { "epoch": 1.3816085374330458, "grad_norm": 0.039557620882987976, "learning_rate": 2.6402573963918345e-06, "loss": 0.0007, "step": 168950 }, { "epoch": 1.381690313611645, "grad_norm": 0.03943333029747009, "learning_rate": 2.639628264017615e-06, "loss": 0.0011, "step": 168960 }, { "epoch": 1.3817720897902441, "grad_norm": 0.006621513515710831, "learning_rate": 2.638999179726086e-06, "loss": 0.0004, "step": 168970 }, { "epoch": 1.3818538659688433, "grad_norm": 0.0410003699362278, "learning_rate": 2.6383701435300624e-06, "loss": 0.0008, "step": 168980 }, { "epoch": 1.3819356421474425, "grad_norm": 0.10130149871110916, "learning_rate": 2.637741155442357e-06, "loss": 0.001, "step": 168990 }, { "epoch": 1.3820174183260416, "grad_norm": 0.02210984379053116, "learning_rate": 2.6371122154757865e-06, "loss": 0.0011, "step": 169000 }, { "epoch": 1.3820991945046408, "grad_norm": 0.03512110561132431, "learning_rate": 2.636483323643161e-06, "loss": 0.0006, "step": 169010 }, { "epoch": 1.38218097068324, "grad_norm": 0.013825451955199242, "learning_rate": 2.6358544799572914e-06, "loss": 0.0006, "step": 169020 }, { "epoch": 1.3822627468618391, "grad_norm": 0.013503522612154484, "learning_rate": 2.6352256844309875e-06, "loss": 0.0014, "step": 169030 }, { "epoch": 1.3823445230404383, "grad_norm": 0.06225898861885071, "learning_rate": 2.6345969370770583e-06, "loss": 0.0007, "step": 169040 }, { "epoch": 1.3824262992190375, "grad_norm": 0.0381816104054451, "learning_rate": 2.6339682379083124e-06, "loss": 0.0005, "step": 169050 }, { "epoch": 1.3825080753976366, "grad_norm": 0.016785847023129463, "learning_rate": 2.6333395869375567e-06, "loss": 0.0005, "step": 169060 }, { "epoch": 1.3825898515762358, "grad_norm": 0.06880509853363037, "learning_rate": 2.6327109841775955e-06, "loss": 0.001, "step": 169070 }, { "epoch": 1.382671627754835, "grad_norm": 0.011607474647462368, "learning_rate": 2.6320824296412377e-06, "loss": 0.001, "step": 169080 }, { "epoch": 1.3827534039334342, "grad_norm": 0.031141871586441994, "learning_rate": 2.6314539233412856e-06, "loss": 0.0005, "step": 169090 }, { "epoch": 1.3828351801120333, "grad_norm": 0.013806197792291641, "learning_rate": 2.630825465290542e-06, "loss": 0.0008, "step": 169100 }, { "epoch": 1.3829169562906325, "grad_norm": 0.003619101596996188, "learning_rate": 2.630197055501808e-06, "loss": 0.0007, "step": 169110 }, { "epoch": 1.3829987324692317, "grad_norm": 0.0297411922365427, "learning_rate": 2.6295686939878878e-06, "loss": 0.0008, "step": 169120 }, { "epoch": 1.3830805086478308, "grad_norm": 0.053646087646484375, "learning_rate": 2.6289403807615803e-06, "loss": 0.0007, "step": 169130 }, { "epoch": 1.38316228482643, "grad_norm": 0.08194009214639664, "learning_rate": 2.6283121158356843e-06, "loss": 0.001, "step": 169140 }, { "epoch": 1.3832440610050292, "grad_norm": 0.03215866908431053, "learning_rate": 2.6276838992229988e-06, "loss": 0.0009, "step": 169150 }, { "epoch": 1.3833258371836283, "grad_norm": 0.003839881392195821, "learning_rate": 2.62705573093632e-06, "loss": 0.0006, "step": 169160 }, { "epoch": 1.3834076133622275, "grad_norm": 0.010568462312221527, "learning_rate": 2.626427610988445e-06, "loss": 0.001, "step": 169170 }, { "epoch": 1.3834893895408267, "grad_norm": 0.022405648604035378, "learning_rate": 2.6257995393921687e-06, "loss": 0.0008, "step": 169180 }, { "epoch": 1.383571165719426, "grad_norm": 0.020421499386429787, "learning_rate": 2.6251715161602844e-06, "loss": 0.001, "step": 169190 }, { "epoch": 1.383652941898025, "grad_norm": 0.05330774188041687, "learning_rate": 2.624543541305588e-06, "loss": 0.0007, "step": 169200 }, { "epoch": 1.3837347180766244, "grad_norm": 0.027118723839521408, "learning_rate": 2.6239156148408704e-06, "loss": 0.0013, "step": 169210 }, { "epoch": 1.3838164942552234, "grad_norm": 0.027740739285945892, "learning_rate": 2.6232877367789234e-06, "loss": 0.0006, "step": 169220 }, { "epoch": 1.3838982704338227, "grad_norm": 0.007504796143621206, "learning_rate": 2.6226599071325355e-06, "loss": 0.0009, "step": 169230 }, { "epoch": 1.3839800466124217, "grad_norm": 0.008627229370176792, "learning_rate": 2.6220321259144987e-06, "loss": 0.0017, "step": 169240 }, { "epoch": 1.384061822791021, "grad_norm": 0.05497477203607559, "learning_rate": 2.621404393137601e-06, "loss": 0.0009, "step": 169250 }, { "epoch": 1.38414359896962, "grad_norm": 0.02139524184167385, "learning_rate": 2.6207767088146295e-06, "loss": 0.0011, "step": 169260 }, { "epoch": 1.3842253751482194, "grad_norm": 0.019470810890197754, "learning_rate": 2.62014907295837e-06, "loss": 0.0011, "step": 169270 }, { "epoch": 1.3843071513268184, "grad_norm": 0.024447612464427948, "learning_rate": 2.619521485581609e-06, "loss": 0.0007, "step": 169280 }, { "epoch": 1.3843889275054178, "grad_norm": 0.02363511547446251, "learning_rate": 2.618893946697129e-06, "loss": 0.0004, "step": 169290 }, { "epoch": 1.384470703684017, "grad_norm": 0.0042250188998878, "learning_rate": 2.6182664563177156e-06, "loss": 0.001, "step": 169300 }, { "epoch": 1.384552479862616, "grad_norm": 0.07259685546159744, "learning_rate": 2.617639014456148e-06, "loss": 0.0009, "step": 169310 }, { "epoch": 1.3846342560412153, "grad_norm": 0.0015190730337053537, "learning_rate": 2.617011621125213e-06, "loss": 0.0006, "step": 169320 }, { "epoch": 1.3847160322198144, "grad_norm": 0.02350107580423355, "learning_rate": 2.616384276337688e-06, "loss": 0.0009, "step": 169330 }, { "epoch": 1.3847978083984136, "grad_norm": 0.1084207072854042, "learning_rate": 2.615756980106352e-06, "loss": 0.0022, "step": 169340 }, { "epoch": 1.3848795845770128, "grad_norm": 0.015896718949079514, "learning_rate": 2.615129732443983e-06, "loss": 0.0006, "step": 169350 }, { "epoch": 1.384961360755612, "grad_norm": 0.020346224308013916, "learning_rate": 2.6145025333633618e-06, "loss": 0.0007, "step": 169360 }, { "epoch": 1.385043136934211, "grad_norm": 0.06709977984428406, "learning_rate": 2.6138753828772626e-06, "loss": 0.0007, "step": 169370 }, { "epoch": 1.3851249131128103, "grad_norm": 0.038026563823223114, "learning_rate": 2.6132482809984617e-06, "loss": 0.0005, "step": 169380 }, { "epoch": 1.3852066892914094, "grad_norm": 0.1035701334476471, "learning_rate": 2.6126212277397336e-06, "loss": 0.0017, "step": 169390 }, { "epoch": 1.3852884654700086, "grad_norm": 0.13053888082504272, "learning_rate": 2.611994223113852e-06, "loss": 0.0016, "step": 169400 }, { "epoch": 1.3853702416486078, "grad_norm": 0.01954558491706848, "learning_rate": 2.6113672671335877e-06, "loss": 0.0009, "step": 169410 }, { "epoch": 1.385452017827207, "grad_norm": 0.031116575002670288, "learning_rate": 2.6107403598117153e-06, "loss": 0.0009, "step": 169420 }, { "epoch": 1.3855337940058061, "grad_norm": 0.006037144921720028, "learning_rate": 2.6101135011610014e-06, "loss": 0.001, "step": 169430 }, { "epoch": 1.3856155701844053, "grad_norm": 0.040795180946588516, "learning_rate": 2.6094866911942194e-06, "loss": 0.0009, "step": 169440 }, { "epoch": 1.3856973463630045, "grad_norm": 0.011416531167924404, "learning_rate": 2.608859929924137e-06, "loss": 0.0005, "step": 169450 }, { "epoch": 1.3857791225416036, "grad_norm": 0.03222140297293663, "learning_rate": 2.6082332173635215e-06, "loss": 0.0004, "step": 169460 }, { "epoch": 1.3858608987202028, "grad_norm": 0.03314128518104553, "learning_rate": 2.6076065535251377e-06, "loss": 0.0004, "step": 169470 }, { "epoch": 1.385942674898802, "grad_norm": 0.024144692346453667, "learning_rate": 2.6069799384217547e-06, "loss": 0.0006, "step": 169480 }, { "epoch": 1.3860244510774011, "grad_norm": 0.08033142238855362, "learning_rate": 2.606353372066136e-06, "loss": 0.0003, "step": 169490 }, { "epoch": 1.3861062272560003, "grad_norm": 0.027488132938742638, "learning_rate": 2.6057268544710444e-06, "loss": 0.0005, "step": 169500 }, { "epoch": 1.3861880034345995, "grad_norm": 0.020510399714112282, "learning_rate": 2.605100385649243e-06, "loss": 0.0006, "step": 169510 }, { "epoch": 1.3862697796131986, "grad_norm": 0.014277461916208267, "learning_rate": 2.6044739656134934e-06, "loss": 0.0006, "step": 169520 }, { "epoch": 1.3863515557917978, "grad_norm": 0.03370159491896629, "learning_rate": 2.603847594376556e-06, "loss": 0.0003, "step": 169530 }, { "epoch": 1.386433331970397, "grad_norm": 0.02474387176334858, "learning_rate": 2.6032212719511906e-06, "loss": 0.0009, "step": 169540 }, { "epoch": 1.3865151081489961, "grad_norm": 0.0026016554329544306, "learning_rate": 2.602594998350155e-06, "loss": 0.0006, "step": 169550 }, { "epoch": 1.3865968843275953, "grad_norm": 0.005051490850746632, "learning_rate": 2.60196877358621e-06, "loss": 0.001, "step": 169560 }, { "epoch": 1.3866786605061945, "grad_norm": 0.03989580273628235, "learning_rate": 2.6013425976721094e-06, "loss": 0.0008, "step": 169570 }, { "epoch": 1.3867604366847937, "grad_norm": 0.06274783611297607, "learning_rate": 2.60071647062061e-06, "loss": 0.0019, "step": 169580 }, { "epoch": 1.3868422128633928, "grad_norm": 0.01469886489212513, "learning_rate": 2.6000903924444655e-06, "loss": 0.0011, "step": 169590 }, { "epoch": 1.386923989041992, "grad_norm": 0.009598297998309135, "learning_rate": 2.5994643631564307e-06, "loss": 0.0009, "step": 169600 }, { "epoch": 1.3870057652205912, "grad_norm": 0.04412408545613289, "learning_rate": 2.5988383827692593e-06, "loss": 0.0007, "step": 169610 }, { "epoch": 1.3870875413991905, "grad_norm": 0.05804939568042755, "learning_rate": 2.5982124512957012e-06, "loss": 0.0011, "step": 169620 }, { "epoch": 1.3871693175777895, "grad_norm": 0.027390407398343086, "learning_rate": 2.597586568748508e-06, "loss": 0.0008, "step": 169630 }, { "epoch": 1.3872510937563889, "grad_norm": 0.03249971568584442, "learning_rate": 2.596960735140429e-06, "loss": 0.0008, "step": 169640 }, { "epoch": 1.3873328699349878, "grad_norm": 0.04106226935982704, "learning_rate": 2.596334950484214e-06, "loss": 0.0011, "step": 169650 }, { "epoch": 1.3874146461135872, "grad_norm": 0.03483196347951889, "learning_rate": 2.5957092147926087e-06, "loss": 0.0005, "step": 169660 }, { "epoch": 1.3874964222921862, "grad_norm": 0.05761219933629036, "learning_rate": 2.5950835280783593e-06, "loss": 0.0008, "step": 169670 }, { "epoch": 1.3875781984707856, "grad_norm": 0.0011058544041588902, "learning_rate": 2.5944578903542154e-06, "loss": 0.0004, "step": 169680 }, { "epoch": 1.3876599746493845, "grad_norm": 0.008057442493736744, "learning_rate": 2.5938323016329198e-06, "loss": 0.0008, "step": 169690 }, { "epoch": 1.387741750827984, "grad_norm": 0.0018594452412799, "learning_rate": 2.5932067619272155e-06, "loss": 0.0006, "step": 169700 }, { "epoch": 1.3878235270065828, "grad_norm": 0.04214112088084221, "learning_rate": 2.5925812712498445e-06, "loss": 0.0007, "step": 169710 }, { "epoch": 1.3879053031851822, "grad_norm": 0.016965966671705246, "learning_rate": 2.591955829613552e-06, "loss": 0.0006, "step": 169720 }, { "epoch": 1.3879870793637814, "grad_norm": 0.22506041824817657, "learning_rate": 2.5913304370310754e-06, "loss": 0.0014, "step": 169730 }, { "epoch": 1.3880688555423806, "grad_norm": 0.11987290531396866, "learning_rate": 2.5907050935151563e-06, "loss": 0.0012, "step": 169740 }, { "epoch": 1.3881506317209797, "grad_norm": 0.058022964745759964, "learning_rate": 2.590079799078533e-06, "loss": 0.0008, "step": 169750 }, { "epoch": 1.388232407899579, "grad_norm": 0.13010965287685394, "learning_rate": 2.5894545537339428e-06, "loss": 0.001, "step": 169760 }, { "epoch": 1.388314184078178, "grad_norm": 0.030689360573887825, "learning_rate": 2.5888293574941224e-06, "loss": 0.0012, "step": 169770 }, { "epoch": 1.3883959602567773, "grad_norm": 0.03350318223237991, "learning_rate": 2.5882042103718084e-06, "loss": 0.0007, "step": 169780 }, { "epoch": 1.3884777364353764, "grad_norm": 0.040323108434677124, "learning_rate": 2.5875791123797334e-06, "loss": 0.0014, "step": 169790 }, { "epoch": 1.3885595126139756, "grad_norm": 0.022638345137238503, "learning_rate": 2.5869540635306347e-06, "loss": 0.0011, "step": 169800 }, { "epoch": 1.3886412887925748, "grad_norm": 0.02152782864868641, "learning_rate": 2.5863290638372423e-06, "loss": 0.0014, "step": 169810 }, { "epoch": 1.388723064971174, "grad_norm": 0.03908616304397583, "learning_rate": 2.5857041133122897e-06, "loss": 0.0008, "step": 169820 }, { "epoch": 1.388804841149773, "grad_norm": 0.1707538366317749, "learning_rate": 2.5850792119685044e-06, "loss": 0.0005, "step": 169830 }, { "epoch": 1.3888866173283723, "grad_norm": 0.05181536450982094, "learning_rate": 2.58445435981862e-06, "loss": 0.0007, "step": 169840 }, { "epoch": 1.3889683935069714, "grad_norm": 0.04582078754901886, "learning_rate": 2.5838295568753647e-06, "loss": 0.0009, "step": 169850 }, { "epoch": 1.3890501696855706, "grad_norm": 0.06204221397638321, "learning_rate": 2.5832048031514646e-06, "loss": 0.0004, "step": 169860 }, { "epoch": 1.3891319458641698, "grad_norm": 0.04368734359741211, "learning_rate": 2.5825800986596473e-06, "loss": 0.0008, "step": 169870 }, { "epoch": 1.389213722042769, "grad_norm": 0.001751984702423215, "learning_rate": 2.5819554434126383e-06, "loss": 0.0013, "step": 169880 }, { "epoch": 1.389295498221368, "grad_norm": 0.03408098965883255, "learning_rate": 2.5813308374231625e-06, "loss": 0.0009, "step": 169890 }, { "epoch": 1.3893772743999673, "grad_norm": 0.04325628653168678, "learning_rate": 2.580706280703944e-06, "loss": 0.0012, "step": 169900 }, { "epoch": 1.3894590505785664, "grad_norm": 0.052939705550670624, "learning_rate": 2.5800817732677017e-06, "loss": 0.0006, "step": 169910 }, { "epoch": 1.3895408267571656, "grad_norm": 0.13272012770175934, "learning_rate": 2.579457315127164e-06, "loss": 0.0012, "step": 169920 }, { "epoch": 1.3896226029357648, "grad_norm": 0.026658104732632637, "learning_rate": 2.5788329062950478e-06, "loss": 0.0009, "step": 169930 }, { "epoch": 1.389704379114364, "grad_norm": 0.0363055057823658, "learning_rate": 2.5782085467840734e-06, "loss": 0.0012, "step": 169940 }, { "epoch": 1.3897861552929631, "grad_norm": 0.03633525222539902, "learning_rate": 2.577584236606958e-06, "loss": 0.0008, "step": 169950 }, { "epoch": 1.3898679314715623, "grad_norm": 0.040780093520879745, "learning_rate": 2.576959975776422e-06, "loss": 0.0009, "step": 169960 }, { "epoch": 1.3899497076501615, "grad_norm": 0.002740077208727598, "learning_rate": 2.5763357643051812e-06, "loss": 0.0003, "step": 169970 }, { "epoch": 1.3900314838287606, "grad_norm": 0.12044597417116165, "learning_rate": 2.5757116022059503e-06, "loss": 0.0017, "step": 169980 }, { "epoch": 1.3901132600073598, "grad_norm": 0.010556467808783054, "learning_rate": 2.5750874894914455e-06, "loss": 0.0008, "step": 169990 }, { "epoch": 1.390195036185959, "grad_norm": 0.002257623942568898, "learning_rate": 2.5744634261743797e-06, "loss": 0.0006, "step": 170000 }, { "epoch": 1.3902768123645581, "grad_norm": 0.03707021474838257, "learning_rate": 2.5738394122674647e-06, "loss": 0.0007, "step": 170010 }, { "epoch": 1.3903585885431573, "grad_norm": 0.027233555912971497, "learning_rate": 2.5732154477834127e-06, "loss": 0.0012, "step": 170020 }, { "epoch": 1.3904403647217565, "grad_norm": 0.01669301465153694, "learning_rate": 2.572591532734934e-06, "loss": 0.0009, "step": 170030 }, { "epoch": 1.3905221409003556, "grad_norm": 0.04359278455376625, "learning_rate": 2.5719676671347395e-06, "loss": 0.0011, "step": 170040 }, { "epoch": 1.390603917078955, "grad_norm": 0.013440067879855633, "learning_rate": 2.571343850995538e-06, "loss": 0.001, "step": 170050 }, { "epoch": 1.390685693257554, "grad_norm": 0.034373898059129715, "learning_rate": 2.570720084330036e-06, "loss": 0.0007, "step": 170060 }, { "epoch": 1.3907674694361534, "grad_norm": 0.01445690356194973, "learning_rate": 2.570096367150938e-06, "loss": 0.0005, "step": 170070 }, { "epoch": 1.3908492456147523, "grad_norm": 0.0743071511387825, "learning_rate": 2.5694726994709553e-06, "loss": 0.0007, "step": 170080 }, { "epoch": 1.3909310217933517, "grad_norm": 0.0324191078543663, "learning_rate": 2.568849081302789e-06, "loss": 0.0012, "step": 170090 }, { "epoch": 1.3910127979719507, "grad_norm": 0.14899954199790955, "learning_rate": 2.568225512659143e-06, "loss": 0.0017, "step": 170100 }, { "epoch": 1.39109457415055, "grad_norm": 0.062070731073617935, "learning_rate": 2.5676019935527196e-06, "loss": 0.0015, "step": 170110 }, { "epoch": 1.391176350329149, "grad_norm": 0.0774560272693634, "learning_rate": 2.5669785239962207e-06, "loss": 0.001, "step": 170120 }, { "epoch": 1.3912581265077484, "grad_norm": 0.059935688972473145, "learning_rate": 2.566355104002347e-06, "loss": 0.0009, "step": 170130 }, { "epoch": 1.3913399026863473, "grad_norm": 0.041179075837135315, "learning_rate": 2.5657317335837985e-06, "loss": 0.0011, "step": 170140 }, { "epoch": 1.3914216788649467, "grad_norm": 0.0106895100325346, "learning_rate": 2.5651084127532734e-06, "loss": 0.0007, "step": 170150 }, { "epoch": 1.391503455043546, "grad_norm": 0.031729139387607574, "learning_rate": 2.5644851415234663e-06, "loss": 0.0007, "step": 170160 }, { "epoch": 1.391585231222145, "grad_norm": 0.03216487914323807, "learning_rate": 2.5638619199070793e-06, "loss": 0.0004, "step": 170170 }, { "epoch": 1.3916670074007442, "grad_norm": 0.10015790909528732, "learning_rate": 2.5632387479168042e-06, "loss": 0.0008, "step": 170180 }, { "epoch": 1.3917487835793434, "grad_norm": 0.013994148001074791, "learning_rate": 2.5626156255653365e-06, "loss": 0.0004, "step": 170190 }, { "epoch": 1.3918305597579426, "grad_norm": 0.016724275425076485, "learning_rate": 2.561992552865369e-06, "loss": 0.001, "step": 170200 }, { "epoch": 1.3919123359365417, "grad_norm": 0.07822653651237488, "learning_rate": 2.561369529829596e-06, "loss": 0.0008, "step": 170210 }, { "epoch": 1.391994112115141, "grad_norm": 0.027599003165960312, "learning_rate": 2.560746556470708e-06, "loss": 0.0005, "step": 170220 }, { "epoch": 1.39207588829374, "grad_norm": 0.032728422433137894, "learning_rate": 2.5601236328013957e-06, "loss": 0.0007, "step": 170230 }, { "epoch": 1.3921576644723392, "grad_norm": 0.03418101742863655, "learning_rate": 2.559500758834348e-06, "loss": 0.0006, "step": 170240 }, { "epoch": 1.3922394406509384, "grad_norm": 0.022582340985536575, "learning_rate": 2.5588779345822534e-06, "loss": 0.0006, "step": 170250 }, { "epoch": 1.3923212168295376, "grad_norm": 0.03461640328168869, "learning_rate": 2.5582551600578e-06, "loss": 0.0013, "step": 170260 }, { "epoch": 1.3924029930081367, "grad_norm": 0.003884807927533984, "learning_rate": 2.5576324352736737e-06, "loss": 0.0005, "step": 170270 }, { "epoch": 1.392484769186736, "grad_norm": 0.010639824904501438, "learning_rate": 2.5570097602425577e-06, "loss": 0.0005, "step": 170280 }, { "epoch": 1.392566545365335, "grad_norm": 0.02876795455813408, "learning_rate": 2.556387134977141e-06, "loss": 0.0005, "step": 170290 }, { "epoch": 1.3926483215439343, "grad_norm": 0.02154502272605896, "learning_rate": 2.555764559490104e-06, "loss": 0.0009, "step": 170300 }, { "epoch": 1.3927300977225334, "grad_norm": 0.028116244822740555, "learning_rate": 2.5551420337941296e-06, "loss": 0.0011, "step": 170310 }, { "epoch": 1.3928118739011326, "grad_norm": 0.0051157367415726185, "learning_rate": 2.5545195579018978e-06, "loss": 0.0006, "step": 170320 }, { "epoch": 1.3928936500797318, "grad_norm": 0.0009346058359369636, "learning_rate": 2.553897131826092e-06, "loss": 0.0013, "step": 170330 }, { "epoch": 1.392975426258331, "grad_norm": 0.048871733248233795, "learning_rate": 2.5532747555793902e-06, "loss": 0.0005, "step": 170340 }, { "epoch": 1.39305720243693, "grad_norm": 0.0105540556833148, "learning_rate": 2.5526524291744704e-06, "loss": 0.001, "step": 170350 }, { "epoch": 1.3931389786155293, "grad_norm": 0.023635009303689003, "learning_rate": 2.55203015262401e-06, "loss": 0.0006, "step": 170360 }, { "epoch": 1.3932207547941284, "grad_norm": 0.005160682834684849, "learning_rate": 2.551407925940685e-06, "loss": 0.0009, "step": 170370 }, { "epoch": 1.3933025309727276, "grad_norm": 0.040299247950315475, "learning_rate": 2.550785749137171e-06, "loss": 0.0011, "step": 170380 }, { "epoch": 1.3933843071513268, "grad_norm": 0.049091894179582596, "learning_rate": 2.550163622226142e-06, "loss": 0.001, "step": 170390 }, { "epoch": 1.393466083329926, "grad_norm": 0.03913465514779091, "learning_rate": 2.5495415452202694e-06, "loss": 0.0009, "step": 170400 }, { "epoch": 1.393547859508525, "grad_norm": 0.03575320541858673, "learning_rate": 2.548919518132229e-06, "loss": 0.0008, "step": 170410 }, { "epoch": 1.3936296356871243, "grad_norm": 0.027662722393870354, "learning_rate": 2.548297540974691e-06, "loss": 0.0005, "step": 170420 }, { "epoch": 1.3937114118657234, "grad_norm": 0.030054287984967232, "learning_rate": 2.547675613760325e-06, "loss": 0.0008, "step": 170430 }, { "epoch": 1.3937931880443226, "grad_norm": 0.05430605635046959, "learning_rate": 2.5470537365017976e-06, "loss": 0.0009, "step": 170440 }, { "epoch": 1.3938749642229218, "grad_norm": 0.05420469865202904, "learning_rate": 2.546431909211782e-06, "loss": 0.0007, "step": 170450 }, { "epoch": 1.393956740401521, "grad_norm": 0.11839805543422699, "learning_rate": 2.5458101319029416e-06, "loss": 0.0015, "step": 170460 }, { "epoch": 1.3940385165801201, "grad_norm": 0.005769736133515835, "learning_rate": 2.545188404587945e-06, "loss": 0.0007, "step": 170470 }, { "epoch": 1.3941202927587195, "grad_norm": 0.03450373560190201, "learning_rate": 2.544566727279455e-06, "loss": 0.0009, "step": 170480 }, { "epoch": 1.3942020689373185, "grad_norm": 0.0038030417636036873, "learning_rate": 2.543945099990137e-06, "loss": 0.0007, "step": 170490 }, { "epoch": 1.3942838451159179, "grad_norm": 0.022099651396274567, "learning_rate": 2.5433235227326537e-06, "loss": 0.0016, "step": 170500 }, { "epoch": 1.3943656212945168, "grad_norm": 0.0708225667476654, "learning_rate": 2.5427019955196674e-06, "loss": 0.0019, "step": 170510 }, { "epoch": 1.3944473974731162, "grad_norm": 0.06005106493830681, "learning_rate": 2.5420805183638363e-06, "loss": 0.0009, "step": 170520 }, { "epoch": 1.3945291736517151, "grad_norm": 0.04966645687818527, "learning_rate": 2.541459091277826e-06, "loss": 0.0006, "step": 170530 }, { "epoch": 1.3946109498303145, "grad_norm": 0.06582725048065186, "learning_rate": 2.5408377142742914e-06, "loss": 0.0006, "step": 170540 }, { "epoch": 1.3946927260089135, "grad_norm": 0.026994653046131134, "learning_rate": 2.5402163873658912e-06, "loss": 0.0007, "step": 170550 }, { "epoch": 1.3947745021875129, "grad_norm": 0.05993238091468811, "learning_rate": 2.5395951105652815e-06, "loss": 0.0013, "step": 170560 }, { "epoch": 1.394856278366112, "grad_norm": 0.043905921280384064, "learning_rate": 2.5389738838851206e-06, "loss": 0.0004, "step": 170570 }, { "epoch": 1.3949380545447112, "grad_norm": 0.11548218876123428, "learning_rate": 2.5383527073380633e-06, "loss": 0.0011, "step": 170580 }, { "epoch": 1.3950198307233104, "grad_norm": 0.08568145334720612, "learning_rate": 2.537731580936761e-06, "loss": 0.0013, "step": 170590 }, { "epoch": 1.3951016069019095, "grad_norm": 0.022003449499607086, "learning_rate": 2.537110504693868e-06, "loss": 0.0009, "step": 170600 }, { "epoch": 1.3951833830805087, "grad_norm": 0.010274735279381275, "learning_rate": 2.5364894786220364e-06, "loss": 0.0005, "step": 170610 }, { "epoch": 1.3952651592591079, "grad_norm": 0.007390561047941446, "learning_rate": 2.5358685027339164e-06, "loss": 0.0006, "step": 170620 }, { "epoch": 1.395346935437707, "grad_norm": 0.036453768610954285, "learning_rate": 2.535247577042158e-06, "loss": 0.0002, "step": 170630 }, { "epoch": 1.3954287116163062, "grad_norm": 0.011359071359038353, "learning_rate": 2.5346267015594077e-06, "loss": 0.0004, "step": 170640 }, { "epoch": 1.3955104877949054, "grad_norm": 0.07562439888715744, "learning_rate": 2.5340058762983177e-06, "loss": 0.0015, "step": 170650 }, { "epoch": 1.3955922639735046, "grad_norm": 0.0012498752912506461, "learning_rate": 2.5333851012715327e-06, "loss": 0.0005, "step": 170660 }, { "epoch": 1.3956740401521037, "grad_norm": 0.013446136377751827, "learning_rate": 2.5327643764916976e-06, "loss": 0.0011, "step": 170670 }, { "epoch": 1.395755816330703, "grad_norm": 0.0106374965980649, "learning_rate": 2.5321437019714556e-06, "loss": 0.001, "step": 170680 }, { "epoch": 1.395837592509302, "grad_norm": 0.019538812339305878, "learning_rate": 2.5315230777234546e-06, "loss": 0.0009, "step": 170690 }, { "epoch": 1.3959193686879012, "grad_norm": 0.04437660053372383, "learning_rate": 2.5309025037603354e-06, "loss": 0.0007, "step": 170700 }, { "epoch": 1.3960011448665004, "grad_norm": 0.05539178475737572, "learning_rate": 2.5302819800947385e-06, "loss": 0.0009, "step": 170710 }, { "epoch": 1.3960829210450996, "grad_norm": 0.1483447253704071, "learning_rate": 2.5296615067393056e-06, "loss": 0.0005, "step": 170720 }, { "epoch": 1.3961646972236987, "grad_norm": 0.03880321606993675, "learning_rate": 2.5290410837066753e-06, "loss": 0.0013, "step": 170730 }, { "epoch": 1.396246473402298, "grad_norm": 0.03692355751991272, "learning_rate": 2.5284207110094873e-06, "loss": 0.0024, "step": 170740 }, { "epoch": 1.396328249580897, "grad_norm": 0.053731970489025116, "learning_rate": 2.5278003886603773e-06, "loss": 0.0007, "step": 170750 }, { "epoch": 1.3964100257594962, "grad_norm": 0.01987350359559059, "learning_rate": 2.527180116671982e-06, "loss": 0.0005, "step": 170760 }, { "epoch": 1.3964918019380954, "grad_norm": 0.03451592102646828, "learning_rate": 2.5265598950569392e-06, "loss": 0.0017, "step": 170770 }, { "epoch": 1.3965735781166946, "grad_norm": 0.002126144478097558, "learning_rate": 2.525939723827883e-06, "loss": 0.0016, "step": 170780 }, { "epoch": 1.3966553542952937, "grad_norm": 0.03906013071537018, "learning_rate": 2.5253196029974443e-06, "loss": 0.0008, "step": 170790 }, { "epoch": 1.396737130473893, "grad_norm": 0.03000384010374546, "learning_rate": 2.524699532578255e-06, "loss": 0.0005, "step": 170800 }, { "epoch": 1.396818906652492, "grad_norm": 0.05741901323199272, "learning_rate": 2.5240795125829504e-06, "loss": 0.0012, "step": 170810 }, { "epoch": 1.3969006828310913, "grad_norm": 0.1806543618440628, "learning_rate": 2.5234595430241586e-06, "loss": 0.0016, "step": 170820 }, { "epoch": 1.3969824590096904, "grad_norm": 0.03342859074473381, "learning_rate": 2.5228396239145094e-06, "loss": 0.0006, "step": 170830 }, { "epoch": 1.3970642351882896, "grad_norm": 0.005862031131982803, "learning_rate": 2.52221975526663e-06, "loss": 0.0006, "step": 170840 }, { "epoch": 1.3971460113668888, "grad_norm": 0.04200815409421921, "learning_rate": 2.521599937093149e-06, "loss": 0.0012, "step": 170850 }, { "epoch": 1.397227787545488, "grad_norm": 0.03177257627248764, "learning_rate": 2.5209801694066906e-06, "loss": 0.0006, "step": 170860 }, { "epoch": 1.397309563724087, "grad_norm": 0.023878078907728195, "learning_rate": 2.5203604522198823e-06, "loss": 0.0011, "step": 170870 }, { "epoch": 1.3973913399026863, "grad_norm": 0.016135117039084435, "learning_rate": 2.519740785545344e-06, "loss": 0.0008, "step": 170880 }, { "epoch": 1.3974731160812857, "grad_norm": 0.04026375710964203, "learning_rate": 2.519121169395705e-06, "loss": 0.0006, "step": 170890 }, { "epoch": 1.3975548922598846, "grad_norm": 0.03985292464494705, "learning_rate": 2.518501603783584e-06, "loss": 0.0014, "step": 170900 }, { "epoch": 1.397636668438484, "grad_norm": 0.001087264739908278, "learning_rate": 2.5178820887216025e-06, "loss": 0.0009, "step": 170910 }, { "epoch": 1.397718444617083, "grad_norm": 0.004363447427749634, "learning_rate": 2.5172626242223787e-06, "loss": 0.0012, "step": 170920 }, { "epoch": 1.3978002207956823, "grad_norm": 0.06320507079362869, "learning_rate": 2.516643210298535e-06, "loss": 0.0012, "step": 170930 }, { "epoch": 1.3978819969742813, "grad_norm": 0.030867066234350204, "learning_rate": 2.5160238469626873e-06, "loss": 0.0007, "step": 170940 }, { "epoch": 1.3979637731528807, "grad_norm": 0.01844383217394352, "learning_rate": 2.515404534227453e-06, "loss": 0.0011, "step": 170950 }, { "epoch": 1.3980455493314796, "grad_norm": 0.1367611289024353, "learning_rate": 2.5147852721054484e-06, "loss": 0.0029, "step": 170960 }, { "epoch": 1.398127325510079, "grad_norm": 0.038696687668561935, "learning_rate": 2.514166060609288e-06, "loss": 0.0006, "step": 170970 }, { "epoch": 1.398209101688678, "grad_norm": 0.03344763442873955, "learning_rate": 2.5135468997515843e-06, "loss": 0.0008, "step": 170980 }, { "epoch": 1.3982908778672773, "grad_norm": 0.025060517713427544, "learning_rate": 2.5129277895449522e-06, "loss": 0.0005, "step": 170990 }, { "epoch": 1.3983726540458765, "grad_norm": 0.10784570872783661, "learning_rate": 2.512308730002001e-06, "loss": 0.0006, "step": 171000 }, { "epoch": 1.3984544302244757, "grad_norm": 0.007891705259680748, "learning_rate": 2.5116897211353448e-06, "loss": 0.001, "step": 171010 }, { "epoch": 1.3985362064030749, "grad_norm": 0.04686693102121353, "learning_rate": 2.5110707629575915e-06, "loss": 0.0009, "step": 171020 }, { "epoch": 1.398617982581674, "grad_norm": 0.06781071424484253, "learning_rate": 2.5104518554813496e-06, "loss": 0.001, "step": 171030 }, { "epoch": 1.3986997587602732, "grad_norm": 0.01880974881350994, "learning_rate": 2.5098329987192256e-06, "loss": 0.0015, "step": 171040 }, { "epoch": 1.3987815349388724, "grad_norm": 0.01298182550817728, "learning_rate": 2.5092141926838295e-06, "loss": 0.0012, "step": 171050 }, { "epoch": 1.3988633111174715, "grad_norm": 0.03516183793544769, "learning_rate": 2.508595437387764e-06, "loss": 0.0005, "step": 171060 }, { "epoch": 1.3989450872960707, "grad_norm": 0.03442687168717384, "learning_rate": 2.5079767328436353e-06, "loss": 0.0005, "step": 171070 }, { "epoch": 1.3990268634746699, "grad_norm": 0.024826280772686005, "learning_rate": 2.5073580790640463e-06, "loss": 0.0006, "step": 171080 }, { "epoch": 1.399108639653269, "grad_norm": 0.012475035153329372, "learning_rate": 2.506739476061599e-06, "loss": 0.0006, "step": 171090 }, { "epoch": 1.3991904158318682, "grad_norm": 0.01871308498084545, "learning_rate": 2.506120923848895e-06, "loss": 0.0003, "step": 171100 }, { "epoch": 1.3992721920104674, "grad_norm": 0.03657611459493637, "learning_rate": 2.505502422438535e-06, "loss": 0.0011, "step": 171110 }, { "epoch": 1.3993539681890665, "grad_norm": 0.09912052750587463, "learning_rate": 2.5048839718431166e-06, "loss": 0.0006, "step": 171120 }, { "epoch": 1.3994357443676657, "grad_norm": 0.04011593386530876, "learning_rate": 2.504265572075242e-06, "loss": 0.0014, "step": 171130 }, { "epoch": 1.3995175205462649, "grad_norm": 0.0020067687146365643, "learning_rate": 2.503647223147506e-06, "loss": 0.0005, "step": 171140 }, { "epoch": 1.399599296724864, "grad_norm": 0.026980500668287277, "learning_rate": 2.503028925072505e-06, "loss": 0.001, "step": 171150 }, { "epoch": 1.3996810729034632, "grad_norm": 0.02813536301255226, "learning_rate": 2.502410677862833e-06, "loss": 0.0015, "step": 171160 }, { "epoch": 1.3997628490820624, "grad_norm": 0.008980894461274147, "learning_rate": 2.5017924815310868e-06, "loss": 0.001, "step": 171170 }, { "epoch": 1.3998446252606616, "grad_norm": 0.007160540204495192, "learning_rate": 2.5011743360898596e-06, "loss": 0.0008, "step": 171180 }, { "epoch": 1.3999264014392607, "grad_norm": 0.060002733021974564, "learning_rate": 2.5005562415517414e-06, "loss": 0.0007, "step": 171190 }, { "epoch": 1.40000817761786, "grad_norm": 0.02522212453186512, "learning_rate": 2.4999381979293243e-06, "loss": 0.001, "step": 171200 }, { "epoch": 1.400089953796459, "grad_norm": 0.08728710561990738, "learning_rate": 2.4993202052351985e-06, "loss": 0.0014, "step": 171210 }, { "epoch": 1.4001717299750582, "grad_norm": 0.08421334624290466, "learning_rate": 2.4987022634819526e-06, "loss": 0.0008, "step": 171220 }, { "epoch": 1.4002535061536574, "grad_norm": 0.02934619039297104, "learning_rate": 2.498084372682175e-06, "loss": 0.0008, "step": 171230 }, { "epoch": 1.4003352823322566, "grad_norm": 0.035324618220329285, "learning_rate": 2.49746653284845e-06, "loss": 0.0007, "step": 171240 }, { "epoch": 1.4004170585108557, "grad_norm": 0.0017315112054347992, "learning_rate": 2.496848743993368e-06, "loss": 0.002, "step": 171250 }, { "epoch": 1.400498834689455, "grad_norm": 0.024823995307087898, "learning_rate": 2.496231006129512e-06, "loss": 0.0014, "step": 171260 }, { "epoch": 1.400580610868054, "grad_norm": 0.0710967481136322, "learning_rate": 2.4956133192694653e-06, "loss": 0.0009, "step": 171270 }, { "epoch": 1.4006623870466532, "grad_norm": 0.03393752872943878, "learning_rate": 2.4949956834258093e-06, "loss": 0.0008, "step": 171280 }, { "epoch": 1.4007441632252524, "grad_norm": 0.03511596471071243, "learning_rate": 2.494378098611129e-06, "loss": 0.0003, "step": 171290 }, { "epoch": 1.4008259394038516, "grad_norm": 0.06846553087234497, "learning_rate": 2.493760564838004e-06, "loss": 0.0008, "step": 171300 }, { "epoch": 1.4009077155824508, "grad_norm": 0.035230569541454315, "learning_rate": 2.493143082119013e-06, "loss": 0.0006, "step": 171310 }, { "epoch": 1.4009894917610501, "grad_norm": 0.02318717911839485, "learning_rate": 2.492525650466735e-06, "loss": 0.0017, "step": 171320 }, { "epoch": 1.401071267939649, "grad_norm": 0.011282633990049362, "learning_rate": 2.491908269893748e-06, "loss": 0.0004, "step": 171330 }, { "epoch": 1.4011530441182485, "grad_norm": 0.020677238702774048, "learning_rate": 2.491290940412628e-06, "loss": 0.0008, "step": 171340 }, { "epoch": 1.4012348202968474, "grad_norm": 0.04369896650314331, "learning_rate": 2.4906736620359507e-06, "loss": 0.0006, "step": 171350 }, { "epoch": 1.4013165964754468, "grad_norm": 0.02263268642127514, "learning_rate": 2.490056434776289e-06, "loss": 0.0004, "step": 171360 }, { "epoch": 1.4013983726540458, "grad_norm": 0.08471202850341797, "learning_rate": 2.48943925864622e-06, "loss": 0.0004, "step": 171370 }, { "epoch": 1.4014801488326452, "grad_norm": 0.03725215792655945, "learning_rate": 2.4888221336583135e-06, "loss": 0.0008, "step": 171380 }, { "epoch": 1.401561925011244, "grad_norm": 0.0422818660736084, "learning_rate": 2.488205059825141e-06, "loss": 0.0014, "step": 171390 }, { "epoch": 1.4016437011898435, "grad_norm": 0.036938078701496124, "learning_rate": 2.487588037159272e-06, "loss": 0.001, "step": 171400 }, { "epoch": 1.4017254773684424, "grad_norm": 0.13476651906967163, "learning_rate": 2.486971065673278e-06, "loss": 0.0007, "step": 171410 }, { "epoch": 1.4018072535470418, "grad_norm": 0.055812347680330276, "learning_rate": 2.486354145379727e-06, "loss": 0.0008, "step": 171420 }, { "epoch": 1.401889029725641, "grad_norm": 0.034876152873039246, "learning_rate": 2.485737276291185e-06, "loss": 0.0008, "step": 171430 }, { "epoch": 1.4019708059042402, "grad_norm": 0.03055652603507042, "learning_rate": 2.4851204584202177e-06, "loss": 0.0014, "step": 171440 }, { "epoch": 1.4020525820828393, "grad_norm": 0.07279264181852341, "learning_rate": 2.4845036917793915e-06, "loss": 0.0006, "step": 171450 }, { "epoch": 1.4021343582614385, "grad_norm": 0.039834748953580856, "learning_rate": 2.4838869763812697e-06, "loss": 0.0007, "step": 171460 }, { "epoch": 1.4022161344400377, "grad_norm": 0.023292748257517815, "learning_rate": 2.483270312238415e-06, "loss": 0.0012, "step": 171470 }, { "epoch": 1.4022979106186368, "grad_norm": 0.028778525069355965, "learning_rate": 2.482653699363388e-06, "loss": 0.0006, "step": 171480 }, { "epoch": 1.402379686797236, "grad_norm": 0.022331347689032555, "learning_rate": 2.4820371377687536e-06, "loss": 0.0025, "step": 171490 }, { "epoch": 1.4024614629758352, "grad_norm": 0.042714837938547134, "learning_rate": 2.4814206274670693e-06, "loss": 0.0008, "step": 171500 }, { "epoch": 1.4025432391544344, "grad_norm": 0.007823679596185684, "learning_rate": 2.480804168470894e-06, "loss": 0.0007, "step": 171510 }, { "epoch": 1.4026250153330335, "grad_norm": 0.03611085191369057, "learning_rate": 2.480187760792783e-06, "loss": 0.001, "step": 171520 }, { "epoch": 1.4027067915116327, "grad_norm": 0.01182086393237114, "learning_rate": 2.4795714044452983e-06, "loss": 0.0004, "step": 171530 }, { "epoch": 1.4027885676902319, "grad_norm": 0.028281960636377335, "learning_rate": 2.4789550994409922e-06, "loss": 0.0006, "step": 171540 }, { "epoch": 1.402870343868831, "grad_norm": 0.0043920180760324, "learning_rate": 2.4783388457924208e-06, "loss": 0.0014, "step": 171550 }, { "epoch": 1.4029521200474302, "grad_norm": 0.02460610866546631, "learning_rate": 2.477722643512136e-06, "loss": 0.0005, "step": 171560 }, { "epoch": 1.4030338962260294, "grad_norm": 0.04037301242351532, "learning_rate": 2.4771064926126913e-06, "loss": 0.0014, "step": 171570 }, { "epoch": 1.4031156724046285, "grad_norm": 0.03946169093251228, "learning_rate": 2.476490393106638e-06, "loss": 0.0005, "step": 171580 }, { "epoch": 1.4031974485832277, "grad_norm": 0.050484564155340195, "learning_rate": 2.475874345006527e-06, "loss": 0.0009, "step": 171590 }, { "epoch": 1.4032792247618269, "grad_norm": 0.03565344959497452, "learning_rate": 2.4752583483249065e-06, "loss": 0.0009, "step": 171600 }, { "epoch": 1.403361000940426, "grad_norm": 0.050217531621456146, "learning_rate": 2.4746424030743244e-06, "loss": 0.001, "step": 171610 }, { "epoch": 1.4034427771190252, "grad_norm": 0.016586128622293472, "learning_rate": 2.474026509267331e-06, "loss": 0.0007, "step": 171620 }, { "epoch": 1.4035245532976244, "grad_norm": 0.054249223321676254, "learning_rate": 2.473410666916471e-06, "loss": 0.0019, "step": 171630 }, { "epoch": 1.4036063294762235, "grad_norm": 0.002619457896798849, "learning_rate": 2.4727948760342895e-06, "loss": 0.0009, "step": 171640 }, { "epoch": 1.4036881056548227, "grad_norm": 0.051376745104789734, "learning_rate": 2.472179136633328e-06, "loss": 0.0006, "step": 171650 }, { "epoch": 1.4037698818334219, "grad_norm": 0.005221865139901638, "learning_rate": 2.471563448726135e-06, "loss": 0.0004, "step": 171660 }, { "epoch": 1.403851658012021, "grad_norm": 0.13155212998390198, "learning_rate": 2.4709478123252494e-06, "loss": 0.001, "step": 171670 }, { "epoch": 1.4039334341906202, "grad_norm": 0.04563797265291214, "learning_rate": 2.4703322274432125e-06, "loss": 0.0012, "step": 171680 }, { "epoch": 1.4040152103692194, "grad_norm": 0.07598964869976044, "learning_rate": 2.4697166940925642e-06, "loss": 0.001, "step": 171690 }, { "epoch": 1.4040969865478186, "grad_norm": 0.021721310913562775, "learning_rate": 2.469101212285844e-06, "loss": 0.0008, "step": 171700 }, { "epoch": 1.4041787627264177, "grad_norm": 0.008781726472079754, "learning_rate": 2.468485782035589e-06, "loss": 0.0005, "step": 171710 }, { "epoch": 1.404260538905017, "grad_norm": 0.0368831567466259, "learning_rate": 2.467870403354336e-06, "loss": 0.0012, "step": 171720 }, { "epoch": 1.404342315083616, "grad_norm": 0.021721888333559036, "learning_rate": 2.46725507625462e-06, "loss": 0.0007, "step": 171730 }, { "epoch": 1.4044240912622152, "grad_norm": 0.04922179877758026, "learning_rate": 2.4666398007489785e-06, "loss": 0.0006, "step": 171740 }, { "epoch": 1.4045058674408146, "grad_norm": 0.015164118260145187, "learning_rate": 2.466024576849943e-06, "loss": 0.0005, "step": 171750 }, { "epoch": 1.4045876436194136, "grad_norm": 0.028083352372050285, "learning_rate": 2.4654094045700468e-06, "loss": 0.0007, "step": 171760 }, { "epoch": 1.404669419798013, "grad_norm": 0.003053059335798025, "learning_rate": 2.46479428392182e-06, "loss": 0.0006, "step": 171770 }, { "epoch": 1.404751195976612, "grad_norm": 0.0020929554011672735, "learning_rate": 2.464179214917795e-06, "loss": 0.0004, "step": 171780 }, { "epoch": 1.4048329721552113, "grad_norm": 0.10100527852773666, "learning_rate": 2.4635641975705017e-06, "loss": 0.0019, "step": 171790 }, { "epoch": 1.4049147483338102, "grad_norm": 0.005754108540713787, "learning_rate": 2.462949231892467e-06, "loss": 0.0006, "step": 171800 }, { "epoch": 1.4049965245124096, "grad_norm": 0.07111438363790512, "learning_rate": 2.4623343178962184e-06, "loss": 0.0007, "step": 171810 }, { "epoch": 1.4050783006910086, "grad_norm": 0.028059333562850952, "learning_rate": 2.461719455594283e-06, "loss": 0.0005, "step": 171820 }, { "epoch": 1.405160076869608, "grad_norm": 0.023545142263174057, "learning_rate": 2.461104644999185e-06, "loss": 0.0008, "step": 171830 }, { "epoch": 1.405241853048207, "grad_norm": 0.019447658210992813, "learning_rate": 2.4604898861234495e-06, "loss": 0.0007, "step": 171840 }, { "epoch": 1.4053236292268063, "grad_norm": 0.06271956861019135, "learning_rate": 2.4598751789795973e-06, "loss": 0.0012, "step": 171850 }, { "epoch": 1.4054054054054055, "grad_norm": 0.06890065222978592, "learning_rate": 2.4592605235801544e-06, "loss": 0.0009, "step": 171860 }, { "epoch": 1.4054871815840047, "grad_norm": 0.01663045957684517, "learning_rate": 2.4586459199376395e-06, "loss": 0.0006, "step": 171870 }, { "epoch": 1.4055689577626038, "grad_norm": 0.03726379945874214, "learning_rate": 2.458031368064573e-06, "loss": 0.0019, "step": 171880 }, { "epoch": 1.405650733941203, "grad_norm": 0.012954125180840492, "learning_rate": 2.4574168679734715e-06, "loss": 0.0006, "step": 171890 }, { "epoch": 1.4057325101198022, "grad_norm": 0.005799912381917238, "learning_rate": 2.456802419676858e-06, "loss": 0.0012, "step": 171900 }, { "epoch": 1.4058142862984013, "grad_norm": 0.10166779160499573, "learning_rate": 2.456188023187246e-06, "loss": 0.0004, "step": 171910 }, { "epoch": 1.4058960624770005, "grad_norm": 0.04426100105047226, "learning_rate": 2.455573678517151e-06, "loss": 0.0007, "step": 171920 }, { "epoch": 1.4059778386555997, "grad_norm": 0.010778079740703106, "learning_rate": 2.4549593856790893e-06, "loss": 0.0011, "step": 171930 }, { "epoch": 1.4060596148341988, "grad_norm": 0.03655586391687393, "learning_rate": 2.454345144685573e-06, "loss": 0.0003, "step": 171940 }, { "epoch": 1.406141391012798, "grad_norm": 0.051968131214380264, "learning_rate": 2.453730955549116e-06, "loss": 0.001, "step": 171950 }, { "epoch": 1.4062231671913972, "grad_norm": 0.0513589009642601, "learning_rate": 2.4531168182822285e-06, "loss": 0.0012, "step": 171960 }, { "epoch": 1.4063049433699963, "grad_norm": 0.015374336391687393, "learning_rate": 2.45250273289742e-06, "loss": 0.0014, "step": 171970 }, { "epoch": 1.4063867195485955, "grad_norm": 0.01950581558048725, "learning_rate": 2.4518886994072034e-06, "loss": 0.0012, "step": 171980 }, { "epoch": 1.4064684957271947, "grad_norm": 0.035640817135572433, "learning_rate": 2.4512747178240854e-06, "loss": 0.001, "step": 171990 }, { "epoch": 1.4065502719057938, "grad_norm": 0.03374118730425835, "learning_rate": 2.4506607881605726e-06, "loss": 0.0003, "step": 172000 }, { "epoch": 1.406632048084393, "grad_norm": 0.036846134811639786, "learning_rate": 2.4500469104291704e-06, "loss": 0.0007, "step": 172010 }, { "epoch": 1.4067138242629922, "grad_norm": 0.009195180609822273, "learning_rate": 2.4494330846423868e-06, "loss": 0.0007, "step": 172020 }, { "epoch": 1.4067956004415914, "grad_norm": 0.08787943422794342, "learning_rate": 2.4488193108127244e-06, "loss": 0.0007, "step": 172030 }, { "epoch": 1.4068773766201905, "grad_norm": 0.00864889845252037, "learning_rate": 2.448205588952687e-06, "loss": 0.0008, "step": 172040 }, { "epoch": 1.4069591527987897, "grad_norm": 0.06202404573559761, "learning_rate": 2.4475919190747753e-06, "loss": 0.0009, "step": 172050 }, { "epoch": 1.4070409289773889, "grad_norm": 0.07245306670665741, "learning_rate": 2.4469783011914917e-06, "loss": 0.001, "step": 172060 }, { "epoch": 1.407122705155988, "grad_norm": 0.0022287522442638874, "learning_rate": 2.4463647353153348e-06, "loss": 0.001, "step": 172070 }, { "epoch": 1.4072044813345872, "grad_norm": 0.03527611494064331, "learning_rate": 2.445751221458804e-06, "loss": 0.0005, "step": 172080 }, { "epoch": 1.4072862575131864, "grad_norm": 0.026799572631716728, "learning_rate": 2.4451377596343946e-06, "loss": 0.0011, "step": 172090 }, { "epoch": 1.4073680336917855, "grad_norm": 0.08406742662191391, "learning_rate": 2.4445243498546077e-06, "loss": 0.0007, "step": 172100 }, { "epoch": 1.4074498098703847, "grad_norm": 0.17785710096359253, "learning_rate": 2.4439109921319377e-06, "loss": 0.001, "step": 172110 }, { "epoch": 1.4075315860489839, "grad_norm": 0.013577771373093128, "learning_rate": 2.4432976864788784e-06, "loss": 0.0005, "step": 172120 }, { "epoch": 1.407613362227583, "grad_norm": 0.034828510135412216, "learning_rate": 2.4426844329079213e-06, "loss": 0.001, "step": 172130 }, { "epoch": 1.4076951384061822, "grad_norm": 0.004214616026729345, "learning_rate": 2.4420712314315627e-06, "loss": 0.0008, "step": 172140 }, { "epoch": 1.4077769145847814, "grad_norm": 0.04163610562682152, "learning_rate": 2.4414580820622925e-06, "loss": 0.0006, "step": 172150 }, { "epoch": 1.4078586907633805, "grad_norm": 0.019196921959519386, "learning_rate": 2.4408449848126008e-06, "loss": 0.0007, "step": 172160 }, { "epoch": 1.4079404669419797, "grad_norm": 0.0037321492563933134, "learning_rate": 2.440231939694977e-06, "loss": 0.0006, "step": 172170 }, { "epoch": 1.408022243120579, "grad_norm": 0.03777125105261803, "learning_rate": 2.439618946721909e-06, "loss": 0.0009, "step": 172180 }, { "epoch": 1.408104019299178, "grad_norm": 0.041256729513406754, "learning_rate": 2.4390060059058847e-06, "loss": 0.0007, "step": 172190 }, { "epoch": 1.4081857954777774, "grad_norm": 0.04059237241744995, "learning_rate": 2.4383931172593896e-06, "loss": 0.0008, "step": 172200 }, { "epoch": 1.4082675716563764, "grad_norm": 0.07559031993150711, "learning_rate": 2.4377802807949068e-06, "loss": 0.0008, "step": 172210 }, { "epoch": 1.4083493478349758, "grad_norm": 0.001507600536569953, "learning_rate": 2.437167496524924e-06, "loss": 0.0007, "step": 172220 }, { "epoch": 1.4084311240135747, "grad_norm": 0.06869281083345413, "learning_rate": 2.436554764461923e-06, "loss": 0.001, "step": 172230 }, { "epoch": 1.4085129001921741, "grad_norm": 0.005860339850187302, "learning_rate": 2.435942084618385e-06, "loss": 0.0004, "step": 172240 }, { "epoch": 1.408594676370773, "grad_norm": 0.07606241852045059, "learning_rate": 2.435329457006789e-06, "loss": 0.001, "step": 172250 }, { "epoch": 1.4086764525493725, "grad_norm": 0.03762504458427429, "learning_rate": 2.434716881639618e-06, "loss": 0.0009, "step": 172260 }, { "epoch": 1.4087582287279714, "grad_norm": 0.04314012452960014, "learning_rate": 2.4341043585293507e-06, "loss": 0.0006, "step": 172270 }, { "epoch": 1.4088400049065708, "grad_norm": 0.022714029997587204, "learning_rate": 2.4334918876884624e-06, "loss": 0.0013, "step": 172280 }, { "epoch": 1.40892178108517, "grad_norm": 0.05110928416252136, "learning_rate": 2.43287946912943e-06, "loss": 0.001, "step": 172290 }, { "epoch": 1.4090035572637691, "grad_norm": 0.05622675269842148, "learning_rate": 2.43226710286473e-06, "loss": 0.0008, "step": 172300 }, { "epoch": 1.4090853334423683, "grad_norm": 0.028322529047727585, "learning_rate": 2.431654788906836e-06, "loss": 0.0008, "step": 172310 }, { "epoch": 1.4091671096209675, "grad_norm": 0.0023638561833649874, "learning_rate": 2.431042527268222e-06, "loss": 0.0008, "step": 172320 }, { "epoch": 1.4092488857995666, "grad_norm": 0.13070198893547058, "learning_rate": 2.430430317961358e-06, "loss": 0.0006, "step": 172330 }, { "epoch": 1.4093306619781658, "grad_norm": 0.004695461597293615, "learning_rate": 2.4298181609987186e-06, "loss": 0.0011, "step": 172340 }, { "epoch": 1.409412438156765, "grad_norm": 0.04821166768670082, "learning_rate": 2.429206056392773e-06, "loss": 0.0004, "step": 172350 }, { "epoch": 1.4094942143353641, "grad_norm": 0.05115129426121712, "learning_rate": 2.4285940041559885e-06, "loss": 0.0006, "step": 172360 }, { "epoch": 1.4095759905139633, "grad_norm": 0.005258290097117424, "learning_rate": 2.4279820043008333e-06, "loss": 0.0014, "step": 172370 }, { "epoch": 1.4096577666925625, "grad_norm": 0.047962453216314316, "learning_rate": 2.4273700568397773e-06, "loss": 0.0006, "step": 172380 }, { "epoch": 1.4097395428711617, "grad_norm": 0.03736269474029541, "learning_rate": 2.426758161785284e-06, "loss": 0.0011, "step": 172390 }, { "epoch": 1.4098213190497608, "grad_norm": 0.02600039355456829, "learning_rate": 2.4261463191498185e-06, "loss": 0.0018, "step": 172400 }, { "epoch": 1.40990309522836, "grad_norm": 0.04568582773208618, "learning_rate": 2.4255345289458446e-06, "loss": 0.0008, "step": 172410 }, { "epoch": 1.4099848714069592, "grad_norm": 0.03078564628958702, "learning_rate": 2.424922791185826e-06, "loss": 0.0011, "step": 172420 }, { "epoch": 1.4100666475855583, "grad_norm": 0.0334961861371994, "learning_rate": 2.4243111058822223e-06, "loss": 0.0004, "step": 172430 }, { "epoch": 1.4101484237641575, "grad_norm": 0.022091679275035858, "learning_rate": 2.423699473047495e-06, "loss": 0.0013, "step": 172440 }, { "epoch": 1.4102301999427567, "grad_norm": 0.0022972291335463524, "learning_rate": 2.423087892694102e-06, "loss": 0.0002, "step": 172450 }, { "epoch": 1.4103119761213558, "grad_norm": 0.050654299557209015, "learning_rate": 2.4224763648345052e-06, "loss": 0.0011, "step": 172460 }, { "epoch": 1.410393752299955, "grad_norm": 0.01885085366666317, "learning_rate": 2.4218648894811592e-06, "loss": 0.0007, "step": 172470 }, { "epoch": 1.4104755284785542, "grad_norm": 0.027538632974028587, "learning_rate": 2.4212534666465216e-06, "loss": 0.0007, "step": 172480 }, { "epoch": 1.4105573046571533, "grad_norm": 0.017872100695967674, "learning_rate": 2.4206420963430454e-06, "loss": 0.0006, "step": 172490 }, { "epoch": 1.4106390808357525, "grad_norm": 0.07513105124235153, "learning_rate": 2.420030778583188e-06, "loss": 0.0003, "step": 172500 }, { "epoch": 1.4107208570143517, "grad_norm": 0.02603522315621376, "learning_rate": 2.4194195133794e-06, "loss": 0.0004, "step": 172510 }, { "epoch": 1.4108026331929509, "grad_norm": 0.0002491999475751072, "learning_rate": 2.418808300744135e-06, "loss": 0.0011, "step": 172520 }, { "epoch": 1.41088440937155, "grad_norm": 0.024839432910084724, "learning_rate": 2.418197140689843e-06, "loss": 0.0008, "step": 172530 }, { "epoch": 1.4109661855501492, "grad_norm": 0.006652708165347576, "learning_rate": 2.4175860332289738e-06, "loss": 0.0003, "step": 172540 }, { "epoch": 1.4110479617287484, "grad_norm": 0.26124241948127747, "learning_rate": 2.4169749783739756e-06, "loss": 0.0029, "step": 172550 }, { "epoch": 1.4111297379073475, "grad_norm": 0.0025037198793143034, "learning_rate": 2.416363976137297e-06, "loss": 0.0007, "step": 172560 }, { "epoch": 1.4112115140859467, "grad_norm": 0.005986401811242104, "learning_rate": 2.415753026531382e-06, "loss": 0.0008, "step": 172570 }, { "epoch": 1.4112932902645459, "grad_norm": 0.12380372732877731, "learning_rate": 2.415142129568681e-06, "loss": 0.0016, "step": 172580 }, { "epoch": 1.411375066443145, "grad_norm": 0.04422373324632645, "learning_rate": 2.414531285261636e-06, "loss": 0.0014, "step": 172590 }, { "epoch": 1.4114568426217442, "grad_norm": 0.05242180451750755, "learning_rate": 2.4139204936226895e-06, "loss": 0.0008, "step": 172600 }, { "epoch": 1.4115386188003436, "grad_norm": 0.035748712718486786, "learning_rate": 2.4133097546642837e-06, "loss": 0.0009, "step": 172610 }, { "epoch": 1.4116203949789425, "grad_norm": 0.001338534988462925, "learning_rate": 2.4126990683988617e-06, "loss": 0.0007, "step": 172620 }, { "epoch": 1.411702171157542, "grad_norm": 0.06305183470249176, "learning_rate": 2.4120884348388637e-06, "loss": 0.0007, "step": 172630 }, { "epoch": 1.4117839473361409, "grad_norm": 0.01760285347700119, "learning_rate": 2.411477853996727e-06, "loss": 0.0016, "step": 172640 }, { "epoch": 1.4118657235147403, "grad_norm": 0.024080701172351837, "learning_rate": 2.4108673258848907e-06, "loss": 0.0006, "step": 172650 }, { "epoch": 1.4119474996933392, "grad_norm": 0.009015137329697609, "learning_rate": 2.410256850515792e-06, "loss": 0.0006, "step": 172660 }, { "epoch": 1.4120292758719386, "grad_norm": 0.006676601245999336, "learning_rate": 2.4096464279018654e-06, "loss": 0.0007, "step": 172670 }, { "epoch": 1.4121110520505376, "grad_norm": 0.06642947345972061, "learning_rate": 2.409036058055547e-06, "loss": 0.0006, "step": 172680 }, { "epoch": 1.412192828229137, "grad_norm": 0.08420063555240631, "learning_rate": 2.4084257409892687e-06, "loss": 0.0008, "step": 172690 }, { "epoch": 1.412274604407736, "grad_norm": 0.04592492803931236, "learning_rate": 2.407815476715466e-06, "loss": 0.0008, "step": 172700 }, { "epoch": 1.4123563805863353, "grad_norm": 0.01086699403822422, "learning_rate": 2.4072052652465695e-06, "loss": 0.0008, "step": 172710 }, { "epoch": 1.4124381567649344, "grad_norm": 0.10771556198596954, "learning_rate": 2.4065951065950087e-06, "loss": 0.0009, "step": 172720 }, { "epoch": 1.4125199329435336, "grad_norm": 0.014016630128026009, "learning_rate": 2.405985000773212e-06, "loss": 0.0006, "step": 172730 }, { "epoch": 1.4126017091221328, "grad_norm": 0.07329162955284119, "learning_rate": 2.4053749477936116e-06, "loss": 0.0006, "step": 172740 }, { "epoch": 1.412683485300732, "grad_norm": 0.054639365524053574, "learning_rate": 2.404764947668632e-06, "loss": 0.0008, "step": 172750 }, { "epoch": 1.4127652614793311, "grad_norm": 0.04021741822361946, "learning_rate": 2.4041550004107e-06, "loss": 0.0003, "step": 172760 }, { "epoch": 1.4128470376579303, "grad_norm": 0.013925338163971901, "learning_rate": 2.4035451060322413e-06, "loss": 0.0005, "step": 172770 }, { "epoch": 1.4129288138365295, "grad_norm": 0.11204516887664795, "learning_rate": 2.402935264545679e-06, "loss": 0.001, "step": 172780 }, { "epoch": 1.4130105900151286, "grad_norm": 0.16721759736537933, "learning_rate": 2.4023254759634356e-06, "loss": 0.0041, "step": 172790 }, { "epoch": 1.4130923661937278, "grad_norm": 0.004087876528501511, "learning_rate": 2.4017157402979347e-06, "loss": 0.0006, "step": 172800 }, { "epoch": 1.413174142372327, "grad_norm": 0.05880354344844818, "learning_rate": 2.4011060575615935e-06, "loss": 0.001, "step": 172810 }, { "epoch": 1.4132559185509261, "grad_norm": 0.02357306517660618, "learning_rate": 2.4004964277668367e-06, "loss": 0.0016, "step": 172820 }, { "epoch": 1.4133376947295253, "grad_norm": 0.07846984267234802, "learning_rate": 2.399886850926081e-06, "loss": 0.0017, "step": 172830 }, { "epoch": 1.4134194709081245, "grad_norm": 0.05944264680147171, "learning_rate": 2.3992773270517428e-06, "loss": 0.0007, "step": 172840 }, { "epoch": 1.4135012470867236, "grad_norm": 0.08589984476566315, "learning_rate": 2.398667856156238e-06, "loss": 0.0012, "step": 172850 }, { "epoch": 1.4135830232653228, "grad_norm": 0.009010257199406624, "learning_rate": 2.398058438251985e-06, "loss": 0.0007, "step": 172860 }, { "epoch": 1.413664799443922, "grad_norm": 0.027483170852065086, "learning_rate": 2.3974490733513973e-06, "loss": 0.0014, "step": 172870 }, { "epoch": 1.4137465756225212, "grad_norm": 0.0416051521897316, "learning_rate": 2.396839761466887e-06, "loss": 0.0008, "step": 172880 }, { "epoch": 1.4138283518011203, "grad_norm": 0.008538836613297462, "learning_rate": 2.396230502610867e-06, "loss": 0.0006, "step": 172890 }, { "epoch": 1.4139101279797195, "grad_norm": 0.028602922335267067, "learning_rate": 2.395621296795748e-06, "loss": 0.0009, "step": 172900 }, { "epoch": 1.4139919041583187, "grad_norm": 0.013728509657084942, "learning_rate": 2.3950121440339393e-06, "loss": 0.0005, "step": 172910 }, { "epoch": 1.4140736803369178, "grad_norm": 0.05163797363638878, "learning_rate": 2.3944030443378513e-06, "loss": 0.0015, "step": 172920 }, { "epoch": 1.414155456515517, "grad_norm": 0.030789149925112724, "learning_rate": 2.3937939977198895e-06, "loss": 0.0003, "step": 172930 }, { "epoch": 1.4142372326941162, "grad_norm": 0.00993337295949459, "learning_rate": 2.393185004192464e-06, "loss": 0.0007, "step": 172940 }, { "epoch": 1.4143190088727153, "grad_norm": 0.01669169031083584, "learning_rate": 2.3925760637679792e-06, "loss": 0.0005, "step": 172950 }, { "epoch": 1.4144007850513145, "grad_norm": 0.021399179473519325, "learning_rate": 2.391967176458839e-06, "loss": 0.0005, "step": 172960 }, { "epoch": 1.4144825612299137, "grad_norm": 0.007276637014001608, "learning_rate": 2.3913583422774477e-06, "loss": 0.0006, "step": 172970 }, { "epoch": 1.4145643374085128, "grad_norm": 0.041097670793533325, "learning_rate": 2.3907495612362052e-06, "loss": 0.0006, "step": 172980 }, { "epoch": 1.414646113587112, "grad_norm": 0.023419717326760292, "learning_rate": 2.3901408333475173e-06, "loss": 0.0005, "step": 172990 }, { "epoch": 1.4147278897657112, "grad_norm": 0.000578518258407712, "learning_rate": 2.389532158623782e-06, "loss": 0.0007, "step": 173000 }, { "epoch": 1.4148096659443103, "grad_norm": 0.011969017796218395, "learning_rate": 2.3889235370773983e-06, "loss": 0.0007, "step": 173010 }, { "epoch": 1.4148914421229095, "grad_norm": 0.02607318013906479, "learning_rate": 2.3883149687207646e-06, "loss": 0.0007, "step": 173020 }, { "epoch": 1.4149732183015087, "grad_norm": 0.12007526308298111, "learning_rate": 2.3877064535662776e-06, "loss": 0.0007, "step": 173030 }, { "epoch": 1.415054994480108, "grad_norm": 0.02565067820250988, "learning_rate": 2.387097991626334e-06, "loss": 0.0005, "step": 173040 }, { "epoch": 1.415136770658707, "grad_norm": 0.05440770089626312, "learning_rate": 2.3864895829133277e-06, "loss": 0.0015, "step": 173050 }, { "epoch": 1.4152185468373064, "grad_norm": 0.00955151580274105, "learning_rate": 2.3858812274396513e-06, "loss": 0.0007, "step": 173060 }, { "epoch": 1.4153003230159054, "grad_norm": 0.01868641935288906, "learning_rate": 2.3852729252177016e-06, "loss": 0.0024, "step": 173070 }, { "epoch": 1.4153820991945048, "grad_norm": 0.04842476174235344, "learning_rate": 2.3846646762598667e-06, "loss": 0.0012, "step": 173080 }, { "epoch": 1.4154638753731037, "grad_norm": 0.013873042538762093, "learning_rate": 2.3840564805785386e-06, "loss": 0.0009, "step": 173090 }, { "epoch": 1.415545651551703, "grad_norm": 0.0031080401968210936, "learning_rate": 2.383448338186105e-06, "loss": 0.0004, "step": 173100 }, { "epoch": 1.415627427730302, "grad_norm": 0.01460013072937727, "learning_rate": 2.382840249094957e-06, "loss": 0.0006, "step": 173110 }, { "epoch": 1.4157092039089014, "grad_norm": 0.0026774751022458076, "learning_rate": 2.3822322133174808e-06, "loss": 0.0005, "step": 173120 }, { "epoch": 1.4157909800875004, "grad_norm": 0.026179077103734016, "learning_rate": 2.381624230866062e-06, "loss": 0.0009, "step": 173130 }, { "epoch": 1.4158727562660998, "grad_norm": 0.06751076132059097, "learning_rate": 2.381016301753086e-06, "loss": 0.0009, "step": 173140 }, { "epoch": 1.415954532444699, "grad_norm": 0.07372019439935684, "learning_rate": 2.3804084259909367e-06, "loss": 0.0006, "step": 173150 }, { "epoch": 1.416036308623298, "grad_norm": 0.009487764909863472, "learning_rate": 2.3798006035919967e-06, "loss": 0.0006, "step": 173160 }, { "epoch": 1.4161180848018973, "grad_norm": 0.06948099285364151, "learning_rate": 2.379192834568649e-06, "loss": 0.0005, "step": 173170 }, { "epoch": 1.4161998609804964, "grad_norm": 0.001217906130477786, "learning_rate": 2.378585118933271e-06, "loss": 0.001, "step": 173180 }, { "epoch": 1.4162816371590956, "grad_norm": 0.012133250944316387, "learning_rate": 2.377977456698247e-06, "loss": 0.0008, "step": 173190 }, { "epoch": 1.4163634133376948, "grad_norm": 0.06392958015203476, "learning_rate": 2.377369847875953e-06, "loss": 0.0009, "step": 173200 }, { "epoch": 1.416445189516294, "grad_norm": 0.07715100795030594, "learning_rate": 2.3767622924787683e-06, "loss": 0.001, "step": 173210 }, { "epoch": 1.4165269656948931, "grad_norm": 0.08118265867233276, "learning_rate": 2.3761547905190647e-06, "loss": 0.0009, "step": 173220 }, { "epoch": 1.4166087418734923, "grad_norm": 0.037620168179273605, "learning_rate": 2.3755473420092233e-06, "loss": 0.0007, "step": 173230 }, { "epoch": 1.4166905180520915, "grad_norm": 0.0616605281829834, "learning_rate": 2.374939946961616e-06, "loss": 0.0004, "step": 173240 }, { "epoch": 1.4167722942306906, "grad_norm": 0.04265661537647247, "learning_rate": 2.3743326053886156e-06, "loss": 0.0011, "step": 173250 }, { "epoch": 1.4168540704092898, "grad_norm": 0.06543727219104767, "learning_rate": 2.373725317302594e-06, "loss": 0.0011, "step": 173260 }, { "epoch": 1.416935846587889, "grad_norm": 0.02606896497309208, "learning_rate": 2.3731180827159228e-06, "loss": 0.0025, "step": 173270 }, { "epoch": 1.4170176227664881, "grad_norm": 0.038990896195173264, "learning_rate": 2.3725109016409712e-06, "loss": 0.0006, "step": 173280 }, { "epoch": 1.4170993989450873, "grad_norm": 0.044473107904195786, "learning_rate": 2.3719037740901087e-06, "loss": 0.0006, "step": 173290 }, { "epoch": 1.4171811751236865, "grad_norm": 0.00898129865527153, "learning_rate": 2.3712967000756997e-06, "loss": 0.0017, "step": 173300 }, { "epoch": 1.4172629513022856, "grad_norm": 0.025671185925602913, "learning_rate": 2.3706896796101165e-06, "loss": 0.0007, "step": 173310 }, { "epoch": 1.4173447274808848, "grad_norm": 0.003057942260056734, "learning_rate": 2.370082712705721e-06, "loss": 0.0005, "step": 173320 }, { "epoch": 1.417426503659484, "grad_norm": 0.00315915048122406, "learning_rate": 2.369475799374879e-06, "loss": 0.0012, "step": 173330 }, { "epoch": 1.4175082798380831, "grad_norm": 0.024933841079473495, "learning_rate": 2.3688689396299515e-06, "loss": 0.0015, "step": 173340 }, { "epoch": 1.4175900560166823, "grad_norm": 0.091666579246521, "learning_rate": 2.3682621334833035e-06, "loss": 0.001, "step": 173350 }, { "epoch": 1.4176718321952815, "grad_norm": 0.007984346710145473, "learning_rate": 2.367655380947295e-06, "loss": 0.0005, "step": 173360 }, { "epoch": 1.4177536083738806, "grad_norm": 0.009270250797271729, "learning_rate": 2.3670486820342868e-06, "loss": 0.0004, "step": 173370 }, { "epoch": 1.4178353845524798, "grad_norm": 0.01612183451652527, "learning_rate": 2.3664420367566367e-06, "loss": 0.0006, "step": 173380 }, { "epoch": 1.417917160731079, "grad_norm": 0.015144037082791328, "learning_rate": 2.365835445126703e-06, "loss": 0.0008, "step": 173390 }, { "epoch": 1.4179989369096782, "grad_norm": 0.02556651085615158, "learning_rate": 2.3652289071568423e-06, "loss": 0.0004, "step": 173400 }, { "epoch": 1.4180807130882773, "grad_norm": 0.008121980354189873, "learning_rate": 2.3646224228594106e-06, "loss": 0.0008, "step": 173410 }, { "epoch": 1.4181624892668765, "grad_norm": 0.003714177990332246, "learning_rate": 2.3640159922467603e-06, "loss": 0.0009, "step": 173420 }, { "epoch": 1.4182442654454757, "grad_norm": 0.0015716298948973417, "learning_rate": 2.3634096153312487e-06, "loss": 0.0014, "step": 173430 }, { "epoch": 1.4183260416240748, "grad_norm": 0.05886304751038551, "learning_rate": 2.3628032921252263e-06, "loss": 0.0008, "step": 173440 }, { "epoch": 1.418407817802674, "grad_norm": 0.02164587937295437, "learning_rate": 2.362197022641045e-06, "loss": 0.0011, "step": 173450 }, { "epoch": 1.4184895939812732, "grad_norm": 0.1723622828722, "learning_rate": 2.361590806891052e-06, "loss": 0.0006, "step": 173460 }, { "epoch": 1.4185713701598726, "grad_norm": 0.03193105012178421, "learning_rate": 2.3609846448876013e-06, "loss": 0.0005, "step": 173470 }, { "epoch": 1.4186531463384715, "grad_norm": 0.06392481923103333, "learning_rate": 2.360378536643038e-06, "loss": 0.0004, "step": 173480 }, { "epoch": 1.418734922517071, "grad_norm": 0.09056124836206436, "learning_rate": 2.35977248216971e-06, "loss": 0.0007, "step": 173490 }, { "epoch": 1.4188166986956698, "grad_norm": 0.023629607632756233, "learning_rate": 2.359166481479962e-06, "loss": 0.0007, "step": 173500 }, { "epoch": 1.4188984748742692, "grad_norm": 0.022403845563530922, "learning_rate": 2.3585605345861395e-06, "loss": 0.0011, "step": 173510 }, { "epoch": 1.4189802510528682, "grad_norm": 0.019881777465343475, "learning_rate": 2.357954641500586e-06, "loss": 0.0007, "step": 173520 }, { "epoch": 1.4190620272314676, "grad_norm": 0.0036904332228004932, "learning_rate": 2.3573488022356446e-06, "loss": 0.0006, "step": 173530 }, { "epoch": 1.4191438034100665, "grad_norm": 0.009670404717326164, "learning_rate": 2.356743016803654e-06, "loss": 0.0005, "step": 173540 }, { "epoch": 1.419225579588666, "grad_norm": 0.036399610340595245, "learning_rate": 2.356137285216958e-06, "loss": 0.0019, "step": 173550 }, { "epoch": 1.419307355767265, "grad_norm": 0.028601668775081635, "learning_rate": 2.355531607487895e-06, "loss": 0.0006, "step": 173560 }, { "epoch": 1.4193891319458642, "grad_norm": 0.04562666267156601, "learning_rate": 2.3549259836288025e-06, "loss": 0.0004, "step": 173570 }, { "epoch": 1.4194709081244634, "grad_norm": 0.0024870422203093767, "learning_rate": 2.3543204136520158e-06, "loss": 0.0006, "step": 173580 }, { "epoch": 1.4195526843030626, "grad_norm": 0.02186894416809082, "learning_rate": 2.3537148975698746e-06, "loss": 0.001, "step": 173590 }, { "epoch": 1.4196344604816618, "grad_norm": 0.01453747320920229, "learning_rate": 2.3531094353947127e-06, "loss": 0.0003, "step": 173600 }, { "epoch": 1.419716236660261, "grad_norm": 0.027873605489730835, "learning_rate": 2.352504027138862e-06, "loss": 0.0005, "step": 173610 }, { "epoch": 1.41979801283886, "grad_norm": 0.002993113361299038, "learning_rate": 2.351898672814657e-06, "loss": 0.0007, "step": 173620 }, { "epoch": 1.4198797890174593, "grad_norm": 0.03441904857754707, "learning_rate": 2.3512933724344283e-06, "loss": 0.0004, "step": 173630 }, { "epoch": 1.4199615651960584, "grad_norm": 0.005750157404690981, "learning_rate": 2.3506881260105067e-06, "loss": 0.0005, "step": 173640 }, { "epoch": 1.4200433413746576, "grad_norm": 0.027063315734267235, "learning_rate": 2.3500829335552216e-06, "loss": 0.0008, "step": 173650 }, { "epoch": 1.4201251175532568, "grad_norm": 0.00438678590580821, "learning_rate": 2.349477795080899e-06, "loss": 0.0008, "step": 173660 }, { "epoch": 1.420206893731856, "grad_norm": 0.06653036177158356, "learning_rate": 2.34887271059987e-06, "loss": 0.0014, "step": 173670 }, { "epoch": 1.420288669910455, "grad_norm": 0.0020224794279783964, "learning_rate": 2.3482676801244585e-06, "loss": 0.0007, "step": 173680 }, { "epoch": 1.4203704460890543, "grad_norm": 0.05841166526079178, "learning_rate": 2.34766270366699e-06, "loss": 0.0007, "step": 173690 }, { "epoch": 1.4204522222676534, "grad_norm": 0.03555435314774513, "learning_rate": 2.347057781239787e-06, "loss": 0.0004, "step": 173700 }, { "epoch": 1.4205339984462526, "grad_norm": 0.07931827008724213, "learning_rate": 2.3464529128551744e-06, "loss": 0.0008, "step": 173710 }, { "epoch": 1.4206157746248518, "grad_norm": 0.0597221702337265, "learning_rate": 2.3458480985254732e-06, "loss": 0.0013, "step": 173720 }, { "epoch": 1.420697550803451, "grad_norm": 0.0850694477558136, "learning_rate": 2.345243338263004e-06, "loss": 0.0006, "step": 173730 }, { "epoch": 1.4207793269820501, "grad_norm": 0.08025845140218735, "learning_rate": 2.3446386320800856e-06, "loss": 0.0006, "step": 173740 }, { "epoch": 1.4208611031606493, "grad_norm": 0.06592871248722076, "learning_rate": 2.344033979989037e-06, "loss": 0.001, "step": 173750 }, { "epoch": 1.4209428793392485, "grad_norm": 0.03844500705599785, "learning_rate": 2.3434293820021743e-06, "loss": 0.001, "step": 173760 }, { "epoch": 1.4210246555178476, "grad_norm": 0.0052621387876570225, "learning_rate": 2.3428248381318148e-06, "loss": 0.0005, "step": 173770 }, { "epoch": 1.4211064316964468, "grad_norm": 0.030564090237021446, "learning_rate": 2.342220348390271e-06, "loss": 0.0007, "step": 173780 }, { "epoch": 1.421188207875046, "grad_norm": 0.023744771257042885, "learning_rate": 2.341615912789862e-06, "loss": 0.0007, "step": 173790 }, { "epoch": 1.4212699840536451, "grad_norm": 0.007069590501487255, "learning_rate": 2.3410115313428964e-06, "loss": 0.0004, "step": 173800 }, { "epoch": 1.4213517602322443, "grad_norm": 0.3177192807197571, "learning_rate": 2.340407204061688e-06, "loss": 0.0009, "step": 173810 }, { "epoch": 1.4214335364108435, "grad_norm": 0.036144670099020004, "learning_rate": 2.3398029309585447e-06, "loss": 0.0005, "step": 173820 }, { "epoch": 1.4215153125894426, "grad_norm": 0.06475530564785004, "learning_rate": 2.33919871204578e-06, "loss": 0.0006, "step": 173830 }, { "epoch": 1.4215970887680418, "grad_norm": 0.06904949992895126, "learning_rate": 2.3385945473357e-06, "loss": 0.0004, "step": 173840 }, { "epoch": 1.421678864946641, "grad_norm": 0.004559728782624006, "learning_rate": 2.3379904368406124e-06, "loss": 0.0007, "step": 173850 }, { "epoch": 1.4217606411252401, "grad_norm": 0.05712204426527023, "learning_rate": 2.3373863805728242e-06, "loss": 0.0008, "step": 173860 }, { "epoch": 1.4218424173038393, "grad_norm": 0.037301525473594666, "learning_rate": 2.336782378544639e-06, "loss": 0.0012, "step": 173870 }, { "epoch": 1.4219241934824387, "grad_norm": 0.030911969020962715, "learning_rate": 2.3361784307683614e-06, "loss": 0.0007, "step": 173880 }, { "epoch": 1.4220059696610376, "grad_norm": 0.056122951209545135, "learning_rate": 2.335574537256295e-06, "loss": 0.0008, "step": 173890 }, { "epoch": 1.422087745839637, "grad_norm": 0.0351475365459919, "learning_rate": 2.3349706980207382e-06, "loss": 0.0006, "step": 173900 }, { "epoch": 1.422169522018236, "grad_norm": 0.005089126527309418, "learning_rate": 2.334366913073997e-06, "loss": 0.0008, "step": 173910 }, { "epoch": 1.4222512981968354, "grad_norm": 0.04449397325515747, "learning_rate": 2.3337631824283683e-06, "loss": 0.0007, "step": 173920 }, { "epoch": 1.4223330743754343, "grad_norm": 0.037118904292583466, "learning_rate": 2.3331595060961516e-06, "loss": 0.0007, "step": 173930 }, { "epoch": 1.4224148505540337, "grad_norm": 0.031673673540353775, "learning_rate": 2.3325558840896406e-06, "loss": 0.001, "step": 173940 }, { "epoch": 1.4224966267326327, "grad_norm": 0.011300632730126381, "learning_rate": 2.3319523164211373e-06, "loss": 0.0006, "step": 173950 }, { "epoch": 1.422578402911232, "grad_norm": 0.16675227880477905, "learning_rate": 2.331348803102933e-06, "loss": 0.0008, "step": 173960 }, { "epoch": 1.422660179089831, "grad_norm": 0.010336405597627163, "learning_rate": 2.3307453441473233e-06, "loss": 0.0005, "step": 173970 }, { "epoch": 1.4227419552684304, "grad_norm": 0.005978373810648918, "learning_rate": 2.3301419395666004e-06, "loss": 0.0008, "step": 173980 }, { "epoch": 1.4228237314470296, "grad_norm": 0.056596480309963226, "learning_rate": 2.3295385893730566e-06, "loss": 0.0007, "step": 173990 }, { "epoch": 1.4229055076256287, "grad_norm": 0.04442818835377693, "learning_rate": 2.328935293578982e-06, "loss": 0.0005, "step": 174000 }, { "epoch": 1.422987283804228, "grad_norm": 0.07154848426580429, "learning_rate": 2.3283320521966675e-06, "loss": 0.0009, "step": 174010 }, { "epoch": 1.423069059982827, "grad_norm": 0.0014684954658150673, "learning_rate": 2.327728865238398e-06, "loss": 0.0007, "step": 174020 }, { "epoch": 1.4231508361614262, "grad_norm": 0.02994620054960251, "learning_rate": 2.327125732716466e-06, "loss": 0.0011, "step": 174030 }, { "epoch": 1.4232326123400254, "grad_norm": 0.03405230864882469, "learning_rate": 2.3265226546431554e-06, "loss": 0.0005, "step": 174040 }, { "epoch": 1.4233143885186246, "grad_norm": 0.0078055281192064285, "learning_rate": 2.325919631030751e-06, "loss": 0.0004, "step": 174050 }, { "epoch": 1.4233961646972237, "grad_norm": 0.03687021881341934, "learning_rate": 2.325316661891536e-06, "loss": 0.001, "step": 174060 }, { "epoch": 1.423477940875823, "grad_norm": 0.00842955894768238, "learning_rate": 2.3247137472377956e-06, "loss": 0.0003, "step": 174070 }, { "epoch": 1.423559717054422, "grad_norm": 0.008924522437155247, "learning_rate": 2.324110887081812e-06, "loss": 0.0003, "step": 174080 }, { "epoch": 1.4236414932330212, "grad_norm": 0.022815179079771042, "learning_rate": 2.323508081435864e-06, "loss": 0.0009, "step": 174090 }, { "epoch": 1.4237232694116204, "grad_norm": 0.10715170949697495, "learning_rate": 2.322905330312232e-06, "loss": 0.0015, "step": 174100 }, { "epoch": 1.4238050455902196, "grad_norm": 0.004038266371935606, "learning_rate": 2.3223026337231945e-06, "loss": 0.0038, "step": 174110 }, { "epoch": 1.4238868217688188, "grad_norm": 0.027140561491250992, "learning_rate": 2.321699991681029e-06, "loss": 0.0007, "step": 174120 }, { "epoch": 1.423968597947418, "grad_norm": 0.01885317638516426, "learning_rate": 2.3210974041980117e-06, "loss": 0.0011, "step": 174130 }, { "epoch": 1.424050374126017, "grad_norm": 0.022945277392864227, "learning_rate": 2.3204948712864155e-06, "loss": 0.0004, "step": 174140 }, { "epoch": 1.4241321503046163, "grad_norm": 0.0010147017892450094, "learning_rate": 2.3198923929585194e-06, "loss": 0.0009, "step": 174150 }, { "epoch": 1.4242139264832154, "grad_norm": 0.0007171044126152992, "learning_rate": 2.3192899692265935e-06, "loss": 0.0006, "step": 174160 }, { "epoch": 1.4242957026618146, "grad_norm": 0.0024848002940416336, "learning_rate": 2.3186876001029097e-06, "loss": 0.0007, "step": 174170 }, { "epoch": 1.4243774788404138, "grad_norm": 0.007079349365085363, "learning_rate": 2.318085285599737e-06, "loss": 0.0005, "step": 174180 }, { "epoch": 1.424459255019013, "grad_norm": 0.0061391741037368774, "learning_rate": 2.317483025729349e-06, "loss": 0.0008, "step": 174190 }, { "epoch": 1.424541031197612, "grad_norm": 0.02330700494349003, "learning_rate": 2.3168808205040126e-06, "loss": 0.0007, "step": 174200 }, { "epoch": 1.4246228073762113, "grad_norm": 0.02163872681558132, "learning_rate": 2.3162786699359945e-06, "loss": 0.0014, "step": 174210 }, { "epoch": 1.4247045835548104, "grad_norm": 0.03992907330393791, "learning_rate": 2.3156765740375613e-06, "loss": 0.001, "step": 174220 }, { "epoch": 1.4247863597334096, "grad_norm": 0.02411758340895176, "learning_rate": 2.3150745328209782e-06, "loss": 0.0007, "step": 174230 }, { "epoch": 1.4248681359120088, "grad_norm": 0.0740966871380806, "learning_rate": 2.3144725462985094e-06, "loss": 0.0007, "step": 174240 }, { "epoch": 1.424949912090608, "grad_norm": 0.03362877294421196, "learning_rate": 2.313870614482418e-06, "loss": 0.0005, "step": 174250 }, { "epoch": 1.4250316882692071, "grad_norm": 0.014631923288106918, "learning_rate": 2.3132687373849633e-06, "loss": 0.0005, "step": 174260 }, { "epoch": 1.4251134644478063, "grad_norm": 0.03054649569094181, "learning_rate": 2.31266691501841e-06, "loss": 0.0006, "step": 174270 }, { "epoch": 1.4251952406264055, "grad_norm": 0.15364572405815125, "learning_rate": 2.312065147395016e-06, "loss": 0.0011, "step": 174280 }, { "epoch": 1.4252770168050046, "grad_norm": 0.011980689130723476, "learning_rate": 2.3114634345270404e-06, "loss": 0.001, "step": 174290 }, { "epoch": 1.4253587929836038, "grad_norm": 0.011370858177542686, "learning_rate": 2.310861776426738e-06, "loss": 0.0006, "step": 174300 }, { "epoch": 1.4254405691622032, "grad_norm": 0.06281467527151108, "learning_rate": 2.310260173106369e-06, "loss": 0.0006, "step": 174310 }, { "epoch": 1.4255223453408021, "grad_norm": 0.010141273960471153, "learning_rate": 2.309658624578186e-06, "loss": 0.0007, "step": 174320 }, { "epoch": 1.4256041215194015, "grad_norm": 0.03898659348487854, "learning_rate": 2.3090571308544442e-06, "loss": 0.0005, "step": 174330 }, { "epoch": 1.4256858976980005, "grad_norm": 0.0034051332622766495, "learning_rate": 2.3084556919473957e-06, "loss": 0.0005, "step": 174340 }, { "epoch": 1.4257676738765999, "grad_norm": 0.04986438527703285, "learning_rate": 2.307854307869293e-06, "loss": 0.001, "step": 174350 }, { "epoch": 1.4258494500551988, "grad_norm": 0.028128856793045998, "learning_rate": 2.3072529786323857e-06, "loss": 0.0008, "step": 174360 }, { "epoch": 1.4259312262337982, "grad_norm": 0.06587781757116318, "learning_rate": 2.306651704248924e-06, "loss": 0.0005, "step": 174370 }, { "epoch": 1.4260130024123971, "grad_norm": 0.01192626915872097, "learning_rate": 2.306050484731155e-06, "loss": 0.0006, "step": 174380 }, { "epoch": 1.4260947785909965, "grad_norm": 0.014890738762915134, "learning_rate": 2.3054493200913285e-06, "loss": 0.0008, "step": 174390 }, { "epoch": 1.4261765547695955, "grad_norm": 0.01704481802880764, "learning_rate": 2.3048482103416902e-06, "loss": 0.0004, "step": 174400 }, { "epoch": 1.4262583309481949, "grad_norm": 0.05956721305847168, "learning_rate": 2.3042471554944847e-06, "loss": 0.001, "step": 174410 }, { "epoch": 1.426340107126794, "grad_norm": 0.06481577455997467, "learning_rate": 2.3036461555619556e-06, "loss": 0.0013, "step": 174420 }, { "epoch": 1.4264218833053932, "grad_norm": 0.04454248398542404, "learning_rate": 2.303045210556344e-06, "loss": 0.001, "step": 174430 }, { "epoch": 1.4265036594839924, "grad_norm": 0.002785470336675644, "learning_rate": 2.3024443204898955e-06, "loss": 0.0014, "step": 174440 }, { "epoch": 1.4265854356625915, "grad_norm": 0.02695218101143837, "learning_rate": 2.3018434853748496e-06, "loss": 0.0007, "step": 174450 }, { "epoch": 1.4266672118411907, "grad_norm": 0.040504660457372665, "learning_rate": 2.3012427052234442e-06, "loss": 0.0005, "step": 174460 }, { "epoch": 1.4267489880197899, "grad_norm": 0.032962117344141006, "learning_rate": 2.300641980047919e-06, "loss": 0.0006, "step": 174470 }, { "epoch": 1.426830764198389, "grad_norm": 0.005241753533482552, "learning_rate": 2.3000413098605108e-06, "loss": 0.0004, "step": 174480 }, { "epoch": 1.4269125403769882, "grad_norm": 0.009279610589146614, "learning_rate": 2.2994406946734555e-06, "loss": 0.0007, "step": 174490 }, { "epoch": 1.4269943165555874, "grad_norm": 0.0036973205860704184, "learning_rate": 2.2988401344989884e-06, "loss": 0.0008, "step": 174500 }, { "epoch": 1.4270760927341866, "grad_norm": 0.01505902037024498, "learning_rate": 2.298239629349342e-06, "loss": 0.0015, "step": 174510 }, { "epoch": 1.4271578689127857, "grad_norm": 0.05679437145590782, "learning_rate": 2.297639179236752e-06, "loss": 0.0007, "step": 174520 }, { "epoch": 1.427239645091385, "grad_norm": 0.059123121201992035, "learning_rate": 2.297038784173448e-06, "loss": 0.0011, "step": 174530 }, { "epoch": 1.427321421269984, "grad_norm": 0.10037653893232346, "learning_rate": 2.296438444171662e-06, "loss": 0.0013, "step": 174540 }, { "epoch": 1.4274031974485832, "grad_norm": 0.05318750813603401, "learning_rate": 2.2958381592436195e-06, "loss": 0.0007, "step": 174550 }, { "epoch": 1.4274849736271824, "grad_norm": 0.08280809223651886, "learning_rate": 2.295237929401555e-06, "loss": 0.002, "step": 174560 }, { "epoch": 1.4275667498057816, "grad_norm": 0.03520424664020538, "learning_rate": 2.2946377546576914e-06, "loss": 0.0006, "step": 174570 }, { "epoch": 1.4276485259843807, "grad_norm": 0.10674724727869034, "learning_rate": 2.294037635024256e-06, "loss": 0.0006, "step": 174580 }, { "epoch": 1.42773030216298, "grad_norm": 0.004398984834551811, "learning_rate": 2.293437570513473e-06, "loss": 0.0006, "step": 174590 }, { "epoch": 1.427812078341579, "grad_norm": 0.06135862320661545, "learning_rate": 2.2928375611375676e-06, "loss": 0.0014, "step": 174600 }, { "epoch": 1.4278938545201783, "grad_norm": 0.024101538583636284, "learning_rate": 2.292237606908761e-06, "loss": 0.0009, "step": 174610 }, { "epoch": 1.4279756306987774, "grad_norm": 0.024171723052859306, "learning_rate": 2.2916377078392753e-06, "loss": 0.0012, "step": 174620 }, { "epoch": 1.4280574068773766, "grad_norm": 0.006588793825358152, "learning_rate": 2.2910378639413293e-06, "loss": 0.0003, "step": 174630 }, { "epoch": 1.4281391830559758, "grad_norm": 0.04166385531425476, "learning_rate": 2.290438075227146e-06, "loss": 0.0009, "step": 174640 }, { "epoch": 1.428220959234575, "grad_norm": 0.024962900206446648, "learning_rate": 2.2898383417089413e-06, "loss": 0.0007, "step": 174650 }, { "epoch": 1.428302735413174, "grad_norm": 0.1335175782442093, "learning_rate": 2.289238663398933e-06, "loss": 0.002, "step": 174660 }, { "epoch": 1.4283845115917733, "grad_norm": 0.07614168524742126, "learning_rate": 2.288639040309334e-06, "loss": 0.0004, "step": 174670 }, { "epoch": 1.4284662877703724, "grad_norm": 0.016376560553908348, "learning_rate": 2.2880394724523634e-06, "loss": 0.0009, "step": 174680 }, { "epoch": 1.4285480639489716, "grad_norm": 0.10281488299369812, "learning_rate": 2.2874399598402335e-06, "loss": 0.0005, "step": 174690 }, { "epoch": 1.4286298401275708, "grad_norm": 0.02876928634941578, "learning_rate": 2.2868405024851568e-06, "loss": 0.0007, "step": 174700 }, { "epoch": 1.42871161630617, "grad_norm": 0.04532277211546898, "learning_rate": 2.286241100399344e-06, "loss": 0.0013, "step": 174710 }, { "epoch": 1.428793392484769, "grad_norm": 0.003406137926504016, "learning_rate": 2.285641753595006e-06, "loss": 0.0005, "step": 174720 }, { "epoch": 1.4288751686633683, "grad_norm": 0.10673455148935318, "learning_rate": 2.285042462084352e-06, "loss": 0.0004, "step": 174730 }, { "epoch": 1.4289569448419677, "grad_norm": 0.03225692734122276, "learning_rate": 2.2844432258795896e-06, "loss": 0.0006, "step": 174740 }, { "epoch": 1.4290387210205666, "grad_norm": 0.0032584189902991056, "learning_rate": 2.2838440449929244e-06, "loss": 0.0017, "step": 174750 }, { "epoch": 1.429120497199166, "grad_norm": 0.08700167387723923, "learning_rate": 2.283244919436565e-06, "loss": 0.001, "step": 174760 }, { "epoch": 1.429202273377765, "grad_norm": 0.043430302292108536, "learning_rate": 2.282645849222716e-06, "loss": 0.0004, "step": 174770 }, { "epoch": 1.4292840495563643, "grad_norm": 0.0012190957786515355, "learning_rate": 2.282046834363579e-06, "loss": 0.0009, "step": 174780 }, { "epoch": 1.4293658257349633, "grad_norm": 0.005203730892390013, "learning_rate": 2.281447874871355e-06, "loss": 0.0007, "step": 174790 }, { "epoch": 1.4294476019135627, "grad_norm": 0.005695822648704052, "learning_rate": 2.2808489707582506e-06, "loss": 0.0012, "step": 174800 }, { "epoch": 1.4295293780921616, "grad_norm": 0.010672337375581264, "learning_rate": 2.280250122036462e-06, "loss": 0.0006, "step": 174810 }, { "epoch": 1.429611154270761, "grad_norm": 0.02579832263290882, "learning_rate": 2.27965132871819e-06, "loss": 0.0008, "step": 174820 }, { "epoch": 1.42969293044936, "grad_norm": 0.04434775933623314, "learning_rate": 2.279052590815631e-06, "loss": 0.0009, "step": 174830 }, { "epoch": 1.4297747066279594, "grad_norm": 0.005236952565610409, "learning_rate": 2.2784539083409824e-06, "loss": 0.0009, "step": 174840 }, { "epoch": 1.4298564828065585, "grad_norm": 0.221272274851799, "learning_rate": 2.2778552813064397e-06, "loss": 0.0014, "step": 174850 }, { "epoch": 1.4299382589851577, "grad_norm": 0.014871817082166672, "learning_rate": 2.277256709724198e-06, "loss": 0.0008, "step": 174860 }, { "epoch": 1.4300200351637569, "grad_norm": 0.021255426108837128, "learning_rate": 2.276658193606448e-06, "loss": 0.0005, "step": 174870 }, { "epoch": 1.430101811342356, "grad_norm": 0.020845923572778702, "learning_rate": 2.2760597329653865e-06, "loss": 0.0005, "step": 174880 }, { "epoch": 1.4301835875209552, "grad_norm": 0.022046968340873718, "learning_rate": 2.2754613278132025e-06, "loss": 0.0006, "step": 174890 }, { "epoch": 1.4302653636995544, "grad_norm": 0.05301737040281296, "learning_rate": 2.274862978162086e-06, "loss": 0.0014, "step": 174900 }, { "epoch": 1.4303471398781535, "grad_norm": 0.17464381456375122, "learning_rate": 2.2742646840242232e-06, "loss": 0.0014, "step": 174910 }, { "epoch": 1.4304289160567527, "grad_norm": 0.0817047730088234, "learning_rate": 2.2736664454118063e-06, "loss": 0.0019, "step": 174920 }, { "epoch": 1.4305106922353519, "grad_norm": 0.03521396592259407, "learning_rate": 2.27306826233702e-06, "loss": 0.0018, "step": 174930 }, { "epoch": 1.430592468413951, "grad_norm": 0.010086790658533573, "learning_rate": 2.27247013481205e-06, "loss": 0.0009, "step": 174940 }, { "epoch": 1.4306742445925502, "grad_norm": 0.05447093024849892, "learning_rate": 2.271872062849081e-06, "loss": 0.0007, "step": 174950 }, { "epoch": 1.4307560207711494, "grad_norm": 0.03312998265028, "learning_rate": 2.2712740464602945e-06, "loss": 0.0006, "step": 174960 }, { "epoch": 1.4308377969497486, "grad_norm": 0.07409726083278656, "learning_rate": 2.270676085657874e-06, "loss": 0.0008, "step": 174970 }, { "epoch": 1.4309195731283477, "grad_norm": 0.009424986317753792, "learning_rate": 2.2700781804540005e-06, "loss": 0.0007, "step": 174980 }, { "epoch": 1.431001349306947, "grad_norm": 0.05501477047801018, "learning_rate": 2.2694803308608515e-06, "loss": 0.0007, "step": 174990 }, { "epoch": 1.431083125485546, "grad_norm": 0.0036254271399229765, "learning_rate": 2.268882536890609e-06, "loss": 0.0006, "step": 175000 }, { "epoch": 1.4311649016641452, "grad_norm": 0.013455628417432308, "learning_rate": 2.26828479855545e-06, "loss": 0.0005, "step": 175010 }, { "epoch": 1.4312466778427444, "grad_norm": 0.032152559608221054, "learning_rate": 2.2676871158675496e-06, "loss": 0.0007, "step": 175020 }, { "epoch": 1.4313284540213436, "grad_norm": 0.05504346638917923, "learning_rate": 2.267089488839082e-06, "loss": 0.0009, "step": 175030 }, { "epoch": 1.4314102301999427, "grad_norm": 0.0019245913717895746, "learning_rate": 2.2664919174822248e-06, "loss": 0.0006, "step": 175040 }, { "epoch": 1.431492006378542, "grad_norm": 0.05072946101427078, "learning_rate": 2.265894401809149e-06, "loss": 0.0012, "step": 175050 }, { "epoch": 1.431573782557141, "grad_norm": 0.04908857122063637, "learning_rate": 2.2652969418320264e-06, "loss": 0.0005, "step": 175060 }, { "epoch": 1.4316555587357402, "grad_norm": 0.015651971101760864, "learning_rate": 2.2646995375630283e-06, "loss": 0.0006, "step": 175070 }, { "epoch": 1.4317373349143394, "grad_norm": 0.010118327103555202, "learning_rate": 2.264102189014324e-06, "loss": 0.0005, "step": 175080 }, { "epoch": 1.4318191110929386, "grad_norm": 0.005005374550819397, "learning_rate": 2.2635048961980816e-06, "loss": 0.0008, "step": 175090 }, { "epoch": 1.4319008872715377, "grad_norm": 0.008145181462168694, "learning_rate": 2.262907659126469e-06, "loss": 0.0007, "step": 175100 }, { "epoch": 1.431982663450137, "grad_norm": 0.023116955533623695, "learning_rate": 2.2623104778116504e-06, "loss": 0.0006, "step": 175110 }, { "epoch": 1.432064439628736, "grad_norm": 0.001976237166672945, "learning_rate": 2.2617133522657947e-06, "loss": 0.0006, "step": 175120 }, { "epoch": 1.4321462158073353, "grad_norm": 0.04317135736346245, "learning_rate": 2.2611162825010634e-06, "loss": 0.0005, "step": 175130 }, { "epoch": 1.4322279919859344, "grad_norm": 0.0855114758014679, "learning_rate": 2.2605192685296203e-06, "loss": 0.0007, "step": 175140 }, { "epoch": 1.4323097681645336, "grad_norm": 0.0012463986640796065, "learning_rate": 2.2599223103636236e-06, "loss": 0.0007, "step": 175150 }, { "epoch": 1.4323915443431328, "grad_norm": 0.020647846162319183, "learning_rate": 2.2593254080152394e-06, "loss": 0.0011, "step": 175160 }, { "epoch": 1.4324733205217322, "grad_norm": 0.12285365909337997, "learning_rate": 2.258728561496624e-06, "loss": 0.0012, "step": 175170 }, { "epoch": 1.432555096700331, "grad_norm": 0.13721247017383575, "learning_rate": 2.2581317708199362e-06, "loss": 0.0013, "step": 175180 }, { "epoch": 1.4326368728789305, "grad_norm": 0.03799499571323395, "learning_rate": 2.257535035997333e-06, "loss": 0.0032, "step": 175190 }, { "epoch": 1.4327186490575294, "grad_norm": 0.10606853663921356, "learning_rate": 2.25693835704097e-06, "loss": 0.0008, "step": 175200 }, { "epoch": 1.4328004252361288, "grad_norm": 0.02509034052491188, "learning_rate": 2.2563417339630027e-06, "loss": 0.0006, "step": 175210 }, { "epoch": 1.4328822014147278, "grad_norm": 0.012847734615206718, "learning_rate": 2.255745166775584e-06, "loss": 0.0006, "step": 175220 }, { "epoch": 1.4329639775933272, "grad_norm": 0.060193151235580444, "learning_rate": 2.255148655490866e-06, "loss": 0.0009, "step": 175230 }, { "epoch": 1.4330457537719261, "grad_norm": 0.1409347951412201, "learning_rate": 2.254552200121002e-06, "loss": 0.0004, "step": 175240 }, { "epoch": 1.4331275299505255, "grad_norm": 0.0008351401775144041, "learning_rate": 2.2539558006781415e-06, "loss": 0.0009, "step": 175250 }, { "epoch": 1.4332093061291244, "grad_norm": 0.042298849672079086, "learning_rate": 2.2533594571744334e-06, "loss": 0.0007, "step": 175260 }, { "epoch": 1.4332910823077238, "grad_norm": 0.004694643430411816, "learning_rate": 2.2527631696220237e-06, "loss": 0.0006, "step": 175270 }, { "epoch": 1.433372858486323, "grad_norm": 0.011829569935798645, "learning_rate": 2.2521669380330634e-06, "loss": 0.001, "step": 175280 }, { "epoch": 1.4334546346649222, "grad_norm": 0.017987681552767754, "learning_rate": 2.2515707624196957e-06, "loss": 0.001, "step": 175290 }, { "epoch": 1.4335364108435213, "grad_norm": 0.07898564636707306, "learning_rate": 2.250974642794066e-06, "loss": 0.0009, "step": 175300 }, { "epoch": 1.4336181870221205, "grad_norm": 0.034064456820487976, "learning_rate": 2.2503785791683173e-06, "loss": 0.0009, "step": 175310 }, { "epoch": 1.4336999632007197, "grad_norm": 0.06982946395874023, "learning_rate": 2.2497825715545923e-06, "loss": 0.0005, "step": 175320 }, { "epoch": 1.4337817393793189, "grad_norm": 0.0015860318671911955, "learning_rate": 2.2491866199650313e-06, "loss": 0.0002, "step": 175330 }, { "epoch": 1.433863515557918, "grad_norm": 0.04348995164036751, "learning_rate": 2.248590724411775e-06, "loss": 0.0012, "step": 175340 }, { "epoch": 1.4339452917365172, "grad_norm": 0.003441723296418786, "learning_rate": 2.2479948849069605e-06, "loss": 0.0005, "step": 175350 }, { "epoch": 1.4340270679151164, "grad_norm": 0.0032528885640203953, "learning_rate": 2.2473991014627285e-06, "loss": 0.0011, "step": 175360 }, { "epoch": 1.4341088440937155, "grad_norm": 0.036549557000398636, "learning_rate": 2.2468033740912144e-06, "loss": 0.0007, "step": 175370 }, { "epoch": 1.4341906202723147, "grad_norm": 0.028195088729262352, "learning_rate": 2.246207702804554e-06, "loss": 0.0011, "step": 175380 }, { "epoch": 1.4342723964509139, "grad_norm": 0.040529049932956696, "learning_rate": 2.2456120876148786e-06, "loss": 0.0005, "step": 175390 }, { "epoch": 1.434354172629513, "grad_norm": 0.015002386644482613, "learning_rate": 2.2450165285343257e-06, "loss": 0.0008, "step": 175400 }, { "epoch": 1.4344359488081122, "grad_norm": 0.004631529096513987, "learning_rate": 2.244421025575026e-06, "loss": 0.0007, "step": 175410 }, { "epoch": 1.4345177249867114, "grad_norm": 0.02340664155781269, "learning_rate": 2.2438255787491094e-06, "loss": 0.0013, "step": 175420 }, { "epoch": 1.4345995011653105, "grad_norm": 0.010946417227387428, "learning_rate": 2.2432301880687064e-06, "loss": 0.0011, "step": 175430 }, { "epoch": 1.4346812773439097, "grad_norm": 0.05596794933080673, "learning_rate": 2.2426348535459445e-06, "loss": 0.001, "step": 175440 }, { "epoch": 1.4347630535225089, "grad_norm": 0.05107608437538147, "learning_rate": 2.242039575192953e-06, "loss": 0.0007, "step": 175450 }, { "epoch": 1.434844829701108, "grad_norm": 0.050676364451646805, "learning_rate": 2.2414443530218565e-06, "loss": 0.0006, "step": 175460 }, { "epoch": 1.4349266058797072, "grad_norm": 0.022563621401786804, "learning_rate": 2.240849187044779e-06, "loss": 0.0008, "step": 175470 }, { "epoch": 1.4350083820583064, "grad_norm": 0.039358630776405334, "learning_rate": 2.2402540772738484e-06, "loss": 0.0004, "step": 175480 }, { "epoch": 1.4350901582369056, "grad_norm": 0.06603439152240753, "learning_rate": 2.239659023721186e-06, "loss": 0.0012, "step": 175490 }, { "epoch": 1.4351719344155047, "grad_norm": 0.03573505952954292, "learning_rate": 2.2390640263989115e-06, "loss": 0.0007, "step": 175500 }, { "epoch": 1.435253710594104, "grad_norm": 0.006290024146437645, "learning_rate": 2.2384690853191466e-06, "loss": 0.0019, "step": 175510 }, { "epoch": 1.435335486772703, "grad_norm": 0.016307266429066658, "learning_rate": 2.237874200494012e-06, "loss": 0.0005, "step": 175520 }, { "epoch": 1.4354172629513022, "grad_norm": 0.029927197843790054, "learning_rate": 2.237279371935626e-06, "loss": 0.0005, "step": 175530 }, { "epoch": 1.4354990391299014, "grad_norm": 0.015537469647824764, "learning_rate": 2.236684599656104e-06, "loss": 0.0009, "step": 175540 }, { "epoch": 1.4355808153085006, "grad_norm": 0.057834625244140625, "learning_rate": 2.236089883667563e-06, "loss": 0.0005, "step": 175550 }, { "epoch": 1.4356625914870997, "grad_norm": 0.01625809632241726, "learning_rate": 2.2354952239821184e-06, "loss": 0.0012, "step": 175560 }, { "epoch": 1.435744367665699, "grad_norm": 0.007064528297632933, "learning_rate": 2.2349006206118823e-06, "loss": 0.0013, "step": 175570 }, { "epoch": 1.435826143844298, "grad_norm": 0.042556047439575195, "learning_rate": 2.234306073568968e-06, "loss": 0.001, "step": 175580 }, { "epoch": 1.4359079200228972, "grad_norm": 0.004396958276629448, "learning_rate": 2.2337115828654857e-06, "loss": 0.0004, "step": 175590 }, { "epoch": 1.4359896962014966, "grad_norm": 0.05176689475774765, "learning_rate": 2.2331171485135495e-06, "loss": 0.0006, "step": 175600 }, { "epoch": 1.4360714723800956, "grad_norm": 0.007074086926877499, "learning_rate": 2.232522770525265e-06, "loss": 0.0006, "step": 175610 }, { "epoch": 1.436153248558695, "grad_norm": 0.04670470207929611, "learning_rate": 2.231928448912742e-06, "loss": 0.001, "step": 175620 }, { "epoch": 1.436235024737294, "grad_norm": 0.011348030529916286, "learning_rate": 2.2313341836880836e-06, "loss": 0.0008, "step": 175630 }, { "epoch": 1.4363168009158933, "grad_norm": 0.02733580395579338, "learning_rate": 2.230739974863401e-06, "loss": 0.001, "step": 175640 }, { "epoch": 1.4363985770944923, "grad_norm": 0.006826617289334536, "learning_rate": 2.230145822450796e-06, "loss": 0.0019, "step": 175650 }, { "epoch": 1.4364803532730916, "grad_norm": 0.010265973396599293, "learning_rate": 2.229551726462372e-06, "loss": 0.0005, "step": 175660 }, { "epoch": 1.4365621294516906, "grad_norm": 0.04364737868309021, "learning_rate": 2.2289576869102307e-06, "loss": 0.0005, "step": 175670 }, { "epoch": 1.43664390563029, "grad_norm": 0.03877769038081169, "learning_rate": 2.2283637038064747e-06, "loss": 0.0009, "step": 175680 }, { "epoch": 1.436725681808889, "grad_norm": 0.036078501492738724, "learning_rate": 2.227769777163202e-06, "loss": 0.0009, "step": 175690 }, { "epoch": 1.4368074579874883, "grad_norm": 0.01448856946080923, "learning_rate": 2.227175906992513e-06, "loss": 0.0015, "step": 175700 }, { "epoch": 1.4368892341660875, "grad_norm": 0.15278072655200958, "learning_rate": 2.226582093306503e-06, "loss": 0.0017, "step": 175710 }, { "epoch": 1.4369710103446867, "grad_norm": 0.039044201374053955, "learning_rate": 2.2259883361172713e-06, "loss": 0.0147, "step": 175720 }, { "epoch": 1.4370527865232858, "grad_norm": 0.022906018421053886, "learning_rate": 2.2253946354369123e-06, "loss": 0.0007, "step": 175730 }, { "epoch": 1.437134562701885, "grad_norm": 0.04928571730852127, "learning_rate": 2.22480099127752e-06, "loss": 0.0009, "step": 175740 }, { "epoch": 1.4372163388804842, "grad_norm": 0.047849372029304504, "learning_rate": 2.224207403651185e-06, "loss": 0.0007, "step": 175750 }, { "epoch": 1.4372981150590833, "grad_norm": 0.009532773867249489, "learning_rate": 2.223613872570004e-06, "loss": 0.0007, "step": 175760 }, { "epoch": 1.4373798912376825, "grad_norm": 0.021326037123799324, "learning_rate": 2.2230203980460644e-06, "loss": 0.0012, "step": 175770 }, { "epoch": 1.4374616674162817, "grad_norm": 0.04719824343919754, "learning_rate": 2.2224269800914573e-06, "loss": 0.0039, "step": 175780 }, { "epoch": 1.4375434435948808, "grad_norm": 0.001048174686729908, "learning_rate": 2.2218336187182698e-06, "loss": 0.0005, "step": 175790 }, { "epoch": 1.43762521977348, "grad_norm": 0.009677003137767315, "learning_rate": 2.22124031393859e-06, "loss": 0.0011, "step": 175800 }, { "epoch": 1.4377069959520792, "grad_norm": 0.03154530003666878, "learning_rate": 2.2206470657645035e-06, "loss": 0.0002, "step": 175810 }, { "epoch": 1.4377887721306783, "grad_norm": 0.03751775249838829, "learning_rate": 2.2200538742080956e-06, "loss": 0.0008, "step": 175820 }, { "epoch": 1.4378705483092775, "grad_norm": 0.0495762899518013, "learning_rate": 2.21946073928145e-06, "loss": 0.001, "step": 175830 }, { "epoch": 1.4379523244878767, "grad_norm": 0.02137824147939682, "learning_rate": 2.218867660996647e-06, "loss": 0.0018, "step": 175840 }, { "epoch": 1.4380341006664759, "grad_norm": 0.027100924402475357, "learning_rate": 2.218274639365773e-06, "loss": 0.0012, "step": 175850 }, { "epoch": 1.438115876845075, "grad_norm": 0.009071093052625656, "learning_rate": 2.217681674400905e-06, "loss": 0.0003, "step": 175860 }, { "epoch": 1.4381976530236742, "grad_norm": 0.05615229159593582, "learning_rate": 2.2170887661141233e-06, "loss": 0.0009, "step": 175870 }, { "epoch": 1.4382794292022734, "grad_norm": 0.016999738290905952, "learning_rate": 2.216495914517503e-06, "loss": 0.0005, "step": 175880 }, { "epoch": 1.4383612053808725, "grad_norm": 0.06066196784377098, "learning_rate": 2.2159031196231255e-06, "loss": 0.0017, "step": 175890 }, { "epoch": 1.4384429815594717, "grad_norm": 0.025416621938347816, "learning_rate": 2.215310381443067e-06, "loss": 0.0004, "step": 175900 }, { "epoch": 1.4385247577380709, "grad_norm": 0.061333779245615005, "learning_rate": 2.2147176999893973e-06, "loss": 0.0009, "step": 175910 }, { "epoch": 1.43860653391667, "grad_norm": 0.06815402954816818, "learning_rate": 2.2141250752741898e-06, "loss": 0.0007, "step": 175920 }, { "epoch": 1.4386883100952692, "grad_norm": 0.009154209867119789, "learning_rate": 2.2135325073095214e-06, "loss": 0.0005, "step": 175930 }, { "epoch": 1.4387700862738684, "grad_norm": 0.05831397697329521, "learning_rate": 2.2129399961074604e-06, "loss": 0.0011, "step": 175940 }, { "epoch": 1.4388518624524675, "grad_norm": 0.024112503975629807, "learning_rate": 2.2123475416800776e-06, "loss": 0.0007, "step": 175950 }, { "epoch": 1.4389336386310667, "grad_norm": 0.02538229152560234, "learning_rate": 2.211755144039439e-06, "loss": 0.0009, "step": 175960 }, { "epoch": 1.4390154148096659, "grad_norm": 0.05704252049326897, "learning_rate": 2.211162803197617e-06, "loss": 0.0004, "step": 175970 }, { "epoch": 1.439097190988265, "grad_norm": 0.04562309384346008, "learning_rate": 2.210570519166675e-06, "loss": 0.0005, "step": 175980 }, { "epoch": 1.4391789671668642, "grad_norm": 0.08435291051864624, "learning_rate": 2.209978291958679e-06, "loss": 0.0004, "step": 175990 }, { "epoch": 1.4392607433454634, "grad_norm": 0.005295808892697096, "learning_rate": 2.209386121585691e-06, "loss": 0.0016, "step": 176000 }, { "epoch": 1.4393425195240626, "grad_norm": 0.0037171640433371067, "learning_rate": 2.208794008059778e-06, "loss": 0.0006, "step": 176010 }, { "epoch": 1.4394242957026617, "grad_norm": 0.021452302113175392, "learning_rate": 2.2082019513930016e-06, "loss": 0.0005, "step": 176020 }, { "epoch": 1.4395060718812611, "grad_norm": 0.04769699648022652, "learning_rate": 2.207609951597419e-06, "loss": 0.0017, "step": 176030 }, { "epoch": 1.43958784805986, "grad_norm": 0.006216300651431084, "learning_rate": 2.2070180086850895e-06, "loss": 0.0007, "step": 176040 }, { "epoch": 1.4396696242384595, "grad_norm": 0.005633519496768713, "learning_rate": 2.2064261226680755e-06, "loss": 0.0006, "step": 176050 }, { "epoch": 1.4397514004170584, "grad_norm": 0.02499326877295971, "learning_rate": 2.205834293558432e-06, "loss": 0.0006, "step": 176060 }, { "epoch": 1.4398331765956578, "grad_norm": 0.04104236513376236, "learning_rate": 2.2052425213682154e-06, "loss": 0.0008, "step": 176070 }, { "epoch": 1.4399149527742567, "grad_norm": 0.044609252363443375, "learning_rate": 2.204650806109478e-06, "loss": 0.0007, "step": 176080 }, { "epoch": 1.4399967289528561, "grad_norm": 0.030758392065763474, "learning_rate": 2.204059147794278e-06, "loss": 0.0008, "step": 176090 }, { "epoch": 1.440078505131455, "grad_norm": 0.03323765844106674, "learning_rate": 2.203467546434666e-06, "loss": 0.0005, "step": 176100 }, { "epoch": 1.4401602813100545, "grad_norm": 0.026217086240649223, "learning_rate": 2.2028760020426927e-06, "loss": 0.0008, "step": 176110 }, { "epoch": 1.4402420574886536, "grad_norm": 0.009753229096531868, "learning_rate": 2.2022845146304072e-06, "loss": 0.0007, "step": 176120 }, { "epoch": 1.4403238336672528, "grad_norm": 0.018890149891376495, "learning_rate": 2.201693084209862e-06, "loss": 0.0008, "step": 176130 }, { "epoch": 1.440405609845852, "grad_norm": 0.06032015383243561, "learning_rate": 2.2011017107931054e-06, "loss": 0.001, "step": 176140 }, { "epoch": 1.4404873860244511, "grad_norm": 0.03570428863167763, "learning_rate": 2.2005103943921802e-06, "loss": 0.0007, "step": 176150 }, { "epoch": 1.4405691622030503, "grad_norm": 0.012106853537261486, "learning_rate": 2.199919135019132e-06, "loss": 0.001, "step": 176160 }, { "epoch": 1.4406509383816495, "grad_norm": 0.004116707015782595, "learning_rate": 2.1993279326860085e-06, "loss": 0.0004, "step": 176170 }, { "epoch": 1.4407327145602487, "grad_norm": 0.043184470385313034, "learning_rate": 2.1987367874048516e-06, "loss": 0.001, "step": 176180 }, { "epoch": 1.4408144907388478, "grad_norm": 0.036623649299144745, "learning_rate": 2.198145699187704e-06, "loss": 0.0007, "step": 176190 }, { "epoch": 1.440896266917447, "grad_norm": 0.03019389696419239, "learning_rate": 2.197554668046603e-06, "loss": 0.0013, "step": 176200 }, { "epoch": 1.4409780430960462, "grad_norm": 0.045444633811712265, "learning_rate": 2.196963693993594e-06, "loss": 0.0006, "step": 176210 }, { "epoch": 1.4410598192746453, "grad_norm": 0.008384882472455502, "learning_rate": 2.1963727770407127e-06, "loss": 0.0015, "step": 176220 }, { "epoch": 1.4411415954532445, "grad_norm": 0.09598574787378311, "learning_rate": 2.1957819171999965e-06, "loss": 0.0008, "step": 176230 }, { "epoch": 1.4412233716318437, "grad_norm": 0.04039141163229942, "learning_rate": 2.195191114483481e-06, "loss": 0.0009, "step": 176240 }, { "epoch": 1.4413051478104428, "grad_norm": 0.06115122511982918, "learning_rate": 2.1946003689032037e-06, "loss": 0.0017, "step": 176250 }, { "epoch": 1.441386923989042, "grad_norm": 0.041318129748106, "learning_rate": 2.194009680471199e-06, "loss": 0.0004, "step": 176260 }, { "epoch": 1.4414687001676412, "grad_norm": 0.035864293575286865, "learning_rate": 2.193419049199496e-06, "loss": 0.0011, "step": 176270 }, { "epoch": 1.4415504763462403, "grad_norm": 0.035360187292099, "learning_rate": 2.1928284751001265e-06, "loss": 0.0008, "step": 176280 }, { "epoch": 1.4416322525248395, "grad_norm": 0.004626816138625145, "learning_rate": 2.1922379581851248e-06, "loss": 0.0012, "step": 176290 }, { "epoch": 1.4417140287034387, "grad_norm": 0.17168790102005005, "learning_rate": 2.1916474984665183e-06, "loss": 0.0004, "step": 176300 }, { "epoch": 1.4417958048820378, "grad_norm": 0.017762834206223488, "learning_rate": 2.191057095956335e-06, "loss": 0.0014, "step": 176310 }, { "epoch": 1.441877581060637, "grad_norm": 0.006910145748406649, "learning_rate": 2.1904667506665996e-06, "loss": 0.0005, "step": 176320 }, { "epoch": 1.4419593572392362, "grad_norm": 0.06348259747028351, "learning_rate": 2.1898764626093423e-06, "loss": 0.0019, "step": 176330 }, { "epoch": 1.4420411334178354, "grad_norm": 0.020570721477270126, "learning_rate": 2.189286231796586e-06, "loss": 0.001, "step": 176340 }, { "epoch": 1.4421229095964345, "grad_norm": 0.1583591252565384, "learning_rate": 2.1886960582403534e-06, "loss": 0.001, "step": 176350 }, { "epoch": 1.4422046857750337, "grad_norm": 0.1152181401848793, "learning_rate": 2.1881059419526653e-06, "loss": 0.0011, "step": 176360 }, { "epoch": 1.4422864619536329, "grad_norm": 0.07683414220809937, "learning_rate": 2.1875158829455467e-06, "loss": 0.0011, "step": 176370 }, { "epoch": 1.442368238132232, "grad_norm": 0.04188907891511917, "learning_rate": 2.186925881231017e-06, "loss": 0.0006, "step": 176380 }, { "epoch": 1.4424500143108312, "grad_norm": 0.002093131421133876, "learning_rate": 2.186335936821093e-06, "loss": 0.0008, "step": 176390 }, { "epoch": 1.4425317904894304, "grad_norm": 0.03912007063627243, "learning_rate": 2.1857460497277904e-06, "loss": 0.0014, "step": 176400 }, { "epoch": 1.4426135666680295, "grad_norm": 0.05062735080718994, "learning_rate": 2.1851562199631306e-06, "loss": 0.0007, "step": 176410 }, { "epoch": 1.4426953428466287, "grad_norm": 0.059143293648958206, "learning_rate": 2.184566447539126e-06, "loss": 0.001, "step": 176420 }, { "epoch": 1.4427771190252279, "grad_norm": 0.08462466299533844, "learning_rate": 2.183976732467792e-06, "loss": 0.0013, "step": 176430 }, { "epoch": 1.4428588952038273, "grad_norm": 0.15474165976047516, "learning_rate": 2.1833870747611383e-06, "loss": 0.0008, "step": 176440 }, { "epoch": 1.4429406713824262, "grad_norm": 0.020408807322382927, "learning_rate": 2.182797474431182e-06, "loss": 0.001, "step": 176450 }, { "epoch": 1.4430224475610256, "grad_norm": 0.030045971274375916, "learning_rate": 2.1822079314899308e-06, "loss": 0.0009, "step": 176460 }, { "epoch": 1.4431042237396245, "grad_norm": 0.04372543469071388, "learning_rate": 2.1816184459493942e-06, "loss": 0.0006, "step": 176470 }, { "epoch": 1.443185999918224, "grad_norm": 0.014819507487118244, "learning_rate": 2.1810290178215793e-06, "loss": 0.0004, "step": 176480 }, { "epoch": 1.4432677760968229, "grad_norm": 0.009364007040858269, "learning_rate": 2.1804396471184964e-06, "loss": 0.0009, "step": 176490 }, { "epoch": 1.4433495522754223, "grad_norm": 0.03033747896552086, "learning_rate": 2.1798503338521514e-06, "loss": 0.0006, "step": 176500 }, { "epoch": 1.4434313284540212, "grad_norm": 0.007842788472771645, "learning_rate": 2.179261078034546e-06, "loss": 0.0008, "step": 176510 }, { "epoch": 1.4435131046326206, "grad_norm": 0.025364570319652557, "learning_rate": 2.178671879677683e-06, "loss": 0.0006, "step": 176520 }, { "epoch": 1.4435948808112196, "grad_norm": 0.04706142842769623, "learning_rate": 2.178082738793569e-06, "loss": 0.0005, "step": 176530 }, { "epoch": 1.443676656989819, "grad_norm": 0.01850898005068302, "learning_rate": 2.1774936553942035e-06, "loss": 0.0008, "step": 176540 }, { "epoch": 1.4437584331684181, "grad_norm": 0.01360732689499855, "learning_rate": 2.176904629491586e-06, "loss": 0.0006, "step": 176550 }, { "epoch": 1.4438402093470173, "grad_norm": 0.09182597696781158, "learning_rate": 2.1763156610977148e-06, "loss": 0.001, "step": 176560 }, { "epoch": 1.4439219855256165, "grad_norm": 0.009535972960293293, "learning_rate": 2.1757267502245904e-06, "loss": 0.0008, "step": 176570 }, { "epoch": 1.4440037617042156, "grad_norm": 0.011464639566838741, "learning_rate": 2.175137896884208e-06, "loss": 0.001, "step": 176580 }, { "epoch": 1.4440855378828148, "grad_norm": 0.05039876326918602, "learning_rate": 2.174549101088562e-06, "loss": 0.0009, "step": 176590 }, { "epoch": 1.444167314061414, "grad_norm": 0.009755718521773815, "learning_rate": 2.1739603628496463e-06, "loss": 0.0011, "step": 176600 }, { "epoch": 1.4442490902400131, "grad_norm": 0.019923511892557144, "learning_rate": 2.173371682179457e-06, "loss": 0.0006, "step": 176610 }, { "epoch": 1.4443308664186123, "grad_norm": 0.038807280361652374, "learning_rate": 2.172783059089986e-06, "loss": 0.0009, "step": 176620 }, { "epoch": 1.4444126425972115, "grad_norm": 0.08213290572166443, "learning_rate": 2.17219449359322e-06, "loss": 0.0008, "step": 176630 }, { "epoch": 1.4444944187758106, "grad_norm": 0.04323698952794075, "learning_rate": 2.171605985701149e-06, "loss": 0.0005, "step": 176640 }, { "epoch": 1.4445761949544098, "grad_norm": 0.026267897337675095, "learning_rate": 2.171017535425765e-06, "loss": 0.0009, "step": 176650 }, { "epoch": 1.444657971133009, "grad_norm": 0.017724666744470596, "learning_rate": 2.1704291427790535e-06, "loss": 0.0008, "step": 176660 }, { "epoch": 1.4447397473116081, "grad_norm": 0.03977000713348389, "learning_rate": 2.169840807773e-06, "loss": 0.0005, "step": 176670 }, { "epoch": 1.4448215234902073, "grad_norm": 0.001597634400241077, "learning_rate": 2.1692525304195885e-06, "loss": 0.001, "step": 176680 }, { "epoch": 1.4449032996688065, "grad_norm": 0.0470711886882782, "learning_rate": 2.168664310730805e-06, "loss": 0.0008, "step": 176690 }, { "epoch": 1.4449850758474057, "grad_norm": 0.03185565024614334, "learning_rate": 2.1680761487186307e-06, "loss": 0.0011, "step": 176700 }, { "epoch": 1.4450668520260048, "grad_norm": 0.011906910687685013, "learning_rate": 2.167488044395048e-06, "loss": 0.0004, "step": 176710 }, { "epoch": 1.445148628204604, "grad_norm": 0.0157797709107399, "learning_rate": 2.166899997772034e-06, "loss": 0.0007, "step": 176720 }, { "epoch": 1.4452304043832032, "grad_norm": 0.05572570115327835, "learning_rate": 2.1663120088615717e-06, "loss": 0.0007, "step": 176730 }, { "epoch": 1.4453121805618023, "grad_norm": 0.03086988627910614, "learning_rate": 2.1657240776756386e-06, "loss": 0.001, "step": 176740 }, { "epoch": 1.4453939567404015, "grad_norm": 0.019758474081754684, "learning_rate": 2.1651362042262085e-06, "loss": 0.0007, "step": 176750 }, { "epoch": 1.4454757329190007, "grad_norm": 0.0519479364156723, "learning_rate": 2.1645483885252565e-06, "loss": 0.0016, "step": 176760 }, { "epoch": 1.4455575090975998, "grad_norm": 0.009197331964969635, "learning_rate": 2.1639606305847603e-06, "loss": 0.0015, "step": 176770 }, { "epoch": 1.445639285276199, "grad_norm": 0.06560327112674713, "learning_rate": 2.1633729304166913e-06, "loss": 0.001, "step": 176780 }, { "epoch": 1.4457210614547982, "grad_norm": 0.012370815500617027, "learning_rate": 2.1627852880330214e-06, "loss": 0.0007, "step": 176790 }, { "epoch": 1.4458028376333973, "grad_norm": 0.20009131729602814, "learning_rate": 2.1621977034457194e-06, "loss": 0.0011, "step": 176800 }, { "epoch": 1.4458846138119965, "grad_norm": 0.007319778203964233, "learning_rate": 2.161610176666759e-06, "loss": 0.0004, "step": 176810 }, { "epoch": 1.4459663899905957, "grad_norm": 0.008532156236469746, "learning_rate": 2.1610227077081065e-06, "loss": 0.0006, "step": 176820 }, { "epoch": 1.4460481661691948, "grad_norm": 0.004185077268630266, "learning_rate": 2.160435296581729e-06, "loss": 0.0005, "step": 176830 }, { "epoch": 1.446129942347794, "grad_norm": 0.014183614403009415, "learning_rate": 2.15984794329959e-06, "loss": 0.0008, "step": 176840 }, { "epoch": 1.4462117185263932, "grad_norm": 0.07851988822221756, "learning_rate": 2.1592606478736593e-06, "loss": 0.0005, "step": 176850 }, { "epoch": 1.4462934947049924, "grad_norm": 0.0669412761926651, "learning_rate": 2.1586734103159006e-06, "loss": 0.0006, "step": 176860 }, { "epoch": 1.4463752708835917, "grad_norm": 0.0021529924124479294, "learning_rate": 2.1580862306382723e-06, "loss": 0.0008, "step": 176870 }, { "epoch": 1.4464570470621907, "grad_norm": 0.08722443878650665, "learning_rate": 2.1574991088527346e-06, "loss": 0.0006, "step": 176880 }, { "epoch": 1.44653882324079, "grad_norm": 0.020709658041596413, "learning_rate": 2.1569120449712534e-06, "loss": 0.0005, "step": 176890 }, { "epoch": 1.446620599419389, "grad_norm": 0.017765669152140617, "learning_rate": 2.1563250390057843e-06, "loss": 0.0009, "step": 176900 }, { "epoch": 1.4467023755979884, "grad_norm": 0.021973498165607452, "learning_rate": 2.1557380909682864e-06, "loss": 0.0013, "step": 176910 }, { "epoch": 1.4467841517765874, "grad_norm": 0.04821981489658356, "learning_rate": 2.1551512008707127e-06, "loss": 0.0011, "step": 176920 }, { "epoch": 1.4468659279551868, "grad_norm": 0.009196984581649303, "learning_rate": 2.1545643687250238e-06, "loss": 0.0004, "step": 176930 }, { "epoch": 1.4469477041337857, "grad_norm": 0.017320746555924416, "learning_rate": 2.153977594543171e-06, "loss": 0.0007, "step": 176940 }, { "epoch": 1.447029480312385, "grad_norm": 0.005729924887418747, "learning_rate": 2.1533908783371087e-06, "loss": 0.0004, "step": 176950 }, { "epoch": 1.447111256490984, "grad_norm": 0.031048698350787163, "learning_rate": 2.152804220118786e-06, "loss": 0.0013, "step": 176960 }, { "epoch": 1.4471930326695834, "grad_norm": 0.010216943919658661, "learning_rate": 2.1522176199001576e-06, "loss": 0.0009, "step": 176970 }, { "epoch": 1.4472748088481826, "grad_norm": 0.011028026230633259, "learning_rate": 2.1516310776931726e-06, "loss": 0.0008, "step": 176980 }, { "epoch": 1.4473565850267818, "grad_norm": 0.007213980425149202, "learning_rate": 2.1510445935097763e-06, "loss": 0.0047, "step": 176990 }, { "epoch": 1.447438361205381, "grad_norm": 0.05327902361750603, "learning_rate": 2.150458167361915e-06, "loss": 0.0013, "step": 177000 }, { "epoch": 1.44752013738398, "grad_norm": 0.0178892333060503, "learning_rate": 2.14987179926154e-06, "loss": 0.0007, "step": 177010 }, { "epoch": 1.4476019135625793, "grad_norm": 0.03689812496304512, "learning_rate": 2.149285489220592e-06, "loss": 0.0009, "step": 177020 }, { "epoch": 1.4476836897411784, "grad_norm": 0.0025415269192308187, "learning_rate": 2.148699237251016e-06, "loss": 0.0005, "step": 177030 }, { "epoch": 1.4477654659197776, "grad_norm": 0.05791689082980156, "learning_rate": 2.148113043364753e-06, "loss": 0.0008, "step": 177040 }, { "epoch": 1.4478472420983768, "grad_norm": 0.010639166459441185, "learning_rate": 2.1475269075737463e-06, "loss": 0.0005, "step": 177050 }, { "epoch": 1.447929018276976, "grad_norm": 0.03318720683455467, "learning_rate": 2.146940829889936e-06, "loss": 0.001, "step": 177060 }, { "epoch": 1.4480107944555751, "grad_norm": 0.05233681574463844, "learning_rate": 2.1463548103252595e-06, "loss": 0.0006, "step": 177070 }, { "epoch": 1.4480925706341743, "grad_norm": 0.0709778368473053, "learning_rate": 2.1457688488916533e-06, "loss": 0.0011, "step": 177080 }, { "epoch": 1.4481743468127735, "grad_norm": 0.033426906913518906, "learning_rate": 2.145182945601058e-06, "loss": 0.0005, "step": 177090 }, { "epoch": 1.4482561229913726, "grad_norm": 0.11102312058210373, "learning_rate": 2.144597100465408e-06, "loss": 0.0008, "step": 177100 }, { "epoch": 1.4483378991699718, "grad_norm": 0.01635204255580902, "learning_rate": 2.1440113134966345e-06, "loss": 0.0009, "step": 177110 }, { "epoch": 1.448419675348571, "grad_norm": 0.021244995296001434, "learning_rate": 2.14342558470667e-06, "loss": 0.001, "step": 177120 }, { "epoch": 1.4485014515271701, "grad_norm": 0.0390315018594265, "learning_rate": 2.1428399141074497e-06, "loss": 0.0009, "step": 177130 }, { "epoch": 1.4485832277057693, "grad_norm": 0.020898405462503433, "learning_rate": 2.1422543017109033e-06, "loss": 0.0028, "step": 177140 }, { "epoch": 1.4486650038843685, "grad_norm": 0.005534029565751553, "learning_rate": 2.1416687475289594e-06, "loss": 0.0006, "step": 177150 }, { "epoch": 1.4487467800629676, "grad_norm": 0.030827438458800316, "learning_rate": 2.141083251573545e-06, "loss": 0.0005, "step": 177160 }, { "epoch": 1.4488285562415668, "grad_norm": 0.047213099896907806, "learning_rate": 2.14049781385659e-06, "loss": 0.0003, "step": 177170 }, { "epoch": 1.448910332420166, "grad_norm": 0.003913012333214283, "learning_rate": 2.139912434390019e-06, "loss": 0.0007, "step": 177180 }, { "epoch": 1.4489921085987651, "grad_norm": 0.015208407305181026, "learning_rate": 2.139327113185757e-06, "loss": 0.0005, "step": 177190 }, { "epoch": 1.4490738847773643, "grad_norm": 0.004214025102555752, "learning_rate": 2.1387418502557243e-06, "loss": 0.001, "step": 177200 }, { "epoch": 1.4491556609559635, "grad_norm": 0.026716455817222595, "learning_rate": 2.1381566456118486e-06, "loss": 0.0008, "step": 177210 }, { "epoch": 1.4492374371345627, "grad_norm": 0.000477722380310297, "learning_rate": 2.1375714992660494e-06, "loss": 0.0006, "step": 177220 }, { "epoch": 1.4493192133131618, "grad_norm": 0.024391883984208107, "learning_rate": 2.1369864112302443e-06, "loss": 0.0014, "step": 177230 }, { "epoch": 1.449400989491761, "grad_norm": 0.07009455561637878, "learning_rate": 2.1364013815163503e-06, "loss": 0.0005, "step": 177240 }, { "epoch": 1.4494827656703602, "grad_norm": 0.03782191127538681, "learning_rate": 2.1358164101362905e-06, "loss": 0.0006, "step": 177250 }, { "epoch": 1.4495645418489593, "grad_norm": 0.019085031002759933, "learning_rate": 2.1352314971019782e-06, "loss": 0.0015, "step": 177260 }, { "epoch": 1.4496463180275585, "grad_norm": 0.0024430553894490004, "learning_rate": 2.1346466424253286e-06, "loss": 0.0011, "step": 177270 }, { "epoch": 1.4497280942061577, "grad_norm": 0.047670215368270874, "learning_rate": 2.1340618461182567e-06, "loss": 0.0016, "step": 177280 }, { "epoch": 1.4498098703847568, "grad_norm": 0.04143696650862694, "learning_rate": 2.1334771081926724e-06, "loss": 0.0006, "step": 177290 }, { "epoch": 1.4498916465633562, "grad_norm": 0.05075454339385033, "learning_rate": 2.1328924286604913e-06, "loss": 0.0009, "step": 177300 }, { "epoch": 1.4499734227419552, "grad_norm": 0.02224275842308998, "learning_rate": 2.132307807533622e-06, "loss": 0.0007, "step": 177310 }, { "epoch": 1.4500551989205546, "grad_norm": 0.039407163858413696, "learning_rate": 2.131723244823973e-06, "loss": 0.0009, "step": 177320 }, { "epoch": 1.4501369750991535, "grad_norm": 0.10931596159934998, "learning_rate": 2.131138740543452e-06, "loss": 0.0009, "step": 177330 }, { "epoch": 1.450218751277753, "grad_norm": 0.22913330793380737, "learning_rate": 2.13055429470397e-06, "loss": 0.0015, "step": 177340 }, { "epoch": 1.4503005274563519, "grad_norm": 0.026812445372343063, "learning_rate": 2.1299699073174274e-06, "loss": 0.0004, "step": 177350 }, { "epoch": 1.4503823036349512, "grad_norm": 0.100185826420784, "learning_rate": 2.129385578395731e-06, "loss": 0.0007, "step": 177360 }, { "epoch": 1.4504640798135502, "grad_norm": 0.04539640620350838, "learning_rate": 2.128801307950782e-06, "loss": 0.001, "step": 177370 }, { "epoch": 1.4505458559921496, "grad_norm": 0.08523879200220108, "learning_rate": 2.128217095994486e-06, "loss": 0.0009, "step": 177380 }, { "epoch": 1.4506276321707485, "grad_norm": 0.17401738464832306, "learning_rate": 2.1276329425387414e-06, "loss": 0.0011, "step": 177390 }, { "epoch": 1.450709408349348, "grad_norm": 0.005263380240648985, "learning_rate": 2.1270488475954493e-06, "loss": 0.0005, "step": 177400 }, { "epoch": 1.450791184527947, "grad_norm": 0.04450922831892967, "learning_rate": 2.126464811176505e-06, "loss": 0.0005, "step": 177410 }, { "epoch": 1.4508729607065463, "grad_norm": 0.0045977868139743805, "learning_rate": 2.125880833293811e-06, "loss": 0.0008, "step": 177420 }, { "epoch": 1.4509547368851454, "grad_norm": 0.09925176203250885, "learning_rate": 2.1252969139592592e-06, "loss": 0.0008, "step": 177430 }, { "epoch": 1.4510365130637446, "grad_norm": 0.07255363464355469, "learning_rate": 2.124713053184747e-06, "loss": 0.001, "step": 177440 }, { "epoch": 1.4511182892423438, "grad_norm": 0.017953971400856972, "learning_rate": 2.1241292509821653e-06, "loss": 0.0006, "step": 177450 }, { "epoch": 1.451200065420943, "grad_norm": 0.025747667998075485, "learning_rate": 2.1235455073634115e-06, "loss": 0.0011, "step": 177460 }, { "epoch": 1.451281841599542, "grad_norm": 0.02878410927951336, "learning_rate": 2.1229618223403718e-06, "loss": 0.001, "step": 177470 }, { "epoch": 1.4513636177781413, "grad_norm": 0.05559514835476875, "learning_rate": 2.122378195924939e-06, "loss": 0.001, "step": 177480 }, { "epoch": 1.4514453939567404, "grad_norm": 0.015935346484184265, "learning_rate": 2.121794628129e-06, "loss": 0.0007, "step": 177490 }, { "epoch": 1.4515271701353396, "grad_norm": 0.09807419776916504, "learning_rate": 2.121211118964445e-06, "loss": 0.0012, "step": 177500 }, { "epoch": 1.4516089463139388, "grad_norm": 0.008635856211185455, "learning_rate": 2.1206276684431592e-06, "loss": 0.0046, "step": 177510 }, { "epoch": 1.451690722492538, "grad_norm": 0.06933034956455231, "learning_rate": 2.1200442765770286e-06, "loss": 0.0014, "step": 177520 }, { "epoch": 1.4517724986711371, "grad_norm": 0.06660685688257217, "learning_rate": 2.119460943377935e-06, "loss": 0.0006, "step": 177530 }, { "epoch": 1.4518542748497363, "grad_norm": 0.04014239460229874, "learning_rate": 2.1188776688577657e-06, "loss": 0.0011, "step": 177540 }, { "epoch": 1.4519360510283355, "grad_norm": 0.021918615326285362, "learning_rate": 2.1182944530284e-06, "loss": 0.0004, "step": 177550 }, { "epoch": 1.4520178272069346, "grad_norm": 0.028786400333046913, "learning_rate": 2.1177112959017187e-06, "loss": 0.0009, "step": 177560 }, { "epoch": 1.4520996033855338, "grad_norm": 0.00824917946010828, "learning_rate": 2.117128197489599e-06, "loss": 0.0013, "step": 177570 }, { "epoch": 1.452181379564133, "grad_norm": 0.028306636959314346, "learning_rate": 2.1165451578039255e-06, "loss": 0.0006, "step": 177580 }, { "epoch": 1.4522631557427321, "grad_norm": 0.250501811504364, "learning_rate": 2.1159621768565686e-06, "loss": 0.0011, "step": 177590 }, { "epoch": 1.4523449319213313, "grad_norm": 0.028085503727197647, "learning_rate": 2.1153792546594067e-06, "loss": 0.0005, "step": 177600 }, { "epoch": 1.4524267080999305, "grad_norm": 0.0702732503414154, "learning_rate": 2.114796391224312e-06, "loss": 0.0008, "step": 177610 }, { "epoch": 1.4525084842785296, "grad_norm": 0.024528127163648605, "learning_rate": 2.114213586563162e-06, "loss": 0.0006, "step": 177620 }, { "epoch": 1.4525902604571288, "grad_norm": 0.03716829791665077, "learning_rate": 2.113630840687827e-06, "loss": 0.0007, "step": 177630 }, { "epoch": 1.452672036635728, "grad_norm": 0.07753729075193405, "learning_rate": 2.1130481536101776e-06, "loss": 0.001, "step": 177640 }, { "epoch": 1.4527538128143271, "grad_norm": 0.008864065632224083, "learning_rate": 2.112465525342083e-06, "loss": 0.0006, "step": 177650 }, { "epoch": 1.4528355889929263, "grad_norm": 0.021391721442341805, "learning_rate": 2.1118829558954135e-06, "loss": 0.0008, "step": 177660 }, { "epoch": 1.4529173651715255, "grad_norm": 0.04729577898979187, "learning_rate": 2.1113004452820364e-06, "loss": 0.0008, "step": 177670 }, { "epoch": 1.4529991413501246, "grad_norm": 0.05184321478009224, "learning_rate": 2.1107179935138173e-06, "loss": 0.0006, "step": 177680 }, { "epoch": 1.4530809175287238, "grad_norm": 0.01264361571520567, "learning_rate": 2.110135600602619e-06, "loss": 0.0004, "step": 177690 }, { "epoch": 1.453162693707323, "grad_norm": 0.11237183213233948, "learning_rate": 2.1095532665603112e-06, "loss": 0.0007, "step": 177700 }, { "epoch": 1.4532444698859222, "grad_norm": 0.0142899788916111, "learning_rate": 2.1089709913987517e-06, "loss": 0.0006, "step": 177710 }, { "epoch": 1.4533262460645213, "grad_norm": 0.04701566323637962, "learning_rate": 2.108388775129802e-06, "loss": 0.0006, "step": 177720 }, { "epoch": 1.4534080222431207, "grad_norm": 0.04224168509244919, "learning_rate": 2.1078066177653224e-06, "loss": 0.0006, "step": 177730 }, { "epoch": 1.4534897984217197, "grad_norm": 0.018525505438447, "learning_rate": 2.107224519317174e-06, "loss": 0.0005, "step": 177740 }, { "epoch": 1.453571574600319, "grad_norm": 0.05199211463332176, "learning_rate": 2.106642479797214e-06, "loss": 0.0008, "step": 177750 }, { "epoch": 1.453653350778918, "grad_norm": 0.055037032812833786, "learning_rate": 2.1060604992172983e-06, "loss": 0.0012, "step": 177760 }, { "epoch": 1.4537351269575174, "grad_norm": 0.006808844394981861, "learning_rate": 2.105478577589281e-06, "loss": 0.0003, "step": 177770 }, { "epoch": 1.4538169031361163, "grad_norm": 0.08923850953578949, "learning_rate": 2.10489671492502e-06, "loss": 0.001, "step": 177780 }, { "epoch": 1.4538986793147157, "grad_norm": 0.03678428381681442, "learning_rate": 2.1043149112363653e-06, "loss": 0.0008, "step": 177790 }, { "epoch": 1.4539804554933147, "grad_norm": 0.030084481462836266, "learning_rate": 2.10373316653517e-06, "loss": 0.0012, "step": 177800 }, { "epoch": 1.454062231671914, "grad_norm": 0.023832594975829124, "learning_rate": 2.1031514808332824e-06, "loss": 0.0006, "step": 177810 }, { "epoch": 1.454144007850513, "grad_norm": 0.06478412449359894, "learning_rate": 2.1025698541425575e-06, "loss": 0.0017, "step": 177820 }, { "epoch": 1.4542257840291124, "grad_norm": 0.006442249286919832, "learning_rate": 2.1019882864748382e-06, "loss": 0.0005, "step": 177830 }, { "epoch": 1.4543075602077116, "grad_norm": 0.05951585993170738, "learning_rate": 2.1014067778419726e-06, "loss": 0.0006, "step": 177840 }, { "epoch": 1.4543893363863107, "grad_norm": 0.0007155589992180467, "learning_rate": 2.100825328255806e-06, "loss": 0.001, "step": 177850 }, { "epoch": 1.45447111256491, "grad_norm": 0.05913671851158142, "learning_rate": 2.100243937728185e-06, "loss": 0.0011, "step": 177860 }, { "epoch": 1.454552888743509, "grad_norm": 0.015427942387759686, "learning_rate": 2.0996626062709524e-06, "loss": 0.0005, "step": 177870 }, { "epoch": 1.4546346649221082, "grad_norm": 0.09287354350090027, "learning_rate": 2.0990813338959503e-06, "loss": 0.001, "step": 177880 }, { "epoch": 1.4547164411007074, "grad_norm": 0.266719251871109, "learning_rate": 2.0985001206150174e-06, "loss": 0.0016, "step": 177890 }, { "epoch": 1.4547982172793066, "grad_norm": 0.13276922702789307, "learning_rate": 2.0979189664399974e-06, "loss": 0.0008, "step": 177900 }, { "epoch": 1.4548799934579058, "grad_norm": 0.11215504258871078, "learning_rate": 2.097337871382727e-06, "loss": 0.0021, "step": 177910 }, { "epoch": 1.454961769636505, "grad_norm": 0.3340308964252472, "learning_rate": 2.0967568354550432e-06, "loss": 0.001, "step": 177920 }, { "epoch": 1.455043545815104, "grad_norm": 0.0571126826107502, "learning_rate": 2.0961758586687806e-06, "loss": 0.0004, "step": 177930 }, { "epoch": 1.4551253219937033, "grad_norm": 0.02752930484712124, "learning_rate": 2.095594941035781e-06, "loss": 0.0009, "step": 177940 }, { "epoch": 1.4552070981723024, "grad_norm": 0.048942554742097855, "learning_rate": 2.09501408256787e-06, "loss": 0.001, "step": 177950 }, { "epoch": 1.4552888743509016, "grad_norm": 0.07778242230415344, "learning_rate": 2.0944332832768836e-06, "loss": 0.0009, "step": 177960 }, { "epoch": 1.4553706505295008, "grad_norm": 0.007722236681729555, "learning_rate": 2.093852543174652e-06, "loss": 0.0009, "step": 177970 }, { "epoch": 1.4554524267081, "grad_norm": 0.038333285599946976, "learning_rate": 2.0932718622730073e-06, "loss": 0.0005, "step": 177980 }, { "epoch": 1.455534202886699, "grad_norm": 0.05280397832393646, "learning_rate": 2.092691240583778e-06, "loss": 0.0013, "step": 177990 }, { "epoch": 1.4556159790652983, "grad_norm": 0.009909825399518013, "learning_rate": 2.092110678118791e-06, "loss": 0.0002, "step": 178000 }, { "epoch": 1.4556977552438974, "grad_norm": 0.028011279180645943, "learning_rate": 2.091530174889872e-06, "loss": 0.0011, "step": 178010 }, { "epoch": 1.4557795314224966, "grad_norm": 0.04702343791723251, "learning_rate": 2.0909497309088485e-06, "loss": 0.0008, "step": 178020 }, { "epoch": 1.4558613076010958, "grad_norm": 0.01703615114092827, "learning_rate": 2.090369346187544e-06, "loss": 0.0006, "step": 178030 }, { "epoch": 1.455943083779695, "grad_norm": 0.01227491069585085, "learning_rate": 2.089789020737781e-06, "loss": 0.0009, "step": 178040 }, { "epoch": 1.4560248599582941, "grad_norm": 0.041093841195106506, "learning_rate": 2.0892087545713792e-06, "loss": 0.0007, "step": 178050 }, { "epoch": 1.4561066361368933, "grad_norm": 0.05016108974814415, "learning_rate": 2.0886285477001643e-06, "loss": 0.0009, "step": 178060 }, { "epoch": 1.4561884123154925, "grad_norm": 0.01110057532787323, "learning_rate": 2.088048400135951e-06, "loss": 0.0007, "step": 178070 }, { "epoch": 1.4562701884940916, "grad_norm": 0.03640875220298767, "learning_rate": 2.087468311890559e-06, "loss": 0.0005, "step": 178080 }, { "epoch": 1.4563519646726908, "grad_norm": 0.018956219777464867, "learning_rate": 2.0868882829758026e-06, "loss": 0.0007, "step": 178090 }, { "epoch": 1.45643374085129, "grad_norm": 0.18109622597694397, "learning_rate": 2.0863083134035017e-06, "loss": 0.0012, "step": 178100 }, { "epoch": 1.4565155170298891, "grad_norm": 0.01564725860953331, "learning_rate": 2.0857284031854693e-06, "loss": 0.0006, "step": 178110 }, { "epoch": 1.4565972932084883, "grad_norm": 0.07620755583047867, "learning_rate": 2.0851485523335176e-06, "loss": 0.0013, "step": 178120 }, { "epoch": 1.4566790693870875, "grad_norm": 0.025202283635735512, "learning_rate": 2.0845687608594573e-06, "loss": 0.0004, "step": 178130 }, { "epoch": 1.4567608455656866, "grad_norm": 0.004323821514844894, "learning_rate": 2.083989028775103e-06, "loss": 0.0013, "step": 178140 }, { "epoch": 1.4568426217442858, "grad_norm": 0.01579691842198372, "learning_rate": 2.083409356092263e-06, "loss": 0.0006, "step": 178150 }, { "epoch": 1.4569243979228852, "grad_norm": 0.054997529834508896, "learning_rate": 2.0828297428227446e-06, "loss": 0.0012, "step": 178160 }, { "epoch": 1.4570061741014841, "grad_norm": 0.03711097314953804, "learning_rate": 2.082250188978354e-06, "loss": 0.001, "step": 178170 }, { "epoch": 1.4570879502800835, "grad_norm": 0.02829814702272415, "learning_rate": 2.081670694570902e-06, "loss": 0.0013, "step": 178180 }, { "epoch": 1.4571697264586825, "grad_norm": 0.013643131591379642, "learning_rate": 2.0810912596121884e-06, "loss": 0.0004, "step": 178190 }, { "epoch": 1.4572515026372819, "grad_norm": 0.016933126375079155, "learning_rate": 2.080511884114019e-06, "loss": 0.0005, "step": 178200 }, { "epoch": 1.4573332788158808, "grad_norm": 0.11565778404474258, "learning_rate": 2.0799325680881932e-06, "loss": 0.001, "step": 178210 }, { "epoch": 1.4574150549944802, "grad_norm": 0.04648562893271446, "learning_rate": 2.0793533115465163e-06, "loss": 0.0014, "step": 178220 }, { "epoch": 1.4574968311730792, "grad_norm": 0.013392760418355465, "learning_rate": 2.078774114500787e-06, "loss": 0.0007, "step": 178230 }, { "epoch": 1.4575786073516785, "grad_norm": 0.009106943383812904, "learning_rate": 2.078194976962803e-06, "loss": 0.0006, "step": 178240 }, { "epoch": 1.4576603835302775, "grad_norm": 0.048421118408441544, "learning_rate": 2.0776158989443605e-06, "loss": 0.0013, "step": 178250 }, { "epoch": 1.4577421597088769, "grad_norm": 0.026906892657279968, "learning_rate": 2.07703688045726e-06, "loss": 0.0008, "step": 178260 }, { "epoch": 1.457823935887476, "grad_norm": 0.04119503125548363, "learning_rate": 2.0764579215132936e-06, "loss": 0.0014, "step": 178270 }, { "epoch": 1.4579057120660752, "grad_norm": 0.07873017340898514, "learning_rate": 2.0758790221242553e-06, "loss": 0.0008, "step": 178280 }, { "epoch": 1.4579874882446744, "grad_norm": 0.006350157782435417, "learning_rate": 2.075300182301937e-06, "loss": 0.0005, "step": 178290 }, { "epoch": 1.4580692644232736, "grad_norm": 0.052915312349796295, "learning_rate": 2.0747214020581346e-06, "loss": 0.0003, "step": 178300 }, { "epoch": 1.4581510406018727, "grad_norm": 0.015566078945994377, "learning_rate": 2.0741426814046335e-06, "loss": 0.0013, "step": 178310 }, { "epoch": 1.458232816780472, "grad_norm": 0.0013218761887401342, "learning_rate": 2.073564020353224e-06, "loss": 0.0006, "step": 178320 }, { "epoch": 1.458314592959071, "grad_norm": 0.046518724411726, "learning_rate": 2.0729854189156927e-06, "loss": 0.0009, "step": 178330 }, { "epoch": 1.4583963691376702, "grad_norm": 0.09065542370080948, "learning_rate": 2.072406877103829e-06, "loss": 0.0007, "step": 178340 }, { "epoch": 1.4584781453162694, "grad_norm": 0.010362569242715836, "learning_rate": 2.071828394929417e-06, "loss": 0.0012, "step": 178350 }, { "epoch": 1.4585599214948686, "grad_norm": 0.038797326385974884, "learning_rate": 2.0712499724042407e-06, "loss": 0.0008, "step": 178360 }, { "epoch": 1.4586416976734677, "grad_norm": 0.0025303177535533905, "learning_rate": 2.0706716095400815e-06, "loss": 0.0006, "step": 178370 }, { "epoch": 1.458723473852067, "grad_norm": 0.004372646100819111, "learning_rate": 2.0700933063487243e-06, "loss": 0.0007, "step": 178380 }, { "epoch": 1.458805250030666, "grad_norm": 0.01121906191110611, "learning_rate": 2.069515062841948e-06, "loss": 0.0008, "step": 178390 }, { "epoch": 1.4588870262092652, "grad_norm": 0.08619780838489532, "learning_rate": 2.068936879031532e-06, "loss": 0.0009, "step": 178400 }, { "epoch": 1.4589688023878644, "grad_norm": 0.06503278762102127, "learning_rate": 2.0683587549292524e-06, "loss": 0.0008, "step": 178410 }, { "epoch": 1.4590505785664636, "grad_norm": 0.019191520288586617, "learning_rate": 2.067780690546891e-06, "loss": 0.001, "step": 178420 }, { "epoch": 1.4591323547450628, "grad_norm": 0.002350771566852927, "learning_rate": 2.067202685896219e-06, "loss": 0.0006, "step": 178430 }, { "epoch": 1.459214130923662, "grad_norm": 0.019534971565008163, "learning_rate": 2.066624740989013e-06, "loss": 0.0008, "step": 178440 }, { "epoch": 1.459295907102261, "grad_norm": 0.03539024665951729, "learning_rate": 2.0660468558370422e-06, "loss": 0.0004, "step": 178450 }, { "epoch": 1.4593776832808603, "grad_norm": 0.07208043336868286, "learning_rate": 2.065469030452085e-06, "loss": 0.0051, "step": 178460 }, { "epoch": 1.4594594594594594, "grad_norm": 0.008338652551174164, "learning_rate": 2.0648912648459072e-06, "loss": 0.0008, "step": 178470 }, { "epoch": 1.4595412356380586, "grad_norm": 0.012393102049827576, "learning_rate": 2.0643135590302815e-06, "loss": 0.0003, "step": 178480 }, { "epoch": 1.4596230118166578, "grad_norm": 0.02032463811337948, "learning_rate": 2.063735913016973e-06, "loss": 0.0008, "step": 178490 }, { "epoch": 1.459704787995257, "grad_norm": 0.054314449429512024, "learning_rate": 2.0631583268177517e-06, "loss": 0.0004, "step": 178500 }, { "epoch": 1.459786564173856, "grad_norm": 0.02664851024746895, "learning_rate": 2.0625808004443833e-06, "loss": 0.0007, "step": 178510 }, { "epoch": 1.4598683403524553, "grad_norm": 0.015471058897674084, "learning_rate": 2.062003333908631e-06, "loss": 0.0006, "step": 178520 }, { "epoch": 1.4599501165310544, "grad_norm": 0.026927437633275986, "learning_rate": 2.0614259272222593e-06, "loss": 0.0016, "step": 178530 }, { "epoch": 1.4600318927096536, "grad_norm": 0.06057068705558777, "learning_rate": 2.0608485803970296e-06, "loss": 0.0006, "step": 178540 }, { "epoch": 1.4601136688882528, "grad_norm": 0.0011735496809706092, "learning_rate": 2.0602712934447032e-06, "loss": 0.0008, "step": 178550 }, { "epoch": 1.460195445066852, "grad_norm": 0.008131521753966808, "learning_rate": 2.059694066377041e-06, "loss": 0.0007, "step": 178560 }, { "epoch": 1.4602772212454511, "grad_norm": 0.027106840163469315, "learning_rate": 2.0591168992057984e-06, "loss": 0.0006, "step": 178570 }, { "epoch": 1.4603589974240503, "grad_norm": 0.029414664953947067, "learning_rate": 2.058539791942737e-06, "loss": 0.0009, "step": 178580 }, { "epoch": 1.4604407736026497, "grad_norm": 0.043473903089761734, "learning_rate": 2.057962744599611e-06, "loss": 0.0006, "step": 178590 }, { "epoch": 1.4605225497812486, "grad_norm": 0.01926630176603794, "learning_rate": 2.057385757188175e-06, "loss": 0.0005, "step": 178600 }, { "epoch": 1.460604325959848, "grad_norm": 0.016606474295258522, "learning_rate": 2.056808829720181e-06, "loss": 0.0005, "step": 178610 }, { "epoch": 1.460686102138447, "grad_norm": 0.03193145617842674, "learning_rate": 2.0562319622073854e-06, "loss": 0.0009, "step": 178620 }, { "epoch": 1.4607678783170464, "grad_norm": 0.042607974261045456, "learning_rate": 2.0556551546615385e-06, "loss": 0.0017, "step": 178630 }, { "epoch": 1.4608496544956453, "grad_norm": 0.03665228188037872, "learning_rate": 2.055078407094388e-06, "loss": 0.0019, "step": 178640 }, { "epoch": 1.4609314306742447, "grad_norm": 0.07191649079322815, "learning_rate": 2.054501719517685e-06, "loss": 0.001, "step": 178650 }, { "epoch": 1.4610132068528436, "grad_norm": 0.001355157233774662, "learning_rate": 2.053925091943176e-06, "loss": 0.0007, "step": 178660 }, { "epoch": 1.461094983031443, "grad_norm": 0.03532791510224342, "learning_rate": 2.0533485243826075e-06, "loss": 0.0005, "step": 178670 }, { "epoch": 1.4611767592100422, "grad_norm": 0.07755880057811737, "learning_rate": 2.0527720168477243e-06, "loss": 0.0008, "step": 178680 }, { "epoch": 1.4612585353886414, "grad_norm": 0.023482536897063255, "learning_rate": 2.0521955693502697e-06, "loss": 0.0004, "step": 178690 }, { "epoch": 1.4613403115672405, "grad_norm": 0.036109745502471924, "learning_rate": 2.051619181901989e-06, "loss": 0.0051, "step": 178700 }, { "epoch": 1.4614220877458397, "grad_norm": 0.03512550890445709, "learning_rate": 2.0510428545146222e-06, "loss": 0.0009, "step": 178710 }, { "epoch": 1.4615038639244389, "grad_norm": 0.01009793858975172, "learning_rate": 2.0504665871999092e-06, "loss": 0.0009, "step": 178720 }, { "epoch": 1.461585640103038, "grad_norm": 0.03474077954888344, "learning_rate": 2.0498903799695897e-06, "loss": 0.0003, "step": 178730 }, { "epoch": 1.4616674162816372, "grad_norm": 0.011107773520052433, "learning_rate": 2.049314232835399e-06, "loss": 0.0006, "step": 178740 }, { "epoch": 1.4617491924602364, "grad_norm": 0.02303696982562542, "learning_rate": 2.0487381458090776e-06, "loss": 0.001, "step": 178750 }, { "epoch": 1.4618309686388355, "grad_norm": 0.039096228778362274, "learning_rate": 2.048162118902359e-06, "loss": 0.0005, "step": 178760 }, { "epoch": 1.4619127448174347, "grad_norm": 0.050717033445835114, "learning_rate": 2.0475861521269773e-06, "loss": 0.0015, "step": 178770 }, { "epoch": 1.4619945209960339, "grad_norm": 0.022316046059131622, "learning_rate": 2.0470102454946662e-06, "loss": 0.0006, "step": 178780 }, { "epoch": 1.462076297174633, "grad_norm": 0.07903318107128143, "learning_rate": 2.0464343990171566e-06, "loss": 0.0008, "step": 178790 }, { "epoch": 1.4621580733532322, "grad_norm": 0.02488907240331173, "learning_rate": 2.0458586127061785e-06, "loss": 0.0005, "step": 178800 }, { "epoch": 1.4622398495318314, "grad_norm": 0.011498317122459412, "learning_rate": 2.045282886573462e-06, "loss": 0.0007, "step": 178810 }, { "epoch": 1.4623216257104306, "grad_norm": 0.032677508890628815, "learning_rate": 2.044707220630733e-06, "loss": 0.001, "step": 178820 }, { "epoch": 1.4624034018890297, "grad_norm": 0.010981692932546139, "learning_rate": 2.0441316148897223e-06, "loss": 0.0007, "step": 178830 }, { "epoch": 1.462485178067629, "grad_norm": 0.0010514612076804042, "learning_rate": 2.0435560693621527e-06, "loss": 0.0008, "step": 178840 }, { "epoch": 1.462566954246228, "grad_norm": 0.07981090992689133, "learning_rate": 2.042980584059749e-06, "loss": 0.0016, "step": 178850 }, { "epoch": 1.4626487304248272, "grad_norm": 0.024114053696393967, "learning_rate": 2.042405158994233e-06, "loss": 0.001, "step": 178860 }, { "epoch": 1.4627305066034264, "grad_norm": 0.08525056391954422, "learning_rate": 2.0418297941773297e-06, "loss": 0.0011, "step": 178870 }, { "epoch": 1.4628122827820256, "grad_norm": 0.06002894416451454, "learning_rate": 2.0412544896207576e-06, "loss": 0.0012, "step": 178880 }, { "epoch": 1.4628940589606247, "grad_norm": 0.020848168060183525, "learning_rate": 2.040679245336237e-06, "loss": 0.0007, "step": 178890 }, { "epoch": 1.462975835139224, "grad_norm": 0.03050186298787594, "learning_rate": 2.0401040613354856e-06, "loss": 0.0006, "step": 178900 }, { "epoch": 1.463057611317823, "grad_norm": 0.031613390892744064, "learning_rate": 2.03952893763022e-06, "loss": 0.0012, "step": 178910 }, { "epoch": 1.4631393874964223, "grad_norm": 0.0021864266600459814, "learning_rate": 2.038953874232157e-06, "loss": 0.001, "step": 178920 }, { "epoch": 1.4632211636750214, "grad_norm": 0.08451563864946365, "learning_rate": 2.03837887115301e-06, "loss": 0.0007, "step": 178930 }, { "epoch": 1.4633029398536206, "grad_norm": 0.028822224587202072, "learning_rate": 2.037803928404491e-06, "loss": 0.0007, "step": 178940 }, { "epoch": 1.4633847160322198, "grad_norm": 0.14361754059791565, "learning_rate": 2.0372290459983155e-06, "loss": 0.0006, "step": 178950 }, { "epoch": 1.463466492210819, "grad_norm": 0.05319804698228836, "learning_rate": 2.036654223946193e-06, "loss": 0.0009, "step": 178960 }, { "epoch": 1.463548268389418, "grad_norm": 0.038025129586458206, "learning_rate": 2.0360794622598317e-06, "loss": 0.0006, "step": 178970 }, { "epoch": 1.4636300445680173, "grad_norm": 0.05373983085155487, "learning_rate": 2.03550476095094e-06, "loss": 0.001, "step": 178980 }, { "epoch": 1.4637118207466164, "grad_norm": 0.049227844923734665, "learning_rate": 2.0349301200312278e-06, "loss": 0.0016, "step": 178990 }, { "epoch": 1.4637935969252158, "grad_norm": 0.054730214178562164, "learning_rate": 2.0343555395123987e-06, "loss": 0.0006, "step": 179000 }, { "epoch": 1.4638753731038148, "grad_norm": 0.003998321481049061, "learning_rate": 2.033781019406158e-06, "loss": 0.0007, "step": 179010 }, { "epoch": 1.4639571492824142, "grad_norm": 0.01927364617586136, "learning_rate": 2.0332065597242094e-06, "loss": 0.0007, "step": 179020 }, { "epoch": 1.464038925461013, "grad_norm": 0.017692867666482925, "learning_rate": 2.0326321604782547e-06, "loss": 0.0015, "step": 179030 }, { "epoch": 1.4641207016396125, "grad_norm": 0.06655579060316086, "learning_rate": 2.032057821679994e-06, "loss": 0.0013, "step": 179040 }, { "epoch": 1.4642024778182114, "grad_norm": 0.017830075696110725, "learning_rate": 2.031483543341129e-06, "loss": 0.0003, "step": 179050 }, { "epoch": 1.4642842539968108, "grad_norm": 0.07422275096178055, "learning_rate": 2.0309093254733548e-06, "loss": 0.0005, "step": 179060 }, { "epoch": 1.4643660301754098, "grad_norm": 0.10433060675859451, "learning_rate": 2.030335168088373e-06, "loss": 0.0009, "step": 179070 }, { "epoch": 1.4644478063540092, "grad_norm": 0.008248862810432911, "learning_rate": 2.029761071197878e-06, "loss": 0.0006, "step": 179080 }, { "epoch": 1.4645295825326081, "grad_norm": 0.025347594171762466, "learning_rate": 2.0291870348135635e-06, "loss": 0.0009, "step": 179090 }, { "epoch": 1.4646113587112075, "grad_norm": 0.012529928237199783, "learning_rate": 2.0286130589471227e-06, "loss": 0.0006, "step": 179100 }, { "epoch": 1.4646931348898067, "grad_norm": 0.0803443118929863, "learning_rate": 2.028039143610251e-06, "loss": 0.0008, "step": 179110 }, { "epoch": 1.4647749110684058, "grad_norm": 0.04819302260875702, "learning_rate": 2.0274652888146373e-06, "loss": 0.0007, "step": 179120 }, { "epoch": 1.464856687247005, "grad_norm": 0.04446108639240265, "learning_rate": 2.026891494571972e-06, "loss": 0.0008, "step": 179130 }, { "epoch": 1.4649384634256042, "grad_norm": 0.026888061314821243, "learning_rate": 2.0263177608939434e-06, "loss": 0.0009, "step": 179140 }, { "epoch": 1.4650202396042034, "grad_norm": 0.19386349618434906, "learning_rate": 2.0257440877922395e-06, "loss": 0.001, "step": 179150 }, { "epoch": 1.4651020157828025, "grad_norm": 0.00891188532114029, "learning_rate": 2.025170475278546e-06, "loss": 0.0007, "step": 179160 }, { "epoch": 1.4651837919614017, "grad_norm": 0.0716763287782669, "learning_rate": 2.0245969233645478e-06, "loss": 0.0015, "step": 179170 }, { "epoch": 1.4652655681400009, "grad_norm": 0.0533444881439209, "learning_rate": 2.0240234320619273e-06, "loss": 0.0011, "step": 179180 }, { "epoch": 1.4653473443186, "grad_norm": 0.03390990197658539, "learning_rate": 2.0234500013823706e-06, "loss": 0.0006, "step": 179190 }, { "epoch": 1.4654291204971992, "grad_norm": 0.026512647047638893, "learning_rate": 2.0228766313375564e-06, "loss": 0.0007, "step": 179200 }, { "epoch": 1.4655108966757984, "grad_norm": 0.039471324533224106, "learning_rate": 2.0223033219391653e-06, "loss": 0.0009, "step": 179210 }, { "epoch": 1.4655926728543975, "grad_norm": 0.036104753613471985, "learning_rate": 2.021730073198874e-06, "loss": 0.0004, "step": 179220 }, { "epoch": 1.4656744490329967, "grad_norm": 0.006152261048555374, "learning_rate": 2.0211568851283636e-06, "loss": 0.0004, "step": 179230 }, { "epoch": 1.4657562252115959, "grad_norm": 0.0013751528458669782, "learning_rate": 2.020583757739309e-06, "loss": 0.0008, "step": 179240 }, { "epoch": 1.465838001390195, "grad_norm": 0.0070394654758274555, "learning_rate": 2.0200106910433847e-06, "loss": 0.0019, "step": 179250 }, { "epoch": 1.4659197775687942, "grad_norm": 0.03742170333862305, "learning_rate": 2.0194376850522652e-06, "loss": 0.0004, "step": 179260 }, { "epoch": 1.4660015537473934, "grad_norm": 0.01643395982682705, "learning_rate": 2.0188647397776228e-06, "loss": 0.0002, "step": 179270 }, { "epoch": 1.4660833299259926, "grad_norm": 0.020818298682570457, "learning_rate": 2.0182918552311286e-06, "loss": 0.0005, "step": 179280 }, { "epoch": 1.4661651061045917, "grad_norm": 0.005722143221646547, "learning_rate": 2.017719031424453e-06, "loss": 0.0016, "step": 179290 }, { "epoch": 1.4662468822831909, "grad_norm": 0.009522538632154465, "learning_rate": 2.017146268369264e-06, "loss": 0.0007, "step": 179300 }, { "epoch": 1.46632865846179, "grad_norm": 0.04007738456130028, "learning_rate": 2.0165735660772307e-06, "loss": 0.0008, "step": 179310 }, { "epoch": 1.4664104346403892, "grad_norm": 0.02146676369011402, "learning_rate": 2.0160009245600194e-06, "loss": 0.0013, "step": 179320 }, { "epoch": 1.4664922108189884, "grad_norm": 0.007236375007778406, "learning_rate": 2.015428343829295e-06, "loss": 0.0008, "step": 179330 }, { "epoch": 1.4665739869975876, "grad_norm": 0.05264320969581604, "learning_rate": 2.0148558238967185e-06, "loss": 0.0012, "step": 179340 }, { "epoch": 1.4666557631761867, "grad_norm": 0.04255085065960884, "learning_rate": 2.014283364773958e-06, "loss": 0.0005, "step": 179350 }, { "epoch": 1.466737539354786, "grad_norm": 0.04007347300648689, "learning_rate": 2.0137109664726724e-06, "loss": 0.0011, "step": 179360 }, { "epoch": 1.466819315533385, "grad_norm": 0.03801590949296951, "learning_rate": 2.013138629004521e-06, "loss": 0.0008, "step": 179370 }, { "epoch": 1.4669010917119842, "grad_norm": 0.00790300965309143, "learning_rate": 2.012566352381164e-06, "loss": 0.0009, "step": 179380 }, { "epoch": 1.4669828678905834, "grad_norm": 0.047995563596487045, "learning_rate": 2.011994136614259e-06, "loss": 0.0013, "step": 179390 }, { "epoch": 1.4670646440691826, "grad_norm": 0.019924411550164223, "learning_rate": 2.011421981715462e-06, "loss": 0.0007, "step": 179400 }, { "epoch": 1.4671464202477817, "grad_norm": 0.005630883853882551, "learning_rate": 2.0108498876964285e-06, "loss": 0.0011, "step": 179410 }, { "epoch": 1.467228196426381, "grad_norm": 0.018351823091506958, "learning_rate": 2.01027785456881e-06, "loss": 0.0012, "step": 179420 }, { "epoch": 1.4673099726049803, "grad_norm": 0.060761358588933945, "learning_rate": 2.0097058823442646e-06, "loss": 0.0009, "step": 179430 }, { "epoch": 1.4673917487835793, "grad_norm": 0.01995290070772171, "learning_rate": 2.0091339710344404e-06, "loss": 0.0002, "step": 179440 }, { "epoch": 1.4674735249621786, "grad_norm": 0.10137447714805603, "learning_rate": 2.008562120650988e-06, "loss": 0.0006, "step": 179450 }, { "epoch": 1.4675553011407776, "grad_norm": 0.021176977083086967, "learning_rate": 2.0079903312055553e-06, "loss": 0.0008, "step": 179460 }, { "epoch": 1.467637077319377, "grad_norm": 0.029054639860987663, "learning_rate": 2.0074186027097937e-06, "loss": 0.0005, "step": 179470 }, { "epoch": 1.467718853497976, "grad_norm": 0.043800078332424164, "learning_rate": 2.0068469351753478e-06, "loss": 0.0008, "step": 179480 }, { "epoch": 1.4678006296765753, "grad_norm": 0.04587850347161293, "learning_rate": 2.006275328613862e-06, "loss": 0.0009, "step": 179490 }, { "epoch": 1.4678824058551743, "grad_norm": 0.023928537964820862, "learning_rate": 2.005703783036982e-06, "loss": 0.0008, "step": 179500 }, { "epoch": 1.4679641820337737, "grad_norm": 0.07568521052598953, "learning_rate": 2.0051322984563493e-06, "loss": 0.0011, "step": 179510 }, { "epoch": 1.4680459582123726, "grad_norm": 0.024458855390548706, "learning_rate": 2.004560874883606e-06, "loss": 0.0008, "step": 179520 }, { "epoch": 1.468127734390972, "grad_norm": 0.05133489891886711, "learning_rate": 2.0039895123303927e-06, "loss": 0.0006, "step": 179530 }, { "epoch": 1.4682095105695712, "grad_norm": 0.0012706865090876818, "learning_rate": 2.0034182108083468e-06, "loss": 0.0005, "step": 179540 }, { "epoch": 1.4682912867481703, "grad_norm": 0.03783021122217178, "learning_rate": 2.002846970329109e-06, "loss": 0.001, "step": 179550 }, { "epoch": 1.4683730629267695, "grad_norm": 0.04610443487763405, "learning_rate": 2.002275790904315e-06, "loss": 0.0011, "step": 179560 }, { "epoch": 1.4684548391053687, "grad_norm": 0.10834778845310211, "learning_rate": 2.0017046725455996e-06, "loss": 0.0005, "step": 179570 }, { "epoch": 1.4685366152839678, "grad_norm": 0.025529565289616585, "learning_rate": 2.0011336152645947e-06, "loss": 0.0005, "step": 179580 }, { "epoch": 1.468618391462567, "grad_norm": 0.21456566452980042, "learning_rate": 2.000562619072939e-06, "loss": 0.0014, "step": 179590 }, { "epoch": 1.4687001676411662, "grad_norm": 0.02145436964929104, "learning_rate": 1.9999916839822596e-06, "loss": 0.0004, "step": 179600 }, { "epoch": 1.4687819438197653, "grad_norm": 0.08872636407613754, "learning_rate": 1.9994208100041883e-06, "loss": 0.0012, "step": 179610 }, { "epoch": 1.4688637199983645, "grad_norm": 0.022635789588093758, "learning_rate": 1.9988499971503545e-06, "loss": 0.0007, "step": 179620 }, { "epoch": 1.4689454961769637, "grad_norm": 0.09740176051855087, "learning_rate": 1.9982792454323853e-06, "loss": 0.0013, "step": 179630 }, { "epoch": 1.4690272723555629, "grad_norm": 0.024302253499627113, "learning_rate": 1.997708554861908e-06, "loss": 0.0017, "step": 179640 }, { "epoch": 1.469109048534162, "grad_norm": 0.029668325558304787, "learning_rate": 1.997137925450548e-06, "loss": 0.0005, "step": 179650 }, { "epoch": 1.4691908247127612, "grad_norm": 0.03632226958870888, "learning_rate": 1.996567357209927e-06, "loss": 0.0016, "step": 179660 }, { "epoch": 1.4692726008913604, "grad_norm": 0.059274446219205856, "learning_rate": 1.9959968501516725e-06, "loss": 0.001, "step": 179670 }, { "epoch": 1.4693543770699595, "grad_norm": 0.09938807785511017, "learning_rate": 1.995426404287404e-06, "loss": 0.0012, "step": 179680 }, { "epoch": 1.4694361532485587, "grad_norm": 0.05146017670631409, "learning_rate": 1.9948560196287415e-06, "loss": 0.0011, "step": 179690 }, { "epoch": 1.4695179294271579, "grad_norm": 0.039962828159332275, "learning_rate": 1.994285696187303e-06, "loss": 0.0004, "step": 179700 }, { "epoch": 1.469599705605757, "grad_norm": 0.039376016706228256, "learning_rate": 1.9937154339747097e-06, "loss": 0.0005, "step": 179710 }, { "epoch": 1.4696814817843562, "grad_norm": 0.07206204533576965, "learning_rate": 1.9931452330025768e-06, "loss": 0.0008, "step": 179720 }, { "epoch": 1.4697632579629554, "grad_norm": 0.13648150861263275, "learning_rate": 1.9925750932825193e-06, "loss": 0.0013, "step": 179730 }, { "epoch": 1.4698450341415545, "grad_norm": 0.07634638249874115, "learning_rate": 1.9920050148261522e-06, "loss": 0.001, "step": 179740 }, { "epoch": 1.4699268103201537, "grad_norm": 0.016044441610574722, "learning_rate": 1.9914349976450875e-06, "loss": 0.0008, "step": 179750 }, { "epoch": 1.4700085864987529, "grad_norm": 0.025246115401387215, "learning_rate": 1.990865041750938e-06, "loss": 0.0008, "step": 179760 }, { "epoch": 1.470090362677352, "grad_norm": 0.04078979790210724, "learning_rate": 1.990295147155313e-06, "loss": 0.0006, "step": 179770 }, { "epoch": 1.4701721388559512, "grad_norm": 0.045634493231773376, "learning_rate": 1.9897253138698203e-06, "loss": 0.0006, "step": 179780 }, { "epoch": 1.4702539150345504, "grad_norm": 0.015171573497354984, "learning_rate": 1.989155541906072e-06, "loss": 0.0009, "step": 179790 }, { "epoch": 1.4703356912131496, "grad_norm": 0.010361392982304096, "learning_rate": 1.9885858312756727e-06, "loss": 0.0006, "step": 179800 }, { "epoch": 1.4704174673917487, "grad_norm": 0.026427680626511574, "learning_rate": 1.9880161819902277e-06, "loss": 0.0004, "step": 179810 }, { "epoch": 1.470499243570348, "grad_norm": 0.030049912631511688, "learning_rate": 1.9874465940613396e-06, "loss": 0.0013, "step": 179820 }, { "epoch": 1.470581019748947, "grad_norm": 0.00585783738642931, "learning_rate": 1.986877067500615e-06, "loss": 0.0008, "step": 179830 }, { "epoch": 1.4706627959275462, "grad_norm": 0.043155793100595474, "learning_rate": 1.986307602319653e-06, "loss": 0.0009, "step": 179840 }, { "epoch": 1.4707445721061454, "grad_norm": 0.02455012872815132, "learning_rate": 1.9857381985300556e-06, "loss": 0.0007, "step": 179850 }, { "epoch": 1.4708263482847448, "grad_norm": 0.0017164043383672833, "learning_rate": 1.985168856143421e-06, "loss": 0.0011, "step": 179860 }, { "epoch": 1.4709081244633437, "grad_norm": 0.024465316906571388, "learning_rate": 1.9845995751713474e-06, "loss": 0.0004, "step": 179870 }, { "epoch": 1.4709899006419431, "grad_norm": 0.05243249237537384, "learning_rate": 1.9840303556254314e-06, "loss": 0.0007, "step": 179880 }, { "epoch": 1.471071676820542, "grad_norm": 0.006932642310857773, "learning_rate": 1.9834611975172685e-06, "loss": 0.0005, "step": 179890 }, { "epoch": 1.4711534529991415, "grad_norm": 0.06110415980219841, "learning_rate": 1.982892100858451e-06, "loss": 0.0011, "step": 179900 }, { "epoch": 1.4712352291777404, "grad_norm": 0.06498704850673676, "learning_rate": 1.982323065660576e-06, "loss": 0.0006, "step": 179910 }, { "epoch": 1.4713170053563398, "grad_norm": 0.014385380782186985, "learning_rate": 1.9817540919352335e-06, "loss": 0.0004, "step": 179920 }, { "epoch": 1.4713987815349387, "grad_norm": 0.03716503083705902, "learning_rate": 1.981185179694013e-06, "loss": 0.0009, "step": 179930 }, { "epoch": 1.4714805577135381, "grad_norm": 0.010564117692410946, "learning_rate": 1.980616328948502e-06, "loss": 0.0009, "step": 179940 }, { "epoch": 1.471562333892137, "grad_norm": 0.01767727918922901, "learning_rate": 1.980047539710293e-06, "loss": 0.0004, "step": 179950 }, { "epoch": 1.4716441100707365, "grad_norm": 0.03145278990268707, "learning_rate": 1.9794788119909703e-06, "loss": 0.0007, "step": 179960 }, { "epoch": 1.4717258862493356, "grad_norm": 0.05927637591958046, "learning_rate": 1.978910145802119e-06, "loss": 0.0011, "step": 179970 }, { "epoch": 1.4718076624279348, "grad_norm": 0.0030025169253349304, "learning_rate": 1.9783415411553242e-06, "loss": 0.0008, "step": 179980 }, { "epoch": 1.471889438606534, "grad_norm": 0.01413833536207676, "learning_rate": 1.977772998062168e-06, "loss": 0.0007, "step": 179990 }, { "epoch": 1.4719712147851332, "grad_norm": 0.054575156420469284, "learning_rate": 1.9772045165342323e-06, "loss": 0.0006, "step": 180000 }, { "epoch": 1.4720529909637323, "grad_norm": 0.030336527153849602, "learning_rate": 1.9766360965830982e-06, "loss": 0.0006, "step": 180010 }, { "epoch": 1.4721347671423315, "grad_norm": 0.06641804426908493, "learning_rate": 1.9760677382203417e-06, "loss": 0.0009, "step": 180020 }, { "epoch": 1.4722165433209307, "grad_norm": 0.05642154812812805, "learning_rate": 1.9754994414575464e-06, "loss": 0.0011, "step": 180030 }, { "epoch": 1.4722983194995298, "grad_norm": 0.05025848001241684, "learning_rate": 1.974931206306285e-06, "loss": 0.0008, "step": 180040 }, { "epoch": 1.472380095678129, "grad_norm": 0.06738093495368958, "learning_rate": 1.974363032778134e-06, "loss": 0.001, "step": 180050 }, { "epoch": 1.4724618718567282, "grad_norm": 0.0150653887540102, "learning_rate": 1.9737949208846656e-06, "loss": 0.0008, "step": 180060 }, { "epoch": 1.4725436480353273, "grad_norm": 0.03323080390691757, "learning_rate": 1.9732268706374565e-06, "loss": 0.001, "step": 180070 }, { "epoch": 1.4726254242139265, "grad_norm": 0.009062068536877632, "learning_rate": 1.972658882048076e-06, "loss": 0.0004, "step": 180080 }, { "epoch": 1.4727072003925257, "grad_norm": 0.030968276783823967, "learning_rate": 1.9720909551280947e-06, "loss": 0.0006, "step": 180090 }, { "epoch": 1.4727889765711248, "grad_norm": 0.00783612858504057, "learning_rate": 1.971523089889082e-06, "loss": 0.0007, "step": 180100 }, { "epoch": 1.472870752749724, "grad_norm": 0.07078605145215988, "learning_rate": 1.970955286342606e-06, "loss": 0.0007, "step": 180110 }, { "epoch": 1.4729525289283232, "grad_norm": 0.021077200770378113, "learning_rate": 1.9703875445002326e-06, "loss": 0.0008, "step": 180120 }, { "epoch": 1.4730343051069223, "grad_norm": 0.023800063878297806, "learning_rate": 1.9698198643735274e-06, "loss": 0.0016, "step": 180130 }, { "epoch": 1.4731160812855215, "grad_norm": 0.009192800149321556, "learning_rate": 1.9692522459740547e-06, "loss": 0.001, "step": 180140 }, { "epoch": 1.4731978574641207, "grad_norm": 0.09705979377031326, "learning_rate": 1.9686846893133752e-06, "loss": 0.001, "step": 180150 }, { "epoch": 1.4732796336427199, "grad_norm": 0.009618593379855156, "learning_rate": 1.9681171944030553e-06, "loss": 0.0011, "step": 180160 }, { "epoch": 1.473361409821319, "grad_norm": 0.01511518657207489, "learning_rate": 1.9675497612546518e-06, "loss": 0.0009, "step": 180170 }, { "epoch": 1.4734431859999182, "grad_norm": 0.0020407806150615215, "learning_rate": 1.9669823898797245e-06, "loss": 0.0012, "step": 180180 }, { "epoch": 1.4735249621785174, "grad_norm": 0.015307106077671051, "learning_rate": 1.96641508028983e-06, "loss": 0.0012, "step": 180190 }, { "epoch": 1.4736067383571165, "grad_norm": 0.07663743197917938, "learning_rate": 1.9658478324965275e-06, "loss": 0.0018, "step": 180200 }, { "epoch": 1.4736885145357157, "grad_norm": 0.019364675506949425, "learning_rate": 1.965280646511371e-06, "loss": 0.0009, "step": 180210 }, { "epoch": 1.4737702907143149, "grad_norm": 0.00239185499958694, "learning_rate": 1.9647135223459144e-06, "loss": 0.0012, "step": 180220 }, { "epoch": 1.473852066892914, "grad_norm": 0.012691175565123558, "learning_rate": 1.964146460011711e-06, "loss": 0.0007, "step": 180230 }, { "epoch": 1.4739338430715132, "grad_norm": 0.012394707649946213, "learning_rate": 1.963579459520312e-06, "loss": 0.0006, "step": 180240 }, { "epoch": 1.4740156192501124, "grad_norm": 0.08093531429767609, "learning_rate": 1.9630125208832667e-06, "loss": 0.0029, "step": 180250 }, { "epoch": 1.4740973954287115, "grad_norm": 0.029216108843684196, "learning_rate": 1.962445644112126e-06, "loss": 0.0008, "step": 180260 }, { "epoch": 1.4741791716073107, "grad_norm": 0.04431867599487305, "learning_rate": 1.9618788292184337e-06, "loss": 0.0032, "step": 180270 }, { "epoch": 1.4742609477859099, "grad_norm": 0.02509218454360962, "learning_rate": 1.9613120762137415e-06, "loss": 0.0006, "step": 180280 }, { "epoch": 1.4743427239645093, "grad_norm": 0.04640202596783638, "learning_rate": 1.960745385109592e-06, "loss": 0.001, "step": 180290 }, { "epoch": 1.4744245001431082, "grad_norm": 0.021091846749186516, "learning_rate": 1.9601787559175302e-06, "loss": 0.0005, "step": 180300 }, { "epoch": 1.4745062763217076, "grad_norm": 0.01726652681827545, "learning_rate": 1.9596121886490953e-06, "loss": 0.0012, "step": 180310 }, { "epoch": 1.4745880525003066, "grad_norm": 0.03402133285999298, "learning_rate": 1.959045683315834e-06, "loss": 0.0015, "step": 180320 }, { "epoch": 1.474669828678906, "grad_norm": 0.027861548587679863, "learning_rate": 1.9584792399292834e-06, "loss": 0.0006, "step": 180330 }, { "epoch": 1.474751604857505, "grad_norm": 0.05045469477772713, "learning_rate": 1.9579128585009833e-06, "loss": 0.0009, "step": 180340 }, { "epoch": 1.4748333810361043, "grad_norm": 0.03128376603126526, "learning_rate": 1.9573465390424705e-06, "loss": 0.0006, "step": 180350 }, { "epoch": 1.4749151572147032, "grad_norm": 0.007826426066458225, "learning_rate": 1.9567802815652827e-06, "loss": 0.0006, "step": 180360 }, { "epoch": 1.4749969333933026, "grad_norm": 0.00477486290037632, "learning_rate": 1.956214086080953e-06, "loss": 0.0005, "step": 180370 }, { "epoch": 1.4750787095719016, "grad_norm": 0.04161277785897255, "learning_rate": 1.9556479526010176e-06, "loss": 0.0009, "step": 180380 }, { "epoch": 1.475160485750501, "grad_norm": 0.012562921270728111, "learning_rate": 1.955081881137006e-06, "loss": 0.0004, "step": 180390 }, { "epoch": 1.4752422619291001, "grad_norm": 0.015158756636083126, "learning_rate": 1.9545158717004532e-06, "loss": 0.0004, "step": 180400 }, { "epoch": 1.4753240381076993, "grad_norm": 0.020400395616889, "learning_rate": 1.953949924302887e-06, "loss": 0.001, "step": 180410 }, { "epoch": 1.4754058142862985, "grad_norm": 0.0607115812599659, "learning_rate": 1.953384038955838e-06, "loss": 0.0016, "step": 180420 }, { "epoch": 1.4754875904648976, "grad_norm": 0.018397117033600807, "learning_rate": 1.9528182156708296e-06, "loss": 0.0008, "step": 180430 }, { "epoch": 1.4755693666434968, "grad_norm": 0.0012564602075144649, "learning_rate": 1.9522524544593934e-06, "loss": 0.0006, "step": 180440 }, { "epoch": 1.475651142822096, "grad_norm": 0.0026101081166416407, "learning_rate": 1.951686755333052e-06, "loss": 0.0004, "step": 180450 }, { "epoch": 1.4757329190006951, "grad_norm": 0.0751427486538887, "learning_rate": 1.9511211183033293e-06, "loss": 0.0008, "step": 180460 }, { "epoch": 1.4758146951792943, "grad_norm": 0.01221329253166914, "learning_rate": 1.950555543381747e-06, "loss": 0.0008, "step": 180470 }, { "epoch": 1.4758964713578935, "grad_norm": 0.008121766149997711, "learning_rate": 1.9499900305798275e-06, "loss": 0.0002, "step": 180480 }, { "epoch": 1.4759782475364926, "grad_norm": 0.0718352422118187, "learning_rate": 1.9494245799090906e-06, "loss": 0.0007, "step": 180490 }, { "epoch": 1.4760600237150918, "grad_norm": 0.05260147526860237, "learning_rate": 1.948859191381054e-06, "loss": 0.0013, "step": 180500 }, { "epoch": 1.476141799893691, "grad_norm": 0.014127420261502266, "learning_rate": 1.9482938650072342e-06, "loss": 0.0012, "step": 180510 }, { "epoch": 1.4762235760722902, "grad_norm": 0.03787235915660858, "learning_rate": 1.947728600799151e-06, "loss": 0.0006, "step": 180520 }, { "epoch": 1.4763053522508893, "grad_norm": 0.0534881092607975, "learning_rate": 1.947163398768317e-06, "loss": 0.0014, "step": 180530 }, { "epoch": 1.4763871284294885, "grad_norm": 0.0736735537648201, "learning_rate": 1.9465982589262457e-06, "loss": 0.0008, "step": 180540 }, { "epoch": 1.4764689046080877, "grad_norm": 0.08130503445863724, "learning_rate": 1.9460331812844486e-06, "loss": 0.0005, "step": 180550 }, { "epoch": 1.4765506807866868, "grad_norm": 0.008804216980934143, "learning_rate": 1.9454681658544393e-06, "loss": 0.0007, "step": 180560 }, { "epoch": 1.476632456965286, "grad_norm": 0.04022819176316261, "learning_rate": 1.944903212647726e-06, "loss": 0.0009, "step": 180570 }, { "epoch": 1.4767142331438852, "grad_norm": 0.010150180198252201, "learning_rate": 1.944338321675818e-06, "loss": 0.0006, "step": 180580 }, { "epoch": 1.4767960093224843, "grad_norm": 0.0069660316221416, "learning_rate": 1.9437734929502226e-06, "loss": 0.0004, "step": 180590 }, { "epoch": 1.4768777855010835, "grad_norm": 0.001770766219124198, "learning_rate": 1.943208726482445e-06, "loss": 0.0005, "step": 180600 }, { "epoch": 1.4769595616796827, "grad_norm": 0.0539659783244133, "learning_rate": 1.9426440222839903e-06, "loss": 0.0018, "step": 180610 }, { "epoch": 1.4770413378582818, "grad_norm": 0.0010104867396876216, "learning_rate": 1.942079380366362e-06, "loss": 0.0014, "step": 180620 }, { "epoch": 1.477123114036881, "grad_norm": 0.03328808769583702, "learning_rate": 1.9415148007410605e-06, "loss": 0.0004, "step": 180630 }, { "epoch": 1.4772048902154802, "grad_norm": 0.06418897956609726, "learning_rate": 1.9409502834195904e-06, "loss": 0.001, "step": 180640 }, { "epoch": 1.4772866663940794, "grad_norm": 0.040441058576107025, "learning_rate": 1.94038582841345e-06, "loss": 0.0018, "step": 180650 }, { "epoch": 1.4773684425726785, "grad_norm": 0.05435875803232193, "learning_rate": 1.9398214357341366e-06, "loss": 0.001, "step": 180660 }, { "epoch": 1.4774502187512777, "grad_norm": 0.21105146408081055, "learning_rate": 1.9392571053931465e-06, "loss": 0.0011, "step": 180670 }, { "epoch": 1.4775319949298769, "grad_norm": 0.03435980901122093, "learning_rate": 1.9386928374019785e-06, "loss": 0.0003, "step": 180680 }, { "epoch": 1.477613771108476, "grad_norm": 0.030074309557676315, "learning_rate": 1.938128631772126e-06, "loss": 0.0009, "step": 180690 }, { "epoch": 1.4776955472870752, "grad_norm": 0.03170453757047653, "learning_rate": 1.9375644885150822e-06, "loss": 0.0005, "step": 180700 }, { "epoch": 1.4777773234656744, "grad_norm": 0.01931672915816307, "learning_rate": 1.9370004076423387e-06, "loss": 0.0008, "step": 180710 }, { "epoch": 1.4778590996442738, "grad_norm": 0.004354394041001797, "learning_rate": 1.936436389165387e-06, "loss": 0.0048, "step": 180720 }, { "epoch": 1.4779408758228727, "grad_norm": 0.0011047597508877516, "learning_rate": 1.9358724330957162e-06, "loss": 0.0008, "step": 180730 }, { "epoch": 1.478022652001472, "grad_norm": 0.01595664955675602, "learning_rate": 1.9353085394448144e-06, "loss": 0.0009, "step": 180740 }, { "epoch": 1.478104428180071, "grad_norm": 0.00495305098593235, "learning_rate": 1.934744708224167e-06, "loss": 0.0005, "step": 180750 }, { "epoch": 1.4781862043586704, "grad_norm": 0.014369129203259945, "learning_rate": 1.9341809394452637e-06, "loss": 0.0006, "step": 180760 }, { "epoch": 1.4782679805372694, "grad_norm": 0.0362078920006752, "learning_rate": 1.9336172331195864e-06, "loss": 0.0005, "step": 180770 }, { "epoch": 1.4783497567158688, "grad_norm": 0.03569822013378143, "learning_rate": 1.9330535892586185e-06, "loss": 0.0008, "step": 180780 }, { "epoch": 1.4784315328944677, "grad_norm": 0.02799110673367977, "learning_rate": 1.9324900078738403e-06, "loss": 0.0008, "step": 180790 }, { "epoch": 1.478513309073067, "grad_norm": 0.05435076728463173, "learning_rate": 1.9319264889767353e-06, "loss": 0.0007, "step": 180800 }, { "epoch": 1.478595085251666, "grad_norm": 0.024212054908275604, "learning_rate": 1.931363032578782e-06, "loss": 0.0014, "step": 180810 }, { "epoch": 1.4786768614302654, "grad_norm": 0.021614689379930496, "learning_rate": 1.930799638691458e-06, "loss": 0.0007, "step": 180820 }, { "epoch": 1.4787586376088646, "grad_norm": 0.037638284265995026, "learning_rate": 1.9302363073262397e-06, "loss": 0.0006, "step": 180830 }, { "epoch": 1.4788404137874638, "grad_norm": 0.058916594833135605, "learning_rate": 1.929673038494604e-06, "loss": 0.001, "step": 180840 }, { "epoch": 1.478922189966063, "grad_norm": 0.035042088478803635, "learning_rate": 1.9291098322080232e-06, "loss": 0.001, "step": 180850 }, { "epoch": 1.4790039661446621, "grad_norm": 0.03262965753674507, "learning_rate": 1.928546688477972e-06, "loss": 0.0045, "step": 180860 }, { "epoch": 1.4790857423232613, "grad_norm": 0.04023243114352226, "learning_rate": 1.927983607315919e-06, "loss": 0.0006, "step": 180870 }, { "epoch": 1.4791675185018605, "grad_norm": 0.030097221955657005, "learning_rate": 1.927420588733339e-06, "loss": 0.0008, "step": 180880 }, { "epoch": 1.4792492946804596, "grad_norm": 0.012995404191315174, "learning_rate": 1.9268576327416995e-06, "loss": 0.0004, "step": 180890 }, { "epoch": 1.4793310708590588, "grad_norm": 0.014655244536697865, "learning_rate": 1.926294739352467e-06, "loss": 0.0007, "step": 180900 }, { "epoch": 1.479412847037658, "grad_norm": 0.08123323321342468, "learning_rate": 1.9257319085771083e-06, "loss": 0.0009, "step": 180910 }, { "epoch": 1.4794946232162571, "grad_norm": 0.014339839108288288, "learning_rate": 1.925169140427091e-06, "loss": 0.0005, "step": 180920 }, { "epoch": 1.4795763993948563, "grad_norm": 0.007398185785859823, "learning_rate": 1.924606434913878e-06, "loss": 0.0007, "step": 180930 }, { "epoch": 1.4796581755734555, "grad_norm": 0.037928659468889236, "learning_rate": 1.9240437920489307e-06, "loss": 0.0009, "step": 180940 }, { "epoch": 1.4797399517520546, "grad_norm": 0.020399440079927444, "learning_rate": 1.9234812118437126e-06, "loss": 0.001, "step": 180950 }, { "epoch": 1.4798217279306538, "grad_norm": 0.0012640106724575162, "learning_rate": 1.922918694309682e-06, "loss": 0.0012, "step": 180960 }, { "epoch": 1.479903504109253, "grad_norm": 0.0643884465098381, "learning_rate": 1.9223562394582996e-06, "loss": 0.0009, "step": 180970 }, { "epoch": 1.4799852802878521, "grad_norm": 0.041645970195531845, "learning_rate": 1.921793847301022e-06, "loss": 0.0008, "step": 180980 }, { "epoch": 1.4800670564664513, "grad_norm": 0.07832631468772888, "learning_rate": 1.921231517849304e-06, "loss": 0.0008, "step": 180990 }, { "epoch": 1.4801488326450505, "grad_norm": 0.041165199130773544, "learning_rate": 1.920669251114605e-06, "loss": 0.0009, "step": 181000 }, { "epoch": 1.4802306088236497, "grad_norm": 0.02626189962029457, "learning_rate": 1.9201070471083756e-06, "loss": 0.0009, "step": 181010 }, { "epoch": 1.4803123850022488, "grad_norm": 0.06725756824016571, "learning_rate": 1.919544905842069e-06, "loss": 0.001, "step": 181020 }, { "epoch": 1.480394161180848, "grad_norm": 0.012542400509119034, "learning_rate": 1.918982827327135e-06, "loss": 0.0011, "step": 181030 }, { "epoch": 1.4804759373594472, "grad_norm": 0.06054394692182541, "learning_rate": 1.918420811575027e-06, "loss": 0.0003, "step": 181040 }, { "epoch": 1.4805577135380463, "grad_norm": 0.028196245431900024, "learning_rate": 1.917858858597192e-06, "loss": 0.0008, "step": 181050 }, { "epoch": 1.4806394897166455, "grad_norm": 0.03400453180074692, "learning_rate": 1.917296968405077e-06, "loss": 0.0006, "step": 181060 }, { "epoch": 1.4807212658952447, "grad_norm": 0.004112069495022297, "learning_rate": 1.9167351410101285e-06, "loss": 0.001, "step": 181070 }, { "epoch": 1.4808030420738438, "grad_norm": 0.03860284760594368, "learning_rate": 1.9161733764237917e-06, "loss": 0.0002, "step": 181080 }, { "epoch": 1.480884818252443, "grad_norm": 0.055637940764427185, "learning_rate": 1.91561167465751e-06, "loss": 0.0007, "step": 181090 }, { "epoch": 1.4809665944310422, "grad_norm": 0.04236804321408272, "learning_rate": 1.915050035722725e-06, "loss": 0.0006, "step": 181100 }, { "epoch": 1.4810483706096413, "grad_norm": 0.04089377447962761, "learning_rate": 1.914488459630877e-06, "loss": 0.0013, "step": 181110 }, { "epoch": 1.4811301467882405, "grad_norm": 0.03249938786029816, "learning_rate": 1.9139269463934092e-06, "loss": 0.0005, "step": 181120 }, { "epoch": 1.4812119229668397, "grad_norm": 0.010487350635230541, "learning_rate": 1.913365496021758e-06, "loss": 0.0007, "step": 181130 }, { "epoch": 1.4812936991454388, "grad_norm": 0.06479301303625107, "learning_rate": 1.9128041085273604e-06, "loss": 0.0008, "step": 181140 }, { "epoch": 1.4813754753240382, "grad_norm": 0.055172357708215714, "learning_rate": 1.91224278392165e-06, "loss": 0.0006, "step": 181150 }, { "epoch": 1.4814572515026372, "grad_norm": 0.03366876393556595, "learning_rate": 1.911681522216066e-06, "loss": 0.0006, "step": 181160 }, { "epoch": 1.4815390276812366, "grad_norm": 0.0027320871595293283, "learning_rate": 1.9111203234220404e-06, "loss": 0.0003, "step": 181170 }, { "epoch": 1.4816208038598355, "grad_norm": 0.08000157028436661, "learning_rate": 1.9105591875510045e-06, "loss": 0.0007, "step": 181180 }, { "epoch": 1.481702580038435, "grad_norm": 0.017344564199447632, "learning_rate": 1.9099981146143886e-06, "loss": 0.0004, "step": 181190 }, { "epoch": 1.4817843562170339, "grad_norm": 0.0021461965516209602, "learning_rate": 1.909437104623623e-06, "loss": 0.0006, "step": 181200 }, { "epoch": 1.4818661323956333, "grad_norm": 0.03955535963177681, "learning_rate": 1.908876157590136e-06, "loss": 0.0007, "step": 181210 }, { "epoch": 1.4819479085742322, "grad_norm": 0.0387914702296257, "learning_rate": 1.9083152735253534e-06, "loss": 0.0002, "step": 181220 }, { "epoch": 1.4820296847528316, "grad_norm": 0.07943332195281982, "learning_rate": 1.907754452440701e-06, "loss": 0.0012, "step": 181230 }, { "epoch": 1.4821114609314305, "grad_norm": 0.05259007215499878, "learning_rate": 1.9071936943476055e-06, "loss": 0.0014, "step": 181240 }, { "epoch": 1.48219323711003, "grad_norm": 0.046372395008802414, "learning_rate": 1.906632999257488e-06, "loss": 0.0007, "step": 181250 }, { "epoch": 1.482275013288629, "grad_norm": 0.049395717680454254, "learning_rate": 1.906072367181771e-06, "loss": 0.0004, "step": 181260 }, { "epoch": 1.4823567894672283, "grad_norm": 0.03240002691745758, "learning_rate": 1.9055117981318727e-06, "loss": 0.0007, "step": 181270 }, { "epoch": 1.4824385656458274, "grad_norm": 0.02239489182829857, "learning_rate": 1.9049512921192164e-06, "loss": 0.0005, "step": 181280 }, { "epoch": 1.4825203418244266, "grad_norm": 0.45285946130752563, "learning_rate": 1.9043908491552176e-06, "loss": 0.0009, "step": 181290 }, { "epoch": 1.4826021180030258, "grad_norm": 0.04082941636443138, "learning_rate": 1.9038304692512943e-06, "loss": 0.0009, "step": 181300 }, { "epoch": 1.482683894181625, "grad_norm": 0.039160627871751785, "learning_rate": 1.9032701524188601e-06, "loss": 0.0008, "step": 181310 }, { "epoch": 1.482765670360224, "grad_norm": 0.020814936608076096, "learning_rate": 1.902709898669331e-06, "loss": 0.0006, "step": 181320 }, { "epoch": 1.4828474465388233, "grad_norm": 0.004609975963830948, "learning_rate": 1.9021497080141182e-06, "loss": 0.0011, "step": 181330 }, { "epoch": 1.4829292227174224, "grad_norm": 0.06388761103153229, "learning_rate": 1.901589580464634e-06, "loss": 0.0006, "step": 181340 }, { "epoch": 1.4830109988960216, "grad_norm": 0.028687799349427223, "learning_rate": 1.9010295160322868e-06, "loss": 0.0008, "step": 181350 }, { "epoch": 1.4830927750746208, "grad_norm": 0.05489783361554146, "learning_rate": 1.900469514728489e-06, "loss": 0.0006, "step": 181360 }, { "epoch": 1.48317455125322, "grad_norm": 0.0889345109462738, "learning_rate": 1.899909576564647e-06, "loss": 0.0004, "step": 181370 }, { "epoch": 1.4832563274318191, "grad_norm": 0.006689048372209072, "learning_rate": 1.8993497015521667e-06, "loss": 0.0007, "step": 181380 }, { "epoch": 1.4833381036104183, "grad_norm": 0.03838898614048958, "learning_rate": 1.8987898897024514e-06, "loss": 0.0009, "step": 181390 }, { "epoch": 1.4834198797890175, "grad_norm": 0.012042590416967869, "learning_rate": 1.8982301410269088e-06, "loss": 0.0004, "step": 181400 }, { "epoch": 1.4835016559676166, "grad_norm": 0.05390692502260208, "learning_rate": 1.897670455536939e-06, "loss": 0.0009, "step": 181410 }, { "epoch": 1.4835834321462158, "grad_norm": 0.02242436446249485, "learning_rate": 1.8971108332439442e-06, "loss": 0.0018, "step": 181420 }, { "epoch": 1.483665208324815, "grad_norm": 0.11986155807971954, "learning_rate": 1.896551274159324e-06, "loss": 0.0011, "step": 181430 }, { "epoch": 1.4837469845034141, "grad_norm": 0.006683305371552706, "learning_rate": 1.8959917782944765e-06, "loss": 0.0019, "step": 181440 }, { "epoch": 1.4838287606820133, "grad_norm": 0.027063827961683273, "learning_rate": 1.8954323456607998e-06, "loss": 0.0006, "step": 181450 }, { "epoch": 1.4839105368606125, "grad_norm": 0.05390042066574097, "learning_rate": 1.8948729762696894e-06, "loss": 0.0007, "step": 181460 }, { "epoch": 1.4839923130392116, "grad_norm": 0.06834112852811813, "learning_rate": 1.8943136701325393e-06, "loss": 0.0011, "step": 181470 }, { "epoch": 1.4840740892178108, "grad_norm": 0.0506744347512722, "learning_rate": 1.8937544272607455e-06, "loss": 0.0007, "step": 181480 }, { "epoch": 1.48415586539641, "grad_norm": 0.09977398812770844, "learning_rate": 1.893195247665699e-06, "loss": 0.001, "step": 181490 }, { "epoch": 1.4842376415750091, "grad_norm": 0.033423878252506256, "learning_rate": 1.8926361313587904e-06, "loss": 0.0004, "step": 181500 }, { "epoch": 1.4843194177536083, "grad_norm": 0.013363287784159184, "learning_rate": 1.892077078351408e-06, "loss": 0.0003, "step": 181510 }, { "epoch": 1.4844011939322075, "grad_norm": 0.014040389098227024, "learning_rate": 1.8915180886549439e-06, "loss": 0.0012, "step": 181520 }, { "epoch": 1.4844829701108067, "grad_norm": 0.02068440243601799, "learning_rate": 1.8909591622807826e-06, "loss": 0.0004, "step": 181530 }, { "epoch": 1.4845647462894058, "grad_norm": 0.017548754811286926, "learning_rate": 1.8904002992403103e-06, "loss": 0.0038, "step": 181540 }, { "epoch": 1.484646522468005, "grad_norm": 0.10670926421880722, "learning_rate": 1.8898414995449115e-06, "loss": 0.0012, "step": 181550 }, { "epoch": 1.4847282986466042, "grad_norm": 0.06264382600784302, "learning_rate": 1.8892827632059695e-06, "loss": 0.0006, "step": 181560 }, { "epoch": 1.4848100748252033, "grad_norm": 0.012538732029497623, "learning_rate": 1.8887240902348663e-06, "loss": 0.0007, "step": 181570 }, { "epoch": 1.4848918510038027, "grad_norm": 0.029262790456414223, "learning_rate": 1.8881654806429822e-06, "loss": 0.0003, "step": 181580 }, { "epoch": 1.4849736271824017, "grad_norm": 0.042714834213256836, "learning_rate": 1.8876069344416969e-06, "loss": 0.0006, "step": 181590 }, { "epoch": 1.485055403361001, "grad_norm": 0.0013302586739882827, "learning_rate": 1.8870484516423864e-06, "loss": 0.0008, "step": 181600 }, { "epoch": 1.4851371795396, "grad_norm": 0.022041285410523415, "learning_rate": 1.8864900322564307e-06, "loss": 0.0011, "step": 181610 }, { "epoch": 1.4852189557181994, "grad_norm": 0.020840955898165703, "learning_rate": 1.885931676295204e-06, "loss": 0.0014, "step": 181620 }, { "epoch": 1.4853007318967983, "grad_norm": 0.007655221503227949, "learning_rate": 1.8853733837700805e-06, "loss": 0.0012, "step": 181630 }, { "epoch": 1.4853825080753977, "grad_norm": 0.028145235031843185, "learning_rate": 1.8848151546924309e-06, "loss": 0.0011, "step": 181640 }, { "epoch": 1.4854642842539967, "grad_norm": 0.023051397874951363, "learning_rate": 1.8842569890736307e-06, "loss": 0.0008, "step": 181650 }, { "epoch": 1.485546060432596, "grad_norm": 0.021979117766022682, "learning_rate": 1.8836988869250483e-06, "loss": 0.0008, "step": 181660 }, { "epoch": 1.4856278366111952, "grad_norm": 0.05123911798000336, "learning_rate": 1.8831408482580522e-06, "loss": 0.0012, "step": 181670 }, { "epoch": 1.4857096127897944, "grad_norm": 0.03848384693264961, "learning_rate": 1.8825828730840106e-06, "loss": 0.0007, "step": 181680 }, { "epoch": 1.4857913889683936, "grad_norm": 0.005164536647498608, "learning_rate": 1.8820249614142905e-06, "loss": 0.0015, "step": 181690 }, { "epoch": 1.4858731651469927, "grad_norm": 0.05920030549168587, "learning_rate": 1.8814671132602557e-06, "loss": 0.0004, "step": 181700 }, { "epoch": 1.485954941325592, "grad_norm": 0.06211826205253601, "learning_rate": 1.8809093286332709e-06, "loss": 0.0007, "step": 181710 }, { "epoch": 1.486036717504191, "grad_norm": 0.012128804810345173, "learning_rate": 1.8803516075446965e-06, "loss": 0.0013, "step": 181720 }, { "epoch": 1.4861184936827903, "grad_norm": 0.017173035070300102, "learning_rate": 1.8797939500058975e-06, "loss": 0.0007, "step": 181730 }, { "epoch": 1.4862002698613894, "grad_norm": 0.11585664004087448, "learning_rate": 1.8792363560282317e-06, "loss": 0.0013, "step": 181740 }, { "epoch": 1.4862820460399886, "grad_norm": 0.04596333205699921, "learning_rate": 1.8786788256230587e-06, "loss": 0.0006, "step": 181750 }, { "epoch": 1.4863638222185878, "grad_norm": 0.023546753451228142, "learning_rate": 1.8781213588017328e-06, "loss": 0.0004, "step": 181760 }, { "epoch": 1.486445598397187, "grad_norm": 0.7036563158035278, "learning_rate": 1.8775639555756142e-06, "loss": 0.0006, "step": 181770 }, { "epoch": 1.486527374575786, "grad_norm": 0.01891758106648922, "learning_rate": 1.8770066159560562e-06, "loss": 0.0008, "step": 181780 }, { "epoch": 1.4866091507543853, "grad_norm": 0.00205161701887846, "learning_rate": 1.8764493399544114e-06, "loss": 0.0006, "step": 181790 }, { "epoch": 1.4866909269329844, "grad_norm": 0.005042733624577522, "learning_rate": 1.875892127582033e-06, "loss": 0.0004, "step": 181800 }, { "epoch": 1.4867727031115836, "grad_norm": 0.17512138187885284, "learning_rate": 1.8753349788502713e-06, "loss": 0.0018, "step": 181810 }, { "epoch": 1.4868544792901828, "grad_norm": 0.12408863753080368, "learning_rate": 1.8747778937704758e-06, "loss": 0.0004, "step": 181820 }, { "epoch": 1.486936255468782, "grad_norm": 0.03725435957312584, "learning_rate": 1.874220872353995e-06, "loss": 0.0008, "step": 181830 }, { "epoch": 1.4870180316473811, "grad_norm": 0.05603145435452461, "learning_rate": 1.8736639146121737e-06, "loss": 0.0003, "step": 181840 }, { "epoch": 1.4870998078259803, "grad_norm": 0.060375966131687164, "learning_rate": 1.8731070205563617e-06, "loss": 0.0006, "step": 181850 }, { "epoch": 1.4871815840045794, "grad_norm": 0.059323426336050034, "learning_rate": 1.8725501901979014e-06, "loss": 0.0011, "step": 181860 }, { "epoch": 1.4872633601831786, "grad_norm": 0.08043941110372543, "learning_rate": 1.8719934235481358e-06, "loss": 0.0006, "step": 181870 }, { "epoch": 1.4873451363617778, "grad_norm": 0.001871237880550325, "learning_rate": 1.8714367206184054e-06, "loss": 0.0007, "step": 181880 }, { "epoch": 1.487426912540377, "grad_norm": 0.01494559459388256, "learning_rate": 1.8708800814200534e-06, "loss": 0.001, "step": 181890 }, { "epoch": 1.4875086887189761, "grad_norm": 0.002460663905367255, "learning_rate": 1.8703235059644177e-06, "loss": 0.0003, "step": 181900 }, { "epoch": 1.4875904648975753, "grad_norm": 0.02773606963455677, "learning_rate": 1.8697669942628354e-06, "loss": 0.0006, "step": 181910 }, { "epoch": 1.4876722410761745, "grad_norm": 0.034083783626556396, "learning_rate": 1.8692105463266447e-06, "loss": 0.0006, "step": 181920 }, { "epoch": 1.4877540172547736, "grad_norm": 0.05066516622900963, "learning_rate": 1.8686541621671795e-06, "loss": 0.0086, "step": 181930 }, { "epoch": 1.4878357934333728, "grad_norm": 0.02449648827314377, "learning_rate": 1.8680978417957746e-06, "loss": 0.0007, "step": 181940 }, { "epoch": 1.487917569611972, "grad_norm": 0.05834018811583519, "learning_rate": 1.8675415852237623e-06, "loss": 0.0004, "step": 181950 }, { "epoch": 1.4879993457905711, "grad_norm": 0.004767377860844135, "learning_rate": 1.8669853924624726e-06, "loss": 0.0006, "step": 181960 }, { "epoch": 1.4880811219691703, "grad_norm": 0.0009115736465901136, "learning_rate": 1.8664292635232383e-06, "loss": 0.0009, "step": 181970 }, { "epoch": 1.4881628981477695, "grad_norm": 0.019105058163404465, "learning_rate": 1.8658731984173877e-06, "loss": 0.0004, "step": 181980 }, { "epoch": 1.4882446743263689, "grad_norm": 0.008933051489293575, "learning_rate": 1.8653171971562473e-06, "loss": 0.0007, "step": 181990 }, { "epoch": 1.4883264505049678, "grad_norm": 0.1352205127477646, "learning_rate": 1.8647612597511417e-06, "loss": 0.0015, "step": 182000 }, { "epoch": 1.4884082266835672, "grad_norm": 0.02578427642583847, "learning_rate": 1.864205386213399e-06, "loss": 0.0006, "step": 182010 }, { "epoch": 1.4884900028621662, "grad_norm": 0.0018817443633452058, "learning_rate": 1.8636495765543423e-06, "loss": 0.0004, "step": 182020 }, { "epoch": 1.4885717790407655, "grad_norm": 0.08745077252388, "learning_rate": 1.8630938307852926e-06, "loss": 0.0011, "step": 182030 }, { "epoch": 1.4886535552193645, "grad_norm": 0.07714468240737915, "learning_rate": 1.8625381489175714e-06, "loss": 0.0009, "step": 182040 }, { "epoch": 1.4887353313979639, "grad_norm": 0.01810932531952858, "learning_rate": 1.8619825309624989e-06, "loss": 0.0004, "step": 182050 }, { "epoch": 1.4888171075765628, "grad_norm": 0.020963115617632866, "learning_rate": 1.8614269769313925e-06, "loss": 0.0003, "step": 182060 }, { "epoch": 1.4888988837551622, "grad_norm": 0.10149040073156357, "learning_rate": 1.8608714868355698e-06, "loss": 0.0007, "step": 182070 }, { "epoch": 1.4889806599337612, "grad_norm": 0.06431647390127182, "learning_rate": 1.8603160606863447e-06, "loss": 0.0007, "step": 182080 }, { "epoch": 1.4890624361123606, "grad_norm": 0.04758984223008156, "learning_rate": 1.859760698495035e-06, "loss": 0.0009, "step": 182090 }, { "epoch": 1.4891442122909597, "grad_norm": 0.02765068970620632, "learning_rate": 1.8592054002729532e-06, "loss": 0.0003, "step": 182100 }, { "epoch": 1.489225988469559, "grad_norm": 0.029489049687981606, "learning_rate": 1.8586501660314098e-06, "loss": 0.0004, "step": 182110 }, { "epoch": 1.489307764648158, "grad_norm": 0.0053917220793664455, "learning_rate": 1.858094995781714e-06, "loss": 0.001, "step": 182120 }, { "epoch": 1.4893895408267572, "grad_norm": 0.09946756064891815, "learning_rate": 1.8575398895351793e-06, "loss": 0.0011, "step": 182130 }, { "epoch": 1.4894713170053564, "grad_norm": 0.006554062478244305, "learning_rate": 1.8569848473031111e-06, "loss": 0.0007, "step": 182140 }, { "epoch": 1.4895530931839556, "grad_norm": 0.0392480343580246, "learning_rate": 1.8564298690968162e-06, "loss": 0.0011, "step": 182150 }, { "epoch": 1.4896348693625547, "grad_norm": 0.025474801659584045, "learning_rate": 1.8558749549276e-06, "loss": 0.0005, "step": 182160 }, { "epoch": 1.489716645541154, "grad_norm": 0.020517054945230484, "learning_rate": 1.855320104806767e-06, "loss": 0.0012, "step": 182170 }, { "epoch": 1.489798421719753, "grad_norm": 0.030027523636817932, "learning_rate": 1.8547653187456194e-06, "loss": 0.0006, "step": 182180 }, { "epoch": 1.4898801978983522, "grad_norm": 0.012525011785328388, "learning_rate": 1.854210596755459e-06, "loss": 0.0004, "step": 182190 }, { "epoch": 1.4899619740769514, "grad_norm": 0.06333248317241669, "learning_rate": 1.853655938847584e-06, "loss": 0.0005, "step": 182200 }, { "epoch": 1.4900437502555506, "grad_norm": 0.004784271586686373, "learning_rate": 1.8531013450332962e-06, "loss": 0.0015, "step": 182210 }, { "epoch": 1.4901255264341497, "grad_norm": 0.01927177421748638, "learning_rate": 1.8525468153238924e-06, "loss": 0.0009, "step": 182220 }, { "epoch": 1.490207302612749, "grad_norm": 0.029005056247115135, "learning_rate": 1.8519923497306685e-06, "loss": 0.0006, "step": 182230 }, { "epoch": 1.490289078791348, "grad_norm": 0.012637262232601643, "learning_rate": 1.8514379482649176e-06, "loss": 0.0004, "step": 182240 }, { "epoch": 1.4903708549699473, "grad_norm": 0.03145270049571991, "learning_rate": 1.8508836109379363e-06, "loss": 0.0004, "step": 182250 }, { "epoch": 1.4904526311485464, "grad_norm": 0.04177377000451088, "learning_rate": 1.8503293377610154e-06, "loss": 0.0011, "step": 182260 }, { "epoch": 1.4905344073271456, "grad_norm": 0.01389202568680048, "learning_rate": 1.8497751287454468e-06, "loss": 0.0008, "step": 182270 }, { "epoch": 1.4906161835057448, "grad_norm": 0.004635800141841173, "learning_rate": 1.849220983902519e-06, "loss": 0.0007, "step": 182280 }, { "epoch": 1.490697959684344, "grad_norm": 0.01959260366857052, "learning_rate": 1.8486669032435206e-06, "loss": 0.0009, "step": 182290 }, { "epoch": 1.490779735862943, "grad_norm": 0.08424559235572815, "learning_rate": 1.8481128867797392e-06, "loss": 0.002, "step": 182300 }, { "epoch": 1.4908615120415423, "grad_norm": 0.027334246784448624, "learning_rate": 1.8475589345224603e-06, "loss": 0.0005, "step": 182310 }, { "epoch": 1.4909432882201414, "grad_norm": 0.05904683098196983, "learning_rate": 1.847005046482967e-06, "loss": 0.0007, "step": 182320 }, { "epoch": 1.4910250643987406, "grad_norm": 0.13832223415374756, "learning_rate": 1.8464512226725451e-06, "loss": 0.0008, "step": 182330 }, { "epoch": 1.4911068405773398, "grad_norm": 0.02624036744236946, "learning_rate": 1.845897463102475e-06, "loss": 0.0008, "step": 182340 }, { "epoch": 1.491188616755939, "grad_norm": 0.06402940303087234, "learning_rate": 1.8453437677840374e-06, "loss": 0.0006, "step": 182350 }, { "epoch": 1.4912703929345381, "grad_norm": 0.10399922728538513, "learning_rate": 1.8447901367285099e-06, "loss": 0.0007, "step": 182360 }, { "epoch": 1.4913521691131373, "grad_norm": 0.036581579595804214, "learning_rate": 1.8442365699471737e-06, "loss": 0.0014, "step": 182370 }, { "epoch": 1.4914339452917365, "grad_norm": 0.044374726712703705, "learning_rate": 1.843683067451304e-06, "loss": 0.0009, "step": 182380 }, { "epoch": 1.4915157214703356, "grad_norm": 0.03206150606274605, "learning_rate": 1.8431296292521755e-06, "loss": 0.0005, "step": 182390 }, { "epoch": 1.4915974976489348, "grad_norm": 0.057545993477106094, "learning_rate": 1.842576255361062e-06, "loss": 0.0007, "step": 182400 }, { "epoch": 1.491679273827534, "grad_norm": 0.010114694945514202, "learning_rate": 1.8420229457892374e-06, "loss": 0.0012, "step": 182410 }, { "epoch": 1.4917610500061333, "grad_norm": 0.0003631108265835792, "learning_rate": 1.841469700547972e-06, "loss": 0.0011, "step": 182420 }, { "epoch": 1.4918428261847323, "grad_norm": 0.016307484358549118, "learning_rate": 1.8409165196485362e-06, "loss": 0.0011, "step": 182430 }, { "epoch": 1.4919246023633317, "grad_norm": 0.015544313006103039, "learning_rate": 1.8403634031021972e-06, "loss": 0.0007, "step": 182440 }, { "epoch": 1.4920063785419306, "grad_norm": 0.005060509778559208, "learning_rate": 1.8398103509202253e-06, "loss": 0.0008, "step": 182450 }, { "epoch": 1.49208815472053, "grad_norm": 0.03759071230888367, "learning_rate": 1.839257363113886e-06, "loss": 0.0009, "step": 182460 }, { "epoch": 1.492169930899129, "grad_norm": 0.05447433143854141, "learning_rate": 1.8387044396944425e-06, "loss": 0.0006, "step": 182470 }, { "epoch": 1.4922517070777284, "grad_norm": 0.026219692081212997, "learning_rate": 1.8381515806731582e-06, "loss": 0.0007, "step": 182480 }, { "epoch": 1.4923334832563273, "grad_norm": 0.04056498408317566, "learning_rate": 1.8375987860612977e-06, "loss": 0.0019, "step": 182490 }, { "epoch": 1.4924152594349267, "grad_norm": 0.02983940951526165, "learning_rate": 1.8370460558701204e-06, "loss": 0.0009, "step": 182500 }, { "epoch": 1.4924970356135256, "grad_norm": 0.023680435493588448, "learning_rate": 1.8364933901108862e-06, "loss": 0.0012, "step": 182510 }, { "epoch": 1.492578811792125, "grad_norm": 0.034387361258268356, "learning_rate": 1.8359407887948528e-06, "loss": 0.0006, "step": 182520 }, { "epoch": 1.4926605879707242, "grad_norm": 0.23740944266319275, "learning_rate": 1.835388251933277e-06, "loss": 0.0007, "step": 182530 }, { "epoch": 1.4927423641493234, "grad_norm": 0.01506861113011837, "learning_rate": 1.8348357795374156e-06, "loss": 0.0007, "step": 182540 }, { "epoch": 1.4928241403279225, "grad_norm": 0.030327996239066124, "learning_rate": 1.8342833716185215e-06, "loss": 0.0006, "step": 182550 }, { "epoch": 1.4929059165065217, "grad_norm": 0.0010926035465672612, "learning_rate": 1.8337310281878474e-06, "loss": 0.0005, "step": 182560 }, { "epoch": 1.4929876926851209, "grad_norm": 0.029774809256196022, "learning_rate": 1.8331787492566471e-06, "loss": 0.0004, "step": 182570 }, { "epoch": 1.49306946886372, "grad_norm": 0.010987241752445698, "learning_rate": 1.83262653483617e-06, "loss": 0.0005, "step": 182580 }, { "epoch": 1.4931512450423192, "grad_norm": 0.13953040540218353, "learning_rate": 1.8320743849376649e-06, "loss": 0.0008, "step": 182590 }, { "epoch": 1.4932330212209184, "grad_norm": 0.022020498290657997, "learning_rate": 1.8315222995723775e-06, "loss": 0.0002, "step": 182600 }, { "epoch": 1.4933147973995176, "grad_norm": 0.026724105700850487, "learning_rate": 1.8309702787515587e-06, "loss": 0.0004, "step": 182610 }, { "epoch": 1.4933965735781167, "grad_norm": 0.05593493953347206, "learning_rate": 1.8304183224864509e-06, "loss": 0.0008, "step": 182620 }, { "epoch": 1.493478349756716, "grad_norm": 0.00048618181608617306, "learning_rate": 1.829866430788298e-06, "loss": 0.0008, "step": 182630 }, { "epoch": 1.493560125935315, "grad_norm": 0.002284191083163023, "learning_rate": 1.8293146036683428e-06, "loss": 0.0007, "step": 182640 }, { "epoch": 1.4936419021139142, "grad_norm": 0.016468938440084457, "learning_rate": 1.8287628411378266e-06, "loss": 0.0016, "step": 182650 }, { "epoch": 1.4937236782925134, "grad_norm": 0.04329365864396095, "learning_rate": 1.8282111432079892e-06, "loss": 0.0008, "step": 182660 }, { "epoch": 1.4938054544711126, "grad_norm": 0.048457615077495575, "learning_rate": 1.827659509890069e-06, "loss": 0.0005, "step": 182670 }, { "epoch": 1.4938872306497117, "grad_norm": 0.003589946310967207, "learning_rate": 1.8271079411953014e-06, "loss": 0.0005, "step": 182680 }, { "epoch": 1.493969006828311, "grad_norm": 0.001189389848150313, "learning_rate": 1.8265564371349254e-06, "loss": 0.0014, "step": 182690 }, { "epoch": 1.49405078300691, "grad_norm": 0.013450942002236843, "learning_rate": 1.8260049977201743e-06, "loss": 0.0006, "step": 182700 }, { "epoch": 1.4941325591855092, "grad_norm": 0.06544581055641174, "learning_rate": 1.8254536229622816e-06, "loss": 0.0004, "step": 182710 }, { "epoch": 1.4942143353641084, "grad_norm": 0.02158975973725319, "learning_rate": 1.8249023128724774e-06, "loss": 0.0005, "step": 182720 }, { "epoch": 1.4942961115427076, "grad_norm": 0.07831799238920212, "learning_rate": 1.8243510674619958e-06, "loss": 0.0008, "step": 182730 }, { "epoch": 1.4943778877213068, "grad_norm": 0.0054074861109256744, "learning_rate": 1.8237998867420636e-06, "loss": 0.0005, "step": 182740 }, { "epoch": 1.494459663899906, "grad_norm": 0.04454100504517555, "learning_rate": 1.82324877072391e-06, "loss": 0.0008, "step": 182750 }, { "epoch": 1.494541440078505, "grad_norm": 0.03822268545627594, "learning_rate": 1.8226977194187613e-06, "loss": 0.0006, "step": 182760 }, { "epoch": 1.4946232162571043, "grad_norm": 0.03204920142889023, "learning_rate": 1.8221467328378422e-06, "loss": 0.0004, "step": 182770 }, { "epoch": 1.4947049924357034, "grad_norm": 0.031055184081196785, "learning_rate": 1.8215958109923776e-06, "loss": 0.0008, "step": 182780 }, { "epoch": 1.4947867686143026, "grad_norm": 0.05791543051600456, "learning_rate": 1.8210449538935892e-06, "loss": 0.0005, "step": 182790 }, { "epoch": 1.4948685447929018, "grad_norm": 0.001074124244041741, "learning_rate": 1.8204941615526984e-06, "loss": 0.0006, "step": 182800 }, { "epoch": 1.494950320971501, "grad_norm": 0.020249199122190475, "learning_rate": 1.8199434339809269e-06, "loss": 0.001, "step": 182810 }, { "epoch": 1.4950320971501, "grad_norm": 0.0032369447872042656, "learning_rate": 1.8193927711894927e-06, "loss": 0.0007, "step": 182820 }, { "epoch": 1.4951138733286993, "grad_norm": 0.07342348247766495, "learning_rate": 1.8188421731896132e-06, "loss": 0.0012, "step": 182830 }, { "epoch": 1.4951956495072984, "grad_norm": 0.06636518239974976, "learning_rate": 1.8182916399925027e-06, "loss": 0.0009, "step": 182840 }, { "epoch": 1.4952774256858978, "grad_norm": 0.0024436060339212418, "learning_rate": 1.817741171609379e-06, "loss": 0.0007, "step": 182850 }, { "epoch": 1.4953592018644968, "grad_norm": 0.07417865842580795, "learning_rate": 1.817190768051455e-06, "loss": 0.0004, "step": 182860 }, { "epoch": 1.4954409780430962, "grad_norm": 0.051654063165187836, "learning_rate": 1.8166404293299417e-06, "loss": 0.0009, "step": 182870 }, { "epoch": 1.4955227542216951, "grad_norm": 0.0008899992099031806, "learning_rate": 1.8160901554560506e-06, "loss": 0.0007, "step": 182880 }, { "epoch": 1.4956045304002945, "grad_norm": 0.07068132609128952, "learning_rate": 1.8155399464409906e-06, "loss": 0.0004, "step": 182890 }, { "epoch": 1.4956863065788935, "grad_norm": 0.022117748856544495, "learning_rate": 1.8149898022959712e-06, "loss": 0.0006, "step": 182900 }, { "epoch": 1.4957680827574928, "grad_norm": 0.057095274329185486, "learning_rate": 1.8144397230321976e-06, "loss": 0.0015, "step": 182910 }, { "epoch": 1.4958498589360918, "grad_norm": 0.030191367492079735, "learning_rate": 1.8138897086608749e-06, "loss": 0.0006, "step": 182920 }, { "epoch": 1.4959316351146912, "grad_norm": 0.024213630706071854, "learning_rate": 1.8133397591932105e-06, "loss": 0.0008, "step": 182930 }, { "epoch": 1.4960134112932901, "grad_norm": 0.01525038480758667, "learning_rate": 1.8127898746404055e-06, "loss": 0.0006, "step": 182940 }, { "epoch": 1.4960951874718895, "grad_norm": 0.000635004136711359, "learning_rate": 1.8122400550136609e-06, "loss": 0.001, "step": 182950 }, { "epoch": 1.4961769636504887, "grad_norm": 0.01891525462269783, "learning_rate": 1.811690300324176e-06, "loss": 0.0007, "step": 182960 }, { "epoch": 1.4962587398290879, "grad_norm": 0.09744429588317871, "learning_rate": 1.811140610583153e-06, "loss": 0.0007, "step": 182970 }, { "epoch": 1.496340516007687, "grad_norm": 0.046442922204732895, "learning_rate": 1.8105909858017884e-06, "loss": 0.0008, "step": 182980 }, { "epoch": 1.4964222921862862, "grad_norm": 0.042183563113212585, "learning_rate": 1.8100414259912768e-06, "loss": 0.001, "step": 182990 }, { "epoch": 1.4965040683648854, "grad_norm": 0.06318921595811844, "learning_rate": 1.809491931162815e-06, "loss": 0.0007, "step": 183000 }, { "epoch": 1.4965040683648854, "eval_loss": 0.0011413536267355084, "eval_runtime": 5.3629, "eval_samples_per_second": 37.294, "eval_steps_per_second": 9.323, "step": 183000 }, { "epoch": 1.4965858445434845, "grad_norm": 0.008911876939237118, "learning_rate": 1.8089425013275962e-06, "loss": 0.0006, "step": 183010 }, { "epoch": 1.4966676207220837, "grad_norm": 0.03267114236950874, "learning_rate": 1.8083931364968121e-06, "loss": 0.0007, "step": 183020 }, { "epoch": 1.4967493969006829, "grad_norm": 0.03434832766652107, "learning_rate": 1.8078438366816536e-06, "loss": 0.0003, "step": 183030 }, { "epoch": 1.496831173079282, "grad_norm": 0.011938631534576416, "learning_rate": 1.807294601893312e-06, "loss": 0.0022, "step": 183040 }, { "epoch": 1.4969129492578812, "grad_norm": 0.0010292927036061883, "learning_rate": 1.8067454321429718e-06, "loss": 0.0008, "step": 183050 }, { "epoch": 1.4969947254364804, "grad_norm": 0.03852524980902672, "learning_rate": 1.8061963274418248e-06, "loss": 0.0006, "step": 183060 }, { "epoch": 1.4970765016150795, "grad_norm": 0.021562805399298668, "learning_rate": 1.8056472878010545e-06, "loss": 0.0007, "step": 183070 }, { "epoch": 1.4971582777936787, "grad_norm": 0.015971707180142403, "learning_rate": 1.8050983132318456e-06, "loss": 0.0014, "step": 183080 }, { "epoch": 1.4972400539722779, "grad_norm": 0.03691720962524414, "learning_rate": 1.8045494037453799e-06, "loss": 0.0006, "step": 183090 }, { "epoch": 1.497321830150877, "grad_norm": 0.006345076020807028, "learning_rate": 1.804000559352841e-06, "loss": 0.0014, "step": 183100 }, { "epoch": 1.4974036063294762, "grad_norm": 0.03819368779659271, "learning_rate": 1.803451780065409e-06, "loss": 0.0007, "step": 183110 }, { "epoch": 1.4974853825080754, "grad_norm": 0.004217206034809351, "learning_rate": 1.8029030658942626e-06, "loss": 0.0007, "step": 183120 }, { "epoch": 1.4975671586866746, "grad_norm": 0.019766390323638916, "learning_rate": 1.8023544168505792e-06, "loss": 0.0006, "step": 183130 }, { "epoch": 1.4976489348652737, "grad_norm": 0.00595796387642622, "learning_rate": 1.8018058329455357e-06, "loss": 0.0004, "step": 183140 }, { "epoch": 1.497730711043873, "grad_norm": 0.015325040556490421, "learning_rate": 1.8012573141903067e-06, "loss": 0.0007, "step": 183150 }, { "epoch": 1.497812487222472, "grad_norm": 0.023718448355793953, "learning_rate": 1.8007088605960665e-06, "loss": 0.0004, "step": 183160 }, { "epoch": 1.4978942634010712, "grad_norm": 0.06279647350311279, "learning_rate": 1.8001604721739858e-06, "loss": 0.001, "step": 183170 }, { "epoch": 1.4979760395796704, "grad_norm": 0.038314007222652435, "learning_rate": 1.7996121489352386e-06, "loss": 0.0004, "step": 183180 }, { "epoch": 1.4980578157582696, "grad_norm": 0.01667398400604725, "learning_rate": 1.7990638908909935e-06, "loss": 0.0003, "step": 183190 }, { "epoch": 1.4981395919368687, "grad_norm": 0.09068311750888824, "learning_rate": 1.7985156980524188e-06, "loss": 0.0015, "step": 183200 }, { "epoch": 1.498221368115468, "grad_norm": 0.025606950744986534, "learning_rate": 1.79796757043068e-06, "loss": 0.0004, "step": 183210 }, { "epoch": 1.498303144294067, "grad_norm": 0.030926400795578957, "learning_rate": 1.797419508036946e-06, "loss": 0.0006, "step": 183220 }, { "epoch": 1.4983849204726662, "grad_norm": 0.1864180713891983, "learning_rate": 1.7968715108823804e-06, "loss": 0.0017, "step": 183230 }, { "epoch": 1.4984666966512654, "grad_norm": 0.020141853019595146, "learning_rate": 1.7963235789781452e-06, "loss": 0.0011, "step": 183240 }, { "epoch": 1.4985484728298646, "grad_norm": 0.06638729572296143, "learning_rate": 1.7957757123354024e-06, "loss": 0.0011, "step": 183250 }, { "epoch": 1.4986302490084638, "grad_norm": 0.038597848266363144, "learning_rate": 1.7952279109653137e-06, "loss": 0.0009, "step": 183260 }, { "epoch": 1.498712025187063, "grad_norm": 0.028283601626753807, "learning_rate": 1.794680174879037e-06, "loss": 0.0009, "step": 183270 }, { "epoch": 1.4987938013656623, "grad_norm": 0.10293304175138474, "learning_rate": 1.7941325040877304e-06, "loss": 0.0009, "step": 183280 }, { "epoch": 1.4988755775442613, "grad_norm": 0.08923016488552094, "learning_rate": 1.7935848986025495e-06, "loss": 0.0012, "step": 183290 }, { "epoch": 1.4989573537228607, "grad_norm": 0.02454518899321556, "learning_rate": 1.7930373584346516e-06, "loss": 0.0009, "step": 183300 }, { "epoch": 1.4990391299014596, "grad_norm": 0.009227560833096504, "learning_rate": 1.7924898835951898e-06, "loss": 0.0004, "step": 183310 }, { "epoch": 1.499120906080059, "grad_norm": 0.047291576862335205, "learning_rate": 1.7919424740953163e-06, "loss": 0.0004, "step": 183320 }, { "epoch": 1.499202682258658, "grad_norm": 0.0031648066360503435, "learning_rate": 1.7913951299461802e-06, "loss": 0.0007, "step": 183330 }, { "epoch": 1.4992844584372573, "grad_norm": 0.016117608174681664, "learning_rate": 1.7908478511589355e-06, "loss": 0.0006, "step": 183340 }, { "epoch": 1.4993662346158563, "grad_norm": 0.042394913733005524, "learning_rate": 1.7903006377447284e-06, "loss": 0.0004, "step": 183350 }, { "epoch": 1.4994480107944557, "grad_norm": 0.03193483129143715, "learning_rate": 1.7897534897147062e-06, "loss": 0.0006, "step": 183360 }, { "epoch": 1.4995297869730546, "grad_norm": 0.01168892439454794, "learning_rate": 1.7892064070800148e-06, "loss": 0.001, "step": 183370 }, { "epoch": 1.499611563151654, "grad_norm": 0.033877015113830566, "learning_rate": 1.788659389851799e-06, "loss": 0.0011, "step": 183380 }, { "epoch": 1.4996933393302532, "grad_norm": 0.014396500773727894, "learning_rate": 1.7881124380412013e-06, "loss": 0.0006, "step": 183390 }, { "epoch": 1.4997751155088523, "grad_norm": 0.024014044553041458, "learning_rate": 1.7875655516593642e-06, "loss": 0.0006, "step": 183400 }, { "epoch": 1.4998568916874515, "grad_norm": 0.08639080077409744, "learning_rate": 1.787018730717427e-06, "loss": 0.0011, "step": 183410 }, { "epoch": 1.4999386678660507, "grad_norm": 0.024539390578866005, "learning_rate": 1.7864719752265308e-06, "loss": 0.0007, "step": 183420 }, { "epoch": 1.5000204440446498, "grad_norm": 0.006146550644189119, "learning_rate": 1.7859252851978125e-06, "loss": 0.0004, "step": 183430 }, { "epoch": 1.500102220223249, "grad_norm": 0.08048414438962936, "learning_rate": 1.785378660642409e-06, "loss": 0.0013, "step": 183440 }, { "epoch": 1.5001839964018482, "grad_norm": 0.02970348671078682, "learning_rate": 1.7848321015714532e-06, "loss": 0.0008, "step": 183450 }, { "epoch": 1.5002657725804474, "grad_norm": 0.02341189980506897, "learning_rate": 1.784285607996083e-06, "loss": 0.0012, "step": 183460 }, { "epoch": 1.5003475487590465, "grad_norm": 0.01871390827000141, "learning_rate": 1.7837391799274283e-06, "loss": 0.0006, "step": 183470 }, { "epoch": 1.5004293249376457, "grad_norm": 0.05232931673526764, "learning_rate": 1.7831928173766216e-06, "loss": 0.0007, "step": 183480 }, { "epoch": 1.5005111011162449, "grad_norm": 0.013314379379153252, "learning_rate": 1.7826465203547917e-06, "loss": 0.0005, "step": 183490 }, { "epoch": 1.500592877294844, "grad_norm": 0.021195514127612114, "learning_rate": 1.782100288873067e-06, "loss": 0.0004, "step": 183500 }, { "epoch": 1.5006746534734432, "grad_norm": 0.09155825525522232, "learning_rate": 1.7815541229425747e-06, "loss": 0.0007, "step": 183510 }, { "epoch": 1.5007564296520424, "grad_norm": 0.008503442630171776, "learning_rate": 1.781008022574442e-06, "loss": 0.0005, "step": 183520 }, { "epoch": 1.5008382058306415, "grad_norm": 0.05421938747167587, "learning_rate": 1.7804619877797902e-06, "loss": 0.0006, "step": 183530 }, { "epoch": 1.5009199820092407, "grad_norm": 0.02062121592462063, "learning_rate": 1.779916018569746e-06, "loss": 0.0003, "step": 183540 }, { "epoch": 1.5010017581878399, "grad_norm": 0.05872134864330292, "learning_rate": 1.7793701149554304e-06, "loss": 0.0006, "step": 183550 }, { "epoch": 1.501083534366439, "grad_norm": 0.056345053017139435, "learning_rate": 1.7788242769479635e-06, "loss": 0.0012, "step": 183560 }, { "epoch": 1.5011653105450382, "grad_norm": 0.023248761892318726, "learning_rate": 1.7782785045584627e-06, "loss": 0.0004, "step": 183570 }, { "epoch": 1.5012470867236374, "grad_norm": 0.01025824248790741, "learning_rate": 1.777732797798049e-06, "loss": 0.0004, "step": 183580 }, { "epoch": 1.5013288629022365, "grad_norm": 0.07436371594667435, "learning_rate": 1.7771871566778376e-06, "loss": 0.0006, "step": 183590 }, { "epoch": 1.5014106390808357, "grad_norm": 0.07640202343463898, "learning_rate": 1.7766415812089434e-06, "loss": 0.0008, "step": 183600 }, { "epoch": 1.5014924152594349, "grad_norm": 0.0008486757287755609, "learning_rate": 1.77609607140248e-06, "loss": 0.0009, "step": 183610 }, { "epoch": 1.501574191438034, "grad_norm": 0.08236705511808395, "learning_rate": 1.77555062726956e-06, "loss": 0.0011, "step": 183620 }, { "epoch": 1.5016559676166332, "grad_norm": 0.1774170994758606, "learning_rate": 1.7750052488212954e-06, "loss": 0.0007, "step": 183630 }, { "epoch": 1.5017377437952324, "grad_norm": 0.053497180342674255, "learning_rate": 1.7744599360687948e-06, "loss": 0.0003, "step": 183640 }, { "epoch": 1.5018195199738318, "grad_norm": 0.08248715102672577, "learning_rate": 1.7739146890231656e-06, "loss": 0.001, "step": 183650 }, { "epoch": 1.5019012961524307, "grad_norm": 0.03347419574856758, "learning_rate": 1.773369507695518e-06, "loss": 0.001, "step": 183660 }, { "epoch": 1.5019830723310301, "grad_norm": 0.03473806753754616, "learning_rate": 1.7728243920969562e-06, "loss": 0.0006, "step": 183670 }, { "epoch": 1.502064848509629, "grad_norm": 0.019342688843607903, "learning_rate": 1.7722793422385852e-06, "loss": 0.0009, "step": 183680 }, { "epoch": 1.5021466246882285, "grad_norm": 0.004183486569672823, "learning_rate": 1.7717343581315055e-06, "loss": 0.0005, "step": 183690 }, { "epoch": 1.5022284008668274, "grad_norm": 0.048362042754888535, "learning_rate": 1.7711894397868229e-06, "loss": 0.0008, "step": 183700 }, { "epoch": 1.5023101770454268, "grad_norm": 0.0941733866930008, "learning_rate": 1.7706445872156358e-06, "loss": 0.0006, "step": 183710 }, { "epoch": 1.5023919532240257, "grad_norm": 0.011270287446677685, "learning_rate": 1.7700998004290433e-06, "loss": 0.0006, "step": 183720 }, { "epoch": 1.5024737294026251, "grad_norm": 0.044674117118120193, "learning_rate": 1.7695550794381432e-06, "loss": 0.0007, "step": 183730 }, { "epoch": 1.502555505581224, "grad_norm": 0.03605952113866806, "learning_rate": 1.7690104242540324e-06, "loss": 0.0009, "step": 183740 }, { "epoch": 1.5026372817598235, "grad_norm": 0.034780725836753845, "learning_rate": 1.7684658348878053e-06, "loss": 0.0009, "step": 183750 }, { "epoch": 1.5027190579384224, "grad_norm": 0.04102523997426033, "learning_rate": 1.7679213113505562e-06, "loss": 0.0008, "step": 183760 }, { "epoch": 1.5028008341170218, "grad_norm": 0.014219241216778755, "learning_rate": 1.7673768536533748e-06, "loss": 0.0007, "step": 183770 }, { "epoch": 1.5028826102956208, "grad_norm": 0.0011922781122848392, "learning_rate": 1.7668324618073567e-06, "loss": 0.0016, "step": 183780 }, { "epoch": 1.5029643864742201, "grad_norm": 0.024766327813267708, "learning_rate": 1.766288135823589e-06, "loss": 0.0008, "step": 183790 }, { "epoch": 1.503046162652819, "grad_norm": 0.05732009932398796, "learning_rate": 1.7657438757131606e-06, "loss": 0.0006, "step": 183800 }, { "epoch": 1.5031279388314185, "grad_norm": 0.022417791187763214, "learning_rate": 1.7651996814871569e-06, "loss": 0.0004, "step": 183810 }, { "epoch": 1.5032097150100174, "grad_norm": 0.2316051721572876, "learning_rate": 1.7646555531566666e-06, "loss": 0.0008, "step": 183820 }, { "epoch": 1.5032914911886168, "grad_norm": 0.01811377890408039, "learning_rate": 1.7641114907327728e-06, "loss": 0.0008, "step": 183830 }, { "epoch": 1.5033732673672158, "grad_norm": 0.042545318603515625, "learning_rate": 1.763567494226558e-06, "loss": 0.0006, "step": 183840 }, { "epoch": 1.5034550435458152, "grad_norm": 0.03203791007399559, "learning_rate": 1.763023563649104e-06, "loss": 0.0019, "step": 183850 }, { "epoch": 1.5035368197244143, "grad_norm": 0.005785784684121609, "learning_rate": 1.7624796990114913e-06, "loss": 0.0005, "step": 183860 }, { "epoch": 1.5036185959030135, "grad_norm": 0.03349996358156204, "learning_rate": 1.7619359003247982e-06, "loss": 0.0007, "step": 183870 }, { "epoch": 1.5037003720816127, "grad_norm": 0.04617258161306381, "learning_rate": 1.7613921676001034e-06, "loss": 0.0011, "step": 183880 }, { "epoch": 1.5037821482602118, "grad_norm": 0.016704341396689415, "learning_rate": 1.760848500848481e-06, "loss": 0.0009, "step": 183890 }, { "epoch": 1.503863924438811, "grad_norm": 0.008407221175730228, "learning_rate": 1.760304900081009e-06, "loss": 0.0002, "step": 183900 }, { "epoch": 1.5039457006174102, "grad_norm": 0.05922861769795418, "learning_rate": 1.7597613653087592e-06, "loss": 0.0009, "step": 183910 }, { "epoch": 1.5040274767960093, "grad_norm": 0.08741476386785507, "learning_rate": 1.7592178965428047e-06, "loss": 0.001, "step": 183920 }, { "epoch": 1.5041092529746085, "grad_norm": 0.0009523407206870615, "learning_rate": 1.758674493794214e-06, "loss": 0.0009, "step": 183930 }, { "epoch": 1.5041910291532077, "grad_norm": 0.0065383887849748135, "learning_rate": 1.7581311570740601e-06, "loss": 0.0009, "step": 183940 }, { "epoch": 1.5042728053318069, "grad_norm": 0.02442840300500393, "learning_rate": 1.7575878863934098e-06, "loss": 0.0005, "step": 183950 }, { "epoch": 1.504354581510406, "grad_norm": 0.04511357843875885, "learning_rate": 1.7570446817633303e-06, "loss": 0.0007, "step": 183960 }, { "epoch": 1.5044363576890052, "grad_norm": 0.02205030247569084, "learning_rate": 1.7565015431948856e-06, "loss": 0.0006, "step": 183970 }, { "epoch": 1.5045181338676044, "grad_norm": 0.040649816393852234, "learning_rate": 1.7559584706991418e-06, "loss": 0.0018, "step": 183980 }, { "epoch": 1.5045999100462035, "grad_norm": 0.03367052972316742, "learning_rate": 1.7554154642871606e-06, "loss": 0.0005, "step": 183990 }, { "epoch": 1.5046816862248027, "grad_norm": 0.03437207639217377, "learning_rate": 1.7548725239700038e-06, "loss": 0.0016, "step": 184000 }, { "epoch": 1.5047634624034019, "grad_norm": 0.04718339443206787, "learning_rate": 1.7543296497587293e-06, "loss": 0.0006, "step": 184010 }, { "epoch": 1.504845238582001, "grad_norm": 0.1302158236503601, "learning_rate": 1.7537868416644e-06, "loss": 0.0008, "step": 184020 }, { "epoch": 1.5049270147606002, "grad_norm": 0.02529960870742798, "learning_rate": 1.7532440996980716e-06, "loss": 0.0005, "step": 184030 }, { "epoch": 1.5050087909391994, "grad_norm": 0.0705246552824974, "learning_rate": 1.7527014238707996e-06, "loss": 0.0033, "step": 184040 }, { "epoch": 1.5050905671177985, "grad_norm": 0.03627485781908035, "learning_rate": 1.752158814193638e-06, "loss": 0.0006, "step": 184050 }, { "epoch": 1.5051723432963977, "grad_norm": 0.049728140234947205, "learning_rate": 1.7516162706776425e-06, "loss": 0.0006, "step": 184060 }, { "epoch": 1.5052541194749969, "grad_norm": 0.010663148947060108, "learning_rate": 1.7510737933338646e-06, "loss": 0.0007, "step": 184070 }, { "epoch": 1.5053358956535963, "grad_norm": 0.03882284834980965, "learning_rate": 1.750531382173354e-06, "loss": 0.0004, "step": 184080 }, { "epoch": 1.5054176718321952, "grad_norm": 0.027221325784921646, "learning_rate": 1.7499890372071598e-06, "loss": 0.0009, "step": 184090 }, { "epoch": 1.5054994480107946, "grad_norm": 0.10772006213665009, "learning_rate": 1.7494467584463314e-06, "loss": 0.001, "step": 184100 }, { "epoch": 1.5055812241893936, "grad_norm": 0.004945714492350817, "learning_rate": 1.748904545901915e-06, "loss": 0.0008, "step": 184110 }, { "epoch": 1.505663000367993, "grad_norm": 0.12270472943782806, "learning_rate": 1.7483623995849552e-06, "loss": 0.0006, "step": 184120 }, { "epoch": 1.505744776546592, "grad_norm": 0.004770905710756779, "learning_rate": 1.7478203195064947e-06, "loss": 0.0006, "step": 184130 }, { "epoch": 1.5058265527251913, "grad_norm": 0.01853184960782528, "learning_rate": 1.7472783056775793e-06, "loss": 0.0007, "step": 184140 }, { "epoch": 1.5059083289037902, "grad_norm": 0.0008471600594930351, "learning_rate": 1.746736358109249e-06, "loss": 0.0009, "step": 184150 }, { "epoch": 1.5059901050823896, "grad_norm": 0.04530706629157066, "learning_rate": 1.7461944768125432e-06, "loss": 0.0008, "step": 184160 }, { "epoch": 1.5060718812609886, "grad_norm": 0.02507835067808628, "learning_rate": 1.7456526617984992e-06, "loss": 0.0006, "step": 184170 }, { "epoch": 1.506153657439588, "grad_norm": 0.09379368275403976, "learning_rate": 1.745110913078158e-06, "loss": 0.0011, "step": 184180 }, { "epoch": 1.506235433618187, "grad_norm": 0.05882143974304199, "learning_rate": 1.7445692306625523e-06, "loss": 0.0007, "step": 184190 }, { "epoch": 1.5063172097967863, "grad_norm": 0.11237916350364685, "learning_rate": 1.7440276145627182e-06, "loss": 0.0011, "step": 184200 }, { "epoch": 1.5063989859753852, "grad_norm": 0.02198765240609646, "learning_rate": 1.743486064789688e-06, "loss": 0.0004, "step": 184210 }, { "epoch": 1.5064807621539846, "grad_norm": 0.04116526618599892, "learning_rate": 1.7429445813544931e-06, "loss": 0.0006, "step": 184220 }, { "epoch": 1.5065625383325836, "grad_norm": 0.026508048176765442, "learning_rate": 1.7424031642681654e-06, "loss": 0.0011, "step": 184230 }, { "epoch": 1.506644314511183, "grad_norm": 0.06688788533210754, "learning_rate": 1.7418618135417325e-06, "loss": 0.0007, "step": 184240 }, { "epoch": 1.506726090689782, "grad_norm": 0.01893206126987934, "learning_rate": 1.7413205291862217e-06, "loss": 0.0008, "step": 184250 }, { "epoch": 1.5068078668683813, "grad_norm": 0.06579852849245071, "learning_rate": 1.740779311212662e-06, "loss": 0.0007, "step": 184260 }, { "epoch": 1.5068896430469803, "grad_norm": 0.014983650296926498, "learning_rate": 1.7402381596320766e-06, "loss": 0.0008, "step": 184270 }, { "epoch": 1.5069714192255796, "grad_norm": 0.01880721189081669, "learning_rate": 1.7396970744554897e-06, "loss": 0.001, "step": 184280 }, { "epoch": 1.5070531954041788, "grad_norm": 0.008573578670620918, "learning_rate": 1.7391560556939224e-06, "loss": 0.001, "step": 184290 }, { "epoch": 1.507134971582778, "grad_norm": 0.05084190517663956, "learning_rate": 1.738615103358398e-06, "loss": 0.0006, "step": 184300 }, { "epoch": 1.5072167477613772, "grad_norm": 0.05031690001487732, "learning_rate": 1.7380742174599348e-06, "loss": 0.0004, "step": 184310 }, { "epoch": 1.5072985239399763, "grad_norm": 0.019213516265153885, "learning_rate": 1.7375333980095515e-06, "loss": 0.0011, "step": 184320 }, { "epoch": 1.5073803001185755, "grad_norm": 0.0073264604434370995, "learning_rate": 1.7369926450182645e-06, "loss": 0.0008, "step": 184330 }, { "epoch": 1.5074620762971747, "grad_norm": 0.025081932544708252, "learning_rate": 1.7364519584970896e-06, "loss": 0.0008, "step": 184340 }, { "epoch": 1.5075438524757738, "grad_norm": 0.04877509921789169, "learning_rate": 1.7359113384570414e-06, "loss": 0.0006, "step": 184350 }, { "epoch": 1.507625628654373, "grad_norm": 0.04404069483280182, "learning_rate": 1.7353707849091322e-06, "loss": 0.0006, "step": 184360 }, { "epoch": 1.5077074048329722, "grad_norm": 0.007124683354049921, "learning_rate": 1.7348302978643721e-06, "loss": 0.0006, "step": 184370 }, { "epoch": 1.5077891810115713, "grad_norm": 0.09531894326210022, "learning_rate": 1.7342898773337747e-06, "loss": 0.0014, "step": 184380 }, { "epoch": 1.5078709571901705, "grad_norm": 0.019420631229877472, "learning_rate": 1.7337495233283468e-06, "loss": 0.0023, "step": 184390 }, { "epoch": 1.5079527333687697, "grad_norm": 0.07288296520709991, "learning_rate": 1.7332092358590957e-06, "loss": 0.001, "step": 184400 }, { "epoch": 1.5080345095473688, "grad_norm": 0.004739420488476753, "learning_rate": 1.7326690149370268e-06, "loss": 0.0009, "step": 184410 }, { "epoch": 1.508116285725968, "grad_norm": 0.0013557728379964828, "learning_rate": 1.7321288605731474e-06, "loss": 0.0006, "step": 184420 }, { "epoch": 1.5081980619045672, "grad_norm": 0.07963279634714127, "learning_rate": 1.731588772778459e-06, "loss": 0.001, "step": 184430 }, { "epoch": 1.5082798380831663, "grad_norm": 0.008593035861849785, "learning_rate": 1.7310487515639646e-06, "loss": 0.0012, "step": 184440 }, { "epoch": 1.5083616142617655, "grad_norm": 0.021873246878385544, "learning_rate": 1.730508796940664e-06, "loss": 0.0003, "step": 184450 }, { "epoch": 1.5084433904403647, "grad_norm": 0.04505562409758568, "learning_rate": 1.7299689089195566e-06, "loss": 0.0005, "step": 184460 }, { "epoch": 1.5085251666189639, "grad_norm": 0.07678275555372238, "learning_rate": 1.729429087511641e-06, "loss": 0.0007, "step": 184470 }, { "epoch": 1.508606942797563, "grad_norm": 0.08999436348676682, "learning_rate": 1.7288893327279127e-06, "loss": 0.0008, "step": 184480 }, { "epoch": 1.5086887189761622, "grad_norm": 0.017082376405596733, "learning_rate": 1.728349644579368e-06, "loss": 0.0011, "step": 184490 }, { "epoch": 1.5087704951547614, "grad_norm": 0.06241469457745552, "learning_rate": 1.7278100230769985e-06, "loss": 0.001, "step": 184500 }, { "epoch": 1.5088522713333608, "grad_norm": 0.11183585226535797, "learning_rate": 1.7272704682318003e-06, "loss": 0.001, "step": 184510 }, { "epoch": 1.5089340475119597, "grad_norm": 0.06394306570291519, "learning_rate": 1.7267309800547633e-06, "loss": 0.0009, "step": 184520 }, { "epoch": 1.509015823690559, "grad_norm": 0.028889058157801628, "learning_rate": 1.7261915585568767e-06, "loss": 0.0005, "step": 184530 }, { "epoch": 1.509097599869158, "grad_norm": 0.05409662798047066, "learning_rate": 1.725652203749127e-06, "loss": 0.0005, "step": 184540 }, { "epoch": 1.5091793760477574, "grad_norm": 0.036076322197914124, "learning_rate": 1.7251129156425056e-06, "loss": 0.0008, "step": 184550 }, { "epoch": 1.5092611522263564, "grad_norm": 0.05024419724941254, "learning_rate": 1.7245736942479956e-06, "loss": 0.0013, "step": 184560 }, { "epoch": 1.5093429284049558, "grad_norm": 0.0547344870865345, "learning_rate": 1.7240345395765823e-06, "loss": 0.0007, "step": 184570 }, { "epoch": 1.5094247045835547, "grad_norm": 0.05401349440217018, "learning_rate": 1.7234954516392482e-06, "loss": 0.001, "step": 184580 }, { "epoch": 1.509506480762154, "grad_norm": 0.07618999481201172, "learning_rate": 1.7229564304469748e-06, "loss": 0.0004, "step": 184590 }, { "epoch": 1.509588256940753, "grad_norm": 0.014178861863911152, "learning_rate": 1.7224174760107426e-06, "loss": 0.0005, "step": 184600 }, { "epoch": 1.5096700331193524, "grad_norm": 0.03567527234554291, "learning_rate": 1.7218785883415311e-06, "loss": 0.0009, "step": 184610 }, { "epoch": 1.5097518092979514, "grad_norm": 0.02203233726322651, "learning_rate": 1.7213397674503152e-06, "loss": 0.0003, "step": 184620 }, { "epoch": 1.5098335854765508, "grad_norm": 0.08432188630104065, "learning_rate": 1.7208010133480751e-06, "loss": 0.0012, "step": 184630 }, { "epoch": 1.5099153616551497, "grad_norm": 0.047200169414281845, "learning_rate": 1.7202623260457834e-06, "loss": 0.0006, "step": 184640 }, { "epoch": 1.5099971378337491, "grad_norm": 0.01395800244063139, "learning_rate": 1.7197237055544148e-06, "loss": 0.0004, "step": 184650 }, { "epoch": 1.510078914012348, "grad_norm": 0.04161818325519562, "learning_rate": 1.7191851518849383e-06, "loss": 0.0006, "step": 184660 }, { "epoch": 1.5101606901909475, "grad_norm": 0.015490862540900707, "learning_rate": 1.718646665048329e-06, "loss": 0.0007, "step": 184670 }, { "epoch": 1.5102424663695464, "grad_norm": 0.006532155442982912, "learning_rate": 1.7181082450555548e-06, "loss": 0.0006, "step": 184680 }, { "epoch": 1.5103242425481458, "grad_norm": 0.0034598680213093758, "learning_rate": 1.7175698919175826e-06, "loss": 0.0004, "step": 184690 }, { "epoch": 1.5104060187267447, "grad_norm": 0.018083687871694565, "learning_rate": 1.71703160564538e-06, "loss": 0.0006, "step": 184700 }, { "epoch": 1.5104877949053441, "grad_norm": 0.10305973142385483, "learning_rate": 1.7164933862499123e-06, "loss": 0.0012, "step": 184710 }, { "epoch": 1.5105695710839433, "grad_norm": 0.022218357771635056, "learning_rate": 1.715955233742143e-06, "loss": 0.0004, "step": 184720 }, { "epoch": 1.5106513472625425, "grad_norm": 0.05202765390276909, "learning_rate": 1.715417148133035e-06, "loss": 0.0004, "step": 184730 }, { "epoch": 1.5107331234411416, "grad_norm": 0.005235665012151003, "learning_rate": 1.7148791294335482e-06, "loss": 0.0005, "step": 184740 }, { "epoch": 1.5108148996197408, "grad_norm": 0.14386233687400818, "learning_rate": 1.7143411776546453e-06, "loss": 0.0008, "step": 184750 }, { "epoch": 1.51089667579834, "grad_norm": 0.04503313824534416, "learning_rate": 1.7138032928072835e-06, "loss": 0.0013, "step": 184760 }, { "epoch": 1.5109784519769391, "grad_norm": 0.03822072595357895, "learning_rate": 1.7132654749024197e-06, "loss": 0.0017, "step": 184770 }, { "epoch": 1.5110602281555383, "grad_norm": 0.03749831020832062, "learning_rate": 1.7127277239510075e-06, "loss": 0.0007, "step": 184780 }, { "epoch": 1.5111420043341375, "grad_norm": 0.01202330645173788, "learning_rate": 1.7121900399640061e-06, "loss": 0.0003, "step": 184790 }, { "epoch": 1.5112237805127366, "grad_norm": 0.005531275644898415, "learning_rate": 1.7116524229523656e-06, "loss": 0.0002, "step": 184800 }, { "epoch": 1.5113055566913358, "grad_norm": 0.015492511913180351, "learning_rate": 1.7111148729270384e-06, "loss": 0.0005, "step": 184810 }, { "epoch": 1.511387332869935, "grad_norm": 0.034167394042015076, "learning_rate": 1.710577389898974e-06, "loss": 0.0005, "step": 184820 }, { "epoch": 1.5114691090485342, "grad_norm": 0.036509715020656586, "learning_rate": 1.710039973879123e-06, "loss": 0.0016, "step": 184830 }, { "epoch": 1.5115508852271333, "grad_norm": 0.08326688408851624, "learning_rate": 1.7095026248784313e-06, "loss": 0.0004, "step": 184840 }, { "epoch": 1.5116326614057325, "grad_norm": 0.01823810115456581, "learning_rate": 1.7089653429078456e-06, "loss": 0.0006, "step": 184850 }, { "epoch": 1.5117144375843317, "grad_norm": 0.008623752743005753, "learning_rate": 1.7084281279783094e-06, "loss": 0.0005, "step": 184860 }, { "epoch": 1.5117962137629308, "grad_norm": 0.04350056126713753, "learning_rate": 1.70789098010077e-06, "loss": 0.0012, "step": 184870 }, { "epoch": 1.51187798994153, "grad_norm": 0.06518877297639847, "learning_rate": 1.7073538992861666e-06, "loss": 0.0006, "step": 184880 }, { "epoch": 1.5119597661201292, "grad_norm": 0.0122404545545578, "learning_rate": 1.7068168855454408e-06, "loss": 0.0005, "step": 184890 }, { "epoch": 1.5120415422987283, "grad_norm": 0.059276387095451355, "learning_rate": 1.7062799388895302e-06, "loss": 0.0009, "step": 184900 }, { "epoch": 1.5121233184773275, "grad_norm": 0.01617681048810482, "learning_rate": 1.7057430593293767e-06, "loss": 0.0005, "step": 184910 }, { "epoch": 1.5122050946559267, "grad_norm": 0.05617115646600723, "learning_rate": 1.705206246875914e-06, "loss": 0.0007, "step": 184920 }, { "epoch": 1.5122868708345258, "grad_norm": 0.06274430453777313, "learning_rate": 1.7046695015400788e-06, "loss": 0.0015, "step": 184930 }, { "epoch": 1.5123686470131252, "grad_norm": 0.049726132303476334, "learning_rate": 1.7041328233328042e-06, "loss": 0.0006, "step": 184940 }, { "epoch": 1.5124504231917242, "grad_norm": 0.012341246008872986, "learning_rate": 1.703596212265023e-06, "loss": 0.0007, "step": 184950 }, { "epoch": 1.5125321993703236, "grad_norm": 0.10230688750743866, "learning_rate": 1.7030596683476664e-06, "loss": 0.0005, "step": 184960 }, { "epoch": 1.5126139755489225, "grad_norm": 0.06387882679700851, "learning_rate": 1.7025231915916641e-06, "loss": 0.0012, "step": 184970 }, { "epoch": 1.512695751727522, "grad_norm": 0.056098420172929764, "learning_rate": 1.7019867820079434e-06, "loss": 0.0006, "step": 184980 }, { "epoch": 1.5127775279061209, "grad_norm": 0.006719261407852173, "learning_rate": 1.7014504396074345e-06, "loss": 0.0019, "step": 184990 }, { "epoch": 1.5128593040847202, "grad_norm": 0.037166908383369446, "learning_rate": 1.7009141644010608e-06, "loss": 0.0007, "step": 185000 }, { "epoch": 1.5129410802633192, "grad_norm": 0.012158488854765892, "learning_rate": 1.7003779563997474e-06, "loss": 0.0009, "step": 185010 }, { "epoch": 1.5130228564419186, "grad_norm": 0.03974398225545883, "learning_rate": 1.6998418156144152e-06, "loss": 0.0004, "step": 185020 }, { "epoch": 1.5131046326205175, "grad_norm": 0.038716405630111694, "learning_rate": 1.69930574205599e-06, "loss": 0.0008, "step": 185030 }, { "epoch": 1.513186408799117, "grad_norm": 0.04440208896994591, "learning_rate": 1.6987697357353893e-06, "loss": 0.0011, "step": 185040 }, { "epoch": 1.5132681849777159, "grad_norm": 0.0323457196354866, "learning_rate": 1.6982337966635327e-06, "loss": 0.0006, "step": 185050 }, { "epoch": 1.5133499611563153, "grad_norm": 0.04160480201244354, "learning_rate": 1.697697924851337e-06, "loss": 0.0014, "step": 185060 }, { "epoch": 1.5134317373349142, "grad_norm": 0.06235140934586525, "learning_rate": 1.6971621203097193e-06, "loss": 0.001, "step": 185070 }, { "epoch": 1.5135135135135136, "grad_norm": 0.05401712656021118, "learning_rate": 1.6966263830495939e-06, "loss": 0.0007, "step": 185080 }, { "epoch": 1.5135952896921125, "grad_norm": 0.011005518026649952, "learning_rate": 1.6960907130818738e-06, "loss": 0.0006, "step": 185090 }, { "epoch": 1.513677065870712, "grad_norm": 0.028205037117004395, "learning_rate": 1.69555511041747e-06, "loss": 0.0009, "step": 185100 }, { "epoch": 1.5137588420493109, "grad_norm": 0.024324243888258934, "learning_rate": 1.6950195750672966e-06, "loss": 0.0007, "step": 185110 }, { "epoch": 1.5138406182279103, "grad_norm": 0.22465358674526215, "learning_rate": 1.6944841070422603e-06, "loss": 0.001, "step": 185120 }, { "epoch": 1.5139223944065092, "grad_norm": 0.01765746809542179, "learning_rate": 1.69394870635327e-06, "loss": 0.001, "step": 185130 }, { "epoch": 1.5140041705851086, "grad_norm": 0.0400674045085907, "learning_rate": 1.69341337301123e-06, "loss": 0.0007, "step": 185140 }, { "epoch": 1.5140859467637078, "grad_norm": 0.03573862090706825, "learning_rate": 1.692878107027049e-06, "loss": 0.0018, "step": 185150 }, { "epoch": 1.514167722942307, "grad_norm": 0.0033251794520765543, "learning_rate": 1.6923429084116293e-06, "loss": 0.0004, "step": 185160 }, { "epoch": 1.5142494991209061, "grad_norm": 0.08282390981912613, "learning_rate": 1.6918077771758728e-06, "loss": 0.0013, "step": 185170 }, { "epoch": 1.5143312752995053, "grad_norm": 0.05241723731160164, "learning_rate": 1.691272713330681e-06, "loss": 0.0007, "step": 185180 }, { "epoch": 1.5144130514781045, "grad_norm": 0.018873177468776703, "learning_rate": 1.6907377168869538e-06, "loss": 0.0007, "step": 185190 }, { "epoch": 1.5144948276567036, "grad_norm": 0.09373915940523148, "learning_rate": 1.6902027878555888e-06, "loss": 0.0006, "step": 185200 }, { "epoch": 1.5145766038353028, "grad_norm": 0.062172889709472656, "learning_rate": 1.6896679262474836e-06, "loss": 0.001, "step": 185210 }, { "epoch": 1.514658380013902, "grad_norm": 0.008686983957886696, "learning_rate": 1.6891331320735322e-06, "loss": 0.0009, "step": 185220 }, { "epoch": 1.5147401561925011, "grad_norm": 0.12380179762840271, "learning_rate": 1.6885984053446313e-06, "loss": 0.0017, "step": 185230 }, { "epoch": 1.5148219323711003, "grad_norm": 0.0058767409063875675, "learning_rate": 1.6880637460716725e-06, "loss": 0.0005, "step": 185240 }, { "epoch": 1.5149037085496995, "grad_norm": 0.03042459860444069, "learning_rate": 1.6875291542655476e-06, "loss": 0.0011, "step": 185250 }, { "epoch": 1.5149854847282986, "grad_norm": 0.011819673702120781, "learning_rate": 1.6869946299371443e-06, "loss": 0.0009, "step": 185260 }, { "epoch": 1.5150672609068978, "grad_norm": 0.0207271259278059, "learning_rate": 1.6864601730973551e-06, "loss": 0.0004, "step": 185270 }, { "epoch": 1.515149037085497, "grad_norm": 0.029247744008898735, "learning_rate": 1.6859257837570658e-06, "loss": 0.0005, "step": 185280 }, { "epoch": 1.5152308132640961, "grad_norm": 0.007467799354344606, "learning_rate": 1.6853914619271617e-06, "loss": 0.0004, "step": 185290 }, { "epoch": 1.5153125894426953, "grad_norm": 0.06859049201011658, "learning_rate": 1.6848572076185283e-06, "loss": 0.0009, "step": 185300 }, { "epoch": 1.5153943656212945, "grad_norm": 0.010349943302571774, "learning_rate": 1.684323020842048e-06, "loss": 0.0008, "step": 185310 }, { "epoch": 1.5154761417998937, "grad_norm": 0.03460633382201195, "learning_rate": 1.6837889016086029e-06, "loss": 0.0007, "step": 185320 }, { "epoch": 1.5155579179784928, "grad_norm": 0.016906268894672394, "learning_rate": 1.683254849929073e-06, "loss": 0.0004, "step": 185330 }, { "epoch": 1.515639694157092, "grad_norm": 0.0013006784720346332, "learning_rate": 1.6827208658143368e-06, "loss": 0.0004, "step": 185340 }, { "epoch": 1.5157214703356914, "grad_norm": 0.016882622614502907, "learning_rate": 1.682186949275274e-06, "loss": 0.0008, "step": 185350 }, { "epoch": 1.5158032465142903, "grad_norm": 0.08924420177936554, "learning_rate": 1.6816531003227605e-06, "loss": 0.0006, "step": 185360 }, { "epoch": 1.5158850226928897, "grad_norm": 0.015425479039549828, "learning_rate": 1.68111931896767e-06, "loss": 0.0007, "step": 185370 }, { "epoch": 1.5159667988714887, "grad_norm": 0.006023996975272894, "learning_rate": 1.6805856052208757e-06, "loss": 0.0004, "step": 185380 }, { "epoch": 1.516048575050088, "grad_norm": 0.05118532478809357, "learning_rate": 1.6800519590932519e-06, "loss": 0.0012, "step": 185390 }, { "epoch": 1.516130351228687, "grad_norm": 0.29009586572647095, "learning_rate": 1.6795183805956689e-06, "loss": 0.0006, "step": 185400 }, { "epoch": 1.5162121274072864, "grad_norm": 0.0016901586204767227, "learning_rate": 1.678984869738995e-06, "loss": 0.0007, "step": 185410 }, { "epoch": 1.5162939035858853, "grad_norm": 0.027742106467485428, "learning_rate": 1.678451426534099e-06, "loss": 0.0005, "step": 185420 }, { "epoch": 1.5163756797644847, "grad_norm": 0.029316317290067673, "learning_rate": 1.6779180509918469e-06, "loss": 0.0006, "step": 185430 }, { "epoch": 1.5164574559430837, "grad_norm": 0.010422827675938606, "learning_rate": 1.6773847431231049e-06, "loss": 0.0005, "step": 185440 }, { "epoch": 1.516539232121683, "grad_norm": 0.004684056621044874, "learning_rate": 1.6768515029387362e-06, "loss": 0.0004, "step": 185450 }, { "epoch": 1.516621008300282, "grad_norm": 0.06171329692006111, "learning_rate": 1.6763183304496023e-06, "loss": 0.0006, "step": 185460 }, { "epoch": 1.5167027844788814, "grad_norm": 0.020753320306539536, "learning_rate": 1.6757852256665668e-06, "loss": 0.0003, "step": 185470 }, { "epoch": 1.5167845606574804, "grad_norm": 0.08719120919704437, "learning_rate": 1.6752521886004886e-06, "loss": 0.0016, "step": 185480 }, { "epoch": 1.5168663368360797, "grad_norm": 0.00715012289583683, "learning_rate": 1.6747192192622257e-06, "loss": 0.0004, "step": 185490 }, { "epoch": 1.5169481130146787, "grad_norm": 0.013392087072134018, "learning_rate": 1.6741863176626339e-06, "loss": 0.0004, "step": 185500 }, { "epoch": 1.517029889193278, "grad_norm": 0.003084846306592226, "learning_rate": 1.6736534838125713e-06, "loss": 0.0005, "step": 185510 }, { "epoch": 1.517111665371877, "grad_norm": 0.05174360051751137, "learning_rate": 1.6731207177228914e-06, "loss": 0.0005, "step": 185520 }, { "epoch": 1.5171934415504764, "grad_norm": 0.09904047101736069, "learning_rate": 1.6725880194044463e-06, "loss": 0.0016, "step": 185530 }, { "epoch": 1.5172752177290754, "grad_norm": 0.022195523604750633, "learning_rate": 1.6720553888680885e-06, "loss": 0.0004, "step": 185540 }, { "epoch": 1.5173569939076748, "grad_norm": 0.15180231630802155, "learning_rate": 1.671522826124667e-06, "loss": 0.0012, "step": 185550 }, { "epoch": 1.5174387700862737, "grad_norm": 0.04884571582078934, "learning_rate": 1.6709903311850311e-06, "loss": 0.001, "step": 185560 }, { "epoch": 1.517520546264873, "grad_norm": 0.005977874156087637, "learning_rate": 1.6704579040600283e-06, "loss": 0.0017, "step": 185570 }, { "epoch": 1.5176023224434723, "grad_norm": 0.020085040479898453, "learning_rate": 1.6699255447605028e-06, "loss": 0.0006, "step": 185580 }, { "epoch": 1.5176840986220714, "grad_norm": 0.022730302065610886, "learning_rate": 1.6693932532973023e-06, "loss": 0.0005, "step": 185590 }, { "epoch": 1.5177658748006706, "grad_norm": 0.06826391071081161, "learning_rate": 1.6688610296812686e-06, "loss": 0.0007, "step": 185600 }, { "epoch": 1.5178476509792698, "grad_norm": 0.05881383642554283, "learning_rate": 1.6683288739232428e-06, "loss": 0.001, "step": 185610 }, { "epoch": 1.517929427157869, "grad_norm": 0.07354933768510818, "learning_rate": 1.6677967860340648e-06, "loss": 0.0012, "step": 185620 }, { "epoch": 1.518011203336468, "grad_norm": 0.09546561539173126, "learning_rate": 1.6672647660245767e-06, "loss": 0.0006, "step": 185630 }, { "epoch": 1.5180929795150673, "grad_norm": 0.020239723846316338, "learning_rate": 1.6667328139056137e-06, "loss": 0.0007, "step": 185640 }, { "epoch": 1.5181747556936664, "grad_norm": 0.01149137131869793, "learning_rate": 1.6662009296880132e-06, "loss": 0.0009, "step": 185650 }, { "epoch": 1.5182565318722656, "grad_norm": 0.01380008365958929, "learning_rate": 1.6656691133826097e-06, "loss": 0.0016, "step": 185660 }, { "epoch": 1.5183383080508648, "grad_norm": 0.14491897821426392, "learning_rate": 1.6651373650002362e-06, "loss": 0.0021, "step": 185670 }, { "epoch": 1.518420084229464, "grad_norm": 0.0035031232982873917, "learning_rate": 1.6646056845517255e-06, "loss": 0.0006, "step": 185680 }, { "epoch": 1.5185018604080631, "grad_norm": 0.021151969209313393, "learning_rate": 1.6640740720479082e-06, "loss": 0.0007, "step": 185690 }, { "epoch": 1.5185836365866623, "grad_norm": 0.01847071945667267, "learning_rate": 1.663542527499612e-06, "loss": 0.0005, "step": 185700 }, { "epoch": 1.5186654127652615, "grad_norm": 0.04268769174814224, "learning_rate": 1.6630110509176684e-06, "loss": 0.0007, "step": 185710 }, { "epoch": 1.5187471889438606, "grad_norm": 0.008563362061977386, "learning_rate": 1.662479642312902e-06, "loss": 0.0005, "step": 185720 }, { "epoch": 1.5188289651224598, "grad_norm": 0.16785147786140442, "learning_rate": 1.6619483016961373e-06, "loss": 0.0006, "step": 185730 }, { "epoch": 1.518910741301059, "grad_norm": 0.0030422783456742764, "learning_rate": 1.6614170290781984e-06, "loss": 0.0005, "step": 185740 }, { "epoch": 1.5189925174796581, "grad_norm": 0.040212277323007584, "learning_rate": 1.6608858244699094e-06, "loss": 0.0007, "step": 185750 }, { "epoch": 1.5190742936582573, "grad_norm": 0.015593310818076134, "learning_rate": 1.6603546878820909e-06, "loss": 0.0004, "step": 185760 }, { "epoch": 1.5191560698368565, "grad_norm": 0.009548280388116837, "learning_rate": 1.6598236193255618e-06, "loss": 0.0025, "step": 185770 }, { "epoch": 1.5192378460154559, "grad_norm": 0.02196805737912655, "learning_rate": 1.6592926188111402e-06, "loss": 0.0005, "step": 185780 }, { "epoch": 1.5193196221940548, "grad_norm": 0.13360165059566498, "learning_rate": 1.6587616863496436e-06, "loss": 0.0005, "step": 185790 }, { "epoch": 1.5194013983726542, "grad_norm": 0.019320880994200706, "learning_rate": 1.6582308219518877e-06, "loss": 0.0009, "step": 185800 }, { "epoch": 1.5194831745512531, "grad_norm": 0.009290454909205437, "learning_rate": 1.6577000256286863e-06, "loss": 0.0003, "step": 185810 }, { "epoch": 1.5195649507298525, "grad_norm": 0.052036263048648834, "learning_rate": 1.65716929739085e-06, "loss": 0.0004, "step": 185820 }, { "epoch": 1.5196467269084515, "grad_norm": 0.04099855199456215, "learning_rate": 1.6566386372491944e-06, "loss": 0.0003, "step": 185830 }, { "epoch": 1.5197285030870509, "grad_norm": 0.06091681867837906, "learning_rate": 1.6561080452145267e-06, "loss": 0.0008, "step": 185840 }, { "epoch": 1.5198102792656498, "grad_norm": 0.008848688565194607, "learning_rate": 1.655577521297657e-06, "loss": 0.0006, "step": 185850 }, { "epoch": 1.5198920554442492, "grad_norm": 0.005968377459794283, "learning_rate": 1.6550470655093914e-06, "loss": 0.0011, "step": 185860 }, { "epoch": 1.5199738316228482, "grad_norm": 0.020572692155838013, "learning_rate": 1.6545166778605343e-06, "loss": 0.0004, "step": 185870 }, { "epoch": 1.5200556078014476, "grad_norm": 0.13249555230140686, "learning_rate": 1.6539863583618932e-06, "loss": 0.0008, "step": 185880 }, { "epoch": 1.5201373839800465, "grad_norm": 0.20397138595581055, "learning_rate": 1.6534561070242699e-06, "loss": 0.0006, "step": 185890 }, { "epoch": 1.5202191601586459, "grad_norm": 0.006621815729886293, "learning_rate": 1.6529259238584677e-06, "loss": 0.0008, "step": 185900 }, { "epoch": 1.5203009363372448, "grad_norm": 0.002578981453552842, "learning_rate": 1.6523958088752818e-06, "loss": 0.0009, "step": 185910 }, { "epoch": 1.5203827125158442, "grad_norm": 0.06959745287895203, "learning_rate": 1.6518657620855161e-06, "loss": 0.0011, "step": 185920 }, { "epoch": 1.5204644886944432, "grad_norm": 0.02378574013710022, "learning_rate": 1.6513357834999661e-06, "loss": 0.0004, "step": 185930 }, { "epoch": 1.5205462648730426, "grad_norm": 0.013961870223283768, "learning_rate": 1.650805873129428e-06, "loss": 0.0004, "step": 185940 }, { "epoch": 1.5206280410516415, "grad_norm": 0.01757911778986454, "learning_rate": 1.6502760309846948e-06, "loss": 0.0004, "step": 185950 }, { "epoch": 1.520709817230241, "grad_norm": 0.07655833661556244, "learning_rate": 1.6497462570765638e-06, "loss": 0.0034, "step": 185960 }, { "epoch": 1.5207915934088398, "grad_norm": 0.0039039093535393476, "learning_rate": 1.6492165514158248e-06, "loss": 0.0008, "step": 185970 }, { "epoch": 1.5208733695874392, "grad_norm": 0.0005295852315612137, "learning_rate": 1.6486869140132677e-06, "loss": 0.0004, "step": 185980 }, { "epoch": 1.5209551457660384, "grad_norm": 0.08378075063228607, "learning_rate": 1.6481573448796812e-06, "loss": 0.0009, "step": 185990 }, { "epoch": 1.5210369219446376, "grad_norm": 0.00229622726328671, "learning_rate": 1.6476278440258554e-06, "loss": 0.0005, "step": 186000 }, { "epoch": 1.5211186981232367, "grad_norm": 0.04487936943769455, "learning_rate": 1.6470984114625754e-06, "loss": 0.0007, "step": 186010 }, { "epoch": 1.521200474301836, "grad_norm": 0.0476042814552784, "learning_rate": 1.646569047200628e-06, "loss": 0.0006, "step": 186020 }, { "epoch": 1.521282250480435, "grad_norm": 0.04025009647011757, "learning_rate": 1.6460397512507915e-06, "loss": 0.0007, "step": 186030 }, { "epoch": 1.5213640266590343, "grad_norm": 0.02116931416094303, "learning_rate": 1.6455105236238532e-06, "loss": 0.0012, "step": 186040 }, { "epoch": 1.5214458028376334, "grad_norm": 0.013102657161653042, "learning_rate": 1.6449813643305924e-06, "loss": 0.0007, "step": 186050 }, { "epoch": 1.5215275790162326, "grad_norm": 0.016302181407809258, "learning_rate": 1.6444522733817881e-06, "loss": 0.0009, "step": 186060 }, { "epoch": 1.5216093551948318, "grad_norm": 0.053684744983911514, "learning_rate": 1.6439232507882164e-06, "loss": 0.0004, "step": 186070 }, { "epoch": 1.521691131373431, "grad_norm": 0.010981273837387562, "learning_rate": 1.6433942965606582e-06, "loss": 0.0004, "step": 186080 }, { "epoch": 1.52177290755203, "grad_norm": 0.09210364520549774, "learning_rate": 1.6428654107098863e-06, "loss": 0.0008, "step": 186090 }, { "epoch": 1.5218546837306293, "grad_norm": 0.003270102897658944, "learning_rate": 1.642336593246675e-06, "loss": 0.0005, "step": 186100 }, { "epoch": 1.5219364599092284, "grad_norm": 0.03718237206339836, "learning_rate": 1.6418078441817953e-06, "loss": 0.0018, "step": 186110 }, { "epoch": 1.5220182360878276, "grad_norm": 0.026500128209590912, "learning_rate": 1.6412791635260206e-06, "loss": 0.0008, "step": 186120 }, { "epoch": 1.5221000122664268, "grad_norm": 0.01631002686917782, "learning_rate": 1.6407505512901195e-06, "loss": 0.0007, "step": 186130 }, { "epoch": 1.522181788445026, "grad_norm": 0.03053312934935093, "learning_rate": 1.6402220074848619e-06, "loss": 0.0006, "step": 186140 }, { "epoch": 1.522263564623625, "grad_norm": 0.02435567043721676, "learning_rate": 1.6396935321210094e-06, "loss": 0.0007, "step": 186150 }, { "epoch": 1.5223453408022243, "grad_norm": 0.023776041343808174, "learning_rate": 1.6391651252093333e-06, "loss": 0.0009, "step": 186160 }, { "epoch": 1.5224271169808234, "grad_norm": 0.011917916126549244, "learning_rate": 1.638636786760595e-06, "loss": 0.0005, "step": 186170 }, { "epoch": 1.5225088931594226, "grad_norm": 0.0339520089328289, "learning_rate": 1.6381085167855576e-06, "loss": 0.0006, "step": 186180 }, { "epoch": 1.5225906693380218, "grad_norm": 0.05742098391056061, "learning_rate": 1.637580315294981e-06, "loss": 0.0015, "step": 186190 }, { "epoch": 1.522672445516621, "grad_norm": 0.061325397342443466, "learning_rate": 1.6370521822996277e-06, "loss": 0.0014, "step": 186200 }, { "epoch": 1.5227542216952203, "grad_norm": 0.06395076960325241, "learning_rate": 1.6365241178102553e-06, "loss": 0.001, "step": 186210 }, { "epoch": 1.5228359978738193, "grad_norm": 0.030496150255203247, "learning_rate": 1.6359961218376208e-06, "loss": 0.0008, "step": 186220 }, { "epoch": 1.5229177740524187, "grad_norm": 0.010752087458968163, "learning_rate": 1.6354681943924777e-06, "loss": 0.0019, "step": 186230 }, { "epoch": 1.5229995502310176, "grad_norm": 0.056386057287454605, "learning_rate": 1.6349403354855842e-06, "loss": 0.001, "step": 186240 }, { "epoch": 1.523081326409617, "grad_norm": 0.03916407749056816, "learning_rate": 1.634412545127691e-06, "loss": 0.0016, "step": 186250 }, { "epoch": 1.523163102588216, "grad_norm": 0.03666365146636963, "learning_rate": 1.6338848233295518e-06, "loss": 0.0007, "step": 186260 }, { "epoch": 1.5232448787668154, "grad_norm": 0.0410715788602829, "learning_rate": 1.6333571701019118e-06, "loss": 0.0007, "step": 186270 }, { "epoch": 1.5233266549454143, "grad_norm": 0.03925050422549248, "learning_rate": 1.6328295854555243e-06, "loss": 0.0009, "step": 186280 }, { "epoch": 1.5234084311240137, "grad_norm": 0.09523411840200424, "learning_rate": 1.632302069401135e-06, "loss": 0.0011, "step": 186290 }, { "epoch": 1.5234902073026126, "grad_norm": 0.055201176553964615, "learning_rate": 1.6317746219494896e-06, "loss": 0.0004, "step": 186300 }, { "epoch": 1.523571983481212, "grad_norm": 0.05026618391275406, "learning_rate": 1.6312472431113318e-06, "loss": 0.0012, "step": 186310 }, { "epoch": 1.523653759659811, "grad_norm": 0.0381406731903553, "learning_rate": 1.6307199328974077e-06, "loss": 0.0005, "step": 186320 }, { "epoch": 1.5237355358384104, "grad_norm": 0.10825133323669434, "learning_rate": 1.6301926913184568e-06, "loss": 0.0006, "step": 186330 }, { "epoch": 1.5238173120170093, "grad_norm": 0.019385971128940582, "learning_rate": 1.62966551838522e-06, "loss": 0.0009, "step": 186340 }, { "epoch": 1.5238990881956087, "grad_norm": 0.015511817298829556, "learning_rate": 1.6291384141084348e-06, "loss": 0.0005, "step": 186350 }, { "epoch": 1.5239808643742077, "grad_norm": 0.031046204268932343, "learning_rate": 1.6286113784988416e-06, "loss": 0.0006, "step": 186360 }, { "epoch": 1.524062640552807, "grad_norm": 0.05736013129353523, "learning_rate": 1.6280844115671752e-06, "loss": 0.0007, "step": 186370 }, { "epoch": 1.524144416731406, "grad_norm": 0.034662824124097824, "learning_rate": 1.6275575133241712e-06, "loss": 0.0019, "step": 186380 }, { "epoch": 1.5242261929100054, "grad_norm": 0.019241953268647194, "learning_rate": 1.627030683780559e-06, "loss": 0.0006, "step": 186390 }, { "epoch": 1.5243079690886043, "grad_norm": 0.0231538787484169, "learning_rate": 1.6265039229470752e-06, "loss": 0.0008, "step": 186400 }, { "epoch": 1.5243897452672037, "grad_norm": 0.09998126327991486, "learning_rate": 1.6259772308344485e-06, "loss": 0.0004, "step": 186410 }, { "epoch": 1.524471521445803, "grad_norm": 0.03230046480894089, "learning_rate": 1.6254506074534083e-06, "loss": 0.0004, "step": 186420 }, { "epoch": 1.524553297624402, "grad_norm": 0.0591449998319149, "learning_rate": 1.6249240528146804e-06, "loss": 0.0006, "step": 186430 }, { "epoch": 1.5246350738030012, "grad_norm": 0.08506140112876892, "learning_rate": 1.6243975669289947e-06, "loss": 0.0005, "step": 186440 }, { "epoch": 1.5247168499816004, "grad_norm": 0.037727270275354385, "learning_rate": 1.6238711498070743e-06, "loss": 0.0006, "step": 186450 }, { "epoch": 1.5247986261601996, "grad_norm": 0.0483291856944561, "learning_rate": 1.6233448014596426e-06, "loss": 0.0007, "step": 186460 }, { "epoch": 1.5248804023387987, "grad_norm": 0.07212687283754349, "learning_rate": 1.6228185218974207e-06, "loss": 0.0012, "step": 186470 }, { "epoch": 1.524962178517398, "grad_norm": 0.007681636605411768, "learning_rate": 1.6222923111311318e-06, "loss": 0.0008, "step": 186480 }, { "epoch": 1.525043954695997, "grad_norm": 0.0037666228599846363, "learning_rate": 1.6217661691714948e-06, "loss": 0.0013, "step": 186490 }, { "epoch": 1.5251257308745962, "grad_norm": 0.003905410412698984, "learning_rate": 1.621240096029228e-06, "loss": 0.0006, "step": 186500 }, { "epoch": 1.5252075070531954, "grad_norm": 0.010732808150351048, "learning_rate": 1.6207140917150438e-06, "loss": 0.0004, "step": 186510 }, { "epoch": 1.5252892832317946, "grad_norm": 0.01964840106666088, "learning_rate": 1.6201881562396616e-06, "loss": 0.0002, "step": 186520 }, { "epoch": 1.5253710594103937, "grad_norm": 0.03205639123916626, "learning_rate": 1.6196622896137942e-06, "loss": 0.0013, "step": 186530 }, { "epoch": 1.525452835588993, "grad_norm": 0.00720197195187211, "learning_rate": 1.619136491848154e-06, "loss": 0.0006, "step": 186540 }, { "epoch": 1.525534611767592, "grad_norm": 0.06223490461707115, "learning_rate": 1.6186107629534492e-06, "loss": 0.0007, "step": 186550 }, { "epoch": 1.5256163879461913, "grad_norm": 0.04725328087806702, "learning_rate": 1.6180851029403937e-06, "loss": 0.0004, "step": 186560 }, { "epoch": 1.5256981641247904, "grad_norm": 0.014179227873682976, "learning_rate": 1.6175595118196934e-06, "loss": 0.0015, "step": 186570 }, { "epoch": 1.5257799403033896, "grad_norm": 0.09057459235191345, "learning_rate": 1.6170339896020559e-06, "loss": 0.0009, "step": 186580 }, { "epoch": 1.5258617164819888, "grad_norm": 0.09872151911258698, "learning_rate": 1.6165085362981837e-06, "loss": 0.0017, "step": 186590 }, { "epoch": 1.525943492660588, "grad_norm": 0.033941254019737244, "learning_rate": 1.6159831519187847e-06, "loss": 0.0006, "step": 186600 }, { "epoch": 1.526025268839187, "grad_norm": 0.01756308227777481, "learning_rate": 1.6154578364745593e-06, "loss": 0.0004, "step": 186610 }, { "epoch": 1.5261070450177863, "grad_norm": 0.03802100941538811, "learning_rate": 1.6149325899762103e-06, "loss": 0.0004, "step": 186620 }, { "epoch": 1.5261888211963854, "grad_norm": 0.019968491047620773, "learning_rate": 1.6144074124344334e-06, "loss": 0.0005, "step": 186630 }, { "epoch": 1.5262705973749848, "grad_norm": 0.042173873633146286, "learning_rate": 1.6138823038599305e-06, "loss": 0.0006, "step": 186640 }, { "epoch": 1.5263523735535838, "grad_norm": 0.0009369438048452139, "learning_rate": 1.6133572642633977e-06, "loss": 0.0008, "step": 186650 }, { "epoch": 1.5264341497321832, "grad_norm": 0.06751690059900284, "learning_rate": 1.6128322936555302e-06, "loss": 0.0007, "step": 186660 }, { "epoch": 1.5265159259107821, "grad_norm": 0.03283753618597984, "learning_rate": 1.61230739204702e-06, "loss": 0.0012, "step": 186670 }, { "epoch": 1.5265977020893815, "grad_norm": 0.04695171117782593, "learning_rate": 1.6117825594485642e-06, "loss": 0.0006, "step": 186680 }, { "epoch": 1.5266794782679804, "grad_norm": 0.0005603982135653496, "learning_rate": 1.6112577958708513e-06, "loss": 0.0011, "step": 186690 }, { "epoch": 1.5267612544465798, "grad_norm": 0.03630990535020828, "learning_rate": 1.610733101324572e-06, "loss": 0.0004, "step": 186700 }, { "epoch": 1.5268430306251788, "grad_norm": 0.016678273677825928, "learning_rate": 1.6102084758204124e-06, "loss": 0.0012, "step": 186710 }, { "epoch": 1.5269248068037782, "grad_norm": 0.06286413222551346, "learning_rate": 1.6096839193690633e-06, "loss": 0.0004, "step": 186720 }, { "epoch": 1.5270065829823771, "grad_norm": 0.0019149095751345158, "learning_rate": 1.6091594319812088e-06, "loss": 0.0006, "step": 186730 }, { "epoch": 1.5270883591609765, "grad_norm": 0.021671470254659653, "learning_rate": 1.6086350136675337e-06, "loss": 0.0016, "step": 186740 }, { "epoch": 1.5271701353395755, "grad_norm": 0.011418698355555534, "learning_rate": 1.6081106644387178e-06, "loss": 0.0006, "step": 186750 }, { "epoch": 1.5272519115181749, "grad_norm": 0.0018466669134795666, "learning_rate": 1.6075863843054456e-06, "loss": 0.0002, "step": 186760 }, { "epoch": 1.5273336876967738, "grad_norm": 0.03913303464651108, "learning_rate": 1.6070621732783964e-06, "loss": 0.0008, "step": 186770 }, { "epoch": 1.5274154638753732, "grad_norm": 0.04335268959403038, "learning_rate": 1.6065380313682488e-06, "loss": 0.0007, "step": 186780 }, { "epoch": 1.5274972400539721, "grad_norm": 0.01422620564699173, "learning_rate": 1.6060139585856782e-06, "loss": 0.0012, "step": 186790 }, { "epoch": 1.5275790162325715, "grad_norm": 0.05528361350297928, "learning_rate": 1.605489954941364e-06, "loss": 0.0007, "step": 186800 }, { "epoch": 1.5276607924111705, "grad_norm": 0.04962291195988655, "learning_rate": 1.6049660204459777e-06, "loss": 0.0007, "step": 186810 }, { "epoch": 1.5277425685897699, "grad_norm": 0.05685748904943466, "learning_rate": 1.604442155110194e-06, "loss": 0.0009, "step": 186820 }, { "epoch": 1.5278243447683688, "grad_norm": 0.0036041929852217436, "learning_rate": 1.6039183589446816e-06, "loss": 0.0003, "step": 186830 }, { "epoch": 1.5279061209469682, "grad_norm": 0.0011160121066495776, "learning_rate": 1.6033946319601147e-06, "loss": 0.0015, "step": 186840 }, { "epoch": 1.5279878971255674, "grad_norm": 0.04722178727388382, "learning_rate": 1.60287097416716e-06, "loss": 0.0013, "step": 186850 }, { "epoch": 1.5280696733041665, "grad_norm": 0.10626774281263351, "learning_rate": 1.602347385576486e-06, "loss": 0.0018, "step": 186860 }, { "epoch": 1.5281514494827657, "grad_norm": 0.0011454373598098755, "learning_rate": 1.6018238661987546e-06, "loss": 0.0007, "step": 186870 }, { "epoch": 1.5282332256613649, "grad_norm": 0.018554454669356346, "learning_rate": 1.6013004160446349e-06, "loss": 0.0006, "step": 186880 }, { "epoch": 1.528315001839964, "grad_norm": 0.09643663465976715, "learning_rate": 1.600777035124788e-06, "loss": 0.0005, "step": 186890 }, { "epoch": 1.5283967780185632, "grad_norm": 0.034975238144397736, "learning_rate": 1.6002537234498767e-06, "loss": 0.0007, "step": 186900 }, { "epoch": 1.5284785541971624, "grad_norm": 0.02397186867892742, "learning_rate": 1.599730481030558e-06, "loss": 0.0009, "step": 186910 }, { "epoch": 1.5285603303757616, "grad_norm": 0.024708829820156097, "learning_rate": 1.5992073078774956e-06, "loss": 0.0007, "step": 186920 }, { "epoch": 1.5286421065543607, "grad_norm": 0.09381581842899323, "learning_rate": 1.598684204001344e-06, "loss": 0.0004, "step": 186930 }, { "epoch": 1.52872388273296, "grad_norm": 0.06385006010532379, "learning_rate": 1.5981611694127603e-06, "loss": 0.0006, "step": 186940 }, { "epoch": 1.528805658911559, "grad_norm": 0.08274242281913757, "learning_rate": 1.597638204122397e-06, "loss": 0.0004, "step": 186950 }, { "epoch": 1.5288874350901582, "grad_norm": 0.004730492830276489, "learning_rate": 1.5971153081409107e-06, "loss": 0.0004, "step": 186960 }, { "epoch": 1.5289692112687574, "grad_norm": 0.06687452644109726, "learning_rate": 1.5965924814789518e-06, "loss": 0.0008, "step": 186970 }, { "epoch": 1.5290509874473566, "grad_norm": 0.05043366178870201, "learning_rate": 1.5960697241471717e-06, "loss": 0.0004, "step": 186980 }, { "epoch": 1.5291327636259557, "grad_norm": 0.07030444592237473, "learning_rate": 1.5955470361562153e-06, "loss": 0.0007, "step": 186990 }, { "epoch": 1.529214539804555, "grad_norm": 0.025726353749632835, "learning_rate": 1.5950244175167345e-06, "loss": 0.0009, "step": 187000 }, { "epoch": 1.529296315983154, "grad_norm": 0.02725544013082981, "learning_rate": 1.5945018682393738e-06, "loss": 0.0006, "step": 187010 }, { "epoch": 1.5293780921617532, "grad_norm": 0.0179204810410738, "learning_rate": 1.5939793883347782e-06, "loss": 0.001, "step": 187020 }, { "epoch": 1.5294598683403524, "grad_norm": 0.0020230812951922417, "learning_rate": 1.59345697781359e-06, "loss": 0.001, "step": 187030 }, { "epoch": 1.5295416445189516, "grad_norm": 0.04562128707766533, "learning_rate": 1.592934636686453e-06, "loss": 0.0004, "step": 187040 }, { "epoch": 1.5296234206975508, "grad_norm": 0.01888224296271801, "learning_rate": 1.5924123649640073e-06, "loss": 0.0013, "step": 187050 }, { "epoch": 1.52970519687615, "grad_norm": 0.04222158342599869, "learning_rate": 1.5918901626568917e-06, "loss": 0.001, "step": 187060 }, { "epoch": 1.5297869730547493, "grad_norm": 0.1279335618019104, "learning_rate": 1.5913680297757417e-06, "loss": 0.0018, "step": 187070 }, { "epoch": 1.5298687492333483, "grad_norm": 0.015085358172655106, "learning_rate": 1.590845966331197e-06, "loss": 0.0006, "step": 187080 }, { "epoch": 1.5299505254119476, "grad_norm": 0.03528163582086563, "learning_rate": 1.5903239723338932e-06, "loss": 0.0007, "step": 187090 }, { "epoch": 1.5300323015905466, "grad_norm": 0.052134931087493896, "learning_rate": 1.5898020477944599e-06, "loss": 0.0008, "step": 187100 }, { "epoch": 1.530114077769146, "grad_norm": 0.06497630476951599, "learning_rate": 1.589280192723529e-06, "loss": 0.0024, "step": 187110 }, { "epoch": 1.530195853947745, "grad_norm": 0.05329587683081627, "learning_rate": 1.5887584071317352e-06, "loss": 0.0013, "step": 187120 }, { "epoch": 1.5302776301263443, "grad_norm": 0.013990549370646477, "learning_rate": 1.5882366910297047e-06, "loss": 0.0005, "step": 187130 }, { "epoch": 1.5303594063049433, "grad_norm": 0.07081533223390579, "learning_rate": 1.5877150444280665e-06, "loss": 0.0009, "step": 187140 }, { "epoch": 1.5304411824835427, "grad_norm": 0.054020803421735764, "learning_rate": 1.5871934673374445e-06, "loss": 0.001, "step": 187150 }, { "epoch": 1.5305229586621416, "grad_norm": 0.0075996676459908485, "learning_rate": 1.5866719597684677e-06, "loss": 0.0007, "step": 187160 }, { "epoch": 1.530604734840741, "grad_norm": 0.062415145337581635, "learning_rate": 1.5861505217317568e-06, "loss": 0.0009, "step": 187170 }, { "epoch": 1.53068651101934, "grad_norm": 0.012789767235517502, "learning_rate": 1.5856291532379353e-06, "loss": 0.0006, "step": 187180 }, { "epoch": 1.5307682871979393, "grad_norm": 0.037427615374326706, "learning_rate": 1.5851078542976217e-06, "loss": 0.0006, "step": 187190 }, { "epoch": 1.5308500633765383, "grad_norm": 0.06158365681767464, "learning_rate": 1.5845866249214387e-06, "loss": 0.0021, "step": 187200 }, { "epoch": 1.5309318395551377, "grad_norm": 0.06054794043302536, "learning_rate": 1.5840654651200032e-06, "loss": 0.0005, "step": 187210 }, { "epoch": 1.5310136157337366, "grad_norm": 0.055039551109075546, "learning_rate": 1.5835443749039298e-06, "loss": 0.0015, "step": 187220 }, { "epoch": 1.531095391912336, "grad_norm": 0.05834510922431946, "learning_rate": 1.5830233542838324e-06, "loss": 0.0004, "step": 187230 }, { "epoch": 1.531177168090935, "grad_norm": 0.020635653287172318, "learning_rate": 1.5825024032703291e-06, "loss": 0.0007, "step": 187240 }, { "epoch": 1.5312589442695344, "grad_norm": 0.13659757375717163, "learning_rate": 1.5819815218740292e-06, "loss": 0.0016, "step": 187250 }, { "epoch": 1.5313407204481333, "grad_norm": 0.050386909395456314, "learning_rate": 1.5814607101055441e-06, "loss": 0.0006, "step": 187260 }, { "epoch": 1.5314224966267327, "grad_norm": 0.07373514026403427, "learning_rate": 1.580939967975481e-06, "loss": 0.001, "step": 187270 }, { "epoch": 1.5315042728053319, "grad_norm": 0.07918055355548859, "learning_rate": 1.580419295494452e-06, "loss": 0.0004, "step": 187280 }, { "epoch": 1.531586048983931, "grad_norm": 0.06633102893829346, "learning_rate": 1.5798986926730613e-06, "loss": 0.0004, "step": 187290 }, { "epoch": 1.5316678251625302, "grad_norm": 0.0634494498372078, "learning_rate": 1.579378159521915e-06, "loss": 0.0008, "step": 187300 }, { "epoch": 1.5317496013411294, "grad_norm": 0.0236953217536211, "learning_rate": 1.578857696051615e-06, "loss": 0.0011, "step": 187310 }, { "epoch": 1.5318313775197285, "grad_norm": 0.03558504581451416, "learning_rate": 1.5783373022727632e-06, "loss": 0.0012, "step": 187320 }, { "epoch": 1.5319131536983277, "grad_norm": 0.03815910592675209, "learning_rate": 1.5778169781959657e-06, "loss": 0.0007, "step": 187330 }, { "epoch": 1.5319949298769269, "grad_norm": 0.20382185280323029, "learning_rate": 1.577296723831816e-06, "loss": 0.0014, "step": 187340 }, { "epoch": 1.532076706055526, "grad_norm": 0.02362171746790409, "learning_rate": 1.576776539190914e-06, "loss": 0.0023, "step": 187350 }, { "epoch": 1.5321584822341252, "grad_norm": 0.01923149824142456, "learning_rate": 1.5762564242838547e-06, "loss": 0.0013, "step": 187360 }, { "epoch": 1.5322402584127244, "grad_norm": 0.020082516595721245, "learning_rate": 1.575736379121237e-06, "loss": 0.0002, "step": 187370 }, { "epoch": 1.5323220345913235, "grad_norm": 0.04310327023267746, "learning_rate": 1.5752164037136524e-06, "loss": 0.0008, "step": 187380 }, { "epoch": 1.5324038107699227, "grad_norm": 0.010503468103706837, "learning_rate": 1.5746964980716934e-06, "loss": 0.0006, "step": 187390 }, { "epoch": 1.5324855869485219, "grad_norm": 0.03389820456504822, "learning_rate": 1.5741766622059496e-06, "loss": 0.0008, "step": 187400 }, { "epoch": 1.532567363127121, "grad_norm": 0.030116407200694084, "learning_rate": 1.5736568961270132e-06, "loss": 0.0013, "step": 187410 }, { "epoch": 1.5326491393057202, "grad_norm": 0.005587225314229727, "learning_rate": 1.5731371998454708e-06, "loss": 0.0005, "step": 187420 }, { "epoch": 1.5327309154843194, "grad_norm": 0.001733651151880622, "learning_rate": 1.5726175733719096e-06, "loss": 0.0005, "step": 187430 }, { "epoch": 1.5328126916629186, "grad_norm": 0.054564788937568665, "learning_rate": 1.5720980167169125e-06, "loss": 0.0004, "step": 187440 }, { "epoch": 1.5328944678415177, "grad_norm": 0.010825459845364094, "learning_rate": 1.5715785298910686e-06, "loss": 0.0005, "step": 187450 }, { "epoch": 1.532976244020117, "grad_norm": 0.015590977855026722, "learning_rate": 1.5710591129049545e-06, "loss": 0.0008, "step": 187460 }, { "epoch": 1.533058020198716, "grad_norm": 0.12942425906658173, "learning_rate": 1.570539765769154e-06, "loss": 0.0006, "step": 187470 }, { "epoch": 1.5331397963773152, "grad_norm": 0.015130317769944668, "learning_rate": 1.5700204884942444e-06, "loss": 0.0009, "step": 187480 }, { "epoch": 1.5332215725559144, "grad_norm": 0.02776774764060974, "learning_rate": 1.5695012810908073e-06, "loss": 0.0008, "step": 187490 }, { "epoch": 1.5333033487345138, "grad_norm": 0.02957765758037567, "learning_rate": 1.5689821435694175e-06, "loss": 0.0019, "step": 187500 }, { "epoch": 1.5333851249131127, "grad_norm": 0.09911149740219116, "learning_rate": 1.5684630759406499e-06, "loss": 0.0003, "step": 187510 }, { "epoch": 1.5334669010917121, "grad_norm": 0.0011153819505125284, "learning_rate": 1.5679440782150774e-06, "loss": 0.0003, "step": 187520 }, { "epoch": 1.533548677270311, "grad_norm": 0.016420556232333183, "learning_rate": 1.5674251504032751e-06, "loss": 0.0008, "step": 187530 }, { "epoch": 1.5336304534489105, "grad_norm": 0.10928328335285187, "learning_rate": 1.566906292515813e-06, "loss": 0.0013, "step": 187540 }, { "epoch": 1.5337122296275094, "grad_norm": 0.01843942515552044, "learning_rate": 1.56638750456326e-06, "loss": 0.0004, "step": 187550 }, { "epoch": 1.5337940058061088, "grad_norm": 0.002650334034115076, "learning_rate": 1.5658687865561828e-06, "loss": 0.0006, "step": 187560 }, { "epoch": 1.5338757819847078, "grad_norm": 0.0141365397721529, "learning_rate": 1.5653501385051533e-06, "loss": 0.0009, "step": 187570 }, { "epoch": 1.5339575581633071, "grad_norm": 0.03175235539674759, "learning_rate": 1.564831560420732e-06, "loss": 0.0009, "step": 187580 }, { "epoch": 1.534039334341906, "grad_norm": 0.03275575861334801, "learning_rate": 1.5643130523134836e-06, "loss": 0.0007, "step": 187590 }, { "epoch": 1.5341211105205055, "grad_norm": 0.009767771698534489, "learning_rate": 1.5637946141939692e-06, "loss": 0.0005, "step": 187600 }, { "epoch": 1.5342028866991044, "grad_norm": 0.04974803701043129, "learning_rate": 1.5632762460727535e-06, "loss": 0.0009, "step": 187610 }, { "epoch": 1.5342846628777038, "grad_norm": 0.04207516461610794, "learning_rate": 1.5627579479603945e-06, "loss": 0.0007, "step": 187620 }, { "epoch": 1.5343664390563028, "grad_norm": 0.019774561747908592, "learning_rate": 1.5622397198674494e-06, "loss": 0.0007, "step": 187630 }, { "epoch": 1.5344482152349022, "grad_norm": 0.015669316053390503, "learning_rate": 1.5617215618044746e-06, "loss": 0.0008, "step": 187640 }, { "epoch": 1.534529991413501, "grad_norm": 0.10388699173927307, "learning_rate": 1.5612034737820276e-06, "loss": 0.0006, "step": 187650 }, { "epoch": 1.5346117675921005, "grad_norm": 0.03659123182296753, "learning_rate": 1.560685455810661e-06, "loss": 0.0008, "step": 187660 }, { "epoch": 1.5346935437706994, "grad_norm": 0.048789139837026596, "learning_rate": 1.560167507900927e-06, "loss": 0.0008, "step": 187670 }, { "epoch": 1.5347753199492988, "grad_norm": 0.040424324572086334, "learning_rate": 1.5596496300633757e-06, "loss": 0.0007, "step": 187680 }, { "epoch": 1.5348570961278978, "grad_norm": 0.08531320840120316, "learning_rate": 1.559131822308561e-06, "loss": 0.0006, "step": 187690 }, { "epoch": 1.5349388723064972, "grad_norm": 0.019203759729862213, "learning_rate": 1.5586140846470265e-06, "loss": 0.001, "step": 187700 }, { "epoch": 1.5350206484850963, "grad_norm": 0.01071918848901987, "learning_rate": 1.5580964170893205e-06, "loss": 0.001, "step": 187710 }, { "epoch": 1.5351024246636955, "grad_norm": 0.04166724532842636, "learning_rate": 1.5575788196459861e-06, "loss": 0.0008, "step": 187720 }, { "epoch": 1.5351842008422947, "grad_norm": 0.009649761021137238, "learning_rate": 1.5570612923275712e-06, "loss": 0.0006, "step": 187730 }, { "epoch": 1.5352659770208938, "grad_norm": 0.0023670815862715244, "learning_rate": 1.5565438351446166e-06, "loss": 0.0008, "step": 187740 }, { "epoch": 1.535347753199493, "grad_norm": 0.0319046787917614, "learning_rate": 1.5560264481076626e-06, "loss": 0.0005, "step": 187750 }, { "epoch": 1.5354295293780922, "grad_norm": 0.025301123037934303, "learning_rate": 1.555509131227248e-06, "loss": 0.0006, "step": 187760 }, { "epoch": 1.5355113055566914, "grad_norm": 0.11972256749868393, "learning_rate": 1.5549918845139135e-06, "loss": 0.0006, "step": 187770 }, { "epoch": 1.5355930817352905, "grad_norm": 0.0037013015244156122, "learning_rate": 1.5544747079781947e-06, "loss": 0.0006, "step": 187780 }, { "epoch": 1.5356748579138897, "grad_norm": 0.002749501494690776, "learning_rate": 1.5539576016306267e-06, "loss": 0.0006, "step": 187790 }, { "epoch": 1.5357566340924889, "grad_norm": 0.012517513707280159, "learning_rate": 1.5534405654817418e-06, "loss": 0.0008, "step": 187800 }, { "epoch": 1.535838410271088, "grad_norm": 0.05363868176937103, "learning_rate": 1.5529235995420777e-06, "loss": 0.0006, "step": 187810 }, { "epoch": 1.5359201864496872, "grad_norm": 0.08147616684436798, "learning_rate": 1.5524067038221596e-06, "loss": 0.0005, "step": 187820 }, { "epoch": 1.5360019626282864, "grad_norm": 0.03907839208841324, "learning_rate": 1.5518898783325198e-06, "loss": 0.0008, "step": 187830 }, { "epoch": 1.5360837388068855, "grad_norm": 0.023326922208070755, "learning_rate": 1.5513731230836838e-06, "loss": 0.0006, "step": 187840 }, { "epoch": 1.5361655149854847, "grad_norm": 0.023034285753965378, "learning_rate": 1.5508564380861824e-06, "loss": 0.0006, "step": 187850 }, { "epoch": 1.5362472911640839, "grad_norm": 0.018541986122727394, "learning_rate": 1.550339823350539e-06, "loss": 0.0005, "step": 187860 }, { "epoch": 1.536329067342683, "grad_norm": 0.04187488928437233, "learning_rate": 1.5498232788872775e-06, "loss": 0.0007, "step": 187870 }, { "epoch": 1.5364108435212822, "grad_norm": 0.030807998031377792, "learning_rate": 1.5493068047069183e-06, "loss": 0.002, "step": 187880 }, { "epoch": 1.5364926196998814, "grad_norm": 0.13340149819850922, "learning_rate": 1.5487904008199861e-06, "loss": 0.0005, "step": 187890 }, { "epoch": 1.5365743958784805, "grad_norm": 0.03361033275723457, "learning_rate": 1.5482740672369989e-06, "loss": 0.001, "step": 187900 }, { "epoch": 1.53665617205708, "grad_norm": 0.11667678505182266, "learning_rate": 1.5477578039684748e-06, "loss": 0.0009, "step": 187910 }, { "epoch": 1.5367379482356789, "grad_norm": 0.08814093470573425, "learning_rate": 1.5472416110249282e-06, "loss": 0.0004, "step": 187920 }, { "epoch": 1.5368197244142783, "grad_norm": 0.0032588448375463486, "learning_rate": 1.5467254884168803e-06, "loss": 0.0008, "step": 187930 }, { "epoch": 1.5369015005928772, "grad_norm": 0.06251631677150726, "learning_rate": 1.5462094361548392e-06, "loss": 0.001, "step": 187940 }, { "epoch": 1.5369832767714766, "grad_norm": 0.052266404032707214, "learning_rate": 1.5456934542493195e-06, "loss": 0.0012, "step": 187950 }, { "epoch": 1.5370650529500756, "grad_norm": 0.04006539657711983, "learning_rate": 1.5451775427108302e-06, "loss": 0.0008, "step": 187960 }, { "epoch": 1.537146829128675, "grad_norm": 0.005844434257596731, "learning_rate": 1.5446617015498844e-06, "loss": 0.0005, "step": 187970 }, { "epoch": 1.537228605307274, "grad_norm": 0.016850270330905914, "learning_rate": 1.544145930776988e-06, "loss": 0.0007, "step": 187980 }, { "epoch": 1.5373103814858733, "grad_norm": 0.03397456184029579, "learning_rate": 1.543630230402648e-06, "loss": 0.0007, "step": 187990 }, { "epoch": 1.5373921576644722, "grad_norm": 0.08264993131160736, "learning_rate": 1.5431146004373681e-06, "loss": 0.0004, "step": 188000 }, { "epoch": 1.5374739338430716, "grad_norm": 0.033148061484098434, "learning_rate": 1.5425990408916559e-06, "loss": 0.0009, "step": 188010 }, { "epoch": 1.5375557100216706, "grad_norm": 0.004111429210752249, "learning_rate": 1.5420835517760107e-06, "loss": 0.001, "step": 188020 }, { "epoch": 1.53763748620027, "grad_norm": 0.0016396481078118086, "learning_rate": 1.5415681331009353e-06, "loss": 0.0007, "step": 188030 }, { "epoch": 1.537719262378869, "grad_norm": 0.01151891890913248, "learning_rate": 1.5410527848769258e-06, "loss": 0.0025, "step": 188040 }, { "epoch": 1.5378010385574683, "grad_norm": 0.0021600776817649603, "learning_rate": 1.5405375071144861e-06, "loss": 0.0005, "step": 188050 }, { "epoch": 1.5378828147360672, "grad_norm": 0.12105178833007812, "learning_rate": 1.5400222998241078e-06, "loss": 0.0006, "step": 188060 }, { "epoch": 1.5379645909146666, "grad_norm": 0.06069689244031906, "learning_rate": 1.5395071630162873e-06, "loss": 0.0011, "step": 188070 }, { "epoch": 1.5380463670932656, "grad_norm": 0.032791946083307266, "learning_rate": 1.5389920967015177e-06, "loss": 0.0006, "step": 188080 }, { "epoch": 1.538128143271865, "grad_norm": 0.10737232863903046, "learning_rate": 1.5384771008902938e-06, "loss": 0.0007, "step": 188090 }, { "epoch": 1.538209919450464, "grad_norm": 0.01613524742424488, "learning_rate": 1.5379621755931046e-06, "loss": 0.0009, "step": 188100 }, { "epoch": 1.5382916956290633, "grad_norm": 0.045770276337862015, "learning_rate": 1.5374473208204404e-06, "loss": 0.0007, "step": 188110 }, { "epoch": 1.5383734718076623, "grad_norm": 0.0982421413064003, "learning_rate": 1.536932536582787e-06, "loss": 0.001, "step": 188120 }, { "epoch": 1.5384552479862617, "grad_norm": 0.0026268698275089264, "learning_rate": 1.536417822890634e-06, "loss": 0.0007, "step": 188130 }, { "epoch": 1.5385370241648608, "grad_norm": 0.025224635377526283, "learning_rate": 1.535903179754466e-06, "loss": 0.0004, "step": 188140 }, { "epoch": 1.53861880034346, "grad_norm": 0.17436878383159637, "learning_rate": 1.5353886071847656e-06, "loss": 0.0007, "step": 188150 }, { "epoch": 1.5387005765220592, "grad_norm": 0.0706794410943985, "learning_rate": 1.534874105192014e-06, "loss": 0.0008, "step": 188160 }, { "epoch": 1.5387823527006583, "grad_norm": 0.01269934605807066, "learning_rate": 1.5343596737866967e-06, "loss": 0.0005, "step": 188170 }, { "epoch": 1.5388641288792575, "grad_norm": 0.01794479973614216, "learning_rate": 1.533845312979288e-06, "loss": 0.0008, "step": 188180 }, { "epoch": 1.5389459050578567, "grad_norm": 0.006838272325694561, "learning_rate": 1.5333310227802678e-06, "loss": 0.0002, "step": 188190 }, { "epoch": 1.5390276812364558, "grad_norm": 0.021061450242996216, "learning_rate": 1.5328168032001111e-06, "loss": 0.0005, "step": 188200 }, { "epoch": 1.539109457415055, "grad_norm": 0.1109335646033287, "learning_rate": 1.5323026542492958e-06, "loss": 0.0004, "step": 188210 }, { "epoch": 1.5391912335936542, "grad_norm": 0.031460538506507874, "learning_rate": 1.5317885759382938e-06, "loss": 0.0015, "step": 188220 }, { "epoch": 1.5392730097722533, "grad_norm": 0.03411824628710747, "learning_rate": 1.5312745682775781e-06, "loss": 0.0005, "step": 188230 }, { "epoch": 1.5393547859508525, "grad_norm": 0.06569148600101471, "learning_rate": 1.5307606312776174e-06, "loss": 0.0006, "step": 188240 }, { "epoch": 1.5394365621294517, "grad_norm": 0.037843506783246994, "learning_rate": 1.530246764948884e-06, "loss": 0.0005, "step": 188250 }, { "epoch": 1.5395183383080508, "grad_norm": 0.0392940379679203, "learning_rate": 1.5297329693018442e-06, "loss": 0.0005, "step": 188260 }, { "epoch": 1.53960011448665, "grad_norm": 0.04609063267707825, "learning_rate": 1.529219244346965e-06, "loss": 0.0008, "step": 188270 }, { "epoch": 1.5396818906652492, "grad_norm": 0.05917833745479584, "learning_rate": 1.528705590094709e-06, "loss": 0.0008, "step": 188280 }, { "epoch": 1.5397636668438484, "grad_norm": 0.009286326356232166, "learning_rate": 1.528192006555545e-06, "loss": 0.0003, "step": 188290 }, { "epoch": 1.5398454430224475, "grad_norm": 0.058517396450042725, "learning_rate": 1.5276784937399307e-06, "loss": 0.0009, "step": 188300 }, { "epoch": 1.5399272192010467, "grad_norm": 0.016187472268939018, "learning_rate": 1.5271650516583275e-06, "loss": 0.0007, "step": 188310 }, { "epoch": 1.5400089953796459, "grad_norm": 0.022787682712078094, "learning_rate": 1.5266516803211934e-06, "loss": 0.0006, "step": 188320 }, { "epoch": 1.540090771558245, "grad_norm": 0.020508531481027603, "learning_rate": 1.5261383797389895e-06, "loss": 0.0006, "step": 188330 }, { "epoch": 1.5401725477368444, "grad_norm": 0.002838543616235256, "learning_rate": 1.5256251499221708e-06, "loss": 0.0016, "step": 188340 }, { "epoch": 1.5402543239154434, "grad_norm": 0.050907202064991, "learning_rate": 1.5251119908811918e-06, "loss": 0.0007, "step": 188350 }, { "epoch": 1.5403361000940428, "grad_norm": 0.001274166745133698, "learning_rate": 1.5245989026265046e-06, "loss": 0.0021, "step": 188360 }, { "epoch": 1.5404178762726417, "grad_norm": 0.0018352303886786103, "learning_rate": 1.5240858851685637e-06, "loss": 0.0004, "step": 188370 }, { "epoch": 1.540499652451241, "grad_norm": 0.03098689578473568, "learning_rate": 1.5235729385178194e-06, "loss": 0.0009, "step": 188380 }, { "epoch": 1.54058142862984, "grad_norm": 0.054129038006067276, "learning_rate": 1.5230600626847197e-06, "loss": 0.0006, "step": 188390 }, { "epoch": 1.5406632048084394, "grad_norm": 0.002366614295169711, "learning_rate": 1.5225472576797112e-06, "loss": 0.0006, "step": 188400 }, { "epoch": 1.5407449809870384, "grad_norm": 0.04940374195575714, "learning_rate": 1.522034523513245e-06, "loss": 0.001, "step": 188410 }, { "epoch": 1.5408267571656378, "grad_norm": 0.03587056323885918, "learning_rate": 1.5215218601957604e-06, "loss": 0.0007, "step": 188420 }, { "epoch": 1.5409085333442367, "grad_norm": 0.07453221827745438, "learning_rate": 1.5210092677377036e-06, "loss": 0.0007, "step": 188430 }, { "epoch": 1.540990309522836, "grad_norm": 0.07278699427843094, "learning_rate": 1.520496746149514e-06, "loss": 0.0008, "step": 188440 }, { "epoch": 1.541072085701435, "grad_norm": 0.11576641350984573, "learning_rate": 1.5199842954416361e-06, "loss": 0.0007, "step": 188450 }, { "epoch": 1.5411538618800344, "grad_norm": 0.04087776690721512, "learning_rate": 1.5194719156245063e-06, "loss": 0.0008, "step": 188460 }, { "epoch": 1.5412356380586334, "grad_norm": 0.026423737406730652, "learning_rate": 1.5189596067085627e-06, "loss": 0.0017, "step": 188470 }, { "epoch": 1.5413174142372328, "grad_norm": 0.03421437367796898, "learning_rate": 1.51844736870424e-06, "loss": 0.0007, "step": 188480 }, { "epoch": 1.5413991904158317, "grad_norm": 0.12856893241405487, "learning_rate": 1.5179352016219757e-06, "loss": 0.0012, "step": 188490 }, { "epoch": 1.5414809665944311, "grad_norm": 0.03997478261590004, "learning_rate": 1.5174231054722022e-06, "loss": 0.0015, "step": 188500 }, { "epoch": 1.54156274277303, "grad_norm": 0.0028768640477210283, "learning_rate": 1.5169110802653502e-06, "loss": 0.0002, "step": 188510 }, { "epoch": 1.5416445189516295, "grad_norm": 0.014403062872588634, "learning_rate": 1.5163991260118494e-06, "loss": 0.0007, "step": 188520 }, { "epoch": 1.5417262951302284, "grad_norm": 0.01944119669497013, "learning_rate": 1.5158872427221333e-06, "loss": 0.0005, "step": 188530 }, { "epoch": 1.5418080713088278, "grad_norm": 0.05336301401257515, "learning_rate": 1.5153754304066243e-06, "loss": 0.0011, "step": 188540 }, { "epoch": 1.541889847487427, "grad_norm": 0.020818838849663734, "learning_rate": 1.5148636890757506e-06, "loss": 0.0007, "step": 188550 }, { "epoch": 1.5419716236660261, "grad_norm": 0.0016378891887143254, "learning_rate": 1.5143520187399346e-06, "loss": 0.0005, "step": 188560 }, { "epoch": 1.5420533998446253, "grad_norm": 0.013265073299407959, "learning_rate": 1.5138404194096029e-06, "loss": 0.0012, "step": 188570 }, { "epoch": 1.5421351760232245, "grad_norm": 0.0886072888970375, "learning_rate": 1.5133288910951754e-06, "loss": 0.0009, "step": 188580 }, { "epoch": 1.5422169522018236, "grad_norm": 0.009596883319318295, "learning_rate": 1.5128174338070722e-06, "loss": 0.0004, "step": 188590 }, { "epoch": 1.5422987283804228, "grad_norm": 0.03721000254154205, "learning_rate": 1.5123060475557116e-06, "loss": 0.0012, "step": 188600 }, { "epoch": 1.542380504559022, "grad_norm": 0.02173680067062378, "learning_rate": 1.5117947323515126e-06, "loss": 0.0004, "step": 188610 }, { "epoch": 1.5424622807376211, "grad_norm": 0.028414417058229446, "learning_rate": 1.5112834882048906e-06, "loss": 0.0009, "step": 188620 }, { "epoch": 1.5425440569162203, "grad_norm": 0.005495938938111067, "learning_rate": 1.5107723151262598e-06, "loss": 0.003, "step": 188630 }, { "epoch": 1.5426258330948195, "grad_norm": 0.017490269616246223, "learning_rate": 1.5102612131260313e-06, "loss": 0.0006, "step": 188640 }, { "epoch": 1.5427076092734187, "grad_norm": 0.005904392804950476, "learning_rate": 1.5097501822146221e-06, "loss": 0.0005, "step": 188650 }, { "epoch": 1.5427893854520178, "grad_norm": 0.05107475072145462, "learning_rate": 1.5092392224024366e-06, "loss": 0.0011, "step": 188660 }, { "epoch": 1.542871161630617, "grad_norm": 0.007383829448372126, "learning_rate": 1.508728333699886e-06, "loss": 0.0008, "step": 188670 }, { "epoch": 1.5429529378092162, "grad_norm": 0.022949643433094025, "learning_rate": 1.508217516117375e-06, "loss": 0.0007, "step": 188680 }, { "epoch": 1.5430347139878153, "grad_norm": 0.024881716817617416, "learning_rate": 1.5077067696653135e-06, "loss": 0.0005, "step": 188690 }, { "epoch": 1.5431164901664145, "grad_norm": 0.03789599984884262, "learning_rate": 1.5071960943541036e-06, "loss": 0.0005, "step": 188700 }, { "epoch": 1.5431982663450137, "grad_norm": 0.004222205840051174, "learning_rate": 1.506685490194148e-06, "loss": 0.0005, "step": 188710 }, { "epoch": 1.5432800425236128, "grad_norm": 0.042994074523448944, "learning_rate": 1.506174957195849e-06, "loss": 0.0007, "step": 188720 }, { "epoch": 1.543361818702212, "grad_norm": 0.047553543001413345, "learning_rate": 1.5056644953696042e-06, "loss": 0.0007, "step": 188730 }, { "epoch": 1.5434435948808112, "grad_norm": 0.055731747299432755, "learning_rate": 1.5051541047258161e-06, "loss": 0.0004, "step": 188740 }, { "epoch": 1.5435253710594103, "grad_norm": 0.012775130569934845, "learning_rate": 1.504643785274879e-06, "loss": 0.0004, "step": 188750 }, { "epoch": 1.5436071472380095, "grad_norm": 0.04472856968641281, "learning_rate": 1.5041335370271893e-06, "loss": 0.0009, "step": 188760 }, { "epoch": 1.543688923416609, "grad_norm": 0.04761165380477905, "learning_rate": 1.503623359993141e-06, "loss": 0.0005, "step": 188770 }, { "epoch": 1.5437706995952079, "grad_norm": 0.013592214323580265, "learning_rate": 1.5031132541831273e-06, "loss": 0.0012, "step": 188780 }, { "epoch": 1.5438524757738072, "grad_norm": 0.045076772570610046, "learning_rate": 1.5026032196075386e-06, "loss": 0.0011, "step": 188790 }, { "epoch": 1.5439342519524062, "grad_norm": 0.04436032101511955, "learning_rate": 1.502093256276765e-06, "loss": 0.0005, "step": 188800 }, { "epoch": 1.5440160281310056, "grad_norm": 0.020875127986073494, "learning_rate": 1.5015833642011934e-06, "loss": 0.0004, "step": 188810 }, { "epoch": 1.5440978043096045, "grad_norm": 0.08194471150636673, "learning_rate": 1.5010735433912144e-06, "loss": 0.0006, "step": 188820 }, { "epoch": 1.544179580488204, "grad_norm": 0.01933235488831997, "learning_rate": 1.500563793857211e-06, "loss": 0.0005, "step": 188830 }, { "epoch": 1.5442613566668029, "grad_norm": 0.11732885986566544, "learning_rate": 1.5000541156095676e-06, "loss": 0.0016, "step": 188840 }, { "epoch": 1.5443431328454023, "grad_norm": 0.053463567048311234, "learning_rate": 1.4995445086586658e-06, "loss": 0.0008, "step": 188850 }, { "epoch": 1.5444249090240012, "grad_norm": 0.03861883655190468, "learning_rate": 1.4990349730148884e-06, "loss": 0.0011, "step": 188860 }, { "epoch": 1.5445066852026006, "grad_norm": 0.044463180005550385, "learning_rate": 1.4985255086886153e-06, "loss": 0.0006, "step": 188870 }, { "epoch": 1.5445884613811995, "grad_norm": 0.018156109377741814, "learning_rate": 1.4980161156902228e-06, "loss": 0.0006, "step": 188880 }, { "epoch": 1.544670237559799, "grad_norm": 0.06907843053340912, "learning_rate": 1.497506794030089e-06, "loss": 0.0015, "step": 188890 }, { "epoch": 1.5447520137383979, "grad_norm": 0.009230022318661213, "learning_rate": 1.4969975437185886e-06, "loss": 0.0005, "step": 188900 }, { "epoch": 1.5448337899169973, "grad_norm": 0.02361799217760563, "learning_rate": 1.4964883647660961e-06, "loss": 0.0005, "step": 188910 }, { "epoch": 1.5449155660955962, "grad_norm": 0.026199346408247948, "learning_rate": 1.4959792571829828e-06, "loss": 0.0005, "step": 188920 }, { "epoch": 1.5449973422741956, "grad_norm": 0.028061777353286743, "learning_rate": 1.4954702209796191e-06, "loss": 0.0005, "step": 188930 }, { "epoch": 1.5450791184527946, "grad_norm": 0.026664914563298225, "learning_rate": 1.4949612561663767e-06, "loss": 0.0008, "step": 188940 }, { "epoch": 1.545160894631394, "grad_norm": 0.022902077063918114, "learning_rate": 1.4944523627536228e-06, "loss": 0.0008, "step": 188950 }, { "epoch": 1.545242670809993, "grad_norm": 0.0018528072396293283, "learning_rate": 1.493943540751724e-06, "loss": 0.0003, "step": 188960 }, { "epoch": 1.5453244469885923, "grad_norm": 0.12674738466739655, "learning_rate": 1.4934347901710432e-06, "loss": 0.001, "step": 188970 }, { "epoch": 1.5454062231671915, "grad_norm": 0.021414978429675102, "learning_rate": 1.4929261110219478e-06, "loss": 0.0005, "step": 188980 }, { "epoch": 1.5454879993457906, "grad_norm": 0.031500231474637985, "learning_rate": 1.492417503314798e-06, "loss": 0.0008, "step": 188990 }, { "epoch": 1.5455697755243898, "grad_norm": 0.06187313050031662, "learning_rate": 1.4919089670599552e-06, "loss": 0.0007, "step": 189000 }, { "epoch": 1.545651551702989, "grad_norm": 0.035924363881349564, "learning_rate": 1.4914005022677773e-06, "loss": 0.0011, "step": 189010 }, { "epoch": 1.5457333278815881, "grad_norm": 0.037685003131628036, "learning_rate": 1.490892108948624e-06, "loss": 0.0006, "step": 189020 }, { "epoch": 1.5458151040601873, "grad_norm": 0.01115280482918024, "learning_rate": 1.4903837871128507e-06, "loss": 0.0007, "step": 189030 }, { "epoch": 1.5458968802387865, "grad_norm": 0.08489689975976944, "learning_rate": 1.4898755367708124e-06, "loss": 0.0023, "step": 189040 }, { "epoch": 1.5459786564173856, "grad_norm": 0.07566455006599426, "learning_rate": 1.4893673579328604e-06, "loss": 0.0005, "step": 189050 }, { "epoch": 1.5460604325959848, "grad_norm": 0.032600581645965576, "learning_rate": 1.4888592506093513e-06, "loss": 0.0008, "step": 189060 }, { "epoch": 1.546142208774584, "grad_norm": 0.047939497977495193, "learning_rate": 1.4883512148106326e-06, "loss": 0.0005, "step": 189070 }, { "epoch": 1.5462239849531831, "grad_norm": 0.003210347844287753, "learning_rate": 1.4878432505470547e-06, "loss": 0.0006, "step": 189080 }, { "epoch": 1.5463057611317823, "grad_norm": 0.036070603877305984, "learning_rate": 1.487335357828963e-06, "loss": 0.0006, "step": 189090 }, { "epoch": 1.5463875373103815, "grad_norm": 0.035947613418102264, "learning_rate": 1.4868275366667062e-06, "loss": 0.0008, "step": 189100 }, { "epoch": 1.5464693134889806, "grad_norm": 0.022498155012726784, "learning_rate": 1.4863197870706285e-06, "loss": 0.0003, "step": 189110 }, { "epoch": 1.5465510896675798, "grad_norm": 0.036287449300289154, "learning_rate": 1.485812109051073e-06, "loss": 0.0007, "step": 189120 }, { "epoch": 1.546632865846179, "grad_norm": 0.024981433525681496, "learning_rate": 1.485304502618381e-06, "loss": 0.0007, "step": 189130 }, { "epoch": 1.5467146420247782, "grad_norm": 0.027417544275522232, "learning_rate": 1.4847969677828934e-06, "loss": 0.0005, "step": 189140 }, { "epoch": 1.5467964182033773, "grad_norm": 0.05041864514350891, "learning_rate": 1.4842895045549488e-06, "loss": 0.0003, "step": 189150 }, { "epoch": 1.5468781943819765, "grad_norm": 0.02027156762778759, "learning_rate": 1.4837821129448847e-06, "loss": 0.0005, "step": 189160 }, { "epoch": 1.5469599705605757, "grad_norm": 0.022107992321252823, "learning_rate": 1.4832747929630353e-06, "loss": 0.0015, "step": 189170 }, { "epoch": 1.5470417467391748, "grad_norm": 0.008065606467425823, "learning_rate": 1.4827675446197387e-06, "loss": 0.0013, "step": 189180 }, { "epoch": 1.547123522917774, "grad_norm": 0.06801832467317581, "learning_rate": 1.4822603679253262e-06, "loss": 0.0004, "step": 189190 }, { "epoch": 1.5472052990963734, "grad_norm": 0.02946067973971367, "learning_rate": 1.4817532628901294e-06, "loss": 0.0006, "step": 189200 }, { "epoch": 1.5472870752749723, "grad_norm": 0.05714823305606842, "learning_rate": 1.4812462295244766e-06, "loss": 0.0008, "step": 189210 }, { "epoch": 1.5473688514535717, "grad_norm": 0.030093679204583168, "learning_rate": 1.4807392678387e-06, "loss": 0.0016, "step": 189220 }, { "epoch": 1.5474506276321707, "grad_norm": 0.0026315958239138126, "learning_rate": 1.4802323778431254e-06, "loss": 0.0004, "step": 189230 }, { "epoch": 1.54753240381077, "grad_norm": 0.030314192175865173, "learning_rate": 1.479725559548078e-06, "loss": 0.0006, "step": 189240 }, { "epoch": 1.547614179989369, "grad_norm": 0.005938421469181776, "learning_rate": 1.4792188129638824e-06, "loss": 0.0011, "step": 189250 }, { "epoch": 1.5476959561679684, "grad_norm": 0.2164323478937149, "learning_rate": 1.4787121381008612e-06, "loss": 0.0006, "step": 189260 }, { "epoch": 1.5477777323465673, "grad_norm": 0.0357903353869915, "learning_rate": 1.4782055349693363e-06, "loss": 0.0006, "step": 189270 }, { "epoch": 1.5478595085251667, "grad_norm": 0.03758878633379936, "learning_rate": 1.477699003579627e-06, "loss": 0.0006, "step": 189280 }, { "epoch": 1.5479412847037657, "grad_norm": 0.02279398962855339, "learning_rate": 1.477192543942051e-06, "loss": 0.0006, "step": 189290 }, { "epoch": 1.548023060882365, "grad_norm": 0.07310453802347183, "learning_rate": 1.4766861560669272e-06, "loss": 0.0004, "step": 189300 }, { "epoch": 1.548104837060964, "grad_norm": 0.0008224420598708093, "learning_rate": 1.476179839964571e-06, "loss": 0.0009, "step": 189310 }, { "epoch": 1.5481866132395634, "grad_norm": 0.03962711617350578, "learning_rate": 1.4756735956452955e-06, "loss": 0.001, "step": 189320 }, { "epoch": 1.5482683894181624, "grad_norm": 0.015107509680092335, "learning_rate": 1.475167423119412e-06, "loss": 0.0012, "step": 189330 }, { "epoch": 1.5483501655967618, "grad_norm": 0.011046893894672394, "learning_rate": 1.4746613223972344e-06, "loss": 0.0006, "step": 189340 }, { "epoch": 1.5484319417753607, "grad_norm": 0.010444123297929764, "learning_rate": 1.4741552934890719e-06, "loss": 0.0004, "step": 189350 }, { "epoch": 1.54851371795396, "grad_norm": 0.03878447413444519, "learning_rate": 1.4736493364052318e-06, "loss": 0.0007, "step": 189360 }, { "epoch": 1.548595494132559, "grad_norm": 0.043235260993242264, "learning_rate": 1.473143451156021e-06, "loss": 0.0005, "step": 189370 }, { "epoch": 1.5486772703111584, "grad_norm": 0.03817184269428253, "learning_rate": 1.4726376377517449e-06, "loss": 0.0011, "step": 189380 }, { "epoch": 1.5487590464897574, "grad_norm": 0.013119553215801716, "learning_rate": 1.472131896202707e-06, "loss": 0.0012, "step": 189390 }, { "epoch": 1.5488408226683568, "grad_norm": 0.07626577466726303, "learning_rate": 1.4716262265192105e-06, "loss": 0.0003, "step": 189400 }, { "epoch": 1.548922598846956, "grad_norm": 0.08989464491605759, "learning_rate": 1.4711206287115537e-06, "loss": 0.0009, "step": 189410 }, { "epoch": 1.549004375025555, "grad_norm": 0.043258387595415115, "learning_rate": 1.47061510279004e-06, "loss": 0.001, "step": 189420 }, { "epoch": 1.5490861512041543, "grad_norm": 0.03344954177737236, "learning_rate": 1.4701096487649652e-06, "loss": 0.0007, "step": 189430 }, { "epoch": 1.5491679273827534, "grad_norm": 0.02409488335251808, "learning_rate": 1.4696042666466265e-06, "loss": 0.0004, "step": 189440 }, { "epoch": 1.5492497035613526, "grad_norm": 0.051546402275562286, "learning_rate": 1.4690989564453168e-06, "loss": 0.0006, "step": 189450 }, { "epoch": 1.5493314797399518, "grad_norm": 0.03416420891880989, "learning_rate": 1.4685937181713328e-06, "loss": 0.0009, "step": 189460 }, { "epoch": 1.549413255918551, "grad_norm": 0.00653121480718255, "learning_rate": 1.4680885518349653e-06, "loss": 0.0006, "step": 189470 }, { "epoch": 1.5494950320971501, "grad_norm": 0.018010331317782402, "learning_rate": 1.4675834574465053e-06, "loss": 0.0004, "step": 189480 }, { "epoch": 1.5495768082757493, "grad_norm": 0.07407990097999573, "learning_rate": 1.467078435016241e-06, "loss": 0.0008, "step": 189490 }, { "epoch": 1.5496585844543485, "grad_norm": 0.03631261736154556, "learning_rate": 1.466573484554461e-06, "loss": 0.001, "step": 189500 }, { "epoch": 1.5497403606329476, "grad_norm": 0.0579809695482254, "learning_rate": 1.4660686060714513e-06, "loss": 0.001, "step": 189510 }, { "epoch": 1.5498221368115468, "grad_norm": 0.020067350938916206, "learning_rate": 1.4655637995774963e-06, "loss": 0.0006, "step": 189520 }, { "epoch": 1.549903912990146, "grad_norm": 0.02875574491918087, "learning_rate": 1.4650590650828784e-06, "loss": 0.0008, "step": 189530 }, { "epoch": 1.5499856891687451, "grad_norm": 0.09173568338155746, "learning_rate": 1.464554402597882e-06, "loss": 0.0006, "step": 189540 }, { "epoch": 1.5500674653473443, "grad_norm": 0.005919177550822496, "learning_rate": 1.4640498121327861e-06, "loss": 0.0006, "step": 189550 }, { "epoch": 1.5501492415259435, "grad_norm": 0.03996127471327782, "learning_rate": 1.46354529369787e-06, "loss": 0.0006, "step": 189560 }, { "epoch": 1.5502310177045426, "grad_norm": 0.008271632716059685, "learning_rate": 1.4630408473034092e-06, "loss": 0.0012, "step": 189570 }, { "epoch": 1.5503127938831418, "grad_norm": 0.05592312663793564, "learning_rate": 1.4625364729596825e-06, "loss": 0.0006, "step": 189580 }, { "epoch": 1.550394570061741, "grad_norm": 0.06406192481517792, "learning_rate": 1.4620321706769635e-06, "loss": 0.0007, "step": 189590 }, { "epoch": 1.5504763462403401, "grad_norm": 0.03127354755997658, "learning_rate": 1.4615279404655253e-06, "loss": 0.0005, "step": 189600 }, { "epoch": 1.5505581224189393, "grad_norm": 0.05224846675992012, "learning_rate": 1.4610237823356389e-06, "loss": 0.001, "step": 189610 }, { "epoch": 1.5506398985975385, "grad_norm": 0.005715933628380299, "learning_rate": 1.4605196962975749e-06, "loss": 0.0009, "step": 189620 }, { "epoch": 1.5507216747761379, "grad_norm": 0.0022014251444488764, "learning_rate": 1.4600156823616013e-06, "loss": 0.0004, "step": 189630 }, { "epoch": 1.5508034509547368, "grad_norm": 0.02155567891895771, "learning_rate": 1.459511740537986e-06, "loss": 0.0007, "step": 189640 }, { "epoch": 1.5508852271333362, "grad_norm": 0.0025026118382811546, "learning_rate": 1.4590078708369925e-06, "loss": 0.0004, "step": 189650 }, { "epoch": 1.5509670033119352, "grad_norm": 0.01051743421703577, "learning_rate": 1.4585040732688889e-06, "loss": 0.0005, "step": 189660 }, { "epoch": 1.5510487794905345, "grad_norm": 0.07147514820098877, "learning_rate": 1.458000347843936e-06, "loss": 0.0007, "step": 189670 }, { "epoch": 1.5511305556691335, "grad_norm": 0.01624240353703499, "learning_rate": 1.4574966945723945e-06, "loss": 0.0008, "step": 189680 }, { "epoch": 1.5512123318477329, "grad_norm": 0.08457507193088531, "learning_rate": 1.4569931134645238e-06, "loss": 0.0005, "step": 189690 }, { "epoch": 1.5512941080263318, "grad_norm": 0.023590099066495895, "learning_rate": 1.4564896045305848e-06, "loss": 0.0006, "step": 189700 }, { "epoch": 1.5513758842049312, "grad_norm": 0.01258749421685934, "learning_rate": 1.455986167780833e-06, "loss": 0.0006, "step": 189710 }, { "epoch": 1.5514576603835302, "grad_norm": 0.040384694933891296, "learning_rate": 1.4554828032255236e-06, "loss": 0.0007, "step": 189720 }, { "epoch": 1.5515394365621296, "grad_norm": 0.041932884603738785, "learning_rate": 1.4549795108749103e-06, "loss": 0.0008, "step": 189730 }, { "epoch": 1.5516212127407285, "grad_norm": 0.06934134662151337, "learning_rate": 1.4544762907392463e-06, "loss": 0.0006, "step": 189740 }, { "epoch": 1.551702988919328, "grad_norm": 0.03545065596699715, "learning_rate": 1.4539731428287817e-06, "loss": 0.0004, "step": 189750 }, { "epoch": 1.5517847650979268, "grad_norm": 0.0047428058460354805, "learning_rate": 1.453470067153767e-06, "loss": 0.0005, "step": 189760 }, { "epoch": 1.5518665412765262, "grad_norm": 0.03866306692361832, "learning_rate": 1.452967063724448e-06, "loss": 0.0009, "step": 189770 }, { "epoch": 1.5519483174551252, "grad_norm": 0.0019214639905840158, "learning_rate": 1.4524641325510752e-06, "loss": 0.001, "step": 189780 }, { "epoch": 1.5520300936337246, "grad_norm": 0.13993333280086517, "learning_rate": 1.451961273643891e-06, "loss": 0.0038, "step": 189790 }, { "epoch": 1.5521118698123235, "grad_norm": 0.040697406977415085, "learning_rate": 1.4514584870131399e-06, "loss": 0.0004, "step": 189800 }, { "epoch": 1.552193645990923, "grad_norm": 0.04360903054475784, "learning_rate": 1.450955772669062e-06, "loss": 0.0009, "step": 189810 }, { "epoch": 1.5522754221695219, "grad_norm": 0.027698587626218796, "learning_rate": 1.4504531306219017e-06, "loss": 0.0005, "step": 189820 }, { "epoch": 1.5523571983481212, "grad_norm": 0.003210904076695442, "learning_rate": 1.4499505608818959e-06, "loss": 0.0012, "step": 189830 }, { "epoch": 1.5524389745267204, "grad_norm": 0.09905202686786652, "learning_rate": 1.4494480634592833e-06, "loss": 0.0011, "step": 189840 }, { "epoch": 1.5525207507053196, "grad_norm": 0.028540659695863724, "learning_rate": 1.448945638364299e-06, "loss": 0.001, "step": 189850 }, { "epoch": 1.5526025268839188, "grad_norm": 0.009411954320967197, "learning_rate": 1.4484432856071794e-06, "loss": 0.0005, "step": 189860 }, { "epoch": 1.552684303062518, "grad_norm": 0.014179540798068047, "learning_rate": 1.4479410051981563e-06, "loss": 0.0009, "step": 189870 }, { "epoch": 1.552766079241117, "grad_norm": 0.03698158264160156, "learning_rate": 1.4474387971474624e-06, "loss": 0.0006, "step": 189880 }, { "epoch": 1.5528478554197163, "grad_norm": 0.03322747349739075, "learning_rate": 1.4469366614653262e-06, "loss": 0.0004, "step": 189890 }, { "epoch": 1.5529296315983154, "grad_norm": 0.023623937740921974, "learning_rate": 1.4464345981619798e-06, "loss": 0.0008, "step": 189900 }, { "epoch": 1.5530114077769146, "grad_norm": 0.07219207286834717, "learning_rate": 1.4459326072476494e-06, "loss": 0.0007, "step": 189910 }, { "epoch": 1.5530931839555138, "grad_norm": 0.099906325340271, "learning_rate": 1.4454306887325604e-06, "loss": 0.0009, "step": 189920 }, { "epoch": 1.553174960134113, "grad_norm": 0.03335368633270264, "learning_rate": 1.4449288426269365e-06, "loss": 0.0013, "step": 189930 }, { "epoch": 1.553256736312712, "grad_norm": 0.033111199736595154, "learning_rate": 1.4444270689410028e-06, "loss": 0.0005, "step": 189940 }, { "epoch": 1.5533385124913113, "grad_norm": 0.2111949920654297, "learning_rate": 1.4439253676849802e-06, "loss": 0.0007, "step": 189950 }, { "epoch": 1.5534202886699104, "grad_norm": 0.019076375290751457, "learning_rate": 1.4434237388690881e-06, "loss": 0.0004, "step": 189960 }, { "epoch": 1.5535020648485096, "grad_norm": 0.0951160192489624, "learning_rate": 1.4429221825035456e-06, "loss": 0.0011, "step": 189970 }, { "epoch": 1.5535838410271088, "grad_norm": 0.024485236033797264, "learning_rate": 1.442420698598569e-06, "loss": 0.0003, "step": 189980 }, { "epoch": 1.553665617205708, "grad_norm": 0.02254617214202881, "learning_rate": 1.441919287164375e-06, "loss": 0.0007, "step": 189990 }, { "epoch": 1.5537473933843071, "grad_norm": 0.0007368324440903962, "learning_rate": 1.4414179482111774e-06, "loss": 0.0006, "step": 190000 }, { "epoch": 1.5538291695629063, "grad_norm": 0.015285850502550602, "learning_rate": 1.4409166817491872e-06, "loss": 0.0009, "step": 190010 }, { "epoch": 1.5539109457415055, "grad_norm": 0.060101646929979324, "learning_rate": 1.4404154877886183e-06, "loss": 0.001, "step": 190020 }, { "epoch": 1.5539927219201046, "grad_norm": 0.1423477679491043, "learning_rate": 1.4399143663396793e-06, "loss": 0.0013, "step": 190030 }, { "epoch": 1.5540744980987038, "grad_norm": 0.0498436763882637, "learning_rate": 1.439413317412579e-06, "loss": 0.001, "step": 190040 }, { "epoch": 1.554156274277303, "grad_norm": 0.027508176863193512, "learning_rate": 1.4389123410175216e-06, "loss": 0.0028, "step": 190050 }, { "epoch": 1.5542380504559024, "grad_norm": 0.027186907827854156, "learning_rate": 1.4384114371647163e-06, "loss": 0.0004, "step": 190060 }, { "epoch": 1.5543198266345013, "grad_norm": 0.03903381526470184, "learning_rate": 1.4379106058643644e-06, "loss": 0.001, "step": 190070 }, { "epoch": 1.5544016028131007, "grad_norm": 0.03108966164290905, "learning_rate": 1.4374098471266696e-06, "loss": 0.0005, "step": 190080 }, { "epoch": 1.5544833789916996, "grad_norm": 0.04763299971818924, "learning_rate": 1.4369091609618313e-06, "loss": 0.0008, "step": 190090 }, { "epoch": 1.554565155170299, "grad_norm": 0.06815217435359955, "learning_rate": 1.4364085473800505e-06, "loss": 0.0006, "step": 190100 }, { "epoch": 1.554646931348898, "grad_norm": 0.14399923384189606, "learning_rate": 1.4359080063915237e-06, "loss": 0.0014, "step": 190110 }, { "epoch": 1.5547287075274974, "grad_norm": 0.059871282428503036, "learning_rate": 1.4354075380064475e-06, "loss": 0.0012, "step": 190120 }, { "epoch": 1.5548104837060963, "grad_norm": 0.02956586331129074, "learning_rate": 1.4349071422350164e-06, "loss": 0.0005, "step": 190130 }, { "epoch": 1.5548922598846957, "grad_norm": 0.008576491847634315, "learning_rate": 1.4344068190874255e-06, "loss": 0.0014, "step": 190140 }, { "epoch": 1.5549740360632947, "grad_norm": 0.047411076724529266, "learning_rate": 1.4339065685738668e-06, "loss": 0.0005, "step": 190150 }, { "epoch": 1.555055812241894, "grad_norm": 0.05488986149430275, "learning_rate": 1.4334063907045292e-06, "loss": 0.0017, "step": 190160 }, { "epoch": 1.555137588420493, "grad_norm": 0.06073569506406784, "learning_rate": 1.4329062854896032e-06, "loss": 0.0007, "step": 190170 }, { "epoch": 1.5552193645990924, "grad_norm": 0.0026402848307043314, "learning_rate": 1.4324062529392734e-06, "loss": 0.0008, "step": 190180 }, { "epoch": 1.5553011407776913, "grad_norm": 0.13858146965503693, "learning_rate": 1.43190629306373e-06, "loss": 0.0005, "step": 190190 }, { "epoch": 1.5553829169562907, "grad_norm": 0.059507593512535095, "learning_rate": 1.4314064058731559e-06, "loss": 0.0008, "step": 190200 }, { "epoch": 1.5554646931348897, "grad_norm": 0.001566179795190692, "learning_rate": 1.4309065913777337e-06, "loss": 0.0007, "step": 190210 }, { "epoch": 1.555546469313489, "grad_norm": 0.02384587563574314, "learning_rate": 1.430406849587645e-06, "loss": 0.0004, "step": 190220 }, { "epoch": 1.555628245492088, "grad_norm": 0.007010379806160927, "learning_rate": 1.429907180513071e-06, "loss": 0.0003, "step": 190230 }, { "epoch": 1.5557100216706874, "grad_norm": 0.010296386666595936, "learning_rate": 1.429407584164189e-06, "loss": 0.0019, "step": 190240 }, { "epoch": 1.5557917978492863, "grad_norm": 0.0013608806766569614, "learning_rate": 1.4289080605511775e-06, "loss": 0.0015, "step": 190250 }, { "epoch": 1.5558735740278857, "grad_norm": 0.021957430988550186, "learning_rate": 1.4284086096842099e-06, "loss": 0.0014, "step": 190260 }, { "epoch": 1.555955350206485, "grad_norm": 0.009536050260066986, "learning_rate": 1.4279092315734634e-06, "loss": 0.0004, "step": 190270 }, { "epoch": 1.556037126385084, "grad_norm": 0.002463081618770957, "learning_rate": 1.4274099262291092e-06, "loss": 0.0004, "step": 190280 }, { "epoch": 1.5561189025636832, "grad_norm": 0.1706496924161911, "learning_rate": 1.4269106936613186e-06, "loss": 0.0008, "step": 190290 }, { "epoch": 1.5562006787422824, "grad_norm": 0.030653871595859528, "learning_rate": 1.4264115338802604e-06, "loss": 0.0013, "step": 190300 }, { "epoch": 1.5562824549208816, "grad_norm": 0.02604496106505394, "learning_rate": 1.4259124468961056e-06, "loss": 0.0005, "step": 190310 }, { "epoch": 1.5563642310994807, "grad_norm": 0.053497135639190674, "learning_rate": 1.425413432719019e-06, "loss": 0.0007, "step": 190320 }, { "epoch": 1.55644600727808, "grad_norm": 0.027849124744534492, "learning_rate": 1.4249144913591668e-06, "loss": 0.0006, "step": 190330 }, { "epoch": 1.556527783456679, "grad_norm": 0.035361334681510925, "learning_rate": 1.424415622826712e-06, "loss": 0.0007, "step": 190340 }, { "epoch": 1.5566095596352783, "grad_norm": 0.03626831993460655, "learning_rate": 1.4239168271318176e-06, "loss": 0.0009, "step": 190350 }, { "epoch": 1.5566913358138774, "grad_norm": 0.008154178038239479, "learning_rate": 1.423418104284644e-06, "loss": 0.0006, "step": 190360 }, { "epoch": 1.5567731119924766, "grad_norm": 0.05269763618707657, "learning_rate": 1.422919454295351e-06, "loss": 0.0007, "step": 190370 }, { "epoch": 1.5568548881710758, "grad_norm": 0.006949962116777897, "learning_rate": 1.4224208771740949e-06, "loss": 0.0005, "step": 190380 }, { "epoch": 1.556936664349675, "grad_norm": 0.023137349635362625, "learning_rate": 1.421922372931035e-06, "loss": 0.0005, "step": 190390 }, { "epoch": 1.557018440528274, "grad_norm": 0.03271130099892616, "learning_rate": 1.4214239415763248e-06, "loss": 0.0006, "step": 190400 }, { "epoch": 1.5571002167068733, "grad_norm": 0.002077654469758272, "learning_rate": 1.4209255831201175e-06, "loss": 0.0008, "step": 190410 }, { "epoch": 1.5571819928854724, "grad_norm": 0.038836486637592316, "learning_rate": 1.4204272975725642e-06, "loss": 0.0007, "step": 190420 }, { "epoch": 1.5572637690640716, "grad_norm": 0.07562915235757828, "learning_rate": 1.4199290849438175e-06, "loss": 0.0008, "step": 190430 }, { "epoch": 1.5573455452426708, "grad_norm": 0.11603247374296188, "learning_rate": 1.4194309452440251e-06, "loss": 0.0014, "step": 190440 }, { "epoch": 1.55742732142127, "grad_norm": 0.003861566772684455, "learning_rate": 1.4189328784833356e-06, "loss": 0.0013, "step": 190450 }, { "epoch": 1.557509097599869, "grad_norm": 0.03392762318253517, "learning_rate": 1.4184348846718938e-06, "loss": 0.0008, "step": 190460 }, { "epoch": 1.5575908737784683, "grad_norm": 0.03239808231592178, "learning_rate": 1.4179369638198448e-06, "loss": 0.0014, "step": 190470 }, { "epoch": 1.5576726499570674, "grad_norm": 0.0025351853109896183, "learning_rate": 1.4174391159373312e-06, "loss": 0.0006, "step": 190480 }, { "epoch": 1.5577544261356668, "grad_norm": 0.01908976398408413, "learning_rate": 1.416941341034495e-06, "loss": 0.0005, "step": 190490 }, { "epoch": 1.5578362023142658, "grad_norm": 0.06903546303510666, "learning_rate": 1.4164436391214748e-06, "loss": 0.001, "step": 190500 }, { "epoch": 1.5579179784928652, "grad_norm": 0.044233668595552444, "learning_rate": 1.4159460102084115e-06, "loss": 0.0005, "step": 190510 }, { "epoch": 1.5579997546714641, "grad_norm": 0.025483403354883194, "learning_rate": 1.415448454305442e-06, "loss": 0.0007, "step": 190520 }, { "epoch": 1.5580815308500635, "grad_norm": 0.0008263870258815587, "learning_rate": 1.4149509714227005e-06, "loss": 0.0006, "step": 190530 }, { "epoch": 1.5581633070286625, "grad_norm": 0.013335129246115685, "learning_rate": 1.4144535615703208e-06, "loss": 0.0004, "step": 190540 }, { "epoch": 1.5582450832072618, "grad_norm": 0.0019923816435039043, "learning_rate": 1.4139562247584377e-06, "loss": 0.0003, "step": 190550 }, { "epoch": 1.5583268593858608, "grad_norm": 0.0193956196308136, "learning_rate": 1.4134589609971817e-06, "loss": 0.0008, "step": 190560 }, { "epoch": 1.5584086355644602, "grad_norm": 0.0038626142777502537, "learning_rate": 1.4129617702966814e-06, "loss": 0.0008, "step": 190570 }, { "epoch": 1.5584904117430591, "grad_norm": 0.046982601284980774, "learning_rate": 1.4124646526670655e-06, "loss": 0.0009, "step": 190580 }, { "epoch": 1.5585721879216585, "grad_norm": 0.050537336617708206, "learning_rate": 1.4119676081184607e-06, "loss": 0.0008, "step": 190590 }, { "epoch": 1.5586539641002575, "grad_norm": 0.01766463741660118, "learning_rate": 1.4114706366609927e-06, "loss": 0.0027, "step": 190600 }, { "epoch": 1.5587357402788569, "grad_norm": 0.00850734580308199, "learning_rate": 1.4109737383047844e-06, "loss": 0.0009, "step": 190610 }, { "epoch": 1.5588175164574558, "grad_norm": 0.019611189141869545, "learning_rate": 1.4104769130599571e-06, "loss": 0.001, "step": 190620 }, { "epoch": 1.5588992926360552, "grad_norm": 0.040732577443122864, "learning_rate": 1.4099801609366337e-06, "loss": 0.0008, "step": 190630 }, { "epoch": 1.5589810688146541, "grad_norm": 0.03904407098889351, "learning_rate": 1.4094834819449331e-06, "loss": 0.0007, "step": 190640 }, { "epoch": 1.5590628449932535, "grad_norm": 0.00984843261539936, "learning_rate": 1.4089868760949727e-06, "loss": 0.0006, "step": 190650 }, { "epoch": 1.5591446211718525, "grad_norm": 0.022229956462979317, "learning_rate": 1.408490343396866e-06, "loss": 0.0008, "step": 190660 }, { "epoch": 1.5592263973504519, "grad_norm": 0.08358798176050186, "learning_rate": 1.4079938838607327e-06, "loss": 0.0007, "step": 190670 }, { "epoch": 1.5593081735290508, "grad_norm": 0.018673937767744064, "learning_rate": 1.4074974974966838e-06, "loss": 0.0005, "step": 190680 }, { "epoch": 1.5593899497076502, "grad_norm": 0.06430108100175858, "learning_rate": 1.4070011843148306e-06, "loss": 0.0007, "step": 190690 }, { "epoch": 1.5594717258862494, "grad_norm": 0.18868881464004517, "learning_rate": 1.4065049443252843e-06, "loss": 0.0008, "step": 190700 }, { "epoch": 1.5595535020648486, "grad_norm": 0.011100475676357746, "learning_rate": 1.4060087775381526e-06, "loss": 0.001, "step": 190710 }, { "epoch": 1.5596352782434477, "grad_norm": 0.05773460865020752, "learning_rate": 1.4055126839635435e-06, "loss": 0.0007, "step": 190720 }, { "epoch": 1.559717054422047, "grad_norm": 0.04581780359148979, "learning_rate": 1.4050166636115632e-06, "loss": 0.0007, "step": 190730 }, { "epoch": 1.559798830600646, "grad_norm": 0.016218313947319984, "learning_rate": 1.4045207164923142e-06, "loss": 0.0007, "step": 190740 }, { "epoch": 1.5598806067792452, "grad_norm": 0.008218925446271896, "learning_rate": 1.4040248426159015e-06, "loss": 0.0009, "step": 190750 }, { "epoch": 1.5599623829578444, "grad_norm": 0.002892784308642149, "learning_rate": 1.4035290419924263e-06, "loss": 0.0008, "step": 190760 }, { "epoch": 1.5600441591364436, "grad_norm": 0.03475116565823555, "learning_rate": 1.4030333146319879e-06, "loss": 0.0005, "step": 190770 }, { "epoch": 1.5601259353150427, "grad_norm": 0.008496041409671307, "learning_rate": 1.4025376605446828e-06, "loss": 0.0019, "step": 190780 }, { "epoch": 1.560207711493642, "grad_norm": 0.030437281355261803, "learning_rate": 1.4020420797406115e-06, "loss": 0.0004, "step": 190790 }, { "epoch": 1.560289487672241, "grad_norm": 0.08397120237350464, "learning_rate": 1.4015465722298676e-06, "loss": 0.0019, "step": 190800 }, { "epoch": 1.5603712638508402, "grad_norm": 0.01696220226585865, "learning_rate": 1.401051138022545e-06, "loss": 0.0007, "step": 190810 }, { "epoch": 1.5604530400294394, "grad_norm": 0.03190282732248306, "learning_rate": 1.4005557771287358e-06, "loss": 0.0004, "step": 190820 }, { "epoch": 1.5605348162080386, "grad_norm": 0.02122025564312935, "learning_rate": 1.4000604895585317e-06, "loss": 0.0009, "step": 190830 }, { "epoch": 1.5606165923866377, "grad_norm": 0.01564371958374977, "learning_rate": 1.3995652753220212e-06, "loss": 0.0003, "step": 190840 }, { "epoch": 1.560698368565237, "grad_norm": 0.07526000589132309, "learning_rate": 1.3990701344292928e-06, "loss": 0.0011, "step": 190850 }, { "epoch": 1.560780144743836, "grad_norm": 0.041569195687770844, "learning_rate": 1.3985750668904307e-06, "loss": 0.0004, "step": 190860 }, { "epoch": 1.5608619209224353, "grad_norm": 0.021039264276623726, "learning_rate": 1.3980800727155242e-06, "loss": 0.0005, "step": 190870 }, { "epoch": 1.5609436971010344, "grad_norm": 0.06806622445583344, "learning_rate": 1.397585151914654e-06, "loss": 0.0004, "step": 190880 }, { "epoch": 1.5610254732796336, "grad_norm": 0.039717335253953934, "learning_rate": 1.3970903044979028e-06, "loss": 0.0006, "step": 190890 }, { "epoch": 1.561107249458233, "grad_norm": 0.037849925458431244, "learning_rate": 1.3965955304753487e-06, "loss": 0.0029, "step": 190900 }, { "epoch": 1.561189025636832, "grad_norm": 0.006738642696291208, "learning_rate": 1.3961008298570743e-06, "loss": 0.0028, "step": 190910 }, { "epoch": 1.5612708018154313, "grad_norm": 0.04777653142809868, "learning_rate": 1.3956062026531552e-06, "loss": 0.0012, "step": 190920 }, { "epoch": 1.5613525779940303, "grad_norm": 0.11066825687885284, "learning_rate": 1.3951116488736682e-06, "loss": 0.0009, "step": 190930 }, { "epoch": 1.5614343541726297, "grad_norm": 0.0010975911282002926, "learning_rate": 1.3946171685286864e-06, "loss": 0.0004, "step": 190940 }, { "epoch": 1.5615161303512286, "grad_norm": 0.046528562903404236, "learning_rate": 1.3941227616282837e-06, "loss": 0.0004, "step": 190950 }, { "epoch": 1.561597906529828, "grad_norm": 0.0718337744474411, "learning_rate": 1.3936284281825319e-06, "loss": 0.0008, "step": 190960 }, { "epoch": 1.561679682708427, "grad_norm": 0.051458463072776794, "learning_rate": 1.3931341682014997e-06, "loss": 0.0012, "step": 190970 }, { "epoch": 1.5617614588870263, "grad_norm": 0.08551739156246185, "learning_rate": 1.3926399816952551e-06, "loss": 0.0007, "step": 190980 }, { "epoch": 1.5618432350656253, "grad_norm": 0.02970510721206665, "learning_rate": 1.3921458686738676e-06, "loss": 0.0007, "step": 190990 }, { "epoch": 1.5619250112442247, "grad_norm": 0.05503355711698532, "learning_rate": 1.3916518291474013e-06, "loss": 0.0006, "step": 191000 }, { "epoch": 1.5620067874228236, "grad_norm": 0.004954277537763119, "learning_rate": 1.3911578631259204e-06, "loss": 0.0009, "step": 191010 }, { "epoch": 1.562088563601423, "grad_norm": 0.05231337994337082, "learning_rate": 1.3906639706194858e-06, "loss": 0.0007, "step": 191020 }, { "epoch": 1.562170339780022, "grad_norm": 0.013239230029284954, "learning_rate": 1.3901701516381617e-06, "loss": 0.0005, "step": 191030 }, { "epoch": 1.5622521159586213, "grad_norm": 0.03998851031064987, "learning_rate": 1.3896764061920054e-06, "loss": 0.0011, "step": 191040 }, { "epoch": 1.5623338921372203, "grad_norm": 0.0040771616622805595, "learning_rate": 1.3891827342910757e-06, "loss": 0.0003, "step": 191050 }, { "epoch": 1.5624156683158197, "grad_norm": 0.03145737200975418, "learning_rate": 1.3886891359454285e-06, "loss": 0.0007, "step": 191060 }, { "epoch": 1.5624974444944186, "grad_norm": 0.006960420403629541, "learning_rate": 1.388195611165119e-06, "loss": 0.0004, "step": 191070 }, { "epoch": 1.562579220673018, "grad_norm": 0.06318672001361847, "learning_rate": 1.387702159960201e-06, "loss": 0.001, "step": 191080 }, { "epoch": 1.562660996851617, "grad_norm": 0.019634636119008064, "learning_rate": 1.3872087823407255e-06, "loss": 0.0007, "step": 191090 }, { "epoch": 1.5627427730302164, "grad_norm": 0.06967238336801529, "learning_rate": 1.3867154783167431e-06, "loss": 0.0007, "step": 191100 }, { "epoch": 1.5628245492088153, "grad_norm": 0.031155308708548546, "learning_rate": 1.3862222478983046e-06, "loss": 0.0004, "step": 191110 }, { "epoch": 1.5629063253874147, "grad_norm": 0.052326444536447525, "learning_rate": 1.3857290910954556e-06, "loss": 0.0007, "step": 191120 }, { "epoch": 1.5629881015660139, "grad_norm": 0.0654444620013237, "learning_rate": 1.3852360079182436e-06, "loss": 0.0007, "step": 191130 }, { "epoch": 1.563069877744613, "grad_norm": 0.070399209856987, "learning_rate": 1.3847429983767113e-06, "loss": 0.0008, "step": 191140 }, { "epoch": 1.5631516539232122, "grad_norm": 0.04191803187131882, "learning_rate": 1.3842500624809035e-06, "loss": 0.0006, "step": 191150 }, { "epoch": 1.5632334301018114, "grad_norm": 0.018490714952349663, "learning_rate": 1.3837572002408612e-06, "loss": 0.0007, "step": 191160 }, { "epoch": 1.5633152062804105, "grad_norm": 0.0551627092063427, "learning_rate": 1.3832644116666237e-06, "loss": 0.0007, "step": 191170 }, { "epoch": 1.5633969824590097, "grad_norm": 0.03842324763536453, "learning_rate": 1.3827716967682309e-06, "loss": 0.0008, "step": 191180 }, { "epoch": 1.5634787586376089, "grad_norm": 0.01506836898624897, "learning_rate": 1.382279055555718e-06, "loss": 0.0008, "step": 191190 }, { "epoch": 1.563560534816208, "grad_norm": 0.048275742679834366, "learning_rate": 1.3817864880391219e-06, "loss": 0.0005, "step": 191200 }, { "epoch": 1.5636423109948072, "grad_norm": 0.16530238091945648, "learning_rate": 1.381293994228476e-06, "loss": 0.0013, "step": 191210 }, { "epoch": 1.5637240871734064, "grad_norm": 0.020948704332113266, "learning_rate": 1.3808015741338115e-06, "loss": 0.0006, "step": 191220 }, { "epoch": 1.5638058633520056, "grad_norm": 0.1682453155517578, "learning_rate": 1.380309227765162e-06, "loss": 0.0018, "step": 191230 }, { "epoch": 1.5638876395306047, "grad_norm": 0.07173191010951996, "learning_rate": 1.3798169551325557e-06, "loss": 0.0004, "step": 191240 }, { "epoch": 1.563969415709204, "grad_norm": 0.009548394940793514, "learning_rate": 1.3793247562460204e-06, "loss": 0.0006, "step": 191250 }, { "epoch": 1.564051191887803, "grad_norm": 0.017597898840904236, "learning_rate": 1.378832631115582e-06, "loss": 0.0005, "step": 191260 }, { "epoch": 1.5641329680664022, "grad_norm": 0.02089167758822441, "learning_rate": 1.3783405797512677e-06, "loss": 0.0005, "step": 191270 }, { "epoch": 1.5642147442450014, "grad_norm": 0.0007704994059167802, "learning_rate": 1.3778486021630992e-06, "loss": 0.0004, "step": 191280 }, { "epoch": 1.5642965204236006, "grad_norm": 0.049220163375139236, "learning_rate": 1.3773566983610992e-06, "loss": 0.0009, "step": 191290 }, { "epoch": 1.5643782966021997, "grad_norm": 0.09209834784269333, "learning_rate": 1.3768648683552881e-06, "loss": 0.001, "step": 191300 }, { "epoch": 1.564460072780799, "grad_norm": 0.044107917696237564, "learning_rate": 1.3763731121556844e-06, "loss": 0.0008, "step": 191310 }, { "epoch": 1.564541848959398, "grad_norm": 0.007938672788441181, "learning_rate": 1.3758814297723061e-06, "loss": 0.0007, "step": 191320 }, { "epoch": 1.5646236251379975, "grad_norm": 0.03214129060506821, "learning_rate": 1.3753898212151684e-06, "loss": 0.0009, "step": 191330 }, { "epoch": 1.5647054013165964, "grad_norm": 0.023030003532767296, "learning_rate": 1.3748982864942851e-06, "loss": 0.0007, "step": 191340 }, { "epoch": 1.5647871774951958, "grad_norm": 0.02757852151989937, "learning_rate": 1.374406825619672e-06, "loss": 0.0009, "step": 191350 }, { "epoch": 1.5648689536737947, "grad_norm": 0.0352846197783947, "learning_rate": 1.3739154386013387e-06, "loss": 0.0005, "step": 191360 }, { "epoch": 1.5649507298523941, "grad_norm": 0.10442736744880676, "learning_rate": 1.3734241254492953e-06, "loss": 0.0012, "step": 191370 }, { "epoch": 1.565032506030993, "grad_norm": 0.008802160620689392, "learning_rate": 1.3729328861735487e-06, "loss": 0.0005, "step": 191380 }, { "epoch": 1.5651142822095925, "grad_norm": 0.030770568177103996, "learning_rate": 1.372441720784109e-06, "loss": 0.0008, "step": 191390 }, { "epoch": 1.5651960583881914, "grad_norm": 0.043506037443876266, "learning_rate": 1.37195062929098e-06, "loss": 0.0007, "step": 191400 }, { "epoch": 1.5652778345667908, "grad_norm": 0.005339224822819233, "learning_rate": 1.3714596117041663e-06, "loss": 0.0005, "step": 191410 }, { "epoch": 1.5653596107453898, "grad_norm": 0.044423334300518036, "learning_rate": 1.3709686680336686e-06, "loss": 0.0008, "step": 191420 }, { "epoch": 1.5654413869239892, "grad_norm": 0.019480712711811066, "learning_rate": 1.3704777982894902e-06, "loss": 0.0012, "step": 191430 }, { "epoch": 1.565523163102588, "grad_norm": 0.008901688270270824, "learning_rate": 1.3699870024816287e-06, "loss": 0.0006, "step": 191440 }, { "epoch": 1.5656049392811875, "grad_norm": 0.016130032017827034, "learning_rate": 1.3694962806200824e-06, "loss": 0.0009, "step": 191450 }, { "epoch": 1.5656867154597864, "grad_norm": 0.035223882645368576, "learning_rate": 1.3690056327148465e-06, "loss": 0.0036, "step": 191460 }, { "epoch": 1.5657684916383858, "grad_norm": 0.007789603900164366, "learning_rate": 1.368515058775919e-06, "loss": 0.0009, "step": 191470 }, { "epoch": 1.5658502678169848, "grad_norm": 0.0012476291740313172, "learning_rate": 1.3680245588132912e-06, "loss": 0.0008, "step": 191480 }, { "epoch": 1.5659320439955842, "grad_norm": 0.01804317533969879, "learning_rate": 1.3675341328369558e-06, "loss": 0.0004, "step": 191490 }, { "epoch": 1.5660138201741831, "grad_norm": 0.1014007106423378, "learning_rate": 1.3670437808569009e-06, "loss": 0.0007, "step": 191500 }, { "epoch": 1.5660955963527825, "grad_norm": 0.025509823113679886, "learning_rate": 1.366553502883119e-06, "loss": 0.0006, "step": 191510 }, { "epoch": 1.5661773725313815, "grad_norm": 0.03150462731719017, "learning_rate": 1.3660632989255957e-06, "loss": 0.0008, "step": 191520 }, { "epoch": 1.5662591487099808, "grad_norm": 0.022293608635663986, "learning_rate": 1.3655731689943163e-06, "loss": 0.0008, "step": 191530 }, { "epoch": 1.56634092488858, "grad_norm": 0.008286881260573864, "learning_rate": 1.3650831130992658e-06, "loss": 0.0005, "step": 191540 }, { "epoch": 1.5664227010671792, "grad_norm": 0.0009431802318431437, "learning_rate": 1.3645931312504274e-06, "loss": 0.0005, "step": 191550 }, { "epoch": 1.5665044772457783, "grad_norm": 0.011666922830045223, "learning_rate": 1.3641032234577812e-06, "loss": 0.0009, "step": 191560 }, { "epoch": 1.5665862534243775, "grad_norm": 0.07368394732475281, "learning_rate": 1.363613389731308e-06, "loss": 0.0004, "step": 191570 }, { "epoch": 1.5666680296029767, "grad_norm": 0.016752582043409348, "learning_rate": 1.3631236300809847e-06, "loss": 0.0002, "step": 191580 }, { "epoch": 1.5667498057815759, "grad_norm": 0.06264328211545944, "learning_rate": 1.3626339445167907e-06, "loss": 0.0008, "step": 191590 }, { "epoch": 1.566831581960175, "grad_norm": 0.00987380649894476, "learning_rate": 1.3621443330486994e-06, "loss": 0.0005, "step": 191600 }, { "epoch": 1.5669133581387742, "grad_norm": 0.061141420155763626, "learning_rate": 1.3616547956866854e-06, "loss": 0.0007, "step": 191610 }, { "epoch": 1.5669951343173734, "grad_norm": 0.00513525540009141, "learning_rate": 1.3611653324407204e-06, "loss": 0.0003, "step": 191620 }, { "epoch": 1.5670769104959725, "grad_norm": 0.13698767125606537, "learning_rate": 1.360675943320774e-06, "loss": 0.0017, "step": 191630 }, { "epoch": 1.5671586866745717, "grad_norm": 0.06601373106241226, "learning_rate": 1.3601866283368187e-06, "loss": 0.0009, "step": 191640 }, { "epoch": 1.5672404628531709, "grad_norm": 0.020397663116455078, "learning_rate": 1.35969738749882e-06, "loss": 0.0008, "step": 191650 }, { "epoch": 1.56732223903177, "grad_norm": 0.032111022621393204, "learning_rate": 1.3592082208167445e-06, "loss": 0.0007, "step": 191660 }, { "epoch": 1.5674040152103692, "grad_norm": 0.007772575132548809, "learning_rate": 1.3587191283005569e-06, "loss": 0.0009, "step": 191670 }, { "epoch": 1.5674857913889684, "grad_norm": 0.04329730570316315, "learning_rate": 1.3582301099602203e-06, "loss": 0.0012, "step": 191680 }, { "epoch": 1.5675675675675675, "grad_norm": 0.020440198481082916, "learning_rate": 1.3577411658056965e-06, "loss": 0.0006, "step": 191690 }, { "epoch": 1.5676493437461667, "grad_norm": 0.04941706731915474, "learning_rate": 1.3572522958469463e-06, "loss": 0.0005, "step": 191700 }, { "epoch": 1.5677311199247659, "grad_norm": 0.0430326834321022, "learning_rate": 1.3567635000939256e-06, "loss": 0.0006, "step": 191710 }, { "epoch": 1.567812896103365, "grad_norm": 0.042651548981666565, "learning_rate": 1.356274778556596e-06, "loss": 0.0058, "step": 191720 }, { "epoch": 1.5678946722819642, "grad_norm": 0.07199046015739441, "learning_rate": 1.3557861312449105e-06, "loss": 0.0009, "step": 191730 }, { "epoch": 1.5679764484605634, "grad_norm": 0.04241235554218292, "learning_rate": 1.3552975581688238e-06, "loss": 0.0006, "step": 191740 }, { "epoch": 1.5680582246391626, "grad_norm": 0.027118127793073654, "learning_rate": 1.3548090593382868e-06, "loss": 0.0007, "step": 191750 }, { "epoch": 1.568140000817762, "grad_norm": 0.05809414014220238, "learning_rate": 1.3543206347632538e-06, "loss": 0.0007, "step": 191760 }, { "epoch": 1.568221776996361, "grad_norm": 0.03154633194208145, "learning_rate": 1.3538322844536728e-06, "loss": 0.0005, "step": 191770 }, { "epoch": 1.5683035531749603, "grad_norm": 0.021968379616737366, "learning_rate": 1.3533440084194916e-06, "loss": 0.0005, "step": 191780 }, { "epoch": 1.5683853293535592, "grad_norm": 0.09369953721761703, "learning_rate": 1.3528558066706576e-06, "loss": 0.0006, "step": 191790 }, { "epoch": 1.5684671055321586, "grad_norm": 0.14585356414318085, "learning_rate": 1.3523676792171148e-06, "loss": 0.0005, "step": 191800 }, { "epoch": 1.5685488817107576, "grad_norm": 0.00679636700078845, "learning_rate": 1.3518796260688078e-06, "loss": 0.0007, "step": 191810 }, { "epoch": 1.568630657889357, "grad_norm": 0.04220271855592728, "learning_rate": 1.351391647235678e-06, "loss": 0.0006, "step": 191820 }, { "epoch": 1.568712434067956, "grad_norm": 0.05383584648370743, "learning_rate": 1.3509037427276645e-06, "loss": 0.0008, "step": 191830 }, { "epoch": 1.5687942102465553, "grad_norm": 0.008410741575062275, "learning_rate": 1.3504159125547095e-06, "loss": 0.0007, "step": 191840 }, { "epoch": 1.5688759864251542, "grad_norm": 0.005331855732947588, "learning_rate": 1.3499281567267492e-06, "loss": 0.0005, "step": 191850 }, { "epoch": 1.5689577626037536, "grad_norm": 0.004375876393169165, "learning_rate": 1.3494404752537189e-06, "loss": 0.0007, "step": 191860 }, { "epoch": 1.5690395387823526, "grad_norm": 0.01145283691585064, "learning_rate": 1.348952868145552e-06, "loss": 0.0004, "step": 191870 }, { "epoch": 1.569121314960952, "grad_norm": 0.01813618279993534, "learning_rate": 1.3484653354121847e-06, "loss": 0.0005, "step": 191880 }, { "epoch": 1.569203091139551, "grad_norm": 0.02305932156741619, "learning_rate": 1.347977877063546e-06, "loss": 0.0006, "step": 191890 }, { "epoch": 1.5692848673181503, "grad_norm": 0.0023647588677704334, "learning_rate": 1.3474904931095666e-06, "loss": 0.0007, "step": 191900 }, { "epoch": 1.5693666434967493, "grad_norm": 0.009342714212834835, "learning_rate": 1.3470031835601755e-06, "loss": 0.0005, "step": 191910 }, { "epoch": 1.5694484196753486, "grad_norm": 0.053731679916381836, "learning_rate": 1.3465159484252982e-06, "loss": 0.0008, "step": 191920 }, { "epoch": 1.5695301958539476, "grad_norm": 0.03138231486082077, "learning_rate": 1.346028787714861e-06, "loss": 0.001, "step": 191930 }, { "epoch": 1.569611972032547, "grad_norm": 0.02219357341527939, "learning_rate": 1.3455417014387873e-06, "loss": 0.0005, "step": 191940 }, { "epoch": 1.569693748211146, "grad_norm": 0.029531974345445633, "learning_rate": 1.3450546896069983e-06, "loss": 0.0017, "step": 191950 }, { "epoch": 1.5697755243897453, "grad_norm": 0.026091238483786583, "learning_rate": 1.3445677522294175e-06, "loss": 0.0006, "step": 191960 }, { "epoch": 1.5698573005683445, "grad_norm": 0.00953267328441143, "learning_rate": 1.344080889315963e-06, "loss": 0.0005, "step": 191970 }, { "epoch": 1.5699390767469437, "grad_norm": 0.05448524281382561, "learning_rate": 1.3435941008765524e-06, "loss": 0.0048, "step": 191980 }, { "epoch": 1.5700208529255428, "grad_norm": 0.05373414605855942, "learning_rate": 1.3431073869211003e-06, "loss": 0.0005, "step": 191990 }, { "epoch": 1.570102629104142, "grad_norm": 0.058880891650915146, "learning_rate": 1.342620747459525e-06, "loss": 0.0008, "step": 192000 }, { "epoch": 1.5701844052827412, "grad_norm": 0.05207888036966324, "learning_rate": 1.342134182501738e-06, "loss": 0.0008, "step": 192010 }, { "epoch": 1.5702661814613403, "grad_norm": 0.04924997314810753, "learning_rate": 1.3416476920576505e-06, "loss": 0.0011, "step": 192020 }, { "epoch": 1.5703479576399395, "grad_norm": 0.040897976607084274, "learning_rate": 1.341161276137174e-06, "loss": 0.0022, "step": 192030 }, { "epoch": 1.5704297338185387, "grad_norm": 0.006107228808104992, "learning_rate": 1.3406749347502157e-06, "loss": 0.0007, "step": 192040 }, { "epoch": 1.5705115099971378, "grad_norm": 0.007349095307290554, "learning_rate": 1.3401886679066833e-06, "loss": 0.0003, "step": 192050 }, { "epoch": 1.570593286175737, "grad_norm": 0.0101857865229249, "learning_rate": 1.339702475616483e-06, "loss": 0.0005, "step": 192060 }, { "epoch": 1.5706750623543362, "grad_norm": 0.05347028002142906, "learning_rate": 1.3392163578895167e-06, "loss": 0.001, "step": 192070 }, { "epoch": 1.5707568385329354, "grad_norm": 0.03700863942503929, "learning_rate": 1.3387303147356905e-06, "loss": 0.0005, "step": 192080 }, { "epoch": 1.5708386147115345, "grad_norm": 0.02789207175374031, "learning_rate": 1.3382443461649043e-06, "loss": 0.0009, "step": 192090 }, { "epoch": 1.5709203908901337, "grad_norm": 0.014559954404830933, "learning_rate": 1.3377584521870568e-06, "loss": 0.0006, "step": 192100 }, { "epoch": 1.5710021670687329, "grad_norm": 0.009042487479746342, "learning_rate": 1.3372726328120445e-06, "loss": 0.0009, "step": 192110 }, { "epoch": 1.571083943247332, "grad_norm": 0.06272020936012268, "learning_rate": 1.336786888049768e-06, "loss": 0.0016, "step": 192120 }, { "epoch": 1.5711657194259312, "grad_norm": 0.0271486584097147, "learning_rate": 1.3363012179101198e-06, "loss": 0.0009, "step": 192130 }, { "epoch": 1.5712474956045304, "grad_norm": 0.0421210415661335, "learning_rate": 1.3358156224029938e-06, "loss": 0.0011, "step": 192140 }, { "epoch": 1.5713292717831295, "grad_norm": 0.007233431097120047, "learning_rate": 1.3353301015382824e-06, "loss": 0.0005, "step": 192150 }, { "epoch": 1.5714110479617287, "grad_norm": 0.07262155413627625, "learning_rate": 1.3348446553258754e-06, "loss": 0.0006, "step": 192160 }, { "epoch": 1.5714928241403279, "grad_norm": 0.0832936242222786, "learning_rate": 1.3343592837756618e-06, "loss": 0.0009, "step": 192170 }, { "epoch": 1.571574600318927, "grad_norm": 0.037186261266469955, "learning_rate": 1.3338739868975292e-06, "loss": 0.0005, "step": 192180 }, { "epoch": 1.5716563764975264, "grad_norm": 0.0017258820589631796, "learning_rate": 1.333388764701362e-06, "loss": 0.0006, "step": 192190 }, { "epoch": 1.5717381526761254, "grad_norm": 0.0484427809715271, "learning_rate": 1.3329036171970477e-06, "loss": 0.001, "step": 192200 }, { "epoch": 1.5718199288547248, "grad_norm": 0.03300211951136589, "learning_rate": 1.3324185443944676e-06, "loss": 0.0013, "step": 192210 }, { "epoch": 1.5719017050333237, "grad_norm": 0.049476899206638336, "learning_rate": 1.3319335463035027e-06, "loss": 0.0008, "step": 192220 }, { "epoch": 1.571983481211923, "grad_norm": 0.059746153652668, "learning_rate": 1.3314486229340311e-06, "loss": 0.0009, "step": 192230 }, { "epoch": 1.572065257390522, "grad_norm": 0.027745887637138367, "learning_rate": 1.3309637742959352e-06, "loss": 0.0015, "step": 192240 }, { "epoch": 1.5721470335691214, "grad_norm": 0.16511355340480804, "learning_rate": 1.3304790003990892e-06, "loss": 0.001, "step": 192250 }, { "epoch": 1.5722288097477204, "grad_norm": 0.18499119579792023, "learning_rate": 1.3299943012533689e-06, "loss": 0.0004, "step": 192260 }, { "epoch": 1.5723105859263198, "grad_norm": 0.08268316090106964, "learning_rate": 1.3295096768686478e-06, "loss": 0.0008, "step": 192270 }, { "epoch": 1.5723923621049187, "grad_norm": 0.0015358921373263001, "learning_rate": 1.3290251272547978e-06, "loss": 0.0004, "step": 192280 }, { "epoch": 1.5724741382835181, "grad_norm": 0.011707169003784657, "learning_rate": 1.3285406524216898e-06, "loss": 0.0007, "step": 192290 }, { "epoch": 1.572555914462117, "grad_norm": 0.028847921639680862, "learning_rate": 1.3280562523791935e-06, "loss": 0.0004, "step": 192300 }, { "epoch": 1.5726376906407165, "grad_norm": 0.01583501696586609, "learning_rate": 1.3275719271371746e-06, "loss": 0.0006, "step": 192310 }, { "epoch": 1.5727194668193154, "grad_norm": 0.01328524574637413, "learning_rate": 1.327087676705502e-06, "loss": 0.0006, "step": 192320 }, { "epoch": 1.5728012429979148, "grad_norm": 0.004613661207258701, "learning_rate": 1.3266035010940387e-06, "loss": 0.0004, "step": 192330 }, { "epoch": 1.5728830191765137, "grad_norm": 0.011269313283264637, "learning_rate": 1.3261194003126477e-06, "loss": 0.0004, "step": 192340 }, { "epoch": 1.5729647953551131, "grad_norm": 0.049833644181489944, "learning_rate": 1.3256353743711898e-06, "loss": 0.0006, "step": 192350 }, { "epoch": 1.573046571533712, "grad_norm": 0.003833168186247349, "learning_rate": 1.3251514232795271e-06, "loss": 0.0002, "step": 192360 }, { "epoch": 1.5731283477123115, "grad_norm": 0.14572753012180328, "learning_rate": 1.3246675470475174e-06, "loss": 0.0009, "step": 192370 }, { "epoch": 1.5732101238909104, "grad_norm": 0.04985867813229561, "learning_rate": 1.3241837456850171e-06, "loss": 0.0037, "step": 192380 }, { "epoch": 1.5732919000695098, "grad_norm": 0.027752932161092758, "learning_rate": 1.3237000192018817e-06, "loss": 0.0003, "step": 192390 }, { "epoch": 1.573373676248109, "grad_norm": 0.025166871026158333, "learning_rate": 1.3232163676079652e-06, "loss": 0.0005, "step": 192400 }, { "epoch": 1.5734554524267081, "grad_norm": 0.012949229218065739, "learning_rate": 1.32273279091312e-06, "loss": 0.0003, "step": 192410 }, { "epoch": 1.5735372286053073, "grad_norm": 0.04301813617348671, "learning_rate": 1.3222492891271966e-06, "loss": 0.0005, "step": 192420 }, { "epoch": 1.5736190047839065, "grad_norm": 0.029667578637599945, "learning_rate": 1.3217658622600432e-06, "loss": 0.0005, "step": 192430 }, { "epoch": 1.5737007809625057, "grad_norm": 0.07538062334060669, "learning_rate": 1.321282510321511e-06, "loss": 0.0011, "step": 192440 }, { "epoch": 1.5737825571411048, "grad_norm": 0.0300455279648304, "learning_rate": 1.3207992333214437e-06, "loss": 0.0007, "step": 192450 }, { "epoch": 1.573864333319704, "grad_norm": 0.028896013274788857, "learning_rate": 1.3203160312696867e-06, "loss": 0.0006, "step": 192460 }, { "epoch": 1.5739461094983032, "grad_norm": 0.06058526411652565, "learning_rate": 1.3198329041760816e-06, "loss": 0.0005, "step": 192470 }, { "epoch": 1.5740278856769023, "grad_norm": 0.0320824459195137, "learning_rate": 1.3193498520504732e-06, "loss": 0.0008, "step": 192480 }, { "epoch": 1.5741096618555015, "grad_norm": 0.006906568538397551, "learning_rate": 1.3188668749027007e-06, "loss": 0.0006, "step": 192490 }, { "epoch": 1.5741914380341007, "grad_norm": 0.01249239407479763, "learning_rate": 1.3183839727426012e-06, "loss": 0.0004, "step": 192500 }, { "epoch": 1.5742732142126998, "grad_norm": 0.02755262702703476, "learning_rate": 1.3179011455800134e-06, "loss": 0.0008, "step": 192510 }, { "epoch": 1.574354990391299, "grad_norm": 0.017971711233258247, "learning_rate": 1.317418393424772e-06, "loss": 0.0003, "step": 192520 }, { "epoch": 1.5744367665698982, "grad_norm": 0.03615283593535423, "learning_rate": 1.3169357162867113e-06, "loss": 0.0023, "step": 192530 }, { "epoch": 1.5745185427484973, "grad_norm": 0.01562521606683731, "learning_rate": 1.3164531141756636e-06, "loss": 0.0003, "step": 192540 }, { "epoch": 1.5746003189270965, "grad_norm": 0.03514732047915459, "learning_rate": 1.3159705871014588e-06, "loss": 0.0006, "step": 192550 }, { "epoch": 1.5746820951056957, "grad_norm": 0.0056765032932162285, "learning_rate": 1.315488135073929e-06, "loss": 0.0005, "step": 192560 }, { "epoch": 1.5747638712842948, "grad_norm": 0.011906662955880165, "learning_rate": 1.3150057581029014e-06, "loss": 0.0006, "step": 192570 }, { "epoch": 1.574845647462894, "grad_norm": 0.018034569919109344, "learning_rate": 1.3145234561982013e-06, "loss": 0.0007, "step": 192580 }, { "epoch": 1.5749274236414932, "grad_norm": 0.09087994694709778, "learning_rate": 1.3140412293696526e-06, "loss": 0.0008, "step": 192590 }, { "epoch": 1.5750091998200924, "grad_norm": 0.01768934726715088, "learning_rate": 1.3135590776270813e-06, "loss": 0.001, "step": 192600 }, { "epoch": 1.5750909759986915, "grad_norm": 0.08160173892974854, "learning_rate": 1.3130770009803085e-06, "loss": 0.001, "step": 192610 }, { "epoch": 1.575172752177291, "grad_norm": 0.0330202579498291, "learning_rate": 1.312594999439154e-06, "loss": 0.0006, "step": 192620 }, { "epoch": 1.5752545283558899, "grad_norm": 0.0071667032316327095, "learning_rate": 1.312113073013437e-06, "loss": 0.0005, "step": 192630 }, { "epoch": 1.5753363045344893, "grad_norm": 0.0760873332619667, "learning_rate": 1.311631221712974e-06, "loss": 0.0007, "step": 192640 }, { "epoch": 1.5754180807130882, "grad_norm": 0.04827617108821869, "learning_rate": 1.3111494455475815e-06, "loss": 0.0005, "step": 192650 }, { "epoch": 1.5754998568916876, "grad_norm": 0.04761308804154396, "learning_rate": 1.3106677445270727e-06, "loss": 0.0006, "step": 192660 }, { "epoch": 1.5755816330702865, "grad_norm": 0.013405398465692997, "learning_rate": 1.3101861186612596e-06, "loss": 0.0008, "step": 192670 }, { "epoch": 1.575663409248886, "grad_norm": 0.059473756700754166, "learning_rate": 1.3097045679599557e-06, "loss": 0.0006, "step": 192680 }, { "epoch": 1.5757451854274849, "grad_norm": 0.06208401545882225, "learning_rate": 1.3092230924329696e-06, "loss": 0.0017, "step": 192690 }, { "epoch": 1.5758269616060843, "grad_norm": 0.0019959593191742897, "learning_rate": 1.3087416920901092e-06, "loss": 0.0004, "step": 192700 }, { "epoch": 1.5759087377846832, "grad_norm": 0.05961696058511734, "learning_rate": 1.3082603669411792e-06, "loss": 0.0005, "step": 192710 }, { "epoch": 1.5759905139632826, "grad_norm": 0.03624262288212776, "learning_rate": 1.307779116995988e-06, "loss": 0.0008, "step": 192720 }, { "epoch": 1.5760722901418815, "grad_norm": 0.028216762468218803, "learning_rate": 1.3072979422643373e-06, "loss": 0.0007, "step": 192730 }, { "epoch": 1.576154066320481, "grad_norm": 0.029370781034231186, "learning_rate": 1.306816842756029e-06, "loss": 0.0008, "step": 192740 }, { "epoch": 1.5762358424990799, "grad_norm": 0.18494850397109985, "learning_rate": 1.3063358184808634e-06, "loss": 0.0014, "step": 192750 }, { "epoch": 1.5763176186776793, "grad_norm": 0.023529009893536568, "learning_rate": 1.30585486944864e-06, "loss": 0.0007, "step": 192760 }, { "epoch": 1.5763993948562782, "grad_norm": 0.07187625020742416, "learning_rate": 1.3053739956691552e-06, "loss": 0.0008, "step": 192770 }, { "epoch": 1.5764811710348776, "grad_norm": 0.009972739964723587, "learning_rate": 1.304893197152206e-06, "loss": 0.0005, "step": 192780 }, { "epoch": 1.5765629472134766, "grad_norm": 0.008570695295929909, "learning_rate": 1.3044124739075837e-06, "loss": 0.0007, "step": 192790 }, { "epoch": 1.576644723392076, "grad_norm": 0.02711866796016693, "learning_rate": 1.3039318259450851e-06, "loss": 0.0005, "step": 192800 }, { "epoch": 1.576726499570675, "grad_norm": 0.029164154082536697, "learning_rate": 1.3034512532744992e-06, "loss": 0.0014, "step": 192810 }, { "epoch": 1.5768082757492743, "grad_norm": 0.08780976384878159, "learning_rate": 1.3029707559056165e-06, "loss": 0.0008, "step": 192820 }, { "epoch": 1.5768900519278735, "grad_norm": 0.07360215485095978, "learning_rate": 1.3024903338482226e-06, "loss": 0.0009, "step": 192830 }, { "epoch": 1.5769718281064726, "grad_norm": 0.04505220800638199, "learning_rate": 1.3020099871121079e-06, "loss": 0.0024, "step": 192840 }, { "epoch": 1.5770536042850718, "grad_norm": 0.12382271885871887, "learning_rate": 1.3015297157070555e-06, "loss": 0.001, "step": 192850 }, { "epoch": 1.577135380463671, "grad_norm": 0.003513574367389083, "learning_rate": 1.3010495196428492e-06, "loss": 0.0022, "step": 192860 }, { "epoch": 1.5772171566422701, "grad_norm": 0.03185568004846573, "learning_rate": 1.3005693989292706e-06, "loss": 0.0004, "step": 192870 }, { "epoch": 1.5772989328208693, "grad_norm": 0.02753658965229988, "learning_rate": 1.3000893535761006e-06, "loss": 0.0009, "step": 192880 }, { "epoch": 1.5773807089994685, "grad_norm": 0.07964406162500381, "learning_rate": 1.2996093835931173e-06, "loss": 0.0006, "step": 192890 }, { "epoch": 1.5774624851780676, "grad_norm": 0.03876407817006111, "learning_rate": 1.2991294889900996e-06, "loss": 0.0008, "step": 192900 }, { "epoch": 1.5775442613566668, "grad_norm": 0.024703513830900192, "learning_rate": 1.2986496697768203e-06, "loss": 0.0011, "step": 192910 }, { "epoch": 1.577626037535266, "grad_norm": 0.05989428237080574, "learning_rate": 1.2981699259630576e-06, "loss": 0.0009, "step": 192920 }, { "epoch": 1.5777078137138651, "grad_norm": 0.015238279476761818, "learning_rate": 1.2976902575585827e-06, "loss": 0.0006, "step": 192930 }, { "epoch": 1.5777895898924643, "grad_norm": 0.05956219509243965, "learning_rate": 1.2972106645731664e-06, "loss": 0.0007, "step": 192940 }, { "epoch": 1.5778713660710635, "grad_norm": 0.08605942130088806, "learning_rate": 1.2967311470165767e-06, "loss": 0.0007, "step": 192950 }, { "epoch": 1.5779531422496627, "grad_norm": 0.014526943676173687, "learning_rate": 1.2962517048985856e-06, "loss": 0.0005, "step": 192960 }, { "epoch": 1.5780349184282618, "grad_norm": 0.02322615310549736, "learning_rate": 1.2957723382289583e-06, "loss": 0.0008, "step": 192970 }, { "epoch": 1.578116694606861, "grad_norm": 0.01186592597514391, "learning_rate": 1.295293047017459e-06, "loss": 0.0004, "step": 192980 }, { "epoch": 1.5781984707854602, "grad_norm": 0.06442289054393768, "learning_rate": 1.2948138312738516e-06, "loss": 0.0006, "step": 192990 }, { "epoch": 1.5782802469640593, "grad_norm": 0.0046586343087255955, "learning_rate": 1.294334691007898e-06, "loss": 0.0009, "step": 193000 }, { "epoch": 1.5783620231426585, "grad_norm": 0.061738189309835434, "learning_rate": 1.2938556262293594e-06, "loss": 0.0011, "step": 193010 }, { "epoch": 1.5784437993212577, "grad_norm": 0.012763876467943192, "learning_rate": 1.293376636947994e-06, "loss": 0.0005, "step": 193020 }, { "epoch": 1.5785255754998568, "grad_norm": 0.009883525781333447, "learning_rate": 1.292897723173559e-06, "loss": 0.0006, "step": 193030 }, { "epoch": 1.578607351678456, "grad_norm": 0.030942397192120552, "learning_rate": 1.2924188849158097e-06, "loss": 0.0004, "step": 193040 }, { "epoch": 1.5786891278570554, "grad_norm": 0.03931178152561188, "learning_rate": 1.2919401221845023e-06, "loss": 0.0006, "step": 193050 }, { "epoch": 1.5787709040356543, "grad_norm": 0.049352824687957764, "learning_rate": 1.2914614349893894e-06, "loss": 0.0008, "step": 193060 }, { "epoch": 1.5788526802142537, "grad_norm": 0.0041369060054421425, "learning_rate": 1.290982823340221e-06, "loss": 0.0004, "step": 193070 }, { "epoch": 1.5789344563928527, "grad_norm": 0.012836298905313015, "learning_rate": 1.290504287246746e-06, "loss": 0.0013, "step": 193080 }, { "epoch": 1.579016232571452, "grad_norm": 0.03654778376221657, "learning_rate": 1.2900258267187155e-06, "loss": 0.0004, "step": 193090 }, { "epoch": 1.579098008750051, "grad_norm": 0.05133949592709541, "learning_rate": 1.2895474417658744e-06, "loss": 0.0005, "step": 193100 }, { "epoch": 1.5791797849286504, "grad_norm": 0.027454841881990433, "learning_rate": 1.2890691323979675e-06, "loss": 0.0006, "step": 193110 }, { "epoch": 1.5792615611072494, "grad_norm": 0.027971213683485985, "learning_rate": 1.2885908986247398e-06, "loss": 0.0009, "step": 193120 }, { "epoch": 1.5793433372858487, "grad_norm": 0.048213209956884384, "learning_rate": 1.2881127404559312e-06, "loss": 0.0009, "step": 193130 }, { "epoch": 1.5794251134644477, "grad_norm": 0.010022646747529507, "learning_rate": 1.287634657901284e-06, "loss": 0.0004, "step": 193140 }, { "epoch": 1.579506889643047, "grad_norm": 0.0023617753759026527, "learning_rate": 1.2871566509705363e-06, "loss": 0.0002, "step": 193150 }, { "epoch": 1.579588665821646, "grad_norm": 0.1384577751159668, "learning_rate": 1.2866787196734237e-06, "loss": 0.0009, "step": 193160 }, { "epoch": 1.5796704420002454, "grad_norm": 0.08785630762577057, "learning_rate": 1.2862008640196855e-06, "loss": 0.0006, "step": 193170 }, { "epoch": 1.5797522181788444, "grad_norm": 0.03748420998454094, "learning_rate": 1.285723084019055e-06, "loss": 0.0012, "step": 193180 }, { "epoch": 1.5798339943574438, "grad_norm": 0.008639461360871792, "learning_rate": 1.2852453796812637e-06, "loss": 0.0006, "step": 193190 }, { "epoch": 1.5799157705360427, "grad_norm": 0.013621400110423565, "learning_rate": 1.2847677510160421e-06, "loss": 0.0007, "step": 193200 }, { "epoch": 1.579997546714642, "grad_norm": 0.01552892941981554, "learning_rate": 1.284290198033123e-06, "loss": 0.0004, "step": 193210 }, { "epoch": 1.580079322893241, "grad_norm": 0.03541601821780205, "learning_rate": 1.283812720742233e-06, "loss": 0.0004, "step": 193220 }, { "epoch": 1.5801610990718404, "grad_norm": 0.04525335878133774, "learning_rate": 1.2833353191530984e-06, "loss": 0.0009, "step": 193230 }, { "epoch": 1.5802428752504394, "grad_norm": 0.04266373813152313, "learning_rate": 1.2828579932754443e-06, "loss": 0.0007, "step": 193240 }, { "epoch": 1.5803246514290388, "grad_norm": 0.06776116043329239, "learning_rate": 1.2823807431189945e-06, "loss": 0.0013, "step": 193250 }, { "epoch": 1.580406427607638, "grad_norm": 0.012687175534665585, "learning_rate": 1.2819035686934705e-06, "loss": 0.0003, "step": 193260 }, { "epoch": 1.5804882037862371, "grad_norm": 0.01173610519617796, "learning_rate": 1.2814264700085932e-06, "loss": 0.0005, "step": 193270 }, { "epoch": 1.5805699799648363, "grad_norm": 0.041226230561733246, "learning_rate": 1.2809494470740796e-06, "loss": 0.0008, "step": 193280 }, { "epoch": 1.5806517561434354, "grad_norm": 0.035312555730342865, "learning_rate": 1.2804724998996504e-06, "loss": 0.0028, "step": 193290 }, { "epoch": 1.5807335323220346, "grad_norm": 0.0021282287780195475, "learning_rate": 1.2799956284950199e-06, "loss": 0.0004, "step": 193300 }, { "epoch": 1.5808153085006338, "grad_norm": 0.145380899310112, "learning_rate": 1.279518832869902e-06, "loss": 0.0008, "step": 193310 }, { "epoch": 1.580897084679233, "grad_norm": 0.030946211889386177, "learning_rate": 1.2790421130340085e-06, "loss": 0.0003, "step": 193320 }, { "epoch": 1.5809788608578321, "grad_norm": 0.023085886612534523, "learning_rate": 1.2785654689970533e-06, "loss": 0.0009, "step": 193330 }, { "epoch": 1.5810606370364313, "grad_norm": 0.014347950927913189, "learning_rate": 1.278088900768744e-06, "loss": 0.0017, "step": 193340 }, { "epoch": 1.5811424132150305, "grad_norm": 0.017186053097248077, "learning_rate": 1.2776124083587894e-06, "loss": 0.0012, "step": 193350 }, { "epoch": 1.5812241893936296, "grad_norm": 0.0206650011241436, "learning_rate": 1.2771359917768955e-06, "loss": 0.0004, "step": 193360 }, { "epoch": 1.5813059655722288, "grad_norm": 0.06771743297576904, "learning_rate": 1.2766596510327673e-06, "loss": 0.0007, "step": 193370 }, { "epoch": 1.581387741750828, "grad_norm": 0.10066717863082886, "learning_rate": 1.276183386136109e-06, "loss": 0.0006, "step": 193380 }, { "epoch": 1.5814695179294271, "grad_norm": 0.02319360338151455, "learning_rate": 1.2757071970966218e-06, "loss": 0.0008, "step": 193390 }, { "epoch": 1.5815512941080263, "grad_norm": 0.03425377979874611, "learning_rate": 1.275231083924005e-06, "loss": 0.001, "step": 193400 }, { "epoch": 1.5816330702866255, "grad_norm": 0.042427122592926025, "learning_rate": 1.2747550466279596e-06, "loss": 0.0005, "step": 193410 }, { "epoch": 1.5817148464652246, "grad_norm": 0.028539640828967094, "learning_rate": 1.2742790852181824e-06, "loss": 0.0004, "step": 193420 }, { "epoch": 1.5817966226438238, "grad_norm": 0.012339978478848934, "learning_rate": 1.2738031997043687e-06, "loss": 0.0005, "step": 193430 }, { "epoch": 1.581878398822423, "grad_norm": 0.0702698603272438, "learning_rate": 1.2733273900962107e-06, "loss": 0.001, "step": 193440 }, { "epoch": 1.5819601750010222, "grad_norm": 0.08350405097007751, "learning_rate": 1.2728516564034048e-06, "loss": 0.001, "step": 193450 }, { "epoch": 1.5820419511796215, "grad_norm": 0.050335198640823364, "learning_rate": 1.2723759986356405e-06, "loss": 0.001, "step": 193460 }, { "epoch": 1.5821237273582205, "grad_norm": 0.03156053274869919, "learning_rate": 1.2719004168026066e-06, "loss": 0.0012, "step": 193470 }, { "epoch": 1.5822055035368199, "grad_norm": 0.027402719482779503, "learning_rate": 1.2714249109139915e-06, "loss": 0.0003, "step": 193480 }, { "epoch": 1.5822872797154188, "grad_norm": 0.07150629907846451, "learning_rate": 1.2709494809794821e-06, "loss": 0.0012, "step": 193490 }, { "epoch": 1.5823690558940182, "grad_norm": 0.01281234435737133, "learning_rate": 1.2704741270087624e-06, "loss": 0.0005, "step": 193500 }, { "epoch": 1.5824508320726172, "grad_norm": 0.05206339433789253, "learning_rate": 1.2699988490115168e-06, "loss": 0.0012, "step": 193510 }, { "epoch": 1.5825326082512166, "grad_norm": 0.014107361435890198, "learning_rate": 1.2695236469974248e-06, "loss": 0.0016, "step": 193520 }, { "epoch": 1.5826143844298155, "grad_norm": 0.024251068010926247, "learning_rate": 1.26904852097617e-06, "loss": 0.0008, "step": 193530 }, { "epoch": 1.582696160608415, "grad_norm": 0.018698444589972496, "learning_rate": 1.2685734709574288e-06, "loss": 0.0005, "step": 193540 }, { "epoch": 1.5827779367870138, "grad_norm": 0.009173620492219925, "learning_rate": 1.2680984969508796e-06, "loss": 0.0008, "step": 193550 }, { "epoch": 1.5828597129656132, "grad_norm": 0.00658327667042613, "learning_rate": 1.267623598966196e-06, "loss": 0.0007, "step": 193560 }, { "epoch": 1.5829414891442122, "grad_norm": 0.017347082495689392, "learning_rate": 1.2671487770130552e-06, "loss": 0.0003, "step": 193570 }, { "epoch": 1.5830232653228116, "grad_norm": 0.030833156779408455, "learning_rate": 1.2666740311011277e-06, "loss": 0.0009, "step": 193580 }, { "epoch": 1.5831050415014105, "grad_norm": 0.0887472853064537, "learning_rate": 1.2661993612400853e-06, "loss": 0.0009, "step": 193590 }, { "epoch": 1.58318681768001, "grad_norm": 0.040541842579841614, "learning_rate": 1.265724767439596e-06, "loss": 0.0004, "step": 193600 }, { "epoch": 1.5832685938586089, "grad_norm": 0.175015389919281, "learning_rate": 1.2652502497093294e-06, "loss": 0.0031, "step": 193610 }, { "epoch": 1.5833503700372082, "grad_norm": 0.04239708185195923, "learning_rate": 1.2647758080589506e-06, "loss": 0.0008, "step": 193620 }, { "epoch": 1.5834321462158072, "grad_norm": 0.006392046809196472, "learning_rate": 1.2643014424981249e-06, "loss": 0.0003, "step": 193630 }, { "epoch": 1.5835139223944066, "grad_norm": 0.0753028392791748, "learning_rate": 1.2638271530365137e-06, "loss": 0.0013, "step": 193640 }, { "epoch": 1.5835956985730055, "grad_norm": 0.009545305743813515, "learning_rate": 1.2633529396837819e-06, "loss": 0.0002, "step": 193650 }, { "epoch": 1.583677474751605, "grad_norm": 0.017341315746307373, "learning_rate": 1.2628788024495881e-06, "loss": 0.0008, "step": 193660 }, { "epoch": 1.5837592509302039, "grad_norm": 0.07332340627908707, "learning_rate": 1.2624047413435904e-06, "loss": 0.0006, "step": 193670 }, { "epoch": 1.5838410271088033, "grad_norm": 0.05606650188565254, "learning_rate": 1.2619307563754445e-06, "loss": 0.0008, "step": 193680 }, { "epoch": 1.5839228032874024, "grad_norm": 0.012081182561814785, "learning_rate": 1.26145684755481e-06, "loss": 0.0006, "step": 193690 }, { "epoch": 1.5840045794660016, "grad_norm": 0.006518787704408169, "learning_rate": 1.2609830148913377e-06, "loss": 0.0003, "step": 193700 }, { "epoch": 1.5840863556446008, "grad_norm": 0.0806197077035904, "learning_rate": 1.2605092583946805e-06, "loss": 0.0006, "step": 193710 }, { "epoch": 1.5841681318232, "grad_norm": 0.00623546727001667, "learning_rate": 1.2600355780744899e-06, "loss": 0.0004, "step": 193720 }, { "epoch": 1.584249908001799, "grad_norm": 0.022103069350123405, "learning_rate": 1.259561973940414e-06, "loss": 0.0013, "step": 193730 }, { "epoch": 1.5843316841803983, "grad_norm": 0.046651385724544525, "learning_rate": 1.2590884460021014e-06, "loss": 0.0006, "step": 193740 }, { "epoch": 1.5844134603589974, "grad_norm": 0.04868347942829132, "learning_rate": 1.2586149942691977e-06, "loss": 0.0005, "step": 193750 }, { "epoch": 1.5844952365375966, "grad_norm": 0.04866403713822365, "learning_rate": 1.2581416187513462e-06, "loss": 0.002, "step": 193760 }, { "epoch": 1.5845770127161958, "grad_norm": 0.0009981165640056133, "learning_rate": 1.2576683194581934e-06, "loss": 0.0006, "step": 193770 }, { "epoch": 1.584658788894795, "grad_norm": 0.0696474015712738, "learning_rate": 1.2571950963993784e-06, "loss": 0.0006, "step": 193780 }, { "epoch": 1.5847405650733941, "grad_norm": 0.004479976836591959, "learning_rate": 1.2567219495845417e-06, "loss": 0.0004, "step": 193790 }, { "epoch": 1.5848223412519933, "grad_norm": 0.006565170828253031, "learning_rate": 1.2562488790233202e-06, "loss": 0.0009, "step": 193800 }, { "epoch": 1.5849041174305925, "grad_norm": 0.046170104295015335, "learning_rate": 1.2557758847253538e-06, "loss": 0.0008, "step": 193810 }, { "epoch": 1.5849858936091916, "grad_norm": 0.10528727620840073, "learning_rate": 1.2553029667002752e-06, "loss": 0.0006, "step": 193820 }, { "epoch": 1.5850676697877908, "grad_norm": 0.041234858334064484, "learning_rate": 1.2548301249577199e-06, "loss": 0.0013, "step": 193830 }, { "epoch": 1.58514944596639, "grad_norm": 0.043951407074928284, "learning_rate": 1.2543573595073188e-06, "loss": 0.0005, "step": 193840 }, { "epoch": 1.5852312221449891, "grad_norm": 0.005605996586382389, "learning_rate": 1.2538846703587033e-06, "loss": 0.0005, "step": 193850 }, { "epoch": 1.5853129983235883, "grad_norm": 0.03229634091258049, "learning_rate": 1.2534120575215014e-06, "loss": 0.001, "step": 193860 }, { "epoch": 1.5853947745021875, "grad_norm": 0.07832571119070053, "learning_rate": 1.2529395210053418e-06, "loss": 0.0004, "step": 193870 }, { "epoch": 1.5854765506807866, "grad_norm": 0.0006028432981111109, "learning_rate": 1.252467060819848e-06, "loss": 0.0008, "step": 193880 }, { "epoch": 1.585558326859386, "grad_norm": 0.059084612876176834, "learning_rate": 1.2519946769746482e-06, "loss": 0.001, "step": 193890 }, { "epoch": 1.585640103037985, "grad_norm": 0.06645803153514862, "learning_rate": 1.2515223694793632e-06, "loss": 0.0014, "step": 193900 }, { "epoch": 1.5857218792165844, "grad_norm": 0.028129523620009422, "learning_rate": 1.2510501383436147e-06, "loss": 0.0008, "step": 193910 }, { "epoch": 1.5858036553951833, "grad_norm": 0.003797308076173067, "learning_rate": 1.2505779835770205e-06, "loss": 0.0011, "step": 193920 }, { "epoch": 1.5858854315737827, "grad_norm": 0.017163028940558434, "learning_rate": 1.2501059051892018e-06, "loss": 0.0011, "step": 193930 }, { "epoch": 1.5859672077523816, "grad_norm": 0.0009462696616537869, "learning_rate": 1.2496339031897743e-06, "loss": 0.0009, "step": 193940 }, { "epoch": 1.586048983930981, "grad_norm": 0.1653587818145752, "learning_rate": 1.2491619775883523e-06, "loss": 0.0025, "step": 193950 }, { "epoch": 1.58613076010958, "grad_norm": 0.00498212268576026, "learning_rate": 1.2486901283945496e-06, "loss": 0.0003, "step": 193960 }, { "epoch": 1.5862125362881794, "grad_norm": 0.05441892519593239, "learning_rate": 1.2482183556179782e-06, "loss": 0.0007, "step": 193970 }, { "epoch": 1.5862943124667783, "grad_norm": 0.011183592490851879, "learning_rate": 1.2477466592682487e-06, "loss": 0.0015, "step": 193980 }, { "epoch": 1.5863760886453777, "grad_norm": 0.11561395227909088, "learning_rate": 1.2472750393549694e-06, "loss": 0.0015, "step": 193990 }, { "epoch": 1.5864578648239767, "grad_norm": 0.026922721415758133, "learning_rate": 1.2468034958877468e-06, "loss": 0.0005, "step": 194000 }, { "epoch": 1.586539641002576, "grad_norm": 0.03787185251712799, "learning_rate": 1.2463320288761893e-06, "loss": 0.0018, "step": 194010 }, { "epoch": 1.586621417181175, "grad_norm": 0.03319334238767624, "learning_rate": 1.2458606383298994e-06, "loss": 0.0006, "step": 194020 }, { "epoch": 1.5867031933597744, "grad_norm": 0.06017055734992027, "learning_rate": 1.245389324258479e-06, "loss": 0.0005, "step": 194030 }, { "epoch": 1.5867849695383733, "grad_norm": 0.029132120311260223, "learning_rate": 1.2449180866715293e-06, "loss": 0.0006, "step": 194040 }, { "epoch": 1.5868667457169727, "grad_norm": 0.014088554307818413, "learning_rate": 1.2444469255786522e-06, "loss": 0.0013, "step": 194050 }, { "epoch": 1.5869485218955717, "grad_norm": 0.006922123022377491, "learning_rate": 1.2439758409894436e-06, "loss": 0.0011, "step": 194060 }, { "epoch": 1.587030298074171, "grad_norm": 0.009004207327961922, "learning_rate": 1.2435048329135002e-06, "loss": 0.0007, "step": 194070 }, { "epoch": 1.58711207425277, "grad_norm": 0.02455839328467846, "learning_rate": 1.2430339013604164e-06, "loss": 0.0008, "step": 194080 }, { "epoch": 1.5871938504313694, "grad_norm": 0.027472851797938347, "learning_rate": 1.2425630463397864e-06, "loss": 0.0004, "step": 194090 }, { "epoch": 1.5872756266099686, "grad_norm": 0.04380129650235176, "learning_rate": 1.2420922678612008e-06, "loss": 0.0003, "step": 194100 }, { "epoch": 1.5873574027885677, "grad_norm": 0.017780452966690063, "learning_rate": 1.2416215659342507e-06, "loss": 0.0006, "step": 194110 }, { "epoch": 1.587439178967167, "grad_norm": 0.023480426520109177, "learning_rate": 1.2411509405685224e-06, "loss": 0.0007, "step": 194120 }, { "epoch": 1.587520955145766, "grad_norm": 0.07401400804519653, "learning_rate": 1.2406803917736066e-06, "loss": 0.0009, "step": 194130 }, { "epoch": 1.5876027313243652, "grad_norm": 0.024369070306420326, "learning_rate": 1.2402099195590872e-06, "loss": 0.0005, "step": 194140 }, { "epoch": 1.5876845075029644, "grad_norm": 0.013560710474848747, "learning_rate": 1.2397395239345472e-06, "loss": 0.0005, "step": 194150 }, { "epoch": 1.5877662836815636, "grad_norm": 0.01125369779765606, "learning_rate": 1.2392692049095684e-06, "loss": 0.0005, "step": 194160 }, { "epoch": 1.5878480598601628, "grad_norm": 0.0736989676952362, "learning_rate": 1.2387989624937346e-06, "loss": 0.0009, "step": 194170 }, { "epoch": 1.587929836038762, "grad_norm": 0.019214702770113945, "learning_rate": 1.238328796696623e-06, "loss": 0.0005, "step": 194180 }, { "epoch": 1.588011612217361, "grad_norm": 0.06167539954185486, "learning_rate": 1.2378587075278109e-06, "loss": 0.0012, "step": 194190 }, { "epoch": 1.5880933883959603, "grad_norm": 0.058724626898765564, "learning_rate": 1.2373886949968755e-06, "loss": 0.0005, "step": 194200 }, { "epoch": 1.5881751645745594, "grad_norm": 0.06530068814754486, "learning_rate": 1.2369187591133908e-06, "loss": 0.001, "step": 194210 }, { "epoch": 1.5882569407531586, "grad_norm": 0.021530400961637497, "learning_rate": 1.2364488998869294e-06, "loss": 0.0008, "step": 194220 }, { "epoch": 1.5883387169317578, "grad_norm": 0.03432035446166992, "learning_rate": 1.2359791173270636e-06, "loss": 0.0003, "step": 194230 }, { "epoch": 1.588420493110357, "grad_norm": 0.0015128732193261385, "learning_rate": 1.2355094114433608e-06, "loss": 0.0007, "step": 194240 }, { "epoch": 1.588502269288956, "grad_norm": 0.04616761952638626, "learning_rate": 1.235039782245393e-06, "loss": 0.0009, "step": 194250 }, { "epoch": 1.5885840454675553, "grad_norm": 0.03091409057378769, "learning_rate": 1.2345702297427248e-06, "loss": 0.0003, "step": 194260 }, { "epoch": 1.5886658216461544, "grad_norm": 0.03996610268950462, "learning_rate": 1.2341007539449218e-06, "loss": 0.0007, "step": 194270 }, { "epoch": 1.5887475978247536, "grad_norm": 0.0014401577645912766, "learning_rate": 1.2336313548615464e-06, "loss": 0.0004, "step": 194280 }, { "epoch": 1.5888293740033528, "grad_norm": 0.001628122990950942, "learning_rate": 1.233162032502163e-06, "loss": 0.0008, "step": 194290 }, { "epoch": 1.588911150181952, "grad_norm": 0.038812652230262756, "learning_rate": 1.2326927868763305e-06, "loss": 0.0006, "step": 194300 }, { "epoch": 1.5889929263605511, "grad_norm": 0.01255191769450903, "learning_rate": 1.2322236179936087e-06, "loss": 0.0008, "step": 194310 }, { "epoch": 1.5890747025391505, "grad_norm": 0.0842236578464508, "learning_rate": 1.2317545258635544e-06, "loss": 0.0006, "step": 194320 }, { "epoch": 1.5891564787177495, "grad_norm": 0.04590094834566116, "learning_rate": 1.2312855104957228e-06, "loss": 0.0005, "step": 194330 }, { "epoch": 1.5892382548963488, "grad_norm": 0.07707451283931732, "learning_rate": 1.2308165718996695e-06, "loss": 0.0006, "step": 194340 }, { "epoch": 1.5893200310749478, "grad_norm": 0.010802778415381908, "learning_rate": 1.230347710084946e-06, "loss": 0.0006, "step": 194350 }, { "epoch": 1.5894018072535472, "grad_norm": 0.04829841107130051, "learning_rate": 1.2298789250611022e-06, "loss": 0.0009, "step": 194360 }, { "epoch": 1.5894835834321461, "grad_norm": 0.03535714000463486, "learning_rate": 1.2294102168376908e-06, "loss": 0.001, "step": 194370 }, { "epoch": 1.5895653596107455, "grad_norm": 0.055983688682317734, "learning_rate": 1.228941585424258e-06, "loss": 0.0008, "step": 194380 }, { "epoch": 1.5896471357893445, "grad_norm": 0.014036320149898529, "learning_rate": 1.2284730308303506e-06, "loss": 0.0032, "step": 194390 }, { "epoch": 1.5897289119679439, "grad_norm": 0.04070178419351578, "learning_rate": 1.2280045530655117e-06, "loss": 0.0005, "step": 194400 }, { "epoch": 1.5898106881465428, "grad_norm": 0.04714592546224594, "learning_rate": 1.2275361521392875e-06, "loss": 0.0006, "step": 194410 }, { "epoch": 1.5898924643251422, "grad_norm": 0.00779324397444725, "learning_rate": 1.227067828061218e-06, "loss": 0.0009, "step": 194420 }, { "epoch": 1.5899742405037411, "grad_norm": 0.02945418655872345, "learning_rate": 1.2265995808408443e-06, "loss": 0.0007, "step": 194430 }, { "epoch": 1.5900560166823405, "grad_norm": 0.10745484381914139, "learning_rate": 1.2261314104877036e-06, "loss": 0.0008, "step": 194440 }, { "epoch": 1.5901377928609395, "grad_norm": 0.03677290305495262, "learning_rate": 1.2256633170113341e-06, "loss": 0.0015, "step": 194450 }, { "epoch": 1.5902195690395389, "grad_norm": 0.013902835547924042, "learning_rate": 1.2251953004212708e-06, "loss": 0.0005, "step": 194460 }, { "epoch": 1.5903013452181378, "grad_norm": 0.033661067485809326, "learning_rate": 1.2247273607270472e-06, "loss": 0.0007, "step": 194470 }, { "epoch": 1.5903831213967372, "grad_norm": 0.0030218351166695356, "learning_rate": 1.2242594979381961e-06, "loss": 0.0006, "step": 194480 }, { "epoch": 1.5904648975753362, "grad_norm": 0.012804711237549782, "learning_rate": 1.2237917120642466e-06, "loss": 0.0012, "step": 194490 }, { "epoch": 1.5905466737539355, "grad_norm": 0.030112091451883316, "learning_rate": 1.223324003114731e-06, "loss": 0.0006, "step": 194500 }, { "epoch": 1.5906284499325345, "grad_norm": 0.01121943723410368, "learning_rate": 1.2228563710991753e-06, "loss": 0.0014, "step": 194510 }, { "epoch": 1.5907102261111339, "grad_norm": 0.019941430538892746, "learning_rate": 1.2223888160271053e-06, "loss": 0.001, "step": 194520 }, { "epoch": 1.590792002289733, "grad_norm": 0.04507586359977722, "learning_rate": 1.2219213379080446e-06, "loss": 0.0008, "step": 194530 }, { "epoch": 1.5908737784683322, "grad_norm": 0.014756288379430771, "learning_rate": 1.2214539367515188e-06, "loss": 0.0006, "step": 194540 }, { "epoch": 1.5909555546469314, "grad_norm": 0.0038310957606881857, "learning_rate": 1.2209866125670472e-06, "loss": 0.0008, "step": 194550 }, { "epoch": 1.5910373308255306, "grad_norm": 0.030877305194735527, "learning_rate": 1.22051936536415e-06, "loss": 0.0006, "step": 194560 }, { "epoch": 1.5911191070041297, "grad_norm": 0.010502531193196774, "learning_rate": 1.2200521951523457e-06, "loss": 0.0009, "step": 194570 }, { "epoch": 1.591200883182729, "grad_norm": 0.03997785970568657, "learning_rate": 1.2195851019411508e-06, "loss": 0.0005, "step": 194580 }, { "epoch": 1.591282659361328, "grad_norm": 0.021475471556186676, "learning_rate": 1.2191180857400798e-06, "loss": 0.0006, "step": 194590 }, { "epoch": 1.5913644355399272, "grad_norm": 0.03604944422841072, "learning_rate": 1.218651146558647e-06, "loss": 0.001, "step": 194600 }, { "epoch": 1.5914462117185264, "grad_norm": 0.0006614528829231858, "learning_rate": 1.2181842844063624e-06, "loss": 0.0006, "step": 194610 }, { "epoch": 1.5915279878971256, "grad_norm": 0.05952529236674309, "learning_rate": 1.21771749929274e-06, "loss": 0.0008, "step": 194620 }, { "epoch": 1.5916097640757247, "grad_norm": 0.03850485011935234, "learning_rate": 1.2172507912272857e-06, "loss": 0.0003, "step": 194630 }, { "epoch": 1.591691540254324, "grad_norm": 0.07523870468139648, "learning_rate": 1.216784160219508e-06, "loss": 0.0008, "step": 194640 }, { "epoch": 1.591773316432923, "grad_norm": 0.0009149126708507538, "learning_rate": 1.21631760627891e-06, "loss": 0.0011, "step": 194650 }, { "epoch": 1.5918550926115222, "grad_norm": 0.028864752501249313, "learning_rate": 1.215851129415e-06, "loss": 0.0004, "step": 194660 }, { "epoch": 1.5919368687901214, "grad_norm": 0.06364057213068008, "learning_rate": 1.2153847296372784e-06, "loss": 0.0012, "step": 194670 }, { "epoch": 1.5920186449687206, "grad_norm": 0.0015833261422812939, "learning_rate": 1.2149184069552461e-06, "loss": 0.0009, "step": 194680 }, { "epoch": 1.5921004211473198, "grad_norm": 0.017385613173246384, "learning_rate": 1.2144521613784022e-06, "loss": 0.0009, "step": 194690 }, { "epoch": 1.592182197325919, "grad_norm": 0.013741201721131802, "learning_rate": 1.213985992916245e-06, "loss": 0.0011, "step": 194700 }, { "epoch": 1.592263973504518, "grad_norm": 0.015322468243539333, "learning_rate": 1.2135199015782707e-06, "loss": 0.0011, "step": 194710 }, { "epoch": 1.5923457496831173, "grad_norm": 0.06880184262990952, "learning_rate": 1.213053887373974e-06, "loss": 0.0014, "step": 194720 }, { "epoch": 1.5924275258617164, "grad_norm": 0.03761113062500954, "learning_rate": 1.2125879503128456e-06, "loss": 0.0006, "step": 194730 }, { "epoch": 1.5925093020403156, "grad_norm": 0.014758185483515263, "learning_rate": 1.2121220904043812e-06, "loss": 0.0008, "step": 194740 }, { "epoch": 1.592591078218915, "grad_norm": 0.022682765498757362, "learning_rate": 1.211656307658069e-06, "loss": 0.0004, "step": 194750 }, { "epoch": 1.592672854397514, "grad_norm": 0.03499361127614975, "learning_rate": 1.2111906020833963e-06, "loss": 0.0003, "step": 194760 }, { "epoch": 1.5927546305761133, "grad_norm": 0.005448976065963507, "learning_rate": 1.2107249736898503e-06, "loss": 0.0007, "step": 194770 }, { "epoch": 1.5928364067547123, "grad_norm": 0.012665823101997375, "learning_rate": 1.2102594224869174e-06, "loss": 0.0008, "step": 194780 }, { "epoch": 1.5929181829333117, "grad_norm": 0.07180821150541306, "learning_rate": 1.209793948484081e-06, "loss": 0.0006, "step": 194790 }, { "epoch": 1.5929999591119106, "grad_norm": 0.04295274242758751, "learning_rate": 1.209328551690822e-06, "loss": 0.0005, "step": 194800 }, { "epoch": 1.59308173529051, "grad_norm": 0.011484534479677677, "learning_rate": 1.208863232116622e-06, "loss": 0.0007, "step": 194810 }, { "epoch": 1.593163511469109, "grad_norm": 0.0309302918612957, "learning_rate": 1.2083979897709596e-06, "loss": 0.0006, "step": 194820 }, { "epoch": 1.5932452876477083, "grad_norm": 0.015556450001895428, "learning_rate": 1.207932824663312e-06, "loss": 0.0013, "step": 194830 }, { "epoch": 1.5933270638263073, "grad_norm": 0.04358702898025513, "learning_rate": 1.2074677368031557e-06, "loss": 0.0007, "step": 194840 }, { "epoch": 1.5934088400049067, "grad_norm": 0.012626494280993938, "learning_rate": 1.2070027261999622e-06, "loss": 0.0007, "step": 194850 }, { "epoch": 1.5934906161835056, "grad_norm": 0.04582529515028, "learning_rate": 1.2065377928632078e-06, "loss": 0.0006, "step": 194860 }, { "epoch": 1.593572392362105, "grad_norm": 0.038254264742136, "learning_rate": 1.2060729368023622e-06, "loss": 0.0008, "step": 194870 }, { "epoch": 1.593654168540704, "grad_norm": 0.001613483764231205, "learning_rate": 1.2056081580268947e-06, "loss": 0.0006, "step": 194880 }, { "epoch": 1.5937359447193034, "grad_norm": 0.045026492327451706, "learning_rate": 1.205143456546272e-06, "loss": 0.0006, "step": 194890 }, { "epoch": 1.5938177208979023, "grad_norm": 0.019636213779449463, "learning_rate": 1.2046788323699632e-06, "loss": 0.0011, "step": 194900 }, { "epoch": 1.5938994970765017, "grad_norm": 0.016678646206855774, "learning_rate": 1.2042142855074313e-06, "loss": 0.0006, "step": 194910 }, { "epoch": 1.5939812732551006, "grad_norm": 0.01731947995722294, "learning_rate": 1.2037498159681394e-06, "loss": 0.0009, "step": 194920 }, { "epoch": 1.5940630494337, "grad_norm": 0.01819528080523014, "learning_rate": 1.20328542376155e-06, "loss": 0.0005, "step": 194930 }, { "epoch": 1.594144825612299, "grad_norm": 0.02423279546201229, "learning_rate": 1.2028211088971225e-06, "loss": 0.0004, "step": 194940 }, { "epoch": 1.5942266017908984, "grad_norm": 0.004981760401278734, "learning_rate": 1.2023568713843158e-06, "loss": 0.0007, "step": 194950 }, { "epoch": 1.5943083779694975, "grad_norm": 0.004641168750822544, "learning_rate": 1.2018927112325862e-06, "loss": 0.001, "step": 194960 }, { "epoch": 1.5943901541480967, "grad_norm": 0.007642118260264397, "learning_rate": 1.2014286284513882e-06, "loss": 0.0007, "step": 194970 }, { "epoch": 1.5944719303266959, "grad_norm": 0.0050567686557769775, "learning_rate": 1.2009646230501775e-06, "loss": 0.0012, "step": 194980 }, { "epoch": 1.594553706505295, "grad_norm": 0.022782690823078156, "learning_rate": 1.2005006950384062e-06, "loss": 0.0022, "step": 194990 }, { "epoch": 1.5946354826838942, "grad_norm": 0.008644385263323784, "learning_rate": 1.2000368444255233e-06, "loss": 0.0004, "step": 195000 }, { "epoch": 1.5947172588624934, "grad_norm": 0.058634400367736816, "learning_rate": 1.199573071220978e-06, "loss": 0.0007, "step": 195010 }, { "epoch": 1.5947990350410925, "grad_norm": 0.013931293040513992, "learning_rate": 1.1991093754342193e-06, "loss": 0.0013, "step": 195020 }, { "epoch": 1.5948808112196917, "grad_norm": 0.03900153934955597, "learning_rate": 1.1986457570746923e-06, "loss": 0.0005, "step": 195030 }, { "epoch": 1.5949625873982909, "grad_norm": 0.0036404835991561413, "learning_rate": 1.1981822161518413e-06, "loss": 0.0006, "step": 195040 }, { "epoch": 1.59504436357689, "grad_norm": 0.03752801939845085, "learning_rate": 1.1977187526751088e-06, "loss": 0.0005, "step": 195050 }, { "epoch": 1.5951261397554892, "grad_norm": 0.08635278046131134, "learning_rate": 1.1972553666539355e-06, "loss": 0.0005, "step": 195060 }, { "epoch": 1.5952079159340884, "grad_norm": 0.06872253865003586, "learning_rate": 1.1967920580977621e-06, "loss": 0.0008, "step": 195070 }, { "epoch": 1.5952896921126876, "grad_norm": 0.02144693210721016, "learning_rate": 1.1963288270160256e-06, "loss": 0.0007, "step": 195080 }, { "epoch": 1.5953714682912867, "grad_norm": 0.0221950002014637, "learning_rate": 1.1958656734181613e-06, "loss": 0.001, "step": 195090 }, { "epoch": 1.595453244469886, "grad_norm": 0.017178064212203026, "learning_rate": 1.1954025973136063e-06, "loss": 0.0009, "step": 195100 }, { "epoch": 1.595535020648485, "grad_norm": 0.0615902915596962, "learning_rate": 1.1949395987117934e-06, "loss": 0.0008, "step": 195110 }, { "epoch": 1.5956167968270842, "grad_norm": 0.05098316818475723, "learning_rate": 1.194476677622154e-06, "loss": 0.0006, "step": 195120 }, { "epoch": 1.5956985730056834, "grad_norm": 0.08367332071065903, "learning_rate": 1.1940138340541158e-06, "loss": 0.0006, "step": 195130 }, { "epoch": 1.5957803491842826, "grad_norm": 0.009344183839857578, "learning_rate": 1.1935510680171114e-06, "loss": 0.0005, "step": 195140 }, { "epoch": 1.5958621253628817, "grad_norm": 0.040207378566265106, "learning_rate": 1.1930883795205656e-06, "loss": 0.0003, "step": 195150 }, { "epoch": 1.595943901541481, "grad_norm": 0.022529996931552887, "learning_rate": 1.192625768573904e-06, "loss": 0.0005, "step": 195160 }, { "epoch": 1.59602567772008, "grad_norm": 0.06705108284950256, "learning_rate": 1.1921632351865503e-06, "loss": 0.0008, "step": 195170 }, { "epoch": 1.5961074538986795, "grad_norm": 0.1233815997838974, "learning_rate": 1.191700779367927e-06, "loss": 0.0003, "step": 195180 }, { "epoch": 1.5961892300772784, "grad_norm": 0.027869608253240585, "learning_rate": 1.191238401127454e-06, "loss": 0.0004, "step": 195190 }, { "epoch": 1.5962710062558778, "grad_norm": 0.036225106567144394, "learning_rate": 1.1907761004745504e-06, "loss": 0.0012, "step": 195200 }, { "epoch": 1.5963527824344768, "grad_norm": 0.026403842493891716, "learning_rate": 1.190313877418633e-06, "loss": 0.0006, "step": 195210 }, { "epoch": 1.5964345586130761, "grad_norm": 0.015902146697044373, "learning_rate": 1.18985173196912e-06, "loss": 0.0016, "step": 195220 }, { "epoch": 1.596516334791675, "grad_norm": 0.008543531410396099, "learning_rate": 1.189389664135424e-06, "loss": 0.0004, "step": 195230 }, { "epoch": 1.5965981109702745, "grad_norm": 0.051057081669569016, "learning_rate": 1.1889276739269578e-06, "loss": 0.0011, "step": 195240 }, { "epoch": 1.5966798871488734, "grad_norm": 0.0005474321660585701, "learning_rate": 1.188465761353132e-06, "loss": 0.0004, "step": 195250 }, { "epoch": 1.5967616633274728, "grad_norm": 0.06394517421722412, "learning_rate": 1.1880039264233572e-06, "loss": 0.001, "step": 195260 }, { "epoch": 1.5968434395060718, "grad_norm": 0.0189034603536129, "learning_rate": 1.1875421691470418e-06, "loss": 0.0002, "step": 195270 }, { "epoch": 1.5969252156846712, "grad_norm": 0.049278538674116135, "learning_rate": 1.1870804895335913e-06, "loss": 0.0004, "step": 195280 }, { "epoch": 1.59700699186327, "grad_norm": 0.0010868918616324663, "learning_rate": 1.18661888759241e-06, "loss": 0.0013, "step": 195290 }, { "epoch": 1.5970887680418695, "grad_norm": 0.049671564251184464, "learning_rate": 1.1861573633329022e-06, "loss": 0.0004, "step": 195300 }, { "epoch": 1.5971705442204684, "grad_norm": 0.011982631869614124, "learning_rate": 1.1856959167644683e-06, "loss": 0.0006, "step": 195310 }, { "epoch": 1.5972523203990678, "grad_norm": 0.02306727133691311, "learning_rate": 1.1852345478965094e-06, "loss": 0.0009, "step": 195320 }, { "epoch": 1.5973340965776668, "grad_norm": 0.0017659153090789914, "learning_rate": 1.1847732567384224e-06, "loss": 0.0005, "step": 195330 }, { "epoch": 1.5974158727562662, "grad_norm": 0.0574578121304512, "learning_rate": 1.1843120432996063e-06, "loss": 0.0015, "step": 195340 }, { "epoch": 1.5974976489348651, "grad_norm": 0.038955215364694595, "learning_rate": 1.1838509075894556e-06, "loss": 0.0005, "step": 195350 }, { "epoch": 1.5975794251134645, "grad_norm": 0.04142266511917114, "learning_rate": 1.183389849617364e-06, "loss": 0.0008, "step": 195360 }, { "epoch": 1.5976612012920635, "grad_norm": 0.02509935200214386, "learning_rate": 1.1829288693927215e-06, "loss": 0.0007, "step": 195370 }, { "epoch": 1.5977429774706629, "grad_norm": 0.05241676792502403, "learning_rate": 1.1824679669249229e-06, "loss": 0.0005, "step": 195380 }, { "epoch": 1.597824753649262, "grad_norm": 0.021974356845021248, "learning_rate": 1.1820071422233543e-06, "loss": 0.0009, "step": 195390 }, { "epoch": 1.5979065298278612, "grad_norm": 0.022247936576604843, "learning_rate": 1.1815463952974033e-06, "loss": 0.001, "step": 195400 }, { "epoch": 1.5979883060064604, "grad_norm": 0.0031376502010971308, "learning_rate": 1.1810857261564562e-06, "loss": 0.0005, "step": 195410 }, { "epoch": 1.5980700821850595, "grad_norm": 0.09988590329885483, "learning_rate": 1.1806251348098974e-06, "loss": 0.0007, "step": 195420 }, { "epoch": 1.5981518583636587, "grad_norm": 0.034033071249723434, "learning_rate": 1.1801646212671091e-06, "loss": 0.0005, "step": 195430 }, { "epoch": 1.5982336345422579, "grad_norm": 0.008131668902933598, "learning_rate": 1.1797041855374724e-06, "loss": 0.0004, "step": 195440 }, { "epoch": 1.598315410720857, "grad_norm": 0.053137462586164474, "learning_rate": 1.1792438276303652e-06, "loss": 0.0007, "step": 195450 }, { "epoch": 1.5983971868994562, "grad_norm": 0.08497091382741928, "learning_rate": 1.1787835475551678e-06, "loss": 0.0009, "step": 195460 }, { "epoch": 1.5984789630780554, "grad_norm": 0.03705524280667305, "learning_rate": 1.1783233453212562e-06, "loss": 0.0013, "step": 195470 }, { "epoch": 1.5985607392566545, "grad_norm": 0.03244364634156227, "learning_rate": 1.1778632209380047e-06, "loss": 0.001, "step": 195480 }, { "epoch": 1.5986425154352537, "grad_norm": 0.00021292699966579676, "learning_rate": 1.177403174414784e-06, "loss": 0.0006, "step": 195490 }, { "epoch": 1.5987242916138529, "grad_norm": 0.16036972403526306, "learning_rate": 1.1769432057609698e-06, "loss": 0.0006, "step": 195500 }, { "epoch": 1.598806067792452, "grad_norm": 0.036613572388887405, "learning_rate": 1.17648331498593e-06, "loss": 0.0007, "step": 195510 }, { "epoch": 1.5988878439710512, "grad_norm": 0.016743235290050507, "learning_rate": 1.176023502099033e-06, "loss": 0.0015, "step": 195520 }, { "epoch": 1.5989696201496504, "grad_norm": 0.012456401251256466, "learning_rate": 1.1755637671096454e-06, "loss": 0.0005, "step": 195530 }, { "epoch": 1.5990513963282496, "grad_norm": 0.022135667502880096, "learning_rate": 1.175104110027132e-06, "loss": 0.0006, "step": 195540 }, { "epoch": 1.5991331725068487, "grad_norm": 0.04412362724542618, "learning_rate": 1.174644530860858e-06, "loss": 0.0006, "step": 195550 }, { "epoch": 1.599214948685448, "grad_norm": 0.017682354897260666, "learning_rate": 1.1741850296201835e-06, "loss": 0.0005, "step": 195560 }, { "epoch": 1.599296724864047, "grad_norm": 0.015692392364144325, "learning_rate": 1.1737256063144686e-06, "loss": 0.0029, "step": 195570 }, { "epoch": 1.5993785010426462, "grad_norm": 0.008360913023352623, "learning_rate": 1.1732662609530743e-06, "loss": 0.0011, "step": 195580 }, { "epoch": 1.5994602772212454, "grad_norm": 0.0750470906496048, "learning_rate": 1.1728069935453574e-06, "loss": 0.0007, "step": 195590 }, { "epoch": 1.5995420533998446, "grad_norm": 0.03300900757312775, "learning_rate": 1.1723478041006726e-06, "loss": 0.0006, "step": 195600 }, { "epoch": 1.599623829578444, "grad_norm": 0.06314665824174881, "learning_rate": 1.1718886926283729e-06, "loss": 0.0008, "step": 195610 }, { "epoch": 1.599705605757043, "grad_norm": 0.03143303096294403, "learning_rate": 1.1714296591378138e-06, "loss": 0.0007, "step": 195620 }, { "epoch": 1.5997873819356423, "grad_norm": 0.022846531122922897, "learning_rate": 1.1709707036383444e-06, "loss": 0.0006, "step": 195630 }, { "epoch": 1.5998691581142412, "grad_norm": 0.012601069174706936, "learning_rate": 1.1705118261393138e-06, "loss": 0.0007, "step": 195640 }, { "epoch": 1.5999509342928406, "grad_norm": 0.020476868376135826, "learning_rate": 1.1700530266500704e-06, "loss": 0.0003, "step": 195650 }, { "epoch": 1.6000327104714396, "grad_norm": 0.03874928504228592, "learning_rate": 1.1695943051799602e-06, "loss": 0.0005, "step": 195660 }, { "epoch": 1.600114486650039, "grad_norm": 0.06459947675466537, "learning_rate": 1.169135661738327e-06, "loss": 0.0009, "step": 195670 }, { "epoch": 1.600196262828638, "grad_norm": 0.07761082053184509, "learning_rate": 1.1686770963345152e-06, "loss": 0.0005, "step": 195680 }, { "epoch": 1.6002780390072373, "grad_norm": 0.03447531908750534, "learning_rate": 1.168218608977863e-06, "loss": 0.0007, "step": 195690 }, { "epoch": 1.6003598151858363, "grad_norm": 0.029959173873066902, "learning_rate": 1.1677601996777138e-06, "loss": 0.0004, "step": 195700 }, { "epoch": 1.6004415913644356, "grad_norm": 0.023364443331956863, "learning_rate": 1.1673018684434046e-06, "loss": 0.0002, "step": 195710 }, { "epoch": 1.6005233675430346, "grad_norm": 0.0017302343621850014, "learning_rate": 1.1668436152842716e-06, "loss": 0.0005, "step": 195720 }, { "epoch": 1.600605143721634, "grad_norm": 0.027367904782295227, "learning_rate": 1.1663854402096482e-06, "loss": 0.0008, "step": 195730 }, { "epoch": 1.600686919900233, "grad_norm": 0.025701701641082764, "learning_rate": 1.1659273432288714e-06, "loss": 0.0008, "step": 195740 }, { "epoch": 1.6007686960788323, "grad_norm": 0.03188355639576912, "learning_rate": 1.1654693243512705e-06, "loss": 0.0009, "step": 195750 }, { "epoch": 1.6008504722574313, "grad_norm": 0.16519160568714142, "learning_rate": 1.165011383586177e-06, "loss": 0.0025, "step": 195760 }, { "epoch": 1.6009322484360307, "grad_norm": 0.03292502090334892, "learning_rate": 1.1645535209429188e-06, "loss": 0.0013, "step": 195770 }, { "epoch": 1.6010140246146296, "grad_norm": 0.05241323634982109, "learning_rate": 1.1640957364308225e-06, "loss": 0.0005, "step": 195780 }, { "epoch": 1.601095800793229, "grad_norm": 0.06496323645114899, "learning_rate": 1.1636380300592142e-06, "loss": 0.0007, "step": 195790 }, { "epoch": 1.601177576971828, "grad_norm": 0.04224396124482155, "learning_rate": 1.1631804018374177e-06, "loss": 0.0011, "step": 195800 }, { "epoch": 1.6012593531504273, "grad_norm": 0.05455843731760979, "learning_rate": 1.1627228517747536e-06, "loss": 0.0007, "step": 195810 }, { "epoch": 1.6013411293290265, "grad_norm": 0.09971033036708832, "learning_rate": 1.1622653798805456e-06, "loss": 0.0012, "step": 195820 }, { "epoch": 1.6014229055076257, "grad_norm": 0.003122196067124605, "learning_rate": 1.1618079861641114e-06, "loss": 0.0009, "step": 195830 }, { "epoch": 1.6015046816862248, "grad_norm": 0.045397479087114334, "learning_rate": 1.1613506706347682e-06, "loss": 0.0013, "step": 195840 }, { "epoch": 1.601586457864824, "grad_norm": 0.03179997205734253, "learning_rate": 1.1608934333018306e-06, "loss": 0.0007, "step": 195850 }, { "epoch": 1.6016682340434232, "grad_norm": 0.043886058032512665, "learning_rate": 1.160436274174616e-06, "loss": 0.0005, "step": 195860 }, { "epoch": 1.6017500102220223, "grad_norm": 0.07931412011384964, "learning_rate": 1.1599791932624355e-06, "loss": 0.0008, "step": 195870 }, { "epoch": 1.6018317864006215, "grad_norm": 0.009463589638471603, "learning_rate": 1.1595221905745995e-06, "loss": 0.0007, "step": 195880 }, { "epoch": 1.6019135625792207, "grad_norm": 0.013932178728282452, "learning_rate": 1.1590652661204204e-06, "loss": 0.0005, "step": 195890 }, { "epoch": 1.6019953387578199, "grad_norm": 0.044793762266635895, "learning_rate": 1.1586084199092007e-06, "loss": 0.0012, "step": 195900 }, { "epoch": 1.602077114936419, "grad_norm": 0.08656784892082214, "learning_rate": 1.1581516519502511e-06, "loss": 0.0006, "step": 195910 }, { "epoch": 1.6021588911150182, "grad_norm": 0.014792989939451218, "learning_rate": 1.1576949622528754e-06, "loss": 0.0004, "step": 195920 }, { "epoch": 1.6022406672936174, "grad_norm": 0.05572449788451195, "learning_rate": 1.1572383508263769e-06, "loss": 0.0005, "step": 195930 }, { "epoch": 1.6023224434722165, "grad_norm": 0.0064007858745753765, "learning_rate": 1.1567818176800549e-06, "loss": 0.0093, "step": 195940 }, { "epoch": 1.6024042196508157, "grad_norm": 0.05320397764444351, "learning_rate": 1.1563253628232124e-06, "loss": 0.001, "step": 195950 }, { "epoch": 1.6024859958294149, "grad_norm": 0.031998783349990845, "learning_rate": 1.1558689862651473e-06, "loss": 0.0007, "step": 195960 }, { "epoch": 1.602567772008014, "grad_norm": 0.0355013832449913, "learning_rate": 1.155412688015155e-06, "loss": 0.0008, "step": 195970 }, { "epoch": 1.6026495481866132, "grad_norm": 0.002113675931468606, "learning_rate": 1.1549564680825298e-06, "loss": 0.0009, "step": 195980 }, { "epoch": 1.6027313243652124, "grad_norm": 0.055541981011629105, "learning_rate": 1.1545003264765687e-06, "loss": 0.0008, "step": 195990 }, { "epoch": 1.6028131005438115, "grad_norm": 0.06226522848010063, "learning_rate": 1.1540442632065613e-06, "loss": 0.0007, "step": 196000 }, { "epoch": 1.6028948767224107, "grad_norm": 0.006676344200968742, "learning_rate": 1.1535882782817997e-06, "loss": 0.0007, "step": 196010 }, { "epoch": 1.60297665290101, "grad_norm": 0.012232803739607334, "learning_rate": 1.153132371711569e-06, "loss": 0.0008, "step": 196020 }, { "epoch": 1.603058429079609, "grad_norm": 0.002741811331361532, "learning_rate": 1.1526765435051602e-06, "loss": 0.0008, "step": 196030 }, { "epoch": 1.6031402052582084, "grad_norm": 0.033131469041109085, "learning_rate": 1.152220793671857e-06, "loss": 0.0008, "step": 196040 }, { "epoch": 1.6032219814368074, "grad_norm": 0.09233128279447556, "learning_rate": 1.1517651222209441e-06, "loss": 0.0007, "step": 196050 }, { "epoch": 1.6033037576154068, "grad_norm": 0.00845398660749197, "learning_rate": 1.1513095291617022e-06, "loss": 0.0005, "step": 196060 }, { "epoch": 1.6033855337940057, "grad_norm": 0.034837692975997925, "learning_rate": 1.1508540145034153e-06, "loss": 0.0004, "step": 196070 }, { "epoch": 1.6034673099726051, "grad_norm": 0.033395566046237946, "learning_rate": 1.1503985782553607e-06, "loss": 0.0004, "step": 196080 }, { "epoch": 1.603549086151204, "grad_norm": 0.02539806067943573, "learning_rate": 1.1499432204268157e-06, "loss": 0.0013, "step": 196090 }, { "epoch": 1.6036308623298035, "grad_norm": 0.001263473299331963, "learning_rate": 1.1494879410270559e-06, "loss": 0.0011, "step": 196100 }, { "epoch": 1.6037126385084024, "grad_norm": 0.0035332508850842714, "learning_rate": 1.1490327400653578e-06, "loss": 0.0011, "step": 196110 }, { "epoch": 1.6037944146870018, "grad_norm": 0.15417762100696564, "learning_rate": 1.1485776175509934e-06, "loss": 0.001, "step": 196120 }, { "epoch": 1.6038761908656007, "grad_norm": 0.019643498584628105, "learning_rate": 1.1481225734932344e-06, "loss": 0.0002, "step": 196130 }, { "epoch": 1.6039579670442001, "grad_norm": 0.027524927631020546, "learning_rate": 1.1476676079013472e-06, "loss": 0.0006, "step": 196140 }, { "epoch": 1.604039743222799, "grad_norm": 0.05242225527763367, "learning_rate": 1.1472127207846034e-06, "loss": 0.0008, "step": 196150 }, { "epoch": 1.6041215194013985, "grad_norm": 0.03761983662843704, "learning_rate": 1.1467579121522681e-06, "loss": 0.0011, "step": 196160 }, { "epoch": 1.6042032955799974, "grad_norm": 0.0750960186123848, "learning_rate": 1.1463031820136061e-06, "loss": 0.0008, "step": 196170 }, { "epoch": 1.6042850717585968, "grad_norm": 0.03485902398824692, "learning_rate": 1.1458485303778794e-06, "loss": 0.0008, "step": 196180 }, { "epoch": 1.6043668479371958, "grad_norm": 0.08049527555704117, "learning_rate": 1.145393957254352e-06, "loss": 0.0005, "step": 196190 }, { "epoch": 1.6044486241157951, "grad_norm": 0.05832784250378609, "learning_rate": 1.1449394626522836e-06, "loss": 0.001, "step": 196200 }, { "epoch": 1.604530400294394, "grad_norm": 0.043575357645750046, "learning_rate": 1.1444850465809316e-06, "loss": 0.0012, "step": 196210 }, { "epoch": 1.6046121764729935, "grad_norm": 0.0014252159744501114, "learning_rate": 1.1440307090495517e-06, "loss": 0.0003, "step": 196220 }, { "epoch": 1.6046939526515924, "grad_norm": 0.05856078863143921, "learning_rate": 1.1435764500674019e-06, "loss": 0.0007, "step": 196230 }, { "epoch": 1.6047757288301918, "grad_norm": 0.05177058279514313, "learning_rate": 1.1431222696437344e-06, "loss": 0.0014, "step": 196240 }, { "epoch": 1.604857505008791, "grad_norm": 0.0013673536013811827, "learning_rate": 1.1426681677878026e-06, "loss": 0.0003, "step": 196250 }, { "epoch": 1.6049392811873902, "grad_norm": 0.0022814280819147825, "learning_rate": 1.142214144508853e-06, "loss": 0.0044, "step": 196260 }, { "epoch": 1.6050210573659893, "grad_norm": 0.056911665946245193, "learning_rate": 1.1417601998161386e-06, "loss": 0.0007, "step": 196270 }, { "epoch": 1.6051028335445885, "grad_norm": 0.037810541689395905, "learning_rate": 1.141306333718905e-06, "loss": 0.001, "step": 196280 }, { "epoch": 1.6051846097231877, "grad_norm": 0.011468390002846718, "learning_rate": 1.1408525462263975e-06, "loss": 0.0005, "step": 196290 }, { "epoch": 1.6052663859017868, "grad_norm": 0.05755418911576271, "learning_rate": 1.1403988373478597e-06, "loss": 0.0006, "step": 196300 }, { "epoch": 1.605348162080386, "grad_norm": 0.033144623041152954, "learning_rate": 1.1399452070925359e-06, "loss": 0.0006, "step": 196310 }, { "epoch": 1.6054299382589852, "grad_norm": 0.0024271283764392138, "learning_rate": 1.1394916554696655e-06, "loss": 0.0005, "step": 196320 }, { "epoch": 1.6055117144375843, "grad_norm": 0.0030517687555402517, "learning_rate": 1.1390381824884888e-06, "loss": 0.0005, "step": 196330 }, { "epoch": 1.6055934906161835, "grad_norm": 0.02355499006807804, "learning_rate": 1.1385847881582407e-06, "loss": 0.0007, "step": 196340 }, { "epoch": 1.6056752667947827, "grad_norm": 0.010731623508036137, "learning_rate": 1.138131472488161e-06, "loss": 0.0007, "step": 196350 }, { "epoch": 1.6057570429733818, "grad_norm": 0.030112603679299355, "learning_rate": 1.1376782354874821e-06, "loss": 0.0006, "step": 196360 }, { "epoch": 1.605838819151981, "grad_norm": 0.04297583922743797, "learning_rate": 1.1372250771654381e-06, "loss": 0.0005, "step": 196370 }, { "epoch": 1.6059205953305802, "grad_norm": 0.019934045150876045, "learning_rate": 1.136771997531257e-06, "loss": 0.0005, "step": 196380 }, { "epoch": 1.6060023715091793, "grad_norm": 0.06690534204244614, "learning_rate": 1.1363189965941713e-06, "loss": 0.0004, "step": 196390 }, { "epoch": 1.6060841476877785, "grad_norm": 0.02299579232931137, "learning_rate": 1.135866074363408e-06, "loss": 0.0016, "step": 196400 }, { "epoch": 1.6061659238663777, "grad_norm": 0.0369059219956398, "learning_rate": 1.1354132308481941e-06, "loss": 0.0004, "step": 196410 }, { "epoch": 1.6062477000449769, "grad_norm": 0.02354622446000576, "learning_rate": 1.134960466057753e-06, "loss": 0.0005, "step": 196420 }, { "epoch": 1.606329476223576, "grad_norm": 0.03678227588534355, "learning_rate": 1.1345077800013093e-06, "loss": 0.0007, "step": 196430 }, { "epoch": 1.6064112524021752, "grad_norm": 0.022286050021648407, "learning_rate": 1.134055172688085e-06, "loss": 0.0004, "step": 196440 }, { "epoch": 1.6064930285807746, "grad_norm": 0.058671239763498306, "learning_rate": 1.1336026441272986e-06, "loss": 0.0003, "step": 196450 }, { "epoch": 1.6065748047593735, "grad_norm": 0.0009708469151519239, "learning_rate": 1.133150194328168e-06, "loss": 0.0002, "step": 196460 }, { "epoch": 1.606656580937973, "grad_norm": 0.03836996108293533, "learning_rate": 1.1326978232999126e-06, "loss": 0.0007, "step": 196470 }, { "epoch": 1.6067383571165719, "grad_norm": 0.030328337103128433, "learning_rate": 1.1322455310517461e-06, "loss": 0.0007, "step": 196480 }, { "epoch": 1.6068201332951713, "grad_norm": 0.04082777351140976, "learning_rate": 1.131793317592883e-06, "loss": 0.0006, "step": 196490 }, { "epoch": 1.6069019094737702, "grad_norm": 0.1261991709470749, "learning_rate": 1.1313411829325316e-06, "loss": 0.0008, "step": 196500 }, { "epoch": 1.6069836856523696, "grad_norm": 0.006852121092379093, "learning_rate": 1.1308891270799066e-06, "loss": 0.0004, "step": 196510 }, { "epoch": 1.6070654618309685, "grad_norm": 0.009618205018341541, "learning_rate": 1.1304371500442146e-06, "loss": 0.0005, "step": 196520 }, { "epoch": 1.607147238009568, "grad_norm": 0.025611622259020805, "learning_rate": 1.1299852518346639e-06, "loss": 0.0008, "step": 196530 }, { "epoch": 1.6072290141881669, "grad_norm": 0.021765433251857758, "learning_rate": 1.1295334324604574e-06, "loss": 0.0005, "step": 196540 }, { "epoch": 1.6073107903667663, "grad_norm": 0.01670807972550392, "learning_rate": 1.1290816919308023e-06, "loss": 0.0007, "step": 196550 }, { "epoch": 1.6073925665453652, "grad_norm": 0.048263274133205414, "learning_rate": 1.1286300302548997e-06, "loss": 0.0007, "step": 196560 }, { "epoch": 1.6074743427239646, "grad_norm": 0.09865642338991165, "learning_rate": 1.12817844744195e-06, "loss": 0.0007, "step": 196570 }, { "epoch": 1.6075561189025636, "grad_norm": 0.02268582209944725, "learning_rate": 1.127726943501151e-06, "loss": 0.0007, "step": 196580 }, { "epoch": 1.607637895081163, "grad_norm": 0.02878551371395588, "learning_rate": 1.1272755184417034e-06, "loss": 0.001, "step": 196590 }, { "epoch": 1.607719671259762, "grad_norm": 0.030664868652820587, "learning_rate": 1.1268241722728018e-06, "loss": 0.0006, "step": 196600 }, { "epoch": 1.6078014474383613, "grad_norm": 0.0018022670410573483, "learning_rate": 1.126372905003641e-06, "loss": 0.0006, "step": 196610 }, { "epoch": 1.6078832236169602, "grad_norm": 0.01721857488155365, "learning_rate": 1.12592171664341e-06, "loss": 0.0004, "step": 196620 }, { "epoch": 1.6079649997955596, "grad_norm": 0.004460847936570644, "learning_rate": 1.1254706072013038e-06, "loss": 0.0004, "step": 196630 }, { "epoch": 1.6080467759741586, "grad_norm": 0.07017232477664948, "learning_rate": 1.1250195766865112e-06, "loss": 0.0008, "step": 196640 }, { "epoch": 1.608128552152758, "grad_norm": 0.0013367810752242804, "learning_rate": 1.1245686251082195e-06, "loss": 0.0005, "step": 196650 }, { "epoch": 1.6082103283313571, "grad_norm": 0.029481306672096252, "learning_rate": 1.124117752475613e-06, "loss": 0.0003, "step": 196660 }, { "epoch": 1.6082921045099563, "grad_norm": 0.007380173075944185, "learning_rate": 1.1236669587978804e-06, "loss": 0.0004, "step": 196670 }, { "epoch": 1.6083738806885555, "grad_norm": 0.03158069774508476, "learning_rate": 1.1232162440842026e-06, "loss": 0.0004, "step": 196680 }, { "epoch": 1.6084556568671546, "grad_norm": 0.00998138077557087, "learning_rate": 1.122765608343761e-06, "loss": 0.0005, "step": 196690 }, { "epoch": 1.6085374330457538, "grad_norm": 0.018808746710419655, "learning_rate": 1.122315051585734e-06, "loss": 0.0003, "step": 196700 }, { "epoch": 1.608619209224353, "grad_norm": 0.0010982482926920056, "learning_rate": 1.121864573819303e-06, "loss": 0.0016, "step": 196710 }, { "epoch": 1.6087009854029521, "grad_norm": 0.0072867083363235, "learning_rate": 1.1214141750536428e-06, "loss": 0.001, "step": 196720 }, { "epoch": 1.6087827615815513, "grad_norm": 0.030239101499319077, "learning_rate": 1.12096385529793e-06, "loss": 0.0008, "step": 196730 }, { "epoch": 1.6088645377601505, "grad_norm": 0.015576415695250034, "learning_rate": 1.1205136145613338e-06, "loss": 0.0007, "step": 196740 }, { "epoch": 1.6089463139387497, "grad_norm": 0.04484008997678757, "learning_rate": 1.1200634528530296e-06, "loss": 0.0004, "step": 196750 }, { "epoch": 1.6090280901173488, "grad_norm": 0.02631498873233795, "learning_rate": 1.119613370182187e-06, "loss": 0.0011, "step": 196760 }, { "epoch": 1.609109866295948, "grad_norm": 0.08141429722309113, "learning_rate": 1.1191633665579744e-06, "loss": 0.0012, "step": 196770 }, { "epoch": 1.6091916424745472, "grad_norm": 0.0038992976769804955, "learning_rate": 1.1187134419895568e-06, "loss": 0.0006, "step": 196780 }, { "epoch": 1.6092734186531463, "grad_norm": 0.042066894471645355, "learning_rate": 1.1182635964861027e-06, "loss": 0.0006, "step": 196790 }, { "epoch": 1.6093551948317455, "grad_norm": 0.01767835207283497, "learning_rate": 1.1178138300567748e-06, "loss": 0.0005, "step": 196800 }, { "epoch": 1.6094369710103447, "grad_norm": 0.08643482625484467, "learning_rate": 1.117364142710734e-06, "loss": 0.0008, "step": 196810 }, { "epoch": 1.6095187471889438, "grad_norm": 0.05582801252603531, "learning_rate": 1.1169145344571408e-06, "loss": 0.0005, "step": 196820 }, { "epoch": 1.609600523367543, "grad_norm": 0.00857907347381115, "learning_rate": 1.1164650053051561e-06, "loss": 0.0004, "step": 196830 }, { "epoch": 1.6096822995461422, "grad_norm": 0.00411970354616642, "learning_rate": 1.1160155552639357e-06, "loss": 0.0011, "step": 196840 }, { "epoch": 1.6097640757247413, "grad_norm": 0.036392636597156525, "learning_rate": 1.1155661843426374e-06, "loss": 0.0005, "step": 196850 }, { "epoch": 1.6098458519033405, "grad_norm": 0.05877270549535751, "learning_rate": 1.1151168925504103e-06, "loss": 0.0008, "step": 196860 }, { "epoch": 1.6099276280819397, "grad_norm": 0.023140378296375275, "learning_rate": 1.1146676798964112e-06, "loss": 0.0011, "step": 196870 }, { "epoch": 1.610009404260539, "grad_norm": 0.020964322611689568, "learning_rate": 1.1142185463897897e-06, "loss": 0.0006, "step": 196880 }, { "epoch": 1.610091180439138, "grad_norm": 0.0018675620667636395, "learning_rate": 1.1137694920396946e-06, "loss": 0.0004, "step": 196890 }, { "epoch": 1.6101729566177374, "grad_norm": 0.04230745881795883, "learning_rate": 1.113320516855273e-06, "loss": 0.0011, "step": 196900 }, { "epoch": 1.6102547327963364, "grad_norm": 0.02102723717689514, "learning_rate": 1.1128716208456725e-06, "loss": 0.0012, "step": 196910 }, { "epoch": 1.6103365089749357, "grad_norm": 0.04339739307761192, "learning_rate": 1.1124228040200375e-06, "loss": 0.0011, "step": 196920 }, { "epoch": 1.6104182851535347, "grad_norm": 0.026468932628631592, "learning_rate": 1.1119740663875096e-06, "loss": 0.0012, "step": 196930 }, { "epoch": 1.610500061332134, "grad_norm": 0.1269412487745285, "learning_rate": 1.1115254079572286e-06, "loss": 0.0006, "step": 196940 }, { "epoch": 1.610581837510733, "grad_norm": 0.050155483186244965, "learning_rate": 1.1110768287383373e-06, "loss": 0.0005, "step": 196950 }, { "epoch": 1.6106636136893324, "grad_norm": 0.05181679129600525, "learning_rate": 1.110628328739972e-06, "loss": 0.0005, "step": 196960 }, { "epoch": 1.6107453898679314, "grad_norm": 0.0344984196126461, "learning_rate": 1.1101799079712705e-06, "loss": 0.0006, "step": 196970 }, { "epoch": 1.6108271660465308, "grad_norm": 0.03100407123565674, "learning_rate": 1.1097315664413638e-06, "loss": 0.0008, "step": 196980 }, { "epoch": 1.6109089422251297, "grad_norm": 0.01773366704583168, "learning_rate": 1.1092833041593882e-06, "loss": 0.0008, "step": 196990 }, { "epoch": 1.610990718403729, "grad_norm": 0.03644203767180443, "learning_rate": 1.1088351211344744e-06, "loss": 0.0005, "step": 197000 }, { "epoch": 1.611072494582328, "grad_norm": 0.03646601736545563, "learning_rate": 1.1083870173757515e-06, "loss": 0.0005, "step": 197010 }, { "epoch": 1.6111542707609274, "grad_norm": 0.023686744272708893, "learning_rate": 1.107938992892348e-06, "loss": 0.0002, "step": 197020 }, { "epoch": 1.6112360469395264, "grad_norm": 0.05817105993628502, "learning_rate": 1.1074910476933913e-06, "loss": 0.0009, "step": 197030 }, { "epoch": 1.6113178231181258, "grad_norm": 0.015598077327013016, "learning_rate": 1.1070431817880062e-06, "loss": 0.001, "step": 197040 }, { "epoch": 1.6113995992967247, "grad_norm": 0.018698161467909813, "learning_rate": 1.1065953951853165e-06, "loss": 0.0009, "step": 197050 }, { "epoch": 1.611481375475324, "grad_norm": 0.05161984637379646, "learning_rate": 1.1061476878944411e-06, "loss": 0.0009, "step": 197060 }, { "epoch": 1.611563151653923, "grad_norm": 0.04259558022022247, "learning_rate": 1.1057000599245038e-06, "loss": 0.0007, "step": 197070 }, { "epoch": 1.6116449278325224, "grad_norm": 0.021762438118457794, "learning_rate": 1.1052525112846224e-06, "loss": 0.0007, "step": 197080 }, { "epoch": 1.6117267040111216, "grad_norm": 0.06433241069316864, "learning_rate": 1.104805041983914e-06, "loss": 0.0007, "step": 197090 }, { "epoch": 1.6118084801897208, "grad_norm": 0.12570863962173462, "learning_rate": 1.1043576520314902e-06, "loss": 0.0015, "step": 197100 }, { "epoch": 1.61189025636832, "grad_norm": 0.05994952842593193, "learning_rate": 1.1039103414364694e-06, "loss": 0.0006, "step": 197110 }, { "epoch": 1.6119720325469191, "grad_norm": 0.029364461079239845, "learning_rate": 1.1034631102079613e-06, "loss": 0.0013, "step": 197120 }, { "epoch": 1.6120538087255183, "grad_norm": 0.03661282733082771, "learning_rate": 1.1030159583550776e-06, "loss": 0.0005, "step": 197130 }, { "epoch": 1.6121355849041175, "grad_norm": 0.031109105795621872, "learning_rate": 1.1025688858869243e-06, "loss": 0.0009, "step": 197140 }, { "epoch": 1.6122173610827166, "grad_norm": 0.027842922136187553, "learning_rate": 1.1021218928126127e-06, "loss": 0.0008, "step": 197150 }, { "epoch": 1.6122991372613158, "grad_norm": 0.11976656317710876, "learning_rate": 1.1016749791412463e-06, "loss": 0.0012, "step": 197160 }, { "epoch": 1.612380913439915, "grad_norm": 0.0518626794219017, "learning_rate": 1.1012281448819296e-06, "loss": 0.0003, "step": 197170 }, { "epoch": 1.6124626896185141, "grad_norm": 0.006882158573716879, "learning_rate": 1.1007813900437625e-06, "loss": 0.0008, "step": 197180 }, { "epoch": 1.6125444657971133, "grad_norm": 0.05844832956790924, "learning_rate": 1.10033471463585e-06, "loss": 0.001, "step": 197190 }, { "epoch": 1.6126262419757125, "grad_norm": 0.08489445596933365, "learning_rate": 1.0998881186672893e-06, "loss": 0.0014, "step": 197200 }, { "epoch": 1.6127080181543116, "grad_norm": 0.031136900186538696, "learning_rate": 1.0994416021471787e-06, "loss": 0.0003, "step": 197210 }, { "epoch": 1.6127897943329108, "grad_norm": 0.015630031004548073, "learning_rate": 1.0989951650846109e-06, "loss": 0.0006, "step": 197220 }, { "epoch": 1.61287157051151, "grad_norm": 0.07330828160047531, "learning_rate": 1.0985488074886834e-06, "loss": 0.0004, "step": 197230 }, { "epoch": 1.6129533466901091, "grad_norm": 0.029930084943771362, "learning_rate": 1.0981025293684882e-06, "loss": 0.0015, "step": 197240 }, { "epoch": 1.6130351228687083, "grad_norm": 0.008663058280944824, "learning_rate": 1.0976563307331162e-06, "loss": 0.0003, "step": 197250 }, { "epoch": 1.6131168990473075, "grad_norm": 0.0005988742923364043, "learning_rate": 1.097210211591655e-06, "loss": 0.0004, "step": 197260 }, { "epoch": 1.6131986752259067, "grad_norm": 0.03706065192818642, "learning_rate": 1.0967641719531958e-06, "loss": 0.0011, "step": 197270 }, { "epoch": 1.6132804514045058, "grad_norm": 0.03028198517858982, "learning_rate": 1.0963182118268228e-06, "loss": 0.0011, "step": 197280 }, { "epoch": 1.613362227583105, "grad_norm": 0.014995434321463108, "learning_rate": 1.0958723312216214e-06, "loss": 0.0006, "step": 197290 }, { "epoch": 1.6134440037617042, "grad_norm": 0.023332979530096054, "learning_rate": 1.095426530146672e-06, "loss": 0.0005, "step": 197300 }, { "epoch": 1.6135257799403036, "grad_norm": 0.0015325293643400073, "learning_rate": 1.0949808086110597e-06, "loss": 0.0006, "step": 197310 }, { "epoch": 1.6136075561189025, "grad_norm": 0.017383571714162827, "learning_rate": 1.094535166623863e-06, "loss": 0.0008, "step": 197320 }, { "epoch": 1.6136893322975019, "grad_norm": 0.020901862531900406, "learning_rate": 1.09408960419416e-06, "loss": 0.001, "step": 197330 }, { "epoch": 1.6137711084761008, "grad_norm": 0.03175941854715347, "learning_rate": 1.0936441213310256e-06, "loss": 0.0003, "step": 197340 }, { "epoch": 1.6138528846547002, "grad_norm": 0.030358081683516502, "learning_rate": 1.0931987180435343e-06, "loss": 0.0008, "step": 197350 }, { "epoch": 1.6139346608332992, "grad_norm": 0.021175477653741837, "learning_rate": 1.0927533943407625e-06, "loss": 0.0011, "step": 197360 }, { "epoch": 1.6140164370118986, "grad_norm": 0.006798373535275459, "learning_rate": 1.0923081502317796e-06, "loss": 0.0004, "step": 197370 }, { "epoch": 1.6140982131904975, "grad_norm": 0.00768451439216733, "learning_rate": 1.0918629857256563e-06, "loss": 0.0004, "step": 197380 }, { "epoch": 1.614179989369097, "grad_norm": 0.022985782474279404, "learning_rate": 1.091417900831459e-06, "loss": 0.0007, "step": 197390 }, { "epoch": 1.6142617655476958, "grad_norm": 0.0069066914729774, "learning_rate": 1.0909728955582577e-06, "loss": 0.001, "step": 197400 }, { "epoch": 1.6143435417262952, "grad_norm": 0.060075968503952026, "learning_rate": 1.090527969915116e-06, "loss": 0.0005, "step": 197410 }, { "epoch": 1.6144253179048942, "grad_norm": 0.023294439539313316, "learning_rate": 1.090083123911097e-06, "loss": 0.0006, "step": 197420 }, { "epoch": 1.6145070940834936, "grad_norm": 0.005906984675675631, "learning_rate": 1.0896383575552617e-06, "loss": 0.0005, "step": 197430 }, { "epoch": 1.6145888702620925, "grad_norm": 0.017637331038713455, "learning_rate": 1.089193670856673e-06, "loss": 0.0004, "step": 197440 }, { "epoch": 1.614670646440692, "grad_norm": 0.08900938928127289, "learning_rate": 1.0887490638243893e-06, "loss": 0.0006, "step": 197450 }, { "epoch": 1.6147524226192909, "grad_norm": 0.06820332258939743, "learning_rate": 1.0883045364674654e-06, "loss": 0.0006, "step": 197460 }, { "epoch": 1.6148341987978903, "grad_norm": 0.025687724351882935, "learning_rate": 1.087860088794956e-06, "loss": 0.0014, "step": 197470 }, { "epoch": 1.6149159749764892, "grad_norm": 0.02968442067503929, "learning_rate": 1.087415720815918e-06, "loss": 0.0006, "step": 197480 }, { "epoch": 1.6149977511550886, "grad_norm": 0.016872067004442215, "learning_rate": 1.086971432539402e-06, "loss": 0.001, "step": 197490 }, { "epoch": 1.6150795273336875, "grad_norm": 0.04971230402588844, "learning_rate": 1.0865272239744583e-06, "loss": 0.0005, "step": 197500 }, { "epoch": 1.615161303512287, "grad_norm": 0.005875040777027607, "learning_rate": 1.0860830951301354e-06, "loss": 0.0009, "step": 197510 }, { "epoch": 1.615243079690886, "grad_norm": 0.049477338790893555, "learning_rate": 1.085639046015482e-06, "loss": 0.0006, "step": 197520 }, { "epoch": 1.6153248558694853, "grad_norm": 0.010015804320573807, "learning_rate": 1.085195076639543e-06, "loss": 0.0007, "step": 197530 }, { "epoch": 1.6154066320480844, "grad_norm": 0.03167193755507469, "learning_rate": 1.084751187011363e-06, "loss": 0.0009, "step": 197540 }, { "epoch": 1.6154884082266836, "grad_norm": 0.12467287480831146, "learning_rate": 1.084307377139982e-06, "loss": 0.0007, "step": 197550 }, { "epoch": 1.6155701844052828, "grad_norm": 0.019941421225667, "learning_rate": 1.0838636470344438e-06, "loss": 0.0006, "step": 197560 }, { "epoch": 1.615651960583882, "grad_norm": 0.015841923654079437, "learning_rate": 1.0834199967037878e-06, "loss": 0.0007, "step": 197570 }, { "epoch": 1.615733736762481, "grad_norm": 0.0175319816917181, "learning_rate": 1.082976426157049e-06, "loss": 0.0004, "step": 197580 }, { "epoch": 1.6158155129410803, "grad_norm": 0.045017555356025696, "learning_rate": 1.0825329354032626e-06, "loss": 0.0004, "step": 197590 }, { "epoch": 1.6158972891196794, "grad_norm": 0.002990782493725419, "learning_rate": 1.0820895244514661e-06, "loss": 0.0007, "step": 197600 }, { "epoch": 1.6159790652982786, "grad_norm": 0.049805644899606705, "learning_rate": 1.0816461933106903e-06, "loss": 0.001, "step": 197610 }, { "epoch": 1.6160608414768778, "grad_norm": 0.050429895520210266, "learning_rate": 1.0812029419899667e-06, "loss": 0.0006, "step": 197620 }, { "epoch": 1.616142617655477, "grad_norm": 0.056790225207805634, "learning_rate": 1.0807597704983235e-06, "loss": 0.0006, "step": 197630 }, { "epoch": 1.6162243938340761, "grad_norm": 0.03320428729057312, "learning_rate": 1.0803166788447904e-06, "loss": 0.0009, "step": 197640 }, { "epoch": 1.6163061700126753, "grad_norm": 0.04921495169401169, "learning_rate": 1.079873667038393e-06, "loss": 0.0007, "step": 197650 }, { "epoch": 1.6163879461912745, "grad_norm": 0.001356682158075273, "learning_rate": 1.079430735088155e-06, "loss": 0.001, "step": 197660 }, { "epoch": 1.6164697223698736, "grad_norm": 0.04699341952800751, "learning_rate": 1.0789878830030986e-06, "loss": 0.0006, "step": 197670 }, { "epoch": 1.6165514985484728, "grad_norm": 0.004277145490050316, "learning_rate": 1.0785451107922478e-06, "loss": 0.0023, "step": 197680 }, { "epoch": 1.616633274727072, "grad_norm": 0.024789536371827126, "learning_rate": 1.0781024184646217e-06, "loss": 0.0007, "step": 197690 }, { "epoch": 1.6167150509056711, "grad_norm": 0.045792315155267715, "learning_rate": 1.077659806029236e-06, "loss": 0.0006, "step": 197700 }, { "epoch": 1.6167968270842703, "grad_norm": 0.05750623717904091, "learning_rate": 1.077217273495107e-06, "loss": 0.0007, "step": 197710 }, { "epoch": 1.6168786032628695, "grad_norm": 0.0025943750515580177, "learning_rate": 1.0767748208712519e-06, "loss": 0.0007, "step": 197720 }, { "epoch": 1.6169603794414686, "grad_norm": 0.04562724009156227, "learning_rate": 1.0763324481666827e-06, "loss": 0.0006, "step": 197730 }, { "epoch": 1.617042155620068, "grad_norm": 0.05688799172639847, "learning_rate": 1.0758901553904105e-06, "loss": 0.0004, "step": 197740 }, { "epoch": 1.617123931798667, "grad_norm": 0.03758887201547623, "learning_rate": 1.0754479425514447e-06, "loss": 0.0008, "step": 197750 }, { "epoch": 1.6172057079772664, "grad_norm": 0.030068719759583473, "learning_rate": 1.0750058096587956e-06, "loss": 0.0003, "step": 197760 }, { "epoch": 1.6172874841558653, "grad_norm": 0.003636789508163929, "learning_rate": 1.0745637567214684e-06, "loss": 0.0008, "step": 197770 }, { "epoch": 1.6173692603344647, "grad_norm": 0.09112133085727692, "learning_rate": 1.074121783748468e-06, "loss": 0.0008, "step": 197780 }, { "epoch": 1.6174510365130637, "grad_norm": 0.0899357870221138, "learning_rate": 1.0736798907487971e-06, "loss": 0.0005, "step": 197790 }, { "epoch": 1.617532812691663, "grad_norm": 0.03700357675552368, "learning_rate": 1.0732380777314594e-06, "loss": 0.0006, "step": 197800 }, { "epoch": 1.617614588870262, "grad_norm": 0.033093348145484924, "learning_rate": 1.0727963447054556e-06, "loss": 0.0006, "step": 197810 }, { "epoch": 1.6176963650488614, "grad_norm": 0.01731952466070652, "learning_rate": 1.0723546916797805e-06, "loss": 0.0008, "step": 197820 }, { "epoch": 1.6177781412274603, "grad_norm": 0.021894371137022972, "learning_rate": 1.0719131186634318e-06, "loss": 0.0006, "step": 197830 }, { "epoch": 1.6178599174060597, "grad_norm": 0.03049161657691002, "learning_rate": 1.0714716256654074e-06, "loss": 0.0007, "step": 197840 }, { "epoch": 1.6179416935846587, "grad_norm": 0.06255900859832764, "learning_rate": 1.0710302126946993e-06, "loss": 0.0004, "step": 197850 }, { "epoch": 1.618023469763258, "grad_norm": 0.059036850929260254, "learning_rate": 1.0705888797602986e-06, "loss": 0.0007, "step": 197860 }, { "epoch": 1.618105245941857, "grad_norm": 0.017745355144143105, "learning_rate": 1.0701476268711957e-06, "loss": 0.0006, "step": 197870 }, { "epoch": 1.6181870221204564, "grad_norm": 0.03586128354072571, "learning_rate": 1.0697064540363811e-06, "loss": 0.0009, "step": 197880 }, { "epoch": 1.6182687982990553, "grad_norm": 0.014224931597709656, "learning_rate": 1.0692653612648413e-06, "loss": 0.0004, "step": 197890 }, { "epoch": 1.6183505744776547, "grad_norm": 0.05416717752814293, "learning_rate": 1.0688243485655604e-06, "loss": 0.0008, "step": 197900 }, { "epoch": 1.6184323506562537, "grad_norm": 0.0017519729444757104, "learning_rate": 1.068383415947522e-06, "loss": 0.0006, "step": 197910 }, { "epoch": 1.618514126834853, "grad_norm": 0.0035591810010373592, "learning_rate": 1.0679425634197104e-06, "loss": 0.0005, "step": 197920 }, { "epoch": 1.618595903013452, "grad_norm": 0.04048411175608635, "learning_rate": 1.0675017909911061e-06, "loss": 0.0008, "step": 197930 }, { "epoch": 1.6186776791920514, "grad_norm": 0.062085557729005814, "learning_rate": 1.0670610986706858e-06, "loss": 0.0007, "step": 197940 }, { "epoch": 1.6187594553706506, "grad_norm": 0.046153582632541656, "learning_rate": 1.0666204864674263e-06, "loss": 0.0004, "step": 197950 }, { "epoch": 1.6188412315492497, "grad_norm": 0.0013623374979943037, "learning_rate": 1.0661799543903062e-06, "loss": 0.0004, "step": 197960 }, { "epoch": 1.618923007727849, "grad_norm": 0.015931108966469765, "learning_rate": 1.0657395024482974e-06, "loss": 0.0006, "step": 197970 }, { "epoch": 1.619004783906448, "grad_norm": 0.00811717752367258, "learning_rate": 1.0652991306503735e-06, "loss": 0.0004, "step": 197980 }, { "epoch": 1.6190865600850473, "grad_norm": 0.026032762601971626, "learning_rate": 1.0648588390055032e-06, "loss": 0.0005, "step": 197990 }, { "epoch": 1.6191683362636464, "grad_norm": 0.06818917393684387, "learning_rate": 1.0644186275226581e-06, "loss": 0.0003, "step": 198000 }, { "epoch": 1.6192501124422456, "grad_norm": 0.021988864988088608, "learning_rate": 1.0639784962108047e-06, "loss": 0.0005, "step": 198010 }, { "epoch": 1.6193318886208448, "grad_norm": 0.002143877325579524, "learning_rate": 1.0635384450789088e-06, "loss": 0.0012, "step": 198020 }, { "epoch": 1.619413664799444, "grad_norm": 0.018781296908855438, "learning_rate": 1.063098474135933e-06, "loss": 0.0003, "step": 198030 }, { "epoch": 1.619495440978043, "grad_norm": 0.013976011425256729, "learning_rate": 1.0626585833908426e-06, "loss": 0.0008, "step": 198040 }, { "epoch": 1.6195772171566423, "grad_norm": 0.01768556237220764, "learning_rate": 1.0622187728525985e-06, "loss": 0.0007, "step": 198050 }, { "epoch": 1.6196589933352414, "grad_norm": 0.012409779243171215, "learning_rate": 1.0617790425301576e-06, "loss": 0.0008, "step": 198060 }, { "epoch": 1.6197407695138406, "grad_norm": 0.04424014315009117, "learning_rate": 1.0613393924324777e-06, "loss": 0.0007, "step": 198070 }, { "epoch": 1.6198225456924398, "grad_norm": 0.03225705400109291, "learning_rate": 1.060899822568517e-06, "loss": 0.0005, "step": 198080 }, { "epoch": 1.619904321871039, "grad_norm": 0.031602129340171814, "learning_rate": 1.060460332947229e-06, "loss": 0.0006, "step": 198090 }, { "epoch": 1.6199860980496381, "grad_norm": 0.0019749293569475412, "learning_rate": 1.060020923577566e-06, "loss": 0.0014, "step": 198100 }, { "epoch": 1.6200678742282373, "grad_norm": 0.015435605309903622, "learning_rate": 1.059581594468478e-06, "loss": 0.0007, "step": 198110 }, { "epoch": 1.6201496504068365, "grad_norm": 0.04214669391512871, "learning_rate": 1.0591423456289168e-06, "loss": 0.0007, "step": 198120 }, { "epoch": 1.6202314265854356, "grad_norm": 0.00191833998542279, "learning_rate": 1.0587031770678303e-06, "loss": 0.0004, "step": 198130 }, { "epoch": 1.6203132027640348, "grad_norm": 0.0156851839274168, "learning_rate": 1.0582640887941631e-06, "loss": 0.0017, "step": 198140 }, { "epoch": 1.620394978942634, "grad_norm": 0.020902879536151886, "learning_rate": 1.0578250808168589e-06, "loss": 0.0006, "step": 198150 }, { "epoch": 1.6204767551212331, "grad_norm": 0.06672653555870056, "learning_rate": 1.0573861531448637e-06, "loss": 0.001, "step": 198160 }, { "epoch": 1.6205585312998325, "grad_norm": 0.06567844748497009, "learning_rate": 1.0569473057871189e-06, "loss": 0.0005, "step": 198170 }, { "epoch": 1.6206403074784315, "grad_norm": 0.08133754879236221, "learning_rate": 1.0565085387525608e-06, "loss": 0.0006, "step": 198180 }, { "epoch": 1.6207220836570309, "grad_norm": 0.026953689754009247, "learning_rate": 1.0560698520501283e-06, "loss": 0.0005, "step": 198190 }, { "epoch": 1.6208038598356298, "grad_norm": 0.09294264763593674, "learning_rate": 1.0556312456887596e-06, "loss": 0.0007, "step": 198200 }, { "epoch": 1.6208856360142292, "grad_norm": 0.04323416203260422, "learning_rate": 1.0551927196773886e-06, "loss": 0.0004, "step": 198210 }, { "epoch": 1.6209674121928281, "grad_norm": 0.014456292614340782, "learning_rate": 1.054754274024949e-06, "loss": 0.0007, "step": 198220 }, { "epoch": 1.6210491883714275, "grad_norm": 0.01986040733754635, "learning_rate": 1.0543159087403698e-06, "loss": 0.0008, "step": 198230 }, { "epoch": 1.6211309645500265, "grad_norm": 0.011637837626039982, "learning_rate": 1.0538776238325843e-06, "loss": 0.0004, "step": 198240 }, { "epoch": 1.6212127407286259, "grad_norm": 0.000829135999083519, "learning_rate": 1.053439419310519e-06, "loss": 0.0004, "step": 198250 }, { "epoch": 1.6212945169072248, "grad_norm": 0.07214254140853882, "learning_rate": 1.0530012951831004e-06, "loss": 0.0005, "step": 198260 }, { "epoch": 1.6213762930858242, "grad_norm": 0.06000905483961105, "learning_rate": 1.0525632514592527e-06, "loss": 0.0004, "step": 198270 }, { "epoch": 1.6214580692644232, "grad_norm": 0.09031976014375687, "learning_rate": 1.0521252881479033e-06, "loss": 0.0008, "step": 198280 }, { "epoch": 1.6215398454430225, "grad_norm": 0.06789706647396088, "learning_rate": 1.051687405257969e-06, "loss": 0.0007, "step": 198290 }, { "epoch": 1.6216216216216215, "grad_norm": 0.0155404694378376, "learning_rate": 1.0512496027983715e-06, "loss": 0.0006, "step": 198300 }, { "epoch": 1.6217033978002209, "grad_norm": 0.015347177162766457, "learning_rate": 1.0508118807780282e-06, "loss": 0.0013, "step": 198310 }, { "epoch": 1.6217851739788198, "grad_norm": 0.019796764478087425, "learning_rate": 1.050374239205858e-06, "loss": 0.0006, "step": 198320 }, { "epoch": 1.6218669501574192, "grad_norm": 0.022058993577957153, "learning_rate": 1.049936678090775e-06, "loss": 0.0007, "step": 198330 }, { "epoch": 1.6219487263360182, "grad_norm": 0.03191640228033066, "learning_rate": 1.049499197441693e-06, "loss": 0.0007, "step": 198340 }, { "epoch": 1.6220305025146176, "grad_norm": 0.04413833096623421, "learning_rate": 1.0490617972675217e-06, "loss": 0.0006, "step": 198350 }, { "epoch": 1.6221122786932165, "grad_norm": 0.03786514699459076, "learning_rate": 1.0486244775771748e-06, "loss": 0.0008, "step": 198360 }, { "epoch": 1.622194054871816, "grad_norm": 0.008159955963492393, "learning_rate": 1.048187238379559e-06, "loss": 0.0006, "step": 198370 }, { "epoch": 1.622275831050415, "grad_norm": 0.059235598891973495, "learning_rate": 1.047750079683581e-06, "loss": 0.0007, "step": 198380 }, { "epoch": 1.6223576072290142, "grad_norm": 0.02344677597284317, "learning_rate": 1.0473130014981453e-06, "loss": 0.0009, "step": 198390 }, { "epoch": 1.6224393834076134, "grad_norm": 0.023817308247089386, "learning_rate": 1.0468760038321597e-06, "loss": 0.0006, "step": 198400 }, { "epoch": 1.6225211595862126, "grad_norm": 0.002622258383780718, "learning_rate": 1.0464390866945219e-06, "loss": 0.001, "step": 198410 }, { "epoch": 1.6226029357648117, "grad_norm": 0.06413324922323227, "learning_rate": 1.046002250094133e-06, "loss": 0.0009, "step": 198420 }, { "epoch": 1.622684711943411, "grad_norm": 0.17418166995048523, "learning_rate": 1.0455654940398908e-06, "loss": 0.001, "step": 198430 }, { "epoch": 1.62276648812201, "grad_norm": 0.04788621887564659, "learning_rate": 1.0451288185406955e-06, "loss": 0.0004, "step": 198440 }, { "epoch": 1.6228482643006092, "grad_norm": 0.029638955369591713, "learning_rate": 1.0446922236054407e-06, "loss": 0.002, "step": 198450 }, { "epoch": 1.6229300404792084, "grad_norm": 0.007102153263986111, "learning_rate": 1.0442557092430206e-06, "loss": 0.0007, "step": 198460 }, { "epoch": 1.6230118166578076, "grad_norm": 0.010833939537405968, "learning_rate": 1.0438192754623256e-06, "loss": 0.0003, "step": 198470 }, { "epoch": 1.6230935928364068, "grad_norm": 0.09450136870145798, "learning_rate": 1.0433829222722492e-06, "loss": 0.001, "step": 198480 }, { "epoch": 1.623175369015006, "grad_norm": 0.029444515705108643, "learning_rate": 1.042946649681678e-06, "loss": 0.0003, "step": 198490 }, { "epoch": 1.623257145193605, "grad_norm": 0.06446605175733566, "learning_rate": 1.042510457699501e-06, "loss": 0.0007, "step": 198500 }, { "epoch": 1.6233389213722043, "grad_norm": 0.05687195435166359, "learning_rate": 1.0420743463346012e-06, "loss": 0.0009, "step": 198510 }, { "epoch": 1.6234206975508034, "grad_norm": 0.009110604412853718, "learning_rate": 1.0416383155958664e-06, "loss": 0.0004, "step": 198520 }, { "epoch": 1.6235024737294026, "grad_norm": 0.04283652454614639, "learning_rate": 1.0412023654921754e-06, "loss": 0.0011, "step": 198530 }, { "epoch": 1.6235842499080018, "grad_norm": 0.03766978904604912, "learning_rate": 1.0407664960324105e-06, "loss": 0.0005, "step": 198540 }, { "epoch": 1.623666026086601, "grad_norm": 0.05744115263223648, "learning_rate": 1.0403307072254487e-06, "loss": 0.0012, "step": 198550 }, { "epoch": 1.6237478022652, "grad_norm": 0.0020688283257186413, "learning_rate": 1.0398949990801703e-06, "loss": 0.0003, "step": 198560 }, { "epoch": 1.6238295784437993, "grad_norm": 0.04904980957508087, "learning_rate": 1.0394593716054497e-06, "loss": 0.0031, "step": 198570 }, { "epoch": 1.6239113546223987, "grad_norm": 0.008380116894841194, "learning_rate": 1.039023824810161e-06, "loss": 0.0007, "step": 198580 }, { "epoch": 1.6239931308009976, "grad_norm": 0.056791432201862335, "learning_rate": 1.0385883587031752e-06, "loss": 0.0014, "step": 198590 }, { "epoch": 1.624074906979597, "grad_norm": 0.08401324599981308, "learning_rate": 1.0381529732933659e-06, "loss": 0.0009, "step": 198600 }, { "epoch": 1.624156683158196, "grad_norm": 0.11384057998657227, "learning_rate": 1.0377176685896012e-06, "loss": 0.0013, "step": 198610 }, { "epoch": 1.6242384593367953, "grad_norm": 0.10503977537155151, "learning_rate": 1.0372824446007478e-06, "loss": 0.0015, "step": 198620 }, { "epoch": 1.6243202355153943, "grad_norm": 0.004227024503052235, "learning_rate": 1.036847301335671e-06, "loss": 0.0006, "step": 198630 }, { "epoch": 1.6244020116939937, "grad_norm": 0.039872851222753525, "learning_rate": 1.0364122388032389e-06, "loss": 0.0009, "step": 198640 }, { "epoch": 1.6244837878725926, "grad_norm": 0.08142194151878357, "learning_rate": 1.0359772570123094e-06, "loss": 0.0004, "step": 198650 }, { "epoch": 1.624565564051192, "grad_norm": 0.06329847872257233, "learning_rate": 1.035542355971746e-06, "loss": 0.0006, "step": 198660 }, { "epoch": 1.624647340229791, "grad_norm": 0.05572556331753731, "learning_rate": 1.0351075356904055e-06, "loss": 0.0006, "step": 198670 }, { "epoch": 1.6247291164083904, "grad_norm": 0.025980956852436066, "learning_rate": 1.0346727961771485e-06, "loss": 0.0008, "step": 198680 }, { "epoch": 1.6248108925869893, "grad_norm": 0.02006734162569046, "learning_rate": 1.0342381374408306e-06, "loss": 0.0008, "step": 198690 }, { "epoch": 1.6248926687655887, "grad_norm": 0.0422092042863369, "learning_rate": 1.0338035594903044e-06, "loss": 0.002, "step": 198700 }, { "epoch": 1.6249744449441876, "grad_norm": 0.005029547028243542, "learning_rate": 1.0333690623344227e-06, "loss": 0.0004, "step": 198710 }, { "epoch": 1.625056221122787, "grad_norm": 0.0448000505566597, "learning_rate": 1.0329346459820389e-06, "loss": 0.0005, "step": 198720 }, { "epoch": 1.625137997301386, "grad_norm": 0.008900608867406845, "learning_rate": 1.0325003104420012e-06, "loss": 0.001, "step": 198730 }, { "epoch": 1.6252197734799854, "grad_norm": 0.08687195926904678, "learning_rate": 1.0320660557231565e-06, "loss": 0.0005, "step": 198740 }, { "epoch": 1.6253015496585843, "grad_norm": 0.07062049955129623, "learning_rate": 1.0316318818343517e-06, "loss": 0.0012, "step": 198750 }, { "epoch": 1.6253833258371837, "grad_norm": 0.01570180058479309, "learning_rate": 1.0311977887844315e-06, "loss": 0.0011, "step": 198760 }, { "epoch": 1.6254651020157826, "grad_norm": 0.03749131038784981, "learning_rate": 1.0307637765822382e-06, "loss": 0.0008, "step": 198770 }, { "epoch": 1.625546878194382, "grad_norm": 0.012766701169312, "learning_rate": 1.0303298452366128e-06, "loss": 0.0003, "step": 198780 }, { "epoch": 1.625628654372981, "grad_norm": 0.03111165203154087, "learning_rate": 1.0298959947563953e-06, "loss": 0.0006, "step": 198790 }, { "epoch": 1.6257104305515804, "grad_norm": 0.07812625169754028, "learning_rate": 1.0294622251504222e-06, "loss": 0.0011, "step": 198800 }, { "epoch": 1.6257922067301795, "grad_norm": 0.028951460495591164, "learning_rate": 1.029028536427532e-06, "loss": 0.0006, "step": 198810 }, { "epoch": 1.6258739829087787, "grad_norm": 0.015714192762970924, "learning_rate": 1.0285949285965585e-06, "loss": 0.0009, "step": 198820 }, { "epoch": 1.6259557590873779, "grad_norm": 0.06689390540122986, "learning_rate": 1.028161401666335e-06, "loss": 0.0014, "step": 198830 }, { "epoch": 1.626037535265977, "grad_norm": 0.023448621854186058, "learning_rate": 1.0277279556456905e-06, "loss": 0.0006, "step": 198840 }, { "epoch": 1.6261193114445762, "grad_norm": 0.003930674400180578, "learning_rate": 1.0272945905434574e-06, "loss": 0.0015, "step": 198850 }, { "epoch": 1.6262010876231754, "grad_norm": 0.1815265715122223, "learning_rate": 1.0268613063684628e-06, "loss": 0.001, "step": 198860 }, { "epoch": 1.6262828638017746, "grad_norm": 0.030522966757416725, "learning_rate": 1.0264281031295332e-06, "loss": 0.0006, "step": 198870 }, { "epoch": 1.6263646399803737, "grad_norm": 0.07298890501260757, "learning_rate": 1.0259949808354923e-06, "loss": 0.0004, "step": 198880 }, { "epoch": 1.626446416158973, "grad_norm": 0.07880190014839172, "learning_rate": 1.0255619394951643e-06, "loss": 0.0005, "step": 198890 }, { "epoch": 1.626528192337572, "grad_norm": 0.00865314994007349, "learning_rate": 1.0251289791173707e-06, "loss": 0.0002, "step": 198900 }, { "epoch": 1.6266099685161712, "grad_norm": 0.06384158134460449, "learning_rate": 1.02469609971093e-06, "loss": 0.0008, "step": 198910 }, { "epoch": 1.6266917446947704, "grad_norm": 0.025005200877785683, "learning_rate": 1.0242633012846603e-06, "loss": 0.0003, "step": 198920 }, { "epoch": 1.6267735208733696, "grad_norm": 0.0016676365630701184, "learning_rate": 1.0238305838473795e-06, "loss": 0.0004, "step": 198930 }, { "epoch": 1.6268552970519687, "grad_norm": 0.07940791547298431, "learning_rate": 1.023397947407902e-06, "loss": 0.0005, "step": 198940 }, { "epoch": 1.626937073230568, "grad_norm": 0.01235299464315176, "learning_rate": 1.022965391975041e-06, "loss": 0.0002, "step": 198950 }, { "epoch": 1.627018849409167, "grad_norm": 0.02466650679707527, "learning_rate": 1.022532917557606e-06, "loss": 0.0007, "step": 198960 }, { "epoch": 1.6271006255877662, "grad_norm": 0.06488218903541565, "learning_rate": 1.02210052416441e-06, "loss": 0.0006, "step": 198970 }, { "epoch": 1.6271824017663654, "grad_norm": 0.03420087322592735, "learning_rate": 1.0216682118042591e-06, "loss": 0.0005, "step": 198980 }, { "epoch": 1.6272641779449646, "grad_norm": 0.03189576789736748, "learning_rate": 1.0212359804859612e-06, "loss": 0.0005, "step": 198990 }, { "epoch": 1.6273459541235638, "grad_norm": 0.009088502265512943, "learning_rate": 1.0208038302183204e-06, "loss": 0.0004, "step": 199000 }, { "epoch": 1.6274277303021631, "grad_norm": 0.0013589807786047459, "learning_rate": 1.02037176101014e-06, "loss": 0.0004, "step": 199010 }, { "epoch": 1.627509506480762, "grad_norm": 0.02613767795264721, "learning_rate": 1.019939772870221e-06, "loss": 0.0008, "step": 199020 }, { "epoch": 1.6275912826593615, "grad_norm": 0.006984501611441374, "learning_rate": 1.019507865807365e-06, "loss": 0.0003, "step": 199030 }, { "epoch": 1.6276730588379604, "grad_norm": 0.10375173389911652, "learning_rate": 1.0190760398303673e-06, "loss": 0.0007, "step": 199040 }, { "epoch": 1.6277548350165598, "grad_norm": 0.018594278022646904, "learning_rate": 1.018644294948028e-06, "loss": 0.0005, "step": 199050 }, { "epoch": 1.6278366111951588, "grad_norm": 0.07410871237516403, "learning_rate": 1.0182126311691404e-06, "loss": 0.0011, "step": 199060 }, { "epoch": 1.6279183873737582, "grad_norm": 0.01510075107216835, "learning_rate": 1.0177810485024986e-06, "loss": 0.0004, "step": 199070 }, { "epoch": 1.628000163552357, "grad_norm": 0.028601782396435738, "learning_rate": 1.0173495469568918e-06, "loss": 0.0008, "step": 199080 }, { "epoch": 1.6280819397309565, "grad_norm": 0.012253099121153355, "learning_rate": 1.0169181265411137e-06, "loss": 0.0007, "step": 199090 }, { "epoch": 1.6281637159095554, "grad_norm": 0.023083919659256935, "learning_rate": 1.016486787263951e-06, "loss": 0.0009, "step": 199100 }, { "epoch": 1.6282454920881548, "grad_norm": 0.04676984250545502, "learning_rate": 1.01605552913419e-06, "loss": 0.0003, "step": 199110 }, { "epoch": 1.6283272682667538, "grad_norm": 0.024631261825561523, "learning_rate": 1.0156243521606168e-06, "loss": 0.0009, "step": 199120 }, { "epoch": 1.6284090444453532, "grad_norm": 0.025729678571224213, "learning_rate": 1.015193256352014e-06, "loss": 0.0005, "step": 199130 }, { "epoch": 1.6284908206239521, "grad_norm": 0.07570181041955948, "learning_rate": 1.0147622417171633e-06, "loss": 0.0006, "step": 199140 }, { "epoch": 1.6285725968025515, "grad_norm": 0.013401104137301445, "learning_rate": 1.0143313082648454e-06, "loss": 0.0005, "step": 199150 }, { "epoch": 1.6286543729811505, "grad_norm": 0.021439874544739723, "learning_rate": 1.0139004560038367e-06, "loss": 0.0008, "step": 199160 }, { "epoch": 1.6287361491597498, "grad_norm": 0.024480603635311127, "learning_rate": 1.0134696849429176e-06, "loss": 0.0005, "step": 199170 }, { "epoch": 1.6288179253383488, "grad_norm": 0.023666832596063614, "learning_rate": 1.0130389950908614e-06, "loss": 0.0006, "step": 199180 }, { "epoch": 1.6288997015169482, "grad_norm": 0.0035236196126788855, "learning_rate": 1.0126083864564413e-06, "loss": 0.0008, "step": 199190 }, { "epoch": 1.6289814776955471, "grad_norm": 0.033485788851976395, "learning_rate": 1.0121778590484283e-06, "loss": 0.0007, "step": 199200 }, { "epoch": 1.6290632538741465, "grad_norm": 0.028496352955698967, "learning_rate": 1.011747412875595e-06, "loss": 0.0004, "step": 199210 }, { "epoch": 1.6291450300527457, "grad_norm": 0.03803316131234169, "learning_rate": 1.0113170479467089e-06, "loss": 0.0005, "step": 199220 }, { "epoch": 1.6292268062313449, "grad_norm": 0.05212356522679329, "learning_rate": 1.0108867642705366e-06, "loss": 0.0008, "step": 199230 }, { "epoch": 1.629308582409944, "grad_norm": 0.03710275515913963, "learning_rate": 1.0104565618558438e-06, "loss": 0.0009, "step": 199240 }, { "epoch": 1.6293903585885432, "grad_norm": 0.017075447365641594, "learning_rate": 1.0100264407113936e-06, "loss": 0.0002, "step": 199250 }, { "epoch": 1.6294721347671424, "grad_norm": 0.0012974588898941875, "learning_rate": 1.0095964008459474e-06, "loss": 0.0004, "step": 199260 }, { "epoch": 1.6295539109457415, "grad_norm": 0.005900654010474682, "learning_rate": 1.0091664422682668e-06, "loss": 0.0009, "step": 199270 }, { "epoch": 1.6296356871243407, "grad_norm": 0.08607517182826996, "learning_rate": 1.0087365649871079e-06, "loss": 0.0005, "step": 199280 }, { "epoch": 1.6297174633029399, "grad_norm": 0.0026284081395715475, "learning_rate": 1.0083067690112308e-06, "loss": 0.0005, "step": 199290 }, { "epoch": 1.629799239481539, "grad_norm": 0.05015601962804794, "learning_rate": 1.0078770543493893e-06, "loss": 0.0014, "step": 199300 }, { "epoch": 1.6298810156601382, "grad_norm": 0.061387136578559875, "learning_rate": 1.0074474210103375e-06, "loss": 0.0005, "step": 199310 }, { "epoch": 1.6299627918387374, "grad_norm": 0.045103769749403, "learning_rate": 1.0070178690028259e-06, "loss": 0.0006, "step": 199320 }, { "epoch": 1.6300445680173365, "grad_norm": 0.02121816575527191, "learning_rate": 1.0065883983356068e-06, "loss": 0.0006, "step": 199330 }, { "epoch": 1.6301263441959357, "grad_norm": 0.017545506358146667, "learning_rate": 1.0061590090174284e-06, "loss": 0.0003, "step": 199340 }, { "epoch": 1.6302081203745349, "grad_norm": 0.02442050538957119, "learning_rate": 1.005729701057037e-06, "loss": 0.0005, "step": 199350 }, { "epoch": 1.630289896553134, "grad_norm": 0.026335014030337334, "learning_rate": 1.0053004744631783e-06, "loss": 0.0004, "step": 199360 }, { "epoch": 1.6303716727317332, "grad_norm": 0.0067180488258600235, "learning_rate": 1.0048713292445966e-06, "loss": 0.0007, "step": 199370 }, { "epoch": 1.6304534489103324, "grad_norm": 0.06353195011615753, "learning_rate": 1.0044422654100327e-06, "loss": 0.0005, "step": 199380 }, { "epoch": 1.6305352250889316, "grad_norm": 0.006534818094223738, "learning_rate": 1.004013282968228e-06, "loss": 0.0005, "step": 199390 }, { "epoch": 1.6306170012675307, "grad_norm": 0.07982257753610611, "learning_rate": 1.0035843819279189e-06, "loss": 0.0008, "step": 199400 }, { "epoch": 1.63069877744613, "grad_norm": 0.07854443788528442, "learning_rate": 1.0031555622978462e-06, "loss": 0.0005, "step": 199410 }, { "epoch": 1.630780553624729, "grad_norm": 0.03334986791014671, "learning_rate": 1.0027268240867438e-06, "loss": 0.0006, "step": 199420 }, { "epoch": 1.6308623298033282, "grad_norm": 0.05850115790963173, "learning_rate": 1.0022981673033445e-06, "loss": 0.0011, "step": 199430 }, { "epoch": 1.6309441059819276, "grad_norm": 0.011958467774093151, "learning_rate": 1.0018695919563798e-06, "loss": 0.0009, "step": 199440 }, { "epoch": 1.6310258821605266, "grad_norm": 0.0030443770810961723, "learning_rate": 1.001441098054583e-06, "loss": 0.0006, "step": 199450 }, { "epoch": 1.631107658339126, "grad_norm": 0.03953242301940918, "learning_rate": 1.001012685606681e-06, "loss": 0.0004, "step": 199460 }, { "epoch": 1.631189434517725, "grad_norm": 0.005156165454536676, "learning_rate": 1.0005843546214012e-06, "loss": 0.001, "step": 199470 }, { "epoch": 1.6312712106963243, "grad_norm": 0.052721843123435974, "learning_rate": 1.0001561051074693e-06, "loss": 0.0014, "step": 199480 }, { "epoch": 1.6313529868749232, "grad_norm": 0.00755496509373188, "learning_rate": 9.997279370736086e-07, "loss": 0.0007, "step": 199490 }, { "epoch": 1.6314347630535226, "grad_norm": 0.03967760130763054, "learning_rate": 9.992998505285411e-07, "loss": 0.0003, "step": 199500 }, { "epoch": 1.6315165392321216, "grad_norm": 0.11782652884721756, "learning_rate": 9.988718454809876e-07, "loss": 0.0009, "step": 199510 }, { "epoch": 1.631598315410721, "grad_norm": 0.025766469538211823, "learning_rate": 9.984439219396658e-07, "loss": 0.0005, "step": 199520 }, { "epoch": 1.63168009158932, "grad_norm": 0.1902969926595688, "learning_rate": 9.980160799132955e-07, "loss": 0.0006, "step": 199530 }, { "epoch": 1.6317618677679193, "grad_norm": 0.05671077221632004, "learning_rate": 9.975883194105902e-07, "loss": 0.0005, "step": 199540 }, { "epoch": 1.6318436439465183, "grad_norm": 0.05250810831785202, "learning_rate": 9.97160640440264e-07, "loss": 0.0005, "step": 199550 }, { "epoch": 1.6319254201251177, "grad_norm": 0.04791095852851868, "learning_rate": 9.967330430110284e-07, "loss": 0.0009, "step": 199560 }, { "epoch": 1.6320071963037166, "grad_norm": 0.022336728870868683, "learning_rate": 9.963055271315958e-07, "loss": 0.0008, "step": 199570 }, { "epoch": 1.632088972482316, "grad_norm": 0.01175403781235218, "learning_rate": 9.958780928106736e-07, "loss": 0.0003, "step": 199580 }, { "epoch": 1.632170748660915, "grad_norm": 0.0352383591234684, "learning_rate": 9.954507400569702e-07, "loss": 0.0004, "step": 199590 }, { "epoch": 1.6322525248395143, "grad_norm": 0.022259945049881935, "learning_rate": 9.9502346887919e-07, "loss": 0.0005, "step": 199600 }, { "epoch": 1.6323343010181133, "grad_norm": 0.05756504833698273, "learning_rate": 9.945962792860364e-07, "loss": 0.0006, "step": 199610 }, { "epoch": 1.6324160771967127, "grad_norm": 0.02850996144115925, "learning_rate": 9.941691712862133e-07, "loss": 0.0007, "step": 199620 }, { "epoch": 1.6324978533753116, "grad_norm": 0.05332096293568611, "learning_rate": 9.9374214488842e-07, "loss": 0.0008, "step": 199630 }, { "epoch": 1.632579629553911, "grad_norm": 0.14469821751117706, "learning_rate": 9.93315200101354e-07, "loss": 0.0015, "step": 199640 }, { "epoch": 1.6326614057325102, "grad_norm": 0.015222007408738136, "learning_rate": 9.92888336933716e-07, "loss": 0.0003, "step": 199650 }, { "epoch": 1.6327431819111093, "grad_norm": 0.011512327939271927, "learning_rate": 9.924615553941996e-07, "loss": 0.0007, "step": 199660 }, { "epoch": 1.6328249580897085, "grad_norm": 0.08351001143455505, "learning_rate": 9.920348554914982e-07, "loss": 0.0012, "step": 199670 }, { "epoch": 1.6329067342683077, "grad_norm": 0.02083723060786724, "learning_rate": 9.916082372343038e-07, "loss": 0.0012, "step": 199680 }, { "epoch": 1.6329885104469068, "grad_norm": 0.024148384109139442, "learning_rate": 9.911817006313096e-07, "loss": 0.0006, "step": 199690 }, { "epoch": 1.633070286625506, "grad_norm": 0.042529601603746414, "learning_rate": 9.907552456912018e-07, "loss": 0.0005, "step": 199700 }, { "epoch": 1.6331520628041052, "grad_norm": 0.03315826132893562, "learning_rate": 9.90328872422669e-07, "loss": 0.0008, "step": 199710 }, { "epoch": 1.6332338389827044, "grad_norm": 0.002065618522465229, "learning_rate": 9.899025808343966e-07, "loss": 0.0007, "step": 199720 }, { "epoch": 1.6333156151613035, "grad_norm": 0.0359916053712368, "learning_rate": 9.894763709350675e-07, "loss": 0.0006, "step": 199730 }, { "epoch": 1.6333973913399027, "grad_norm": 0.019397037103772163, "learning_rate": 9.890502427333648e-07, "loss": 0.0007, "step": 199740 }, { "epoch": 1.6334791675185019, "grad_norm": 0.17462313175201416, "learning_rate": 9.886241962379695e-07, "loss": 0.0012, "step": 199750 }, { "epoch": 1.633560943697101, "grad_norm": 0.025095004588365555, "learning_rate": 9.881982314575578e-07, "loss": 0.0005, "step": 199760 }, { "epoch": 1.6336427198757002, "grad_norm": 0.0011662920005619526, "learning_rate": 9.87772348400811e-07, "loss": 0.0009, "step": 199770 }, { "epoch": 1.6337244960542994, "grad_norm": 0.018125683069229126, "learning_rate": 9.873465470764026e-07, "loss": 0.0006, "step": 199780 }, { "epoch": 1.6338062722328985, "grad_norm": 0.021779846400022507, "learning_rate": 9.869208274930065e-07, "loss": 0.0008, "step": 199790 }, { "epoch": 1.6338880484114977, "grad_norm": 0.06355488300323486, "learning_rate": 9.864951896592938e-07, "loss": 0.0009, "step": 199800 }, { "epoch": 1.6339698245900969, "grad_norm": 0.026336953043937683, "learning_rate": 9.860696335839382e-07, "loss": 0.0007, "step": 199810 }, { "epoch": 1.634051600768696, "grad_norm": 0.04410037025809288, "learning_rate": 9.856441592756067e-07, "loss": 0.0003, "step": 199820 }, { "epoch": 1.6341333769472952, "grad_norm": 0.02379741705954075, "learning_rate": 9.852187667429663e-07, "loss": 0.0007, "step": 199830 }, { "epoch": 1.6342151531258944, "grad_norm": 0.020578308030962944, "learning_rate": 9.847934559946837e-07, "loss": 0.0004, "step": 199840 }, { "epoch": 1.6342969293044936, "grad_norm": 0.017645493149757385, "learning_rate": 9.843682270394216e-07, "loss": 0.0006, "step": 199850 }, { "epoch": 1.6343787054830927, "grad_norm": 0.07434692233800888, "learning_rate": 9.839430798858428e-07, "loss": 0.0006, "step": 199860 }, { "epoch": 1.634460481661692, "grad_norm": 0.03571813553571701, "learning_rate": 9.835180145426081e-07, "loss": 0.0007, "step": 199870 }, { "epoch": 1.634542257840291, "grad_norm": 0.05043457821011543, "learning_rate": 9.830930310183745e-07, "loss": 0.0005, "step": 199880 }, { "epoch": 1.6346240340188904, "grad_norm": 0.028601298108696938, "learning_rate": 9.826681293218027e-07, "loss": 0.001, "step": 199890 }, { "epoch": 1.6347058101974894, "grad_norm": 0.01706857979297638, "learning_rate": 9.822433094615463e-07, "loss": 0.0008, "step": 199900 }, { "epoch": 1.6347875863760888, "grad_norm": 0.01774783991277218, "learning_rate": 9.818185714462597e-07, "loss": 0.0004, "step": 199910 }, { "epoch": 1.6348693625546877, "grad_norm": 0.03986025229096413, "learning_rate": 9.813939152845936e-07, "loss": 0.0008, "step": 199920 }, { "epoch": 1.6349511387332871, "grad_norm": 0.06778678297996521, "learning_rate": 9.809693409852011e-07, "loss": 0.0006, "step": 199930 }, { "epoch": 1.635032914911886, "grad_norm": 0.01939818263053894, "learning_rate": 9.805448485567298e-07, "loss": 0.0006, "step": 199940 }, { "epoch": 1.6351146910904855, "grad_norm": 0.053330473601818085, "learning_rate": 9.801204380078278e-07, "loss": 0.0004, "step": 199950 }, { "epoch": 1.6351964672690844, "grad_norm": 0.016774680465459824, "learning_rate": 9.796961093471392e-07, "loss": 0.0003, "step": 199960 }, { "epoch": 1.6352782434476838, "grad_norm": 0.08599356561899185, "learning_rate": 9.792718625833091e-07, "loss": 0.0009, "step": 199970 }, { "epoch": 1.6353600196262827, "grad_norm": 0.06428360193967819, "learning_rate": 9.788476977249795e-07, "loss": 0.0005, "step": 199980 }, { "epoch": 1.6354417958048821, "grad_norm": 0.021465377882122993, "learning_rate": 9.784236147807902e-07, "loss": 0.0004, "step": 199990 }, { "epoch": 1.635523571983481, "grad_norm": 0.015329452231526375, "learning_rate": 9.7799961375938e-07, "loss": 0.0009, "step": 200000 }, { "epoch": 1.6356053481620805, "grad_norm": 0.036503344774246216, "learning_rate": 9.775756946693882e-07, "loss": 0.0007, "step": 200010 }, { "epoch": 1.6356871243406794, "grad_norm": 0.0077329352498054504, "learning_rate": 9.771518575194494e-07, "loss": 0.0008, "step": 200020 }, { "epoch": 1.6357689005192788, "grad_norm": 0.04924342781305313, "learning_rate": 9.767281023181968e-07, "loss": 0.0012, "step": 200030 }, { "epoch": 1.6358506766978778, "grad_norm": 0.20951732993125916, "learning_rate": 9.763044290742619e-07, "loss": 0.0007, "step": 200040 }, { "epoch": 1.6359324528764772, "grad_norm": 0.01691870205104351, "learning_rate": 9.758808377962776e-07, "loss": 0.0004, "step": 200050 }, { "epoch": 1.636014229055076, "grad_norm": 0.0170306209474802, "learning_rate": 9.754573284928713e-07, "loss": 0.0007, "step": 200060 }, { "epoch": 1.6360960052336755, "grad_norm": 0.003997316118329763, "learning_rate": 9.750339011726707e-07, "loss": 0.0003, "step": 200070 }, { "epoch": 1.6361777814122747, "grad_norm": 0.02521369978785515, "learning_rate": 9.746105558443015e-07, "loss": 0.0004, "step": 200080 }, { "epoch": 1.6362595575908738, "grad_norm": 0.0327334925532341, "learning_rate": 9.74187292516387e-07, "loss": 0.0007, "step": 200090 }, { "epoch": 1.636341333769473, "grad_norm": 0.02408798411488533, "learning_rate": 9.7376411119755e-07, "loss": 0.0004, "step": 200100 }, { "epoch": 1.6364231099480722, "grad_norm": 0.162260964512825, "learning_rate": 9.733410118964104e-07, "loss": 0.0008, "step": 200110 }, { "epoch": 1.6365048861266713, "grad_norm": 0.035904936492443085, "learning_rate": 9.729179946215861e-07, "loss": 0.0008, "step": 200120 }, { "epoch": 1.6365866623052705, "grad_norm": 0.014646163210272789, "learning_rate": 9.724950593816974e-07, "loss": 0.0004, "step": 200130 }, { "epoch": 1.6366684384838697, "grad_norm": 0.010911151766777039, "learning_rate": 9.720722061853577e-07, "loss": 0.0007, "step": 200140 }, { "epoch": 1.6367502146624688, "grad_norm": 0.021786464378237724, "learning_rate": 9.716494350411808e-07, "loss": 0.0005, "step": 200150 }, { "epoch": 1.636831990841068, "grad_norm": 0.04347533732652664, "learning_rate": 9.71226745957779e-07, "loss": 0.0004, "step": 200160 }, { "epoch": 1.6369137670196672, "grad_norm": 0.018720490857958794, "learning_rate": 9.70804138943764e-07, "loss": 0.0006, "step": 200170 }, { "epoch": 1.6369955431982663, "grad_norm": 0.013630462810397148, "learning_rate": 9.703816140077432e-07, "loss": 0.0007, "step": 200180 }, { "epoch": 1.6370773193768655, "grad_norm": 0.013792752288281918, "learning_rate": 9.699591711583251e-07, "loss": 0.0006, "step": 200190 }, { "epoch": 1.6371590955554647, "grad_norm": 0.0785045325756073, "learning_rate": 9.695368104041142e-07, "loss": 0.0004, "step": 200200 }, { "epoch": 1.6372408717340639, "grad_norm": 0.038489650934934616, "learning_rate": 9.691145317537147e-07, "loss": 0.0008, "step": 200210 }, { "epoch": 1.637322647912663, "grad_norm": 0.025310907512903214, "learning_rate": 9.686923352157285e-07, "loss": 0.0011, "step": 200220 }, { "epoch": 1.6374044240912622, "grad_norm": 0.05132181569933891, "learning_rate": 9.682702207987566e-07, "loss": 0.001, "step": 200230 }, { "epoch": 1.6374862002698614, "grad_norm": 0.04299640282988548, "learning_rate": 9.678481885113972e-07, "loss": 0.0005, "step": 200240 }, { "epoch": 1.6375679764484605, "grad_norm": 0.01239801850169897, "learning_rate": 9.674262383622463e-07, "loss": 0.0006, "step": 200250 }, { "epoch": 1.6376497526270597, "grad_norm": 0.012077421881258488, "learning_rate": 9.67004370359902e-07, "loss": 0.0006, "step": 200260 }, { "epoch": 1.6377315288056589, "grad_norm": 0.006316559389233589, "learning_rate": 9.665825845129567e-07, "loss": 0.0006, "step": 200270 }, { "epoch": 1.637813304984258, "grad_norm": 0.007414365652948618, "learning_rate": 9.661608808300033e-07, "loss": 0.0003, "step": 200280 }, { "epoch": 1.6378950811628572, "grad_norm": 0.10597191005945206, "learning_rate": 9.657392593196301e-07, "loss": 0.0013, "step": 200290 }, { "epoch": 1.6379768573414566, "grad_norm": 0.052319131791591644, "learning_rate": 9.653177199904285e-07, "loss": 0.0009, "step": 200300 }, { "epoch": 1.6380586335200555, "grad_norm": 0.01441288460046053, "learning_rate": 9.648962628509844e-07, "loss": 0.0007, "step": 200310 }, { "epoch": 1.638140409698655, "grad_norm": 0.022305067628622055, "learning_rate": 9.644748879098837e-07, "loss": 0.0007, "step": 200320 }, { "epoch": 1.6382221858772539, "grad_norm": 0.04449540376663208, "learning_rate": 9.640535951757097e-07, "loss": 0.0005, "step": 200330 }, { "epoch": 1.6383039620558533, "grad_norm": 0.04835890606045723, "learning_rate": 9.636323846570444e-07, "loss": 0.0036, "step": 200340 }, { "epoch": 1.6383857382344522, "grad_norm": 0.01661272533237934, "learning_rate": 9.632112563624684e-07, "loss": 0.0006, "step": 200350 }, { "epoch": 1.6384675144130516, "grad_norm": 0.06337470561265945, "learning_rate": 9.627902103005605e-07, "loss": 0.0007, "step": 200360 }, { "epoch": 1.6385492905916506, "grad_norm": 0.042492229491472244, "learning_rate": 9.623692464798962e-07, "loss": 0.0004, "step": 200370 }, { "epoch": 1.63863106677025, "grad_norm": 0.03525562956929207, "learning_rate": 9.61948364909054e-07, "loss": 0.0008, "step": 200380 }, { "epoch": 1.638712842948849, "grad_norm": 0.0409574881196022, "learning_rate": 9.61527565596605e-07, "loss": 0.0011, "step": 200390 }, { "epoch": 1.6387946191274483, "grad_norm": 0.01869933307170868, "learning_rate": 9.611068485511227e-07, "loss": 0.0007, "step": 200400 }, { "epoch": 1.6388763953060472, "grad_norm": 0.005739975720643997, "learning_rate": 9.606862137811757e-07, "loss": 0.0005, "step": 200410 }, { "epoch": 1.6389581714846466, "grad_norm": 0.004122851882129908, "learning_rate": 9.602656612953348e-07, "loss": 0.0004, "step": 200420 }, { "epoch": 1.6390399476632456, "grad_norm": 0.03538583591580391, "learning_rate": 9.598451911021662e-07, "loss": 0.0006, "step": 200430 }, { "epoch": 1.639121723841845, "grad_norm": 0.012008558958768845, "learning_rate": 9.594248032102348e-07, "loss": 0.0003, "step": 200440 }, { "epoch": 1.639203500020444, "grad_norm": 0.03935571014881134, "learning_rate": 9.59004497628105e-07, "loss": 0.0009, "step": 200450 }, { "epoch": 1.6392852761990433, "grad_norm": 0.05668915808200836, "learning_rate": 9.58584274364338e-07, "loss": 0.0006, "step": 200460 }, { "epoch": 1.6393670523776422, "grad_norm": 0.15146775543689728, "learning_rate": 9.581641334274944e-07, "loss": 0.001, "step": 200470 }, { "epoch": 1.6394488285562416, "grad_norm": 0.05560297146439552, "learning_rate": 9.57744074826133e-07, "loss": 0.0006, "step": 200480 }, { "epoch": 1.6395306047348406, "grad_norm": 0.06717349588871002, "learning_rate": 9.573240985688088e-07, "loss": 0.0006, "step": 200490 }, { "epoch": 1.63961238091344, "grad_norm": 0.05968429893255234, "learning_rate": 9.569042046640803e-07, "loss": 0.0006, "step": 200500 }, { "epoch": 1.6396941570920391, "grad_norm": 0.04020388051867485, "learning_rate": 9.564843931204992e-07, "loss": 0.0005, "step": 200510 }, { "epoch": 1.6397759332706383, "grad_norm": 0.05133882910013199, "learning_rate": 9.56064663946618e-07, "loss": 0.001, "step": 200520 }, { "epoch": 1.6398577094492375, "grad_norm": 0.006418467033654451, "learning_rate": 9.556450171509857e-07, "loss": 0.0006, "step": 200530 }, { "epoch": 1.6399394856278366, "grad_norm": 0.07412862032651901, "learning_rate": 9.552254527421529e-07, "loss": 0.0009, "step": 200540 }, { "epoch": 1.6400212618064358, "grad_norm": 0.04441480338573456, "learning_rate": 9.548059707286656e-07, "loss": 0.0005, "step": 200550 }, { "epoch": 1.640103037985035, "grad_norm": 0.03514513373374939, "learning_rate": 9.54386571119068e-07, "loss": 0.0006, "step": 200560 }, { "epoch": 1.6401848141636342, "grad_norm": 0.08284670114517212, "learning_rate": 9.539672539219052e-07, "loss": 0.0015, "step": 200570 }, { "epoch": 1.6402665903422333, "grad_norm": 0.07028713077306747, "learning_rate": 9.535480191457175e-07, "loss": 0.0012, "step": 200580 }, { "epoch": 1.6403483665208325, "grad_norm": 0.10162802785634995, "learning_rate": 9.531288667990462e-07, "loss": 0.0011, "step": 200590 }, { "epoch": 1.6404301426994317, "grad_norm": 0.005603487137705088, "learning_rate": 9.527097968904298e-07, "loss": 0.0008, "step": 200600 }, { "epoch": 1.6405119188780308, "grad_norm": 0.06964396685361862, "learning_rate": 9.522908094284028e-07, "loss": 0.0008, "step": 200610 }, { "epoch": 1.64059369505663, "grad_norm": 0.013491851277649403, "learning_rate": 9.518719044215031e-07, "loss": 0.0006, "step": 200620 }, { "epoch": 1.6406754712352292, "grad_norm": 0.004358501639217138, "learning_rate": 9.514530818782635e-07, "loss": 0.0013, "step": 200630 }, { "epoch": 1.6407572474138283, "grad_norm": 0.018244268372654915, "learning_rate": 9.51034341807216e-07, "loss": 0.0008, "step": 200640 }, { "epoch": 1.6408390235924275, "grad_norm": 0.042092837393283844, "learning_rate": 9.506156842168879e-07, "loss": 0.0009, "step": 200650 }, { "epoch": 1.6409207997710267, "grad_norm": 0.015345778316259384, "learning_rate": 9.501971091158119e-07, "loss": 0.0009, "step": 200660 }, { "epoch": 1.6410025759496258, "grad_norm": 0.041836537420749664, "learning_rate": 9.497786165125122e-07, "loss": 0.0006, "step": 200670 }, { "epoch": 1.641084352128225, "grad_norm": 0.08368262648582458, "learning_rate": 9.493602064155144e-07, "loss": 0.0019, "step": 200680 }, { "epoch": 1.6411661283068242, "grad_norm": 0.023599237203598022, "learning_rate": 9.489418788333415e-07, "loss": 0.0008, "step": 200690 }, { "epoch": 1.6412479044854233, "grad_norm": 0.03748705983161926, "learning_rate": 9.485236337745158e-07, "loss": 0.0008, "step": 200700 }, { "epoch": 1.6413296806640225, "grad_norm": 0.07577075809240341, "learning_rate": 9.481054712475568e-07, "loss": 0.0014, "step": 200710 }, { "epoch": 1.6414114568426217, "grad_norm": 0.012804900296032429, "learning_rate": 9.47687391260983e-07, "loss": 0.0008, "step": 200720 }, { "epoch": 1.641493233021221, "grad_norm": 0.013096536509692669, "learning_rate": 9.472693938233091e-07, "loss": 0.0011, "step": 200730 }, { "epoch": 1.64157500919982, "grad_norm": 0.064796581864357, "learning_rate": 9.468514789430533e-07, "loss": 0.0008, "step": 200740 }, { "epoch": 1.6416567853784194, "grad_norm": 0.030765356495976448, "learning_rate": 9.464336466287277e-07, "loss": 0.0005, "step": 200750 }, { "epoch": 1.6417385615570184, "grad_norm": 0.016654813662171364, "learning_rate": 9.460158968888439e-07, "loss": 0.0005, "step": 200760 }, { "epoch": 1.6418203377356178, "grad_norm": 0.10265180468559265, "learning_rate": 9.455982297319094e-07, "loss": 0.0008, "step": 200770 }, { "epoch": 1.6419021139142167, "grad_norm": 0.02237321063876152, "learning_rate": 9.451806451664364e-07, "loss": 0.0008, "step": 200780 }, { "epoch": 1.641983890092816, "grad_norm": 0.02340966835618019, "learning_rate": 9.447631432009296e-07, "loss": 0.0005, "step": 200790 }, { "epoch": 1.642065666271415, "grad_norm": 0.009078043513000011, "learning_rate": 9.443457238438936e-07, "loss": 0.0007, "step": 200800 }, { "epoch": 1.6421474424500144, "grad_norm": 0.024514809250831604, "learning_rate": 9.439283871038324e-07, "loss": 0.0009, "step": 200810 }, { "epoch": 1.6422292186286134, "grad_norm": 0.23186960816383362, "learning_rate": 9.435111329892461e-07, "loss": 0.0006, "step": 200820 }, { "epoch": 1.6423109948072128, "grad_norm": 0.047444410622119904, "learning_rate": 9.43093961508636e-07, "loss": 0.001, "step": 200830 }, { "epoch": 1.6423927709858117, "grad_norm": 0.03232358768582344, "learning_rate": 9.426768726704987e-07, "loss": 0.0011, "step": 200840 }, { "epoch": 1.642474547164411, "grad_norm": 0.007846364751458168, "learning_rate": 9.42259866483331e-07, "loss": 0.0006, "step": 200850 }, { "epoch": 1.64255632334301, "grad_norm": 0.002887221286073327, "learning_rate": 9.418429429556287e-07, "loss": 0.0018, "step": 200860 }, { "epoch": 1.6426380995216094, "grad_norm": 0.030721155926585197, "learning_rate": 9.414261020958848e-07, "loss": 0.0007, "step": 200870 }, { "epoch": 1.6427198757002084, "grad_norm": 0.018089978024363518, "learning_rate": 9.4100934391259e-07, "loss": 0.0004, "step": 200880 }, { "epoch": 1.6428016518788078, "grad_norm": 0.00523373344913125, "learning_rate": 9.40592668414233e-07, "loss": 0.0009, "step": 200890 }, { "epoch": 1.6428834280574067, "grad_norm": 0.048814766108989716, "learning_rate": 9.401760756093042e-07, "loss": 0.0011, "step": 200900 }, { "epoch": 1.6429652042360061, "grad_norm": 0.014163387008011341, "learning_rate": 9.397595655062886e-07, "loss": 0.0006, "step": 200910 }, { "epoch": 1.643046980414605, "grad_norm": 0.016629813238978386, "learning_rate": 9.393431381136708e-07, "loss": 0.0008, "step": 200920 }, { "epoch": 1.6431287565932045, "grad_norm": 0.05111553147435188, "learning_rate": 9.389267934399343e-07, "loss": 0.0011, "step": 200930 }, { "epoch": 1.6432105327718036, "grad_norm": 0.007520090788602829, "learning_rate": 9.385105314935599e-07, "loss": 0.0005, "step": 200940 }, { "epoch": 1.6432923089504028, "grad_norm": 0.027200749143958092, "learning_rate": 9.380943522830277e-07, "loss": 0.0003, "step": 200950 }, { "epoch": 1.643374085129002, "grad_norm": 0.083732470870018, "learning_rate": 9.376782558168146e-07, "loss": 0.0007, "step": 200960 }, { "epoch": 1.6434558613076011, "grad_norm": 0.04592290148139, "learning_rate": 9.372622421033966e-07, "loss": 0.0004, "step": 200970 }, { "epoch": 1.6435376374862003, "grad_norm": 0.010592718608677387, "learning_rate": 9.368463111512505e-07, "loss": 0.0007, "step": 200980 }, { "epoch": 1.6436194136647995, "grad_norm": 0.1350664496421814, "learning_rate": 9.364304629688475e-07, "loss": 0.0008, "step": 200990 }, { "epoch": 1.6437011898433986, "grad_norm": 0.008243400603532791, "learning_rate": 9.360146975646589e-07, "loss": 0.0004, "step": 201000 }, { "epoch": 1.6437829660219978, "grad_norm": 0.0170853603631258, "learning_rate": 9.355990149471528e-07, "loss": 0.0008, "step": 201010 }, { "epoch": 1.643864742200597, "grad_norm": 0.0033181319013237953, "learning_rate": 9.351834151248002e-07, "loss": 0.0006, "step": 201020 }, { "epoch": 1.6439465183791961, "grad_norm": 0.032024890184402466, "learning_rate": 9.347678981060654e-07, "loss": 0.0007, "step": 201030 }, { "epoch": 1.6440282945577953, "grad_norm": 0.0015563424676656723, "learning_rate": 9.343524638994128e-07, "loss": 0.0006, "step": 201040 }, { "epoch": 1.6441100707363945, "grad_norm": 0.008741497062146664, "learning_rate": 9.33937112513305e-07, "loss": 0.0007, "step": 201050 }, { "epoch": 1.6441918469149936, "grad_norm": 0.01663394644856453, "learning_rate": 9.335218439562038e-07, "loss": 0.002, "step": 201060 }, { "epoch": 1.6442736230935928, "grad_norm": 0.03050350397825241, "learning_rate": 9.331066582365677e-07, "loss": 0.0015, "step": 201070 }, { "epoch": 1.644355399272192, "grad_norm": 0.0024179574102163315, "learning_rate": 9.326915553628552e-07, "loss": 0.0005, "step": 201080 }, { "epoch": 1.6444371754507912, "grad_norm": 0.01947121135890484, "learning_rate": 9.322765353435198e-07, "loss": 0.0009, "step": 201090 }, { "epoch": 1.6445189516293903, "grad_norm": 0.1047310158610344, "learning_rate": 9.318615981870194e-07, "loss": 0.0006, "step": 201100 }, { "epoch": 1.6446007278079895, "grad_norm": 0.12821151316165924, "learning_rate": 9.314467439018049e-07, "loss": 0.0007, "step": 201110 }, { "epoch": 1.6446825039865887, "grad_norm": 0.0066971564665436745, "learning_rate": 9.310319724963274e-07, "loss": 0.0005, "step": 201120 }, { "epoch": 1.6447642801651878, "grad_norm": 0.04401236027479172, "learning_rate": 9.30617283979035e-07, "loss": 0.0005, "step": 201130 }, { "epoch": 1.644846056343787, "grad_norm": 0.011502273380756378, "learning_rate": 9.302026783583773e-07, "loss": 0.0004, "step": 201140 }, { "epoch": 1.6449278325223862, "grad_norm": 0.05371628701686859, "learning_rate": 9.29788155642799e-07, "loss": 0.0004, "step": 201150 }, { "epoch": 1.6450096087009856, "grad_norm": 0.02593999169766903, "learning_rate": 9.293737158407446e-07, "loss": 0.0007, "step": 201160 }, { "epoch": 1.6450913848795845, "grad_norm": 0.05480498820543289, "learning_rate": 9.28959358960656e-07, "loss": 0.0007, "step": 201170 }, { "epoch": 1.645173161058184, "grad_norm": 0.022643903270363808, "learning_rate": 9.285450850109751e-07, "loss": 0.0007, "step": 201180 }, { "epoch": 1.6452549372367828, "grad_norm": 0.011242612265050411, "learning_rate": 9.281308940001399e-07, "loss": 0.0011, "step": 201190 }, { "epoch": 1.6453367134153822, "grad_norm": 0.04019701108336449, "learning_rate": 9.277167859365882e-07, "loss": 0.0006, "step": 201200 }, { "epoch": 1.6454184895939812, "grad_norm": 0.036685820668935776, "learning_rate": 9.273027608287544e-07, "loss": 0.0006, "step": 201210 }, { "epoch": 1.6455002657725806, "grad_norm": 0.10328643023967743, "learning_rate": 9.26888818685075e-07, "loss": 0.0004, "step": 201220 }, { "epoch": 1.6455820419511795, "grad_norm": 0.03922630101442337, "learning_rate": 9.26474959513981e-07, "loss": 0.0004, "step": 201230 }, { "epoch": 1.645663818129779, "grad_norm": 0.02379133366048336, "learning_rate": 9.260611833239036e-07, "loss": 0.0002, "step": 201240 }, { "epoch": 1.6457455943083779, "grad_norm": 0.051013872027397156, "learning_rate": 9.256474901232699e-07, "loss": 0.0008, "step": 201250 }, { "epoch": 1.6458273704869772, "grad_norm": 0.024315157905220985, "learning_rate": 9.252338799205096e-07, "loss": 0.0013, "step": 201260 }, { "epoch": 1.6459091466655762, "grad_norm": 0.0038899965584278107, "learning_rate": 9.248203527240479e-07, "loss": 0.0007, "step": 201270 }, { "epoch": 1.6459909228441756, "grad_norm": 0.004912514239549637, "learning_rate": 9.244069085423074e-07, "loss": 0.001, "step": 201280 }, { "epoch": 1.6460726990227745, "grad_norm": 0.016041666269302368, "learning_rate": 9.239935473837114e-07, "loss": 0.0016, "step": 201290 }, { "epoch": 1.646154475201374, "grad_norm": 0.08436641842126846, "learning_rate": 9.235802692566803e-07, "loss": 0.0004, "step": 201300 }, { "epoch": 1.6462362513799729, "grad_norm": 0.020057521760463715, "learning_rate": 9.231670741696319e-07, "loss": 0.0003, "step": 201310 }, { "epoch": 1.6463180275585723, "grad_norm": 0.1258133500814438, "learning_rate": 9.227539621309845e-07, "loss": 0.0012, "step": 201320 }, { "epoch": 1.6463998037371712, "grad_norm": 0.021851567551493645, "learning_rate": 9.223409331491518e-07, "loss": 0.0012, "step": 201330 }, { "epoch": 1.6464815799157706, "grad_norm": 0.010095315054059029, "learning_rate": 9.219279872325498e-07, "loss": 0.0013, "step": 201340 }, { "epoch": 1.6465633560943695, "grad_norm": 0.15854255855083466, "learning_rate": 9.215151243895897e-07, "loss": 0.0006, "step": 201350 }, { "epoch": 1.646645132272969, "grad_norm": 0.0009825039887800813, "learning_rate": 9.211023446286815e-07, "loss": 0.0009, "step": 201360 }, { "epoch": 1.646726908451568, "grad_norm": 0.013248470611870289, "learning_rate": 9.206896479582328e-07, "loss": 0.0004, "step": 201370 }, { "epoch": 1.6468086846301673, "grad_norm": 0.017638003453612328, "learning_rate": 9.20277034386653e-07, "loss": 0.0007, "step": 201380 }, { "epoch": 1.6468904608087664, "grad_norm": 0.022264977917075157, "learning_rate": 9.198645039223464e-07, "loss": 0.0009, "step": 201390 }, { "epoch": 1.6469722369873656, "grad_norm": 0.004413120448589325, "learning_rate": 9.194520565737164e-07, "loss": 0.0004, "step": 201400 }, { "epoch": 1.6470540131659648, "grad_norm": 0.08185932785272598, "learning_rate": 9.190396923491646e-07, "loss": 0.0003, "step": 201410 }, { "epoch": 1.647135789344564, "grad_norm": 0.0023106562439352274, "learning_rate": 9.186274112570914e-07, "loss": 0.0005, "step": 201420 }, { "epoch": 1.6472175655231631, "grad_norm": 0.021916430443525314, "learning_rate": 9.182152133058958e-07, "loss": 0.0011, "step": 201430 }, { "epoch": 1.6472993417017623, "grad_norm": 0.07814885675907135, "learning_rate": 9.178030985039737e-07, "loss": 0.0005, "step": 201440 }, { "epoch": 1.6473811178803615, "grad_norm": 0.04087153449654579, "learning_rate": 9.173910668597197e-07, "loss": 0.0007, "step": 201450 }, { "epoch": 1.6474628940589606, "grad_norm": 0.06158788502216339, "learning_rate": 9.169791183815291e-07, "loss": 0.0006, "step": 201460 }, { "epoch": 1.6475446702375598, "grad_norm": 0.0122281014919281, "learning_rate": 9.16567253077793e-07, "loss": 0.0011, "step": 201470 }, { "epoch": 1.647626446416159, "grad_norm": 0.004390012938529253, "learning_rate": 9.161554709569009e-07, "loss": 0.0007, "step": 201480 }, { "epoch": 1.6477082225947581, "grad_norm": 0.05520971119403839, "learning_rate": 9.1574377202724e-07, "loss": 0.0003, "step": 201490 }, { "epoch": 1.6477899987733573, "grad_norm": 0.02405577525496483, "learning_rate": 9.153321562972001e-07, "loss": 0.0005, "step": 201500 }, { "epoch": 1.6478717749519565, "grad_norm": 0.07162179052829742, "learning_rate": 9.14920623775164e-07, "loss": 0.0015, "step": 201510 }, { "epoch": 1.6479535511305556, "grad_norm": 0.02003643661737442, "learning_rate": 9.14509174469515e-07, "loss": 0.0003, "step": 201520 }, { "epoch": 1.6480353273091548, "grad_norm": 0.04784867540001869, "learning_rate": 9.140978083886354e-07, "loss": 0.0005, "step": 201530 }, { "epoch": 1.648117103487754, "grad_norm": 0.09587026387453079, "learning_rate": 9.136865255409049e-07, "loss": 0.0008, "step": 201540 }, { "epoch": 1.6481988796663531, "grad_norm": 0.035771969705820084, "learning_rate": 9.13275325934701e-07, "loss": 0.0004, "step": 201550 }, { "epoch": 1.6482806558449523, "grad_norm": 0.017866890877485275, "learning_rate": 9.128642095784007e-07, "loss": 0.0008, "step": 201560 }, { "epoch": 1.6483624320235517, "grad_norm": 0.02413405478000641, "learning_rate": 9.124531764803773e-07, "loss": 0.0007, "step": 201570 }, { "epoch": 1.6484442082021507, "grad_norm": 0.06509850174188614, "learning_rate": 9.120422266490065e-07, "loss": 0.0005, "step": 201580 }, { "epoch": 1.64852598438075, "grad_norm": 0.052685435861349106, "learning_rate": 9.116313600926586e-07, "loss": 0.0004, "step": 201590 }, { "epoch": 1.648607760559349, "grad_norm": 0.10055843740701675, "learning_rate": 9.112205768197024e-07, "loss": 0.0006, "step": 201600 }, { "epoch": 1.6486895367379484, "grad_norm": 0.055905431509017944, "learning_rate": 9.108098768385071e-07, "loss": 0.0005, "step": 201610 }, { "epoch": 1.6487713129165473, "grad_norm": 0.007872347719967365, "learning_rate": 9.103992601574374e-07, "loss": 0.0013, "step": 201620 }, { "epoch": 1.6488530890951467, "grad_norm": 0.015538602136075497, "learning_rate": 9.0998872678486e-07, "loss": 0.0004, "step": 201630 }, { "epoch": 1.6489348652737457, "grad_norm": 0.0736030712723732, "learning_rate": 9.095782767291367e-07, "loss": 0.0005, "step": 201640 }, { "epoch": 1.649016641452345, "grad_norm": 0.07684684544801712, "learning_rate": 9.091679099986289e-07, "loss": 0.0009, "step": 201650 }, { "epoch": 1.649098417630944, "grad_norm": 0.07131755352020264, "learning_rate": 9.087576266016962e-07, "loss": 0.0006, "step": 201660 }, { "epoch": 1.6491801938095434, "grad_norm": 0.044608522206544876, "learning_rate": 9.083474265466957e-07, "loss": 0.0004, "step": 201670 }, { "epoch": 1.6492619699881423, "grad_norm": 0.15847310423851013, "learning_rate": 9.079373098419841e-07, "loss": 0.0006, "step": 201680 }, { "epoch": 1.6493437461667417, "grad_norm": 0.02305791899561882, "learning_rate": 9.075272764959159e-07, "loss": 0.0012, "step": 201690 }, { "epoch": 1.6494255223453407, "grad_norm": 0.05324876308441162, "learning_rate": 9.071173265168421e-07, "loss": 0.0025, "step": 201700 }, { "epoch": 1.64950729852394, "grad_norm": 0.02482251077890396, "learning_rate": 9.067074599131164e-07, "loss": 0.0005, "step": 201710 }, { "epoch": 1.649589074702539, "grad_norm": 0.013553346507251263, "learning_rate": 9.06297676693087e-07, "loss": 0.0007, "step": 201720 }, { "epoch": 1.6496708508811384, "grad_norm": 0.03769608587026596, "learning_rate": 9.058879768651018e-07, "loss": 0.0008, "step": 201730 }, { "epoch": 1.6497526270597374, "grad_norm": 0.010235510766506195, "learning_rate": 9.05478360437505e-07, "loss": 0.0004, "step": 201740 }, { "epoch": 1.6498344032383367, "grad_norm": 0.019887186586856842, "learning_rate": 9.050688274186431e-07, "loss": 0.0006, "step": 201750 }, { "epoch": 1.6499161794169357, "grad_norm": 0.02940242923796177, "learning_rate": 9.046593778168583e-07, "loss": 0.0005, "step": 201760 }, { "epoch": 1.649997955595535, "grad_norm": 0.03816710785031319, "learning_rate": 9.042500116404901e-07, "loss": 0.0005, "step": 201770 }, { "epoch": 1.650079731774134, "grad_norm": 0.0011896522482857108, "learning_rate": 9.03840728897879e-07, "loss": 0.0005, "step": 201780 }, { "epoch": 1.6501615079527334, "grad_norm": 0.02591490000486374, "learning_rate": 9.034315295973616e-07, "loss": 0.0007, "step": 201790 }, { "epoch": 1.6502432841313326, "grad_norm": 0.024829024448990822, "learning_rate": 9.030224137472737e-07, "loss": 0.0002, "step": 201800 }, { "epoch": 1.6503250603099318, "grad_norm": 0.022099774330854416, "learning_rate": 9.026133813559496e-07, "loss": 0.0004, "step": 201810 }, { "epoch": 1.650406836488531, "grad_norm": 0.07505159825086594, "learning_rate": 9.022044324317198e-07, "loss": 0.0009, "step": 201820 }, { "epoch": 1.65048861266713, "grad_norm": 0.02867298759520054, "learning_rate": 9.017955669829181e-07, "loss": 0.0013, "step": 201830 }, { "epoch": 1.6505703888457293, "grad_norm": 0.04658406227827072, "learning_rate": 9.013867850178721e-07, "loss": 0.0005, "step": 201840 }, { "epoch": 1.6506521650243284, "grad_norm": 0.011126860044896603, "learning_rate": 9.009780865449086e-07, "loss": 0.0007, "step": 201850 }, { "epoch": 1.6507339412029276, "grad_norm": 0.02885263040661812, "learning_rate": 9.005694715723517e-07, "loss": 0.0007, "step": 201860 }, { "epoch": 1.6508157173815268, "grad_norm": 0.019393229857087135, "learning_rate": 9.001609401085287e-07, "loss": 0.0005, "step": 201870 }, { "epoch": 1.650897493560126, "grad_norm": 0.053414396941661835, "learning_rate": 8.997524921617596e-07, "loss": 0.0008, "step": 201880 }, { "epoch": 1.650979269738725, "grad_norm": 0.03526999428868294, "learning_rate": 8.993441277403647e-07, "loss": 0.0005, "step": 201890 }, { "epoch": 1.6510610459173243, "grad_norm": 0.08639555424451828, "learning_rate": 8.989358468526638e-07, "loss": 0.0006, "step": 201900 }, { "epoch": 1.6511428220959234, "grad_norm": 0.006340011488646269, "learning_rate": 8.985276495069728e-07, "loss": 0.0005, "step": 201910 }, { "epoch": 1.6512245982745226, "grad_norm": 0.05679580196738243, "learning_rate": 8.981195357116074e-07, "loss": 0.0007, "step": 201920 }, { "epoch": 1.6513063744531218, "grad_norm": 0.0027473263908177614, "learning_rate": 8.977115054748814e-07, "loss": 0.0013, "step": 201930 }, { "epoch": 1.651388150631721, "grad_norm": 0.0024478486739099026, "learning_rate": 8.973035588051054e-07, "loss": 0.0007, "step": 201940 }, { "epoch": 1.6514699268103201, "grad_norm": 0.028465837240219116, "learning_rate": 8.968956957105918e-07, "loss": 0.0005, "step": 201950 }, { "epoch": 1.6515517029889193, "grad_norm": 0.020660290494561195, "learning_rate": 8.964879161996481e-07, "loss": 0.0012, "step": 201960 }, { "epoch": 1.6516334791675185, "grad_norm": 0.04619567096233368, "learning_rate": 8.960802202805813e-07, "loss": 0.0005, "step": 201970 }, { "epoch": 1.6517152553461176, "grad_norm": 0.02037588320672512, "learning_rate": 8.956726079616945e-07, "loss": 0.0008, "step": 201980 }, { "epoch": 1.6517970315247168, "grad_norm": 0.005859770346432924, "learning_rate": 8.952650792512945e-07, "loss": 0.0005, "step": 201990 }, { "epoch": 1.6518788077033162, "grad_norm": 0.05960145220160484, "learning_rate": 8.948576341576808e-07, "loss": 0.0005, "step": 202000 }, { "epoch": 1.6519605838819151, "grad_norm": 0.0443556122481823, "learning_rate": 8.944502726891546e-07, "loss": 0.0008, "step": 202010 }, { "epoch": 1.6520423600605145, "grad_norm": 0.1316971480846405, "learning_rate": 8.940429948540131e-07, "loss": 0.001, "step": 202020 }, { "epoch": 1.6521241362391135, "grad_norm": 0.0065269083715975285, "learning_rate": 8.936358006605533e-07, "loss": 0.0008, "step": 202030 }, { "epoch": 1.6522059124177129, "grad_norm": 0.016371751204133034, "learning_rate": 8.932286901170706e-07, "loss": 0.0008, "step": 202040 }, { "epoch": 1.6522876885963118, "grad_norm": 0.06038454547524452, "learning_rate": 8.92821663231857e-07, "loss": 0.0009, "step": 202050 }, { "epoch": 1.6523694647749112, "grad_norm": 0.017673300579190254, "learning_rate": 8.924147200132033e-07, "loss": 0.0006, "step": 202060 }, { "epoch": 1.6524512409535101, "grad_norm": 0.012160668149590492, "learning_rate": 8.920078604694022e-07, "loss": 0.0008, "step": 202070 }, { "epoch": 1.6525330171321095, "grad_norm": 0.06109294295310974, "learning_rate": 8.916010846087403e-07, "loss": 0.0007, "step": 202080 }, { "epoch": 1.6526147933107085, "grad_norm": 0.0009312452166341245, "learning_rate": 8.911943924395033e-07, "loss": 0.0005, "step": 202090 }, { "epoch": 1.6526965694893079, "grad_norm": 0.17204341292381287, "learning_rate": 8.907877839699752e-07, "loss": 0.0006, "step": 202100 }, { "epoch": 1.6527783456679068, "grad_norm": 0.010347956791520119, "learning_rate": 8.903812592084415e-07, "loss": 0.0003, "step": 202110 }, { "epoch": 1.6528601218465062, "grad_norm": 0.0064159841276705265, "learning_rate": 8.899748181631817e-07, "loss": 0.0009, "step": 202120 }, { "epoch": 1.6529418980251052, "grad_norm": 0.035035260021686554, "learning_rate": 8.89568460842476e-07, "loss": 0.0017, "step": 202130 }, { "epoch": 1.6530236742037046, "grad_norm": 0.013681691139936447, "learning_rate": 8.891621872546013e-07, "loss": 0.0008, "step": 202140 }, { "epoch": 1.6531054503823035, "grad_norm": 0.009550140239298344, "learning_rate": 8.887559974078347e-07, "loss": 0.0014, "step": 202150 }, { "epoch": 1.653187226560903, "grad_norm": 0.03606347739696503, "learning_rate": 8.883498913104505e-07, "loss": 0.0008, "step": 202160 }, { "epoch": 1.6532690027395018, "grad_norm": 0.011904174461960793, "learning_rate": 8.879438689707209e-07, "loss": 0.0007, "step": 202170 }, { "epoch": 1.6533507789181012, "grad_norm": 0.02106594853103161, "learning_rate": 8.875379303969156e-07, "loss": 0.0008, "step": 202180 }, { "epoch": 1.6534325550967002, "grad_norm": 0.094929538667202, "learning_rate": 8.871320755973073e-07, "loss": 0.0005, "step": 202190 }, { "epoch": 1.6535143312752996, "grad_norm": 0.010540534742176533, "learning_rate": 8.867263045801611e-07, "loss": 0.0003, "step": 202200 }, { "epoch": 1.6535961074538987, "grad_norm": 0.08784586191177368, "learning_rate": 8.863206173537442e-07, "loss": 0.0009, "step": 202210 }, { "epoch": 1.653677883632498, "grad_norm": 0.058556318283081055, "learning_rate": 8.859150139263184e-07, "loss": 0.0006, "step": 202220 }, { "epoch": 1.653759659811097, "grad_norm": 0.015904072672128677, "learning_rate": 8.855094943061488e-07, "loss": 0.0005, "step": 202230 }, { "epoch": 1.6538414359896962, "grad_norm": 0.017638161778450012, "learning_rate": 8.851040585014958e-07, "loss": 0.0005, "step": 202240 }, { "epoch": 1.6539232121682954, "grad_norm": 0.017903484404087067, "learning_rate": 8.846987065206175e-07, "loss": 0.0029, "step": 202250 }, { "epoch": 1.6540049883468946, "grad_norm": 0.03990655019879341, "learning_rate": 8.842934383717716e-07, "loss": 0.0012, "step": 202260 }, { "epoch": 1.6540867645254937, "grad_norm": 0.07448174804449081, "learning_rate": 8.838882540632144e-07, "loss": 0.0004, "step": 202270 }, { "epoch": 1.654168540704093, "grad_norm": 0.04113836586475372, "learning_rate": 8.834831536031985e-07, "loss": 0.0005, "step": 202280 }, { "epoch": 1.654250316882692, "grad_norm": 0.010612159967422485, "learning_rate": 8.830781369999769e-07, "loss": 0.0005, "step": 202290 }, { "epoch": 1.6543320930612913, "grad_norm": 0.10202817618846893, "learning_rate": 8.826732042617992e-07, "loss": 0.0006, "step": 202300 }, { "epoch": 1.6544138692398904, "grad_norm": 0.03521810844540596, "learning_rate": 8.822683553969163e-07, "loss": 0.0014, "step": 202310 }, { "epoch": 1.6544956454184896, "grad_norm": 0.007833042182028294, "learning_rate": 8.818635904135736e-07, "loss": 0.0005, "step": 202320 }, { "epoch": 1.6545774215970888, "grad_norm": 0.0007444432703778148, "learning_rate": 8.814589093200171e-07, "loss": 0.0012, "step": 202330 }, { "epoch": 1.654659197775688, "grad_norm": 0.04122127965092659, "learning_rate": 8.810543121244897e-07, "loss": 0.0007, "step": 202340 }, { "epoch": 1.654740973954287, "grad_norm": 0.050984568893909454, "learning_rate": 8.806497988352347e-07, "loss": 0.001, "step": 202350 }, { "epoch": 1.6548227501328863, "grad_norm": 0.008669178001582623, "learning_rate": 8.802453694604918e-07, "loss": 0.0006, "step": 202360 }, { "epoch": 1.6549045263114854, "grad_norm": 0.003896723035722971, "learning_rate": 8.798410240084992e-07, "loss": 0.0016, "step": 202370 }, { "epoch": 1.6549863024900846, "grad_norm": 0.011858111247420311, "learning_rate": 8.794367624874944e-07, "loss": 0.0005, "step": 202380 }, { "epoch": 1.6550680786686838, "grad_norm": 0.009365279227495193, "learning_rate": 8.790325849057119e-07, "loss": 0.0005, "step": 202390 }, { "epoch": 1.655149854847283, "grad_norm": 0.08182369917631149, "learning_rate": 8.78628491271385e-07, "loss": 0.0004, "step": 202400 }, { "epoch": 1.655231631025882, "grad_norm": 0.026205264031887054, "learning_rate": 8.78224481592746e-07, "loss": 0.0007, "step": 202410 }, { "epoch": 1.6553134072044813, "grad_norm": 0.0011665503261610866, "learning_rate": 8.778205558780234e-07, "loss": 0.0006, "step": 202420 }, { "epoch": 1.6553951833830807, "grad_norm": 0.030488567426800728, "learning_rate": 8.774167141354484e-07, "loss": 0.0018, "step": 202430 }, { "epoch": 1.6554769595616796, "grad_norm": 0.04833187162876129, "learning_rate": 8.770129563732455e-07, "loss": 0.0006, "step": 202440 }, { "epoch": 1.655558735740279, "grad_norm": 0.0925409123301506, "learning_rate": 8.766092825996402e-07, "loss": 0.0009, "step": 202450 }, { "epoch": 1.655640511918878, "grad_norm": 0.1248302161693573, "learning_rate": 8.76205692822854e-07, "loss": 0.0008, "step": 202460 }, { "epoch": 1.6557222880974773, "grad_norm": 0.0699729472398758, "learning_rate": 8.758021870511113e-07, "loss": 0.001, "step": 202470 }, { "epoch": 1.6558040642760763, "grad_norm": 0.05959997698664665, "learning_rate": 8.753987652926305e-07, "loss": 0.0006, "step": 202480 }, { "epoch": 1.6558858404546757, "grad_norm": 0.019739320501685143, "learning_rate": 8.749954275556289e-07, "loss": 0.0008, "step": 202490 }, { "epoch": 1.6559676166332746, "grad_norm": 0.055621188133955, "learning_rate": 8.745921738483238e-07, "loss": 0.0013, "step": 202500 }, { "epoch": 1.656049392811874, "grad_norm": 0.046335551887750626, "learning_rate": 8.741890041789297e-07, "loss": 0.0005, "step": 202510 }, { "epoch": 1.656131168990473, "grad_norm": 0.03481173515319824, "learning_rate": 8.737859185556585e-07, "loss": 0.0004, "step": 202520 }, { "epoch": 1.6562129451690724, "grad_norm": 0.014326226897537708, "learning_rate": 8.733829169867225e-07, "loss": 0.0007, "step": 202530 }, { "epoch": 1.6562947213476713, "grad_norm": 0.0005364567623473704, "learning_rate": 8.729799994803301e-07, "loss": 0.0008, "step": 202540 }, { "epoch": 1.6563764975262707, "grad_norm": 0.027760351076722145, "learning_rate": 8.725771660446902e-07, "loss": 0.0007, "step": 202550 }, { "epoch": 1.6564582737048696, "grad_norm": 0.11495190858840942, "learning_rate": 8.721744166880086e-07, "loss": 0.0009, "step": 202560 }, { "epoch": 1.656540049883469, "grad_norm": 0.07775639742612839, "learning_rate": 8.717717514184892e-07, "loss": 0.0011, "step": 202570 }, { "epoch": 1.656621826062068, "grad_norm": 0.051192477345466614, "learning_rate": 8.713691702443339e-07, "loss": 0.0005, "step": 202580 }, { "epoch": 1.6567036022406674, "grad_norm": 0.06004192307591438, "learning_rate": 8.709666731737454e-07, "loss": 0.0004, "step": 202590 }, { "epoch": 1.6567853784192663, "grad_norm": 0.002931587165221572, "learning_rate": 8.705642602149223e-07, "loss": 0.0007, "step": 202600 }, { "epoch": 1.6568671545978657, "grad_norm": 0.09864909201860428, "learning_rate": 8.701619313760618e-07, "loss": 0.001, "step": 202610 }, { "epoch": 1.6569489307764647, "grad_norm": 0.035420458763837814, "learning_rate": 8.697596866653596e-07, "loss": 0.0006, "step": 202620 }, { "epoch": 1.657030706955064, "grad_norm": 0.038775525987148285, "learning_rate": 8.693575260910097e-07, "loss": 0.0016, "step": 202630 }, { "epoch": 1.6571124831336632, "grad_norm": 0.013949040323495865, "learning_rate": 8.689554496612046e-07, "loss": 0.0006, "step": 202640 }, { "epoch": 1.6571942593122624, "grad_norm": 0.03904455155134201, "learning_rate": 8.685534573841348e-07, "loss": 0.001, "step": 202650 }, { "epoch": 1.6572760354908616, "grad_norm": 0.021506767719984055, "learning_rate": 8.681515492679882e-07, "loss": 0.0006, "step": 202660 }, { "epoch": 1.6573578116694607, "grad_norm": 0.019214047119021416, "learning_rate": 8.677497253209544e-07, "loss": 0.001, "step": 202670 }, { "epoch": 1.65743958784806, "grad_norm": 0.02264752797782421, "learning_rate": 8.673479855512173e-07, "loss": 0.0007, "step": 202680 }, { "epoch": 1.657521364026659, "grad_norm": 0.06260509043931961, "learning_rate": 8.669463299669611e-07, "loss": 0.0006, "step": 202690 }, { "epoch": 1.6576031402052582, "grad_norm": 0.023074887692928314, "learning_rate": 8.665447585763665e-07, "loss": 0.0009, "step": 202700 }, { "epoch": 1.6576849163838574, "grad_norm": 0.06368893384933472, "learning_rate": 8.661432713876162e-07, "loss": 0.001, "step": 202710 }, { "epoch": 1.6577666925624566, "grad_norm": 0.0489218533039093, "learning_rate": 8.657418684088875e-07, "loss": 0.0011, "step": 202720 }, { "epoch": 1.6578484687410557, "grad_norm": 0.06792791187763214, "learning_rate": 8.653405496483575e-07, "loss": 0.0006, "step": 202730 }, { "epoch": 1.657930244919655, "grad_norm": 0.043799299746751785, "learning_rate": 8.649393151142016e-07, "loss": 0.0006, "step": 202740 }, { "epoch": 1.658012021098254, "grad_norm": 0.005300639197230339, "learning_rate": 8.645381648145923e-07, "loss": 0.0008, "step": 202750 }, { "epoch": 1.6580937972768532, "grad_norm": 0.003991201054304838, "learning_rate": 8.641370987577024e-07, "loss": 0.0008, "step": 202760 }, { "epoch": 1.6581755734554524, "grad_norm": 0.012288999743759632, "learning_rate": 8.637361169517017e-07, "loss": 0.0005, "step": 202770 }, { "epoch": 1.6582573496340516, "grad_norm": 0.08876871317625046, "learning_rate": 8.633352194047573e-07, "loss": 0.0008, "step": 202780 }, { "epoch": 1.6583391258126507, "grad_norm": 0.04607132449746132, "learning_rate": 8.629344061250383e-07, "loss": 0.001, "step": 202790 }, { "epoch": 1.65842090199125, "grad_norm": 0.06317619234323502, "learning_rate": 8.625336771207077e-07, "loss": 0.0004, "step": 202800 }, { "epoch": 1.658502678169849, "grad_norm": 0.03130212053656578, "learning_rate": 8.621330323999294e-07, "loss": 0.0007, "step": 202810 }, { "epoch": 1.6585844543484483, "grad_norm": 0.12419881671667099, "learning_rate": 8.617324719708636e-07, "loss": 0.0032, "step": 202820 }, { "epoch": 1.6586662305270474, "grad_norm": 0.03332483768463135, "learning_rate": 8.613319958416721e-07, "loss": 0.0004, "step": 202830 }, { "epoch": 1.6587480067056466, "grad_norm": 0.0035296259447932243, "learning_rate": 8.609316040205123e-07, "loss": 0.0019, "step": 202840 }, { "epoch": 1.6588297828842458, "grad_norm": 0.05472381412982941, "learning_rate": 8.605312965155399e-07, "loss": 0.0007, "step": 202850 }, { "epoch": 1.6589115590628452, "grad_norm": 0.05395076423883438, "learning_rate": 8.601310733349094e-07, "loss": 0.0006, "step": 202860 }, { "epoch": 1.658993335241444, "grad_norm": 0.04870452359318733, "learning_rate": 8.597309344867744e-07, "loss": 0.0006, "step": 202870 }, { "epoch": 1.6590751114200435, "grad_norm": 0.005995767656713724, "learning_rate": 8.593308799792855e-07, "loss": 0.0007, "step": 202880 }, { "epoch": 1.6591568875986424, "grad_norm": 0.002902837237343192, "learning_rate": 8.589309098205922e-07, "loss": 0.0002, "step": 202890 }, { "epoch": 1.6592386637772418, "grad_norm": 0.12852002680301666, "learning_rate": 8.58531024018841e-07, "loss": 0.0005, "step": 202900 }, { "epoch": 1.6593204399558408, "grad_norm": 0.01485242135822773, "learning_rate": 8.581312225821808e-07, "loss": 0.0004, "step": 202910 }, { "epoch": 1.6594022161344402, "grad_norm": 0.007105425000190735, "learning_rate": 8.577315055187541e-07, "loss": 0.0003, "step": 202920 }, { "epoch": 1.6594839923130391, "grad_norm": 0.0005410978919826448, "learning_rate": 8.573318728367036e-07, "loss": 0.0011, "step": 202930 }, { "epoch": 1.6595657684916385, "grad_norm": 0.004336968995630741, "learning_rate": 8.569323245441691e-07, "loss": 0.0003, "step": 202940 }, { "epoch": 1.6596475446702375, "grad_norm": 0.03844678774476051, "learning_rate": 8.565328606492918e-07, "loss": 0.0007, "step": 202950 }, { "epoch": 1.6597293208488368, "grad_norm": 0.027916735038161278, "learning_rate": 8.561334811602084e-07, "loss": 0.0014, "step": 202960 }, { "epoch": 1.6598110970274358, "grad_norm": 0.046956416219472885, "learning_rate": 8.557341860850538e-07, "loss": 0.0004, "step": 202970 }, { "epoch": 1.6598928732060352, "grad_norm": 0.056856777518987656, "learning_rate": 8.553349754319629e-07, "loss": 0.0005, "step": 202980 }, { "epoch": 1.6599746493846341, "grad_norm": 0.061808906495571136, "learning_rate": 8.549358492090676e-07, "loss": 0.0009, "step": 202990 }, { "epoch": 1.6600564255632335, "grad_norm": 0.03739243745803833, "learning_rate": 8.545368074244981e-07, "loss": 0.0009, "step": 203000 }, { "epoch": 1.6601382017418325, "grad_norm": 0.017082132399082184, "learning_rate": 8.541378500863834e-07, "loss": 0.001, "step": 203010 }, { "epoch": 1.6602199779204319, "grad_norm": 0.05062657222151756, "learning_rate": 8.537389772028493e-07, "loss": 0.0003, "step": 203020 }, { "epoch": 1.6603017540990308, "grad_norm": 0.036006659269332886, "learning_rate": 8.533401887820236e-07, "loss": 0.0008, "step": 203030 }, { "epoch": 1.6603835302776302, "grad_norm": 0.01906915009021759, "learning_rate": 8.529414848320294e-07, "loss": 0.0009, "step": 203040 }, { "epoch": 1.6604653064562291, "grad_norm": 0.0099005326628685, "learning_rate": 8.525428653609874e-07, "loss": 0.0004, "step": 203050 }, { "epoch": 1.6605470826348285, "grad_norm": 0.0364772193133831, "learning_rate": 8.521443303770189e-07, "loss": 0.0008, "step": 203060 }, { "epoch": 1.6606288588134277, "grad_norm": 0.014941786415874958, "learning_rate": 8.517458798882405e-07, "loss": 0.0011, "step": 203070 }, { "epoch": 1.6607106349920269, "grad_norm": 0.00654391897842288, "learning_rate": 8.513475139027716e-07, "loss": 0.0006, "step": 203080 }, { "epoch": 1.660792411170626, "grad_norm": 0.07665062695741653, "learning_rate": 8.509492324287261e-07, "loss": 0.0007, "step": 203090 }, { "epoch": 1.6608741873492252, "grad_norm": 0.01205146312713623, "learning_rate": 8.505510354742174e-07, "loss": 0.0005, "step": 203100 }, { "epoch": 1.6609559635278244, "grad_norm": 0.01574944332242012, "learning_rate": 8.501529230473571e-07, "loss": 0.0011, "step": 203110 }, { "epoch": 1.6610377397064235, "grad_norm": 0.025157146155834198, "learning_rate": 8.497548951562546e-07, "loss": 0.0004, "step": 203120 }, { "epoch": 1.6611195158850227, "grad_norm": 0.019160445779561996, "learning_rate": 8.493569518090183e-07, "loss": 0.0003, "step": 203130 }, { "epoch": 1.6612012920636219, "grad_norm": 0.00637420080602169, "learning_rate": 8.489590930137547e-07, "loss": 0.0005, "step": 203140 }, { "epoch": 1.661283068242221, "grad_norm": 0.029935479164123535, "learning_rate": 8.485613187785674e-07, "loss": 0.0007, "step": 203150 }, { "epoch": 1.6613648444208202, "grad_norm": 0.05314217880368233, "learning_rate": 8.481636291115619e-07, "loss": 0.0029, "step": 203160 }, { "epoch": 1.6614466205994194, "grad_norm": 0.08612001687288284, "learning_rate": 8.477660240208379e-07, "loss": 0.0006, "step": 203170 }, { "epoch": 1.6615283967780186, "grad_norm": 0.013344553299248219, "learning_rate": 8.473685035144951e-07, "loss": 0.0007, "step": 203180 }, { "epoch": 1.6616101729566177, "grad_norm": 0.08042077720165253, "learning_rate": 8.469710676006304e-07, "loss": 0.0007, "step": 203190 }, { "epoch": 1.661691949135217, "grad_norm": 0.014137819409370422, "learning_rate": 8.465737162873417e-07, "loss": 0.0006, "step": 203200 }, { "epoch": 1.661773725313816, "grad_norm": 0.08785278350114822, "learning_rate": 8.461764495827224e-07, "loss": 0.0011, "step": 203210 }, { "epoch": 1.6618555014924152, "grad_norm": 0.0006302619003690779, "learning_rate": 8.45779267494865e-07, "loss": 0.0005, "step": 203220 }, { "epoch": 1.6619372776710144, "grad_norm": 0.07922621071338654, "learning_rate": 8.453821700318609e-07, "loss": 0.0004, "step": 203230 }, { "epoch": 1.6620190538496136, "grad_norm": 0.006084777880460024, "learning_rate": 8.449851572017991e-07, "loss": 0.0006, "step": 203240 }, { "epoch": 1.6621008300282127, "grad_norm": 0.0647835060954094, "learning_rate": 8.445882290127671e-07, "loss": 0.0008, "step": 203250 }, { "epoch": 1.662182606206812, "grad_norm": 0.038842085748910904, "learning_rate": 8.441913854728506e-07, "loss": 0.0006, "step": 203260 }, { "epoch": 1.662264382385411, "grad_norm": 0.0391983836889267, "learning_rate": 8.437946265901326e-07, "loss": 0.0007, "step": 203270 }, { "epoch": 1.6623461585640102, "grad_norm": 0.021258000284433365, "learning_rate": 8.433979523726971e-07, "loss": 0.0008, "step": 203280 }, { "epoch": 1.6624279347426096, "grad_norm": 0.10624372959136963, "learning_rate": 8.430013628286237e-07, "loss": 0.0007, "step": 203290 }, { "epoch": 1.6625097109212086, "grad_norm": 0.026098616421222687, "learning_rate": 8.426048579659923e-07, "loss": 0.0007, "step": 203300 }, { "epoch": 1.662591487099808, "grad_norm": 0.009847491048276424, "learning_rate": 8.422084377928774e-07, "loss": 0.0006, "step": 203310 }, { "epoch": 1.662673263278407, "grad_norm": 0.005444129463285208, "learning_rate": 8.418121023173581e-07, "loss": 0.0003, "step": 203320 }, { "epoch": 1.6627550394570063, "grad_norm": 0.022322524338960648, "learning_rate": 8.414158515475057e-07, "loss": 0.0005, "step": 203330 }, { "epoch": 1.6628368156356053, "grad_norm": 0.03625759109854698, "learning_rate": 8.410196854913926e-07, "loss": 0.001, "step": 203340 }, { "epoch": 1.6629185918142046, "grad_norm": 0.029094984754920006, "learning_rate": 8.406236041570898e-07, "loss": 0.0007, "step": 203350 }, { "epoch": 1.6630003679928036, "grad_norm": 0.03858589380979538, "learning_rate": 8.402276075526644e-07, "loss": 0.001, "step": 203360 }, { "epoch": 1.663082144171403, "grad_norm": 0.0021093266550451517, "learning_rate": 8.39831695686184e-07, "loss": 0.0009, "step": 203370 }, { "epoch": 1.663163920350002, "grad_norm": 0.09367488324642181, "learning_rate": 8.394358685657139e-07, "loss": 0.0005, "step": 203380 }, { "epoch": 1.6632456965286013, "grad_norm": 0.058945491909980774, "learning_rate": 8.390401261993153e-07, "loss": 0.0009, "step": 203390 }, { "epoch": 1.6633274727072003, "grad_norm": 0.0935683399438858, "learning_rate": 8.386444685950529e-07, "loss": 0.0011, "step": 203400 }, { "epoch": 1.6634092488857997, "grad_norm": 0.01048720721155405, "learning_rate": 8.382488957609852e-07, "loss": 0.0006, "step": 203410 }, { "epoch": 1.6634910250643986, "grad_norm": 0.0600004643201828, "learning_rate": 8.378534077051703e-07, "loss": 0.0027, "step": 203420 }, { "epoch": 1.663572801242998, "grad_norm": 0.04958774149417877, "learning_rate": 8.374580044356639e-07, "loss": 0.0006, "step": 203430 }, { "epoch": 1.663654577421597, "grad_norm": 0.054767489433288574, "learning_rate": 8.370626859605219e-07, "loss": 0.0007, "step": 203440 }, { "epoch": 1.6637363536001963, "grad_norm": 0.008233251050114632, "learning_rate": 8.366674522877971e-07, "loss": 0.0005, "step": 203450 }, { "epoch": 1.6638181297787953, "grad_norm": 0.02443365380167961, "learning_rate": 8.36272303425541e-07, "loss": 0.0015, "step": 203460 }, { "epoch": 1.6638999059573947, "grad_norm": 0.00486246170476079, "learning_rate": 8.358772393818015e-07, "loss": 0.0006, "step": 203470 }, { "epoch": 1.6639816821359936, "grad_norm": 0.020260600373148918, "learning_rate": 8.354822601646284e-07, "loss": 0.0003, "step": 203480 }, { "epoch": 1.664063458314593, "grad_norm": 0.04149278998374939, "learning_rate": 8.350873657820663e-07, "loss": 0.0011, "step": 203490 }, { "epoch": 1.6641452344931922, "grad_norm": 0.0014743844512850046, "learning_rate": 8.346925562421599e-07, "loss": 0.0015, "step": 203500 }, { "epoch": 1.6642270106717914, "grad_norm": 0.07661297172307968, "learning_rate": 8.342978315529504e-07, "loss": 0.0007, "step": 203510 }, { "epoch": 1.6643087868503905, "grad_norm": 0.04228230565786362, "learning_rate": 8.339031917224816e-07, "loss": 0.0006, "step": 203520 }, { "epoch": 1.6643905630289897, "grad_norm": 0.0014775379095226526, "learning_rate": 8.335086367587913e-07, "loss": 0.0005, "step": 203530 }, { "epoch": 1.6644723392075889, "grad_norm": 0.0179368257522583, "learning_rate": 8.331141666699166e-07, "loss": 0.0004, "step": 203540 }, { "epoch": 1.664554115386188, "grad_norm": 0.019786451011896133, "learning_rate": 8.327197814638921e-07, "loss": 0.0004, "step": 203550 }, { "epoch": 1.6646358915647872, "grad_norm": 0.01462513580918312, "learning_rate": 8.323254811487547e-07, "loss": 0.001, "step": 203560 }, { "epoch": 1.6647176677433864, "grad_norm": 0.012272246181964874, "learning_rate": 8.31931265732534e-07, "loss": 0.0005, "step": 203570 }, { "epoch": 1.6647994439219855, "grad_norm": 0.015904638916254044, "learning_rate": 8.31537135223262e-07, "loss": 0.0007, "step": 203580 }, { "epoch": 1.6648812201005847, "grad_norm": 0.026717744767665863, "learning_rate": 8.311430896289669e-07, "loss": 0.0008, "step": 203590 }, { "epoch": 1.6649629962791839, "grad_norm": 0.046374157071113586, "learning_rate": 8.307491289576757e-07, "loss": 0.0007, "step": 203600 }, { "epoch": 1.665044772457783, "grad_norm": 0.053688693791627884, "learning_rate": 8.303552532174136e-07, "loss": 0.0008, "step": 203610 }, { "epoch": 1.6651265486363822, "grad_norm": 0.16562888026237488, "learning_rate": 8.299614624162039e-07, "loss": 0.0005, "step": 203620 }, { "epoch": 1.6652083248149814, "grad_norm": 0.008574326522648335, "learning_rate": 8.29567756562068e-07, "loss": 0.0004, "step": 203630 }, { "epoch": 1.6652901009935805, "grad_norm": 0.013668379746377468, "learning_rate": 8.291741356630279e-07, "loss": 0.0006, "step": 203640 }, { "epoch": 1.6653718771721797, "grad_norm": 0.09891015291213989, "learning_rate": 8.287805997271003e-07, "loss": 0.0008, "step": 203650 }, { "epoch": 1.6654536533507789, "grad_norm": 0.0011639156145974994, "learning_rate": 8.28387148762303e-07, "loss": 0.0011, "step": 203660 }, { "epoch": 1.665535429529378, "grad_norm": 0.0004176045476924628, "learning_rate": 8.279937827766493e-07, "loss": 0.0012, "step": 203670 }, { "epoch": 1.6656172057079772, "grad_norm": 0.0416514128446579, "learning_rate": 8.276005017781541e-07, "loss": 0.0006, "step": 203680 }, { "epoch": 1.6656989818865764, "grad_norm": 0.046129725873470306, "learning_rate": 8.272073057748287e-07, "loss": 0.0007, "step": 203690 }, { "epoch": 1.6657807580651756, "grad_norm": 0.025956183671951294, "learning_rate": 8.268141947746821e-07, "loss": 0.0002, "step": 203700 }, { "epoch": 1.6658625342437747, "grad_norm": 0.03650103509426117, "learning_rate": 8.264211687857227e-07, "loss": 0.0005, "step": 203710 }, { "epoch": 1.6659443104223741, "grad_norm": 0.11144199967384338, "learning_rate": 8.26028227815956e-07, "loss": 0.0005, "step": 203720 }, { "epoch": 1.666026086600973, "grad_norm": 0.03002440370619297, "learning_rate": 8.256353718733873e-07, "loss": 0.0011, "step": 203730 }, { "epoch": 1.6661078627795725, "grad_norm": 0.02375972829759121, "learning_rate": 8.252426009660192e-07, "loss": 0.0003, "step": 203740 }, { "epoch": 1.6661896389581714, "grad_norm": 0.036312032490968704, "learning_rate": 8.248499151018518e-07, "loss": 0.001, "step": 203750 }, { "epoch": 1.6662714151367708, "grad_norm": 0.0013157823123037815, "learning_rate": 8.244573142888862e-07, "loss": 0.0023, "step": 203760 }, { "epoch": 1.6663531913153697, "grad_norm": 0.0308565441519022, "learning_rate": 8.240647985351191e-07, "loss": 0.0005, "step": 203770 }, { "epoch": 1.6664349674939691, "grad_norm": 0.0006906657945364714, "learning_rate": 8.23672367848547e-07, "loss": 0.0004, "step": 203780 }, { "epoch": 1.666516743672568, "grad_norm": 0.00414996687322855, "learning_rate": 8.232800222371618e-07, "loss": 0.0005, "step": 203790 }, { "epoch": 1.6665985198511675, "grad_norm": 0.042733531445264816, "learning_rate": 8.228877617089587e-07, "loss": 0.0008, "step": 203800 }, { "epoch": 1.6666802960297664, "grad_norm": 0.026931047439575195, "learning_rate": 8.224955862719275e-07, "loss": 0.001, "step": 203810 }, { "epoch": 1.6667620722083658, "grad_norm": 0.020105596631765366, "learning_rate": 8.221034959340568e-07, "loss": 0.0006, "step": 203820 }, { "epoch": 1.6668438483869648, "grad_norm": 0.03858734667301178, "learning_rate": 8.217114907033336e-07, "loss": 0.001, "step": 203830 }, { "epoch": 1.6669256245655641, "grad_norm": 0.036532651633024216, "learning_rate": 8.213195705877441e-07, "loss": 0.0038, "step": 203840 }, { "epoch": 1.667007400744163, "grad_norm": 0.012559772469103336, "learning_rate": 8.20927735595271e-07, "loss": 0.0006, "step": 203850 }, { "epoch": 1.6670891769227625, "grad_norm": 0.012050984427332878, "learning_rate": 8.205359857338974e-07, "loss": 0.0008, "step": 203860 }, { "epoch": 1.6671709531013614, "grad_norm": 0.03135624900460243, "learning_rate": 8.201443210116011e-07, "loss": 0.0006, "step": 203870 }, { "epoch": 1.6672527292799608, "grad_norm": 0.029330071061849594, "learning_rate": 8.197527414363643e-07, "loss": 0.0008, "step": 203880 }, { "epoch": 1.6673345054585598, "grad_norm": 0.01401582546532154, "learning_rate": 8.193612470161616e-07, "loss": 0.0004, "step": 203890 }, { "epoch": 1.6674162816371592, "grad_norm": 0.02421218529343605, "learning_rate": 8.189698377589689e-07, "loss": 0.0009, "step": 203900 }, { "epoch": 1.667498057815758, "grad_norm": 0.03793303295969963, "learning_rate": 8.185785136727581e-07, "loss": 0.001, "step": 203910 }, { "epoch": 1.6675798339943575, "grad_norm": 0.0021098044235259295, "learning_rate": 8.181872747655023e-07, "loss": 0.0004, "step": 203920 }, { "epoch": 1.6676616101729567, "grad_norm": 0.017924437299370766, "learning_rate": 8.177961210451712e-07, "loss": 0.0003, "step": 203930 }, { "epoch": 1.6677433863515558, "grad_norm": 0.005289058666676283, "learning_rate": 8.17405052519733e-07, "loss": 0.0006, "step": 203940 }, { "epoch": 1.667825162530155, "grad_norm": 0.001192069030366838, "learning_rate": 8.170140691971528e-07, "loss": 0.0006, "step": 203950 }, { "epoch": 1.6679069387087542, "grad_norm": 0.01218423992395401, "learning_rate": 8.166231710853967e-07, "loss": 0.0006, "step": 203960 }, { "epoch": 1.6679887148873533, "grad_norm": 0.03748071566224098, "learning_rate": 8.16232358192427e-07, "loss": 0.0003, "step": 203970 }, { "epoch": 1.6680704910659525, "grad_norm": 0.03494982421398163, "learning_rate": 8.158416305262052e-07, "loss": 0.0006, "step": 203980 }, { "epoch": 1.6681522672445517, "grad_norm": 0.03278636559844017, "learning_rate": 8.154509880946887e-07, "loss": 0.0006, "step": 203990 }, { "epoch": 1.6682340434231508, "grad_norm": 0.062215112149715424, "learning_rate": 8.150604309058385e-07, "loss": 0.0007, "step": 204000 }, { "epoch": 1.66831581960175, "grad_norm": 0.0010218987008556724, "learning_rate": 8.146699589676088e-07, "loss": 0.0008, "step": 204010 }, { "epoch": 1.6683975957803492, "grad_norm": 0.02124803699553013, "learning_rate": 8.142795722879537e-07, "loss": 0.0005, "step": 204020 }, { "epoch": 1.6684793719589484, "grad_norm": 0.018036650493741035, "learning_rate": 8.138892708748253e-07, "loss": 0.0004, "step": 204030 }, { "epoch": 1.6685611481375475, "grad_norm": 0.04025410860776901, "learning_rate": 8.134990547361765e-07, "loss": 0.0015, "step": 204040 }, { "epoch": 1.6686429243161467, "grad_norm": 0.08423071354627609, "learning_rate": 8.131089238799544e-07, "loss": 0.0004, "step": 204050 }, { "epoch": 1.6687247004947459, "grad_norm": 0.0018674496095627546, "learning_rate": 8.127188783141071e-07, "loss": 0.0008, "step": 204060 }, { "epoch": 1.668806476673345, "grad_norm": 0.02400847151875496, "learning_rate": 8.123289180465798e-07, "loss": 0.001, "step": 204070 }, { "epoch": 1.6688882528519442, "grad_norm": 0.014326268807053566, "learning_rate": 8.119390430853163e-07, "loss": 0.0001, "step": 204080 }, { "epoch": 1.6689700290305434, "grad_norm": 0.015130478888750076, "learning_rate": 8.115492534382585e-07, "loss": 0.0005, "step": 204090 }, { "epoch": 1.6690518052091425, "grad_norm": 0.27261611819267273, "learning_rate": 8.111595491133473e-07, "loss": 0.0009, "step": 204100 }, { "epoch": 1.6691335813877417, "grad_norm": 0.08526843786239624, "learning_rate": 8.107699301185196e-07, "loss": 0.0011, "step": 204110 }, { "epoch": 1.6692153575663409, "grad_norm": 0.09350743144750595, "learning_rate": 8.103803964617152e-07, "loss": 0.0008, "step": 204120 }, { "epoch": 1.6692971337449403, "grad_norm": 0.08359076082706451, "learning_rate": 8.099909481508672e-07, "loss": 0.0007, "step": 204130 }, { "epoch": 1.6693789099235392, "grad_norm": 0.05491999536752701, "learning_rate": 8.096015851939093e-07, "loss": 0.0013, "step": 204140 }, { "epoch": 1.6694606861021386, "grad_norm": 0.009288431145250797, "learning_rate": 8.092123075987723e-07, "loss": 0.0005, "step": 204150 }, { "epoch": 1.6695424622807375, "grad_norm": 0.06060471013188362, "learning_rate": 8.088231153733878e-07, "loss": 0.0006, "step": 204160 }, { "epoch": 1.669624238459337, "grad_norm": 0.01667293719947338, "learning_rate": 8.084340085256837e-07, "loss": 0.0004, "step": 204170 }, { "epoch": 1.6697060146379359, "grad_norm": 0.002350527560338378, "learning_rate": 8.080449870635859e-07, "loss": 0.0005, "step": 204180 }, { "epoch": 1.6697877908165353, "grad_norm": 0.036046676337718964, "learning_rate": 8.076560509950187e-07, "loss": 0.0006, "step": 204190 }, { "epoch": 1.6698695669951342, "grad_norm": 0.05300450325012207, "learning_rate": 8.072672003279058e-07, "loss": 0.0007, "step": 204200 }, { "epoch": 1.6699513431737336, "grad_norm": 0.04266384243965149, "learning_rate": 8.068784350701681e-07, "loss": 0.0015, "step": 204210 }, { "epoch": 1.6700331193523326, "grad_norm": 0.027045294642448425, "learning_rate": 8.064897552297246e-07, "loss": 0.0007, "step": 204220 }, { "epoch": 1.670114895530932, "grad_norm": 0.01184246875345707, "learning_rate": 8.061011608144926e-07, "loss": 0.0006, "step": 204230 }, { "epoch": 1.670196671709531, "grad_norm": 0.001116617931984365, "learning_rate": 8.057126518323899e-07, "loss": 0.0006, "step": 204240 }, { "epoch": 1.6702784478881303, "grad_norm": 0.016059545800089836, "learning_rate": 8.053242282913298e-07, "loss": 0.0006, "step": 204250 }, { "epoch": 1.6703602240667292, "grad_norm": 0.011583340354263783, "learning_rate": 8.049358901992255e-07, "loss": 0.001, "step": 204260 }, { "epoch": 1.6704420002453286, "grad_norm": 0.0923415943980217, "learning_rate": 8.045476375639849e-07, "loss": 0.0009, "step": 204270 }, { "epoch": 1.6705237764239276, "grad_norm": 0.02808959037065506, "learning_rate": 8.041594703935207e-07, "loss": 0.0008, "step": 204280 }, { "epoch": 1.670605552602527, "grad_norm": 0.01075727678835392, "learning_rate": 8.037713886957388e-07, "loss": 0.0013, "step": 204290 }, { "epoch": 1.670687328781126, "grad_norm": 0.032208990305662155, "learning_rate": 8.033833924785445e-07, "loss": 0.0004, "step": 204300 }, { "epoch": 1.6707691049597253, "grad_norm": 0.01049484871327877, "learning_rate": 8.029954817498414e-07, "loss": 0.0015, "step": 204310 }, { "epoch": 1.6708508811383243, "grad_norm": 0.04686904326081276, "learning_rate": 8.026076565175317e-07, "loss": 0.001, "step": 204320 }, { "epoch": 1.6709326573169236, "grad_norm": 0.0366094745695591, "learning_rate": 8.022199167895162e-07, "loss": 0.0004, "step": 204330 }, { "epoch": 1.6710144334955226, "grad_norm": 0.02650650218129158, "learning_rate": 8.018322625736935e-07, "loss": 0.0005, "step": 204340 }, { "epoch": 1.671096209674122, "grad_norm": 0.016262007877230644, "learning_rate": 8.01444693877958e-07, "loss": 0.0047, "step": 204350 }, { "epoch": 1.6711779858527211, "grad_norm": 0.026028240099549294, "learning_rate": 8.010572107102088e-07, "loss": 0.0008, "step": 204360 }, { "epoch": 1.6712597620313203, "grad_norm": 0.04051118716597557, "learning_rate": 8.006698130783369e-07, "loss": 0.0008, "step": 204370 }, { "epoch": 1.6713415382099195, "grad_norm": 0.005495788063853979, "learning_rate": 8.002825009902343e-07, "loss": 0.0005, "step": 204380 }, { "epoch": 1.6714233143885187, "grad_norm": 0.03516891226172447, "learning_rate": 7.998952744537897e-07, "loss": 0.0003, "step": 204390 }, { "epoch": 1.6715050905671178, "grad_norm": 0.015527712181210518, "learning_rate": 7.995081334768939e-07, "loss": 0.0013, "step": 204400 }, { "epoch": 1.671586866745717, "grad_norm": 0.060483213514089584, "learning_rate": 7.991210780674314e-07, "loss": 0.0011, "step": 204410 }, { "epoch": 1.6716686429243162, "grad_norm": 0.007286472246050835, "learning_rate": 7.987341082332872e-07, "loss": 0.0004, "step": 204420 }, { "epoch": 1.6717504191029153, "grad_norm": 0.026875896379351616, "learning_rate": 7.983472239823442e-07, "loss": 0.0005, "step": 204430 }, { "epoch": 1.6718321952815145, "grad_norm": 0.0012962332693859935, "learning_rate": 7.979604253224837e-07, "loss": 0.0009, "step": 204440 }, { "epoch": 1.6719139714601137, "grad_norm": 0.012725546024739742, "learning_rate": 7.975737122615846e-07, "loss": 0.0004, "step": 204450 }, { "epoch": 1.6719957476387128, "grad_norm": 0.024346673861145973, "learning_rate": 7.971870848075252e-07, "loss": 0.0005, "step": 204460 }, { "epoch": 1.672077523817312, "grad_norm": 0.013752085156738758, "learning_rate": 7.968005429681796e-07, "loss": 0.0014, "step": 204470 }, { "epoch": 1.6721592999959112, "grad_norm": 0.020360175520181656, "learning_rate": 7.964140867514247e-07, "loss": 0.0003, "step": 204480 }, { "epoch": 1.6722410761745103, "grad_norm": 0.040698081254959106, "learning_rate": 7.96027716165132e-07, "loss": 0.0007, "step": 204490 }, { "epoch": 1.6723228523531095, "grad_norm": 0.0336984358727932, "learning_rate": 7.956414312171717e-07, "loss": 0.0007, "step": 204500 }, { "epoch": 1.6724046285317087, "grad_norm": 0.11147342622280121, "learning_rate": 7.952552319154128e-07, "loss": 0.0005, "step": 204510 }, { "epoch": 1.6724864047103079, "grad_norm": 0.008071362972259521, "learning_rate": 7.948691182677215e-07, "loss": 0.0004, "step": 204520 }, { "epoch": 1.672568180888907, "grad_norm": 0.041798386722803116, "learning_rate": 7.944830902819656e-07, "loss": 0.0004, "step": 204530 }, { "epoch": 1.6726499570675062, "grad_norm": 0.050727248191833496, "learning_rate": 7.940971479660076e-07, "loss": 0.0008, "step": 204540 }, { "epoch": 1.6727317332461054, "grad_norm": 0.08004763722419739, "learning_rate": 7.937112913277095e-07, "loss": 0.0009, "step": 204550 }, { "epoch": 1.6728135094247047, "grad_norm": 0.06890247017145157, "learning_rate": 7.933255203749312e-07, "loss": 0.0007, "step": 204560 }, { "epoch": 1.6728952856033037, "grad_norm": 0.005737487226724625, "learning_rate": 7.929398351155315e-07, "loss": 0.0007, "step": 204570 }, { "epoch": 1.672977061781903, "grad_norm": 0.01847652718424797, "learning_rate": 7.925542355573673e-07, "loss": 0.001, "step": 204580 }, { "epoch": 1.673058837960502, "grad_norm": 0.013801716268062592, "learning_rate": 7.921687217082929e-07, "loss": 0.0006, "step": 204590 }, { "epoch": 1.6731406141391014, "grad_norm": 0.029512714594602585, "learning_rate": 7.91783293576161e-07, "loss": 0.0003, "step": 204600 }, { "epoch": 1.6732223903177004, "grad_norm": 0.04797187075018883, "learning_rate": 7.913979511688252e-07, "loss": 0.0005, "step": 204610 }, { "epoch": 1.6733041664962998, "grad_norm": 0.03213268145918846, "learning_rate": 7.910126944941343e-07, "loss": 0.0004, "step": 204620 }, { "epoch": 1.6733859426748987, "grad_norm": 0.0006784879951737821, "learning_rate": 7.906275235599354e-07, "loss": 0.0003, "step": 204630 }, { "epoch": 1.673467718853498, "grad_norm": 0.0923563539981842, "learning_rate": 7.902424383740748e-07, "loss": 0.0008, "step": 204640 }, { "epoch": 1.673549495032097, "grad_norm": 0.025678303092718124, "learning_rate": 7.898574389443986e-07, "loss": 0.0006, "step": 204650 }, { "epoch": 1.6736312712106964, "grad_norm": 0.0653458833694458, "learning_rate": 7.894725252787483e-07, "loss": 0.0007, "step": 204660 }, { "epoch": 1.6737130473892954, "grad_norm": 0.018239835277199745, "learning_rate": 7.890876973849654e-07, "loss": 0.0004, "step": 204670 }, { "epoch": 1.6737948235678948, "grad_norm": 0.11213317513465881, "learning_rate": 7.887029552708892e-07, "loss": 0.0011, "step": 204680 }, { "epoch": 1.6738765997464937, "grad_norm": 0.013980718329548836, "learning_rate": 7.883182989443561e-07, "loss": 0.0005, "step": 204690 }, { "epoch": 1.6739583759250931, "grad_norm": 0.060602184385061264, "learning_rate": 7.879337284132033e-07, "loss": 0.0009, "step": 204700 }, { "epoch": 1.674040152103692, "grad_norm": 0.09208653122186661, "learning_rate": 7.875492436852638e-07, "loss": 0.0008, "step": 204710 }, { "epoch": 1.6741219282822914, "grad_norm": 0.00035022557131014764, "learning_rate": 7.871648447683694e-07, "loss": 0.0007, "step": 204720 }, { "epoch": 1.6742037044608904, "grad_norm": 0.04708458110690117, "learning_rate": 7.867805316703525e-07, "loss": 0.0008, "step": 204730 }, { "epoch": 1.6742854806394898, "grad_norm": 0.004587559960782528, "learning_rate": 7.863963043990414e-07, "loss": 0.0006, "step": 204740 }, { "epoch": 1.6743672568180887, "grad_norm": 0.005197860766202211, "learning_rate": 7.860121629622619e-07, "loss": 0.0003, "step": 204750 }, { "epoch": 1.6744490329966881, "grad_norm": 0.011523534543812275, "learning_rate": 7.85628107367839e-07, "loss": 0.0014, "step": 204760 }, { "epoch": 1.6745308091752873, "grad_norm": 0.03237690031528473, "learning_rate": 7.852441376235981e-07, "loss": 0.0011, "step": 204770 }, { "epoch": 1.6746125853538865, "grad_norm": 0.06143312156200409, "learning_rate": 7.848602537373601e-07, "loss": 0.0008, "step": 204780 }, { "epoch": 1.6746943615324856, "grad_norm": 0.0696905255317688, "learning_rate": 7.844764557169454e-07, "loss": 0.0006, "step": 204790 }, { "epoch": 1.6747761377110848, "grad_norm": 0.013551507145166397, "learning_rate": 7.840927435701712e-07, "loss": 0.0006, "step": 204800 }, { "epoch": 1.674857913889684, "grad_norm": 0.008077732287347317, "learning_rate": 7.83709117304855e-07, "loss": 0.001, "step": 204810 }, { "epoch": 1.6749396900682831, "grad_norm": 0.06321290135383606, "learning_rate": 7.833255769288118e-07, "loss": 0.0007, "step": 204820 }, { "epoch": 1.6750214662468823, "grad_norm": 0.0337720662355423, "learning_rate": 7.829421224498534e-07, "loss": 0.0011, "step": 204830 }, { "epoch": 1.6751032424254815, "grad_norm": 0.011405212804675102, "learning_rate": 7.825587538757906e-07, "loss": 0.0003, "step": 204840 }, { "epoch": 1.6751850186040806, "grad_norm": 0.050971850752830505, "learning_rate": 7.821754712144352e-07, "loss": 0.0006, "step": 204850 }, { "epoch": 1.6752667947826798, "grad_norm": 0.17089897394180298, "learning_rate": 7.817922744735945e-07, "loss": 0.0037, "step": 204860 }, { "epoch": 1.675348570961279, "grad_norm": 0.0540076345205307, "learning_rate": 7.814091636610733e-07, "loss": 0.0005, "step": 204870 }, { "epoch": 1.6754303471398782, "grad_norm": 0.009774678386747837, "learning_rate": 7.810261387846757e-07, "loss": 0.0013, "step": 204880 }, { "epoch": 1.6755121233184773, "grad_norm": 0.0054516904056072235, "learning_rate": 7.806431998522057e-07, "loss": 0.0005, "step": 204890 }, { "epoch": 1.6755938994970765, "grad_norm": 0.11352238059043884, "learning_rate": 7.802603468714637e-07, "loss": 0.0008, "step": 204900 }, { "epoch": 1.6756756756756757, "grad_norm": 0.0008099967380985618, "learning_rate": 7.798775798502484e-07, "loss": 0.0007, "step": 204910 }, { "epoch": 1.6757574518542748, "grad_norm": 0.05691329762339592, "learning_rate": 7.794948987963574e-07, "loss": 0.0005, "step": 204920 }, { "epoch": 1.675839228032874, "grad_norm": 0.05294866859912872, "learning_rate": 7.791123037175852e-07, "loss": 0.0003, "step": 204930 }, { "epoch": 1.6759210042114732, "grad_norm": 0.001242773956619203, "learning_rate": 7.787297946217271e-07, "loss": 0.0003, "step": 204940 }, { "epoch": 1.6760027803900723, "grad_norm": 0.01374092698097229, "learning_rate": 7.783473715165734e-07, "loss": 0.0003, "step": 204950 }, { "epoch": 1.6760845565686715, "grad_norm": 0.08079786598682404, "learning_rate": 7.779650344099149e-07, "loss": 0.0008, "step": 204960 }, { "epoch": 1.6761663327472707, "grad_norm": 0.02590232528746128, "learning_rate": 7.775827833095412e-07, "loss": 0.0009, "step": 204970 }, { "epoch": 1.6762481089258698, "grad_norm": 0.016715845093131065, "learning_rate": 7.772006182232384e-07, "loss": 0.0005, "step": 204980 }, { "epoch": 1.6763298851044692, "grad_norm": 0.006065580528229475, "learning_rate": 7.768185391587918e-07, "loss": 0.0004, "step": 204990 }, { "epoch": 1.6764116612830682, "grad_norm": 0.06574320793151855, "learning_rate": 7.764365461239825e-07, "loss": 0.0004, "step": 205000 }, { "epoch": 1.6764934374616676, "grad_norm": 0.04991494119167328, "learning_rate": 7.760546391265955e-07, "loss": 0.0012, "step": 205010 }, { "epoch": 1.6765752136402665, "grad_norm": 0.015924639999866486, "learning_rate": 7.756728181744089e-07, "loss": 0.0008, "step": 205020 }, { "epoch": 1.676656989818866, "grad_norm": 0.009803594090044498, "learning_rate": 7.752910832752009e-07, "loss": 0.0003, "step": 205030 }, { "epoch": 1.6767387659974649, "grad_norm": 0.03069246932864189, "learning_rate": 7.749094344367469e-07, "loss": 0.0008, "step": 205040 }, { "epoch": 1.6768205421760642, "grad_norm": 0.04496019333600998, "learning_rate": 7.745278716668225e-07, "loss": 0.0007, "step": 205050 }, { "epoch": 1.6769023183546632, "grad_norm": 0.0719335749745369, "learning_rate": 7.741463949732003e-07, "loss": 0.0005, "step": 205060 }, { "epoch": 1.6769840945332626, "grad_norm": 0.005271810106933117, "learning_rate": 7.737650043636502e-07, "loss": 0.0005, "step": 205070 }, { "epoch": 1.6770658707118615, "grad_norm": 0.025397028774023056, "learning_rate": 7.733836998459416e-07, "loss": 0.0008, "step": 205080 }, { "epoch": 1.677147646890461, "grad_norm": 0.012353433296084404, "learning_rate": 7.730024814278436e-07, "loss": 0.0004, "step": 205090 }, { "epoch": 1.6772294230690599, "grad_norm": 0.035528797656297684, "learning_rate": 7.726213491171208e-07, "loss": 0.0005, "step": 205100 }, { "epoch": 1.6773111992476593, "grad_norm": 0.0552731454372406, "learning_rate": 7.722403029215375e-07, "loss": 0.0013, "step": 205110 }, { "epoch": 1.6773929754262582, "grad_norm": 0.0004847430682275444, "learning_rate": 7.718593428488547e-07, "loss": 0.0005, "step": 205120 }, { "epoch": 1.6774747516048576, "grad_norm": 0.007669172715395689, "learning_rate": 7.714784689068349e-07, "loss": 0.0005, "step": 205130 }, { "epoch": 1.6775565277834565, "grad_norm": 0.12006887793540955, "learning_rate": 7.710976811032356e-07, "loss": 0.0004, "step": 205140 }, { "epoch": 1.677638303962056, "grad_norm": 0.00971703976392746, "learning_rate": 7.707169794458141e-07, "loss": 0.0004, "step": 205150 }, { "epoch": 1.6777200801406549, "grad_norm": 0.019162897020578384, "learning_rate": 7.703363639423256e-07, "loss": 0.0004, "step": 205160 }, { "epoch": 1.6778018563192543, "grad_norm": 0.029838472604751587, "learning_rate": 7.699558346005232e-07, "loss": 0.0008, "step": 205170 }, { "epoch": 1.6778836324978532, "grad_norm": 0.010411007329821587, "learning_rate": 7.69575391428159e-07, "loss": 0.001, "step": 205180 }, { "epoch": 1.6779654086764526, "grad_norm": 0.0631500780582428, "learning_rate": 7.69195034432983e-07, "loss": 0.0007, "step": 205190 }, { "epoch": 1.6780471848550518, "grad_norm": 0.03923480212688446, "learning_rate": 7.688147636227411e-07, "loss": 0.0005, "step": 205200 }, { "epoch": 1.678128961033651, "grad_norm": 0.054587721824645996, "learning_rate": 7.684345790051839e-07, "loss": 0.0006, "step": 205210 }, { "epoch": 1.6782107372122501, "grad_norm": 0.01577530987560749, "learning_rate": 7.68054480588053e-07, "loss": 0.0008, "step": 205220 }, { "epoch": 1.6782925133908493, "grad_norm": 0.0012003544252365828, "learning_rate": 7.676744683790927e-07, "loss": 0.0005, "step": 205230 }, { "epoch": 1.6783742895694485, "grad_norm": 0.01430409587919712, "learning_rate": 7.672945423860428e-07, "loss": 0.0008, "step": 205240 }, { "epoch": 1.6784560657480476, "grad_norm": 0.021779252216219902, "learning_rate": 7.669147026166446e-07, "loss": 0.0006, "step": 205250 }, { "epoch": 1.6785378419266468, "grad_norm": 0.0017282187473028898, "learning_rate": 7.665349490786344e-07, "loss": 0.0006, "step": 205260 }, { "epoch": 1.678619618105246, "grad_norm": 0.0028101506177335978, "learning_rate": 7.661552817797485e-07, "loss": 0.0009, "step": 205270 }, { "epoch": 1.6787013942838451, "grad_norm": 0.029897958040237427, "learning_rate": 7.657757007277211e-07, "loss": 0.0008, "step": 205280 }, { "epoch": 1.6787831704624443, "grad_norm": 0.047579653561115265, "learning_rate": 7.653962059302844e-07, "loss": 0.0008, "step": 205290 }, { "epoch": 1.6788649466410435, "grad_norm": 0.05977384373545647, "learning_rate": 7.650167973951689e-07, "loss": 0.0021, "step": 205300 }, { "epoch": 1.6789467228196426, "grad_norm": 0.045156173408031464, "learning_rate": 7.646374751301039e-07, "loss": 0.0004, "step": 205310 }, { "epoch": 1.6790284989982418, "grad_norm": 0.022207336500287056, "learning_rate": 7.642582391428149e-07, "loss": 0.0003, "step": 205320 }, { "epoch": 1.679110275176841, "grad_norm": 0.014350242912769318, "learning_rate": 7.638790894410297e-07, "loss": 0.0012, "step": 205330 }, { "epoch": 1.6791920513554401, "grad_norm": 0.07468418776988983, "learning_rate": 7.635000260324704e-07, "loss": 0.0004, "step": 205340 }, { "epoch": 1.6792738275340393, "grad_norm": 0.046621955931186676, "learning_rate": 7.631210489248597e-07, "loss": 0.0005, "step": 205350 }, { "epoch": 1.6793556037126385, "grad_norm": 0.16801488399505615, "learning_rate": 7.627421581259158e-07, "loss": 0.0006, "step": 205360 }, { "epoch": 1.6794373798912376, "grad_norm": 0.01991105079650879, "learning_rate": 7.623633536433595e-07, "loss": 0.0003, "step": 205370 }, { "epoch": 1.6795191560698368, "grad_norm": 0.0078091747127473354, "learning_rate": 7.619846354849059e-07, "loss": 0.0005, "step": 205380 }, { "epoch": 1.679600932248436, "grad_norm": 0.03463834524154663, "learning_rate": 7.616060036582706e-07, "loss": 0.0006, "step": 205390 }, { "epoch": 1.6796827084270352, "grad_norm": 0.047859784215688705, "learning_rate": 7.61227458171166e-07, "loss": 0.0004, "step": 205400 }, { "epoch": 1.6797644846056343, "grad_norm": 0.002538283122703433, "learning_rate": 7.608489990313033e-07, "loss": 0.0006, "step": 205410 }, { "epoch": 1.6798462607842337, "grad_norm": 0.028666211292147636, "learning_rate": 7.604706262463924e-07, "loss": 0.0006, "step": 205420 }, { "epoch": 1.6799280369628327, "grad_norm": 0.04803888127207756, "learning_rate": 7.600923398241411e-07, "loss": 0.0004, "step": 205430 }, { "epoch": 1.680009813141432, "grad_norm": 0.013971618376672268, "learning_rate": 7.597141397722541e-07, "loss": 0.0003, "step": 205440 }, { "epoch": 1.680091589320031, "grad_norm": 0.04513987898826599, "learning_rate": 7.593360260984373e-07, "loss": 0.0007, "step": 205450 }, { "epoch": 1.6801733654986304, "grad_norm": 0.026936648413538933, "learning_rate": 7.589579988103934e-07, "loss": 0.001, "step": 205460 }, { "epoch": 1.6802551416772293, "grad_norm": 0.0020397775806486607, "learning_rate": 7.58580057915822e-07, "loss": 0.0008, "step": 205470 }, { "epoch": 1.6803369178558287, "grad_norm": 0.0029619624838232994, "learning_rate": 7.582022034224212e-07, "loss": 0.0005, "step": 205480 }, { "epoch": 1.6804186940344277, "grad_norm": 0.013704882003366947, "learning_rate": 7.578244353378905e-07, "loss": 0.0009, "step": 205490 }, { "epoch": 1.680500470213027, "grad_norm": 0.006443624850362539, "learning_rate": 7.574467536699243e-07, "loss": 0.0008, "step": 205500 }, { "epoch": 1.680582246391626, "grad_norm": 0.02540082484483719, "learning_rate": 7.570691584262163e-07, "loss": 0.001, "step": 205510 }, { "epoch": 1.6806640225702254, "grad_norm": 0.006040238309651613, "learning_rate": 7.566916496144589e-07, "loss": 0.0005, "step": 205520 }, { "epoch": 1.6807457987488243, "grad_norm": 0.06419489532709122, "learning_rate": 7.563142272423407e-07, "loss": 0.0005, "step": 205530 }, { "epoch": 1.6808275749274237, "grad_norm": 0.09634792059659958, "learning_rate": 7.559368913175518e-07, "loss": 0.0006, "step": 205540 }, { "epoch": 1.6809093511060227, "grad_norm": 0.01361936330795288, "learning_rate": 7.555596418477784e-07, "loss": 0.0004, "step": 205550 }, { "epoch": 1.680991127284622, "grad_norm": 0.0004039389896206558, "learning_rate": 7.551824788407037e-07, "loss": 0.0003, "step": 205560 }, { "epoch": 1.681072903463221, "grad_norm": 0.05614311620593071, "learning_rate": 7.54805402304013e-07, "loss": 0.0007, "step": 205570 }, { "epoch": 1.6811546796418204, "grad_norm": 0.011074338108301163, "learning_rate": 7.544284122453876e-07, "loss": 0.0004, "step": 205580 }, { "epoch": 1.6812364558204194, "grad_norm": 0.020885201171040535, "learning_rate": 7.540515086725059e-07, "loss": 0.0009, "step": 205590 }, { "epoch": 1.6813182319990188, "grad_norm": 0.02931292913854122, "learning_rate": 7.536746915930454e-07, "loss": 0.0006, "step": 205600 }, { "epoch": 1.6814000081776177, "grad_norm": 0.031173313036561012, "learning_rate": 7.53297961014684e-07, "loss": 0.0013, "step": 205610 }, { "epoch": 1.681481784356217, "grad_norm": 0.06712708622217178, "learning_rate": 7.52921316945095e-07, "loss": 0.0007, "step": 205620 }, { "epoch": 1.6815635605348163, "grad_norm": 0.00806593056768179, "learning_rate": 7.52544759391951e-07, "loss": 0.0003, "step": 205630 }, { "epoch": 1.6816453367134154, "grad_norm": 0.015538652427494526, "learning_rate": 7.521682883629227e-07, "loss": 0.0005, "step": 205640 }, { "epoch": 1.6817271128920146, "grad_norm": 0.017085516825318336, "learning_rate": 7.517919038656795e-07, "loss": 0.0007, "step": 205650 }, { "epoch": 1.6818088890706138, "grad_norm": 0.05086909607052803, "learning_rate": 7.514156059078881e-07, "loss": 0.0009, "step": 205660 }, { "epoch": 1.681890665249213, "grad_norm": 0.011458780616521835, "learning_rate": 7.510393944972144e-07, "loss": 0.0006, "step": 205670 }, { "epoch": 1.681972441427812, "grad_norm": 0.028989043086767197, "learning_rate": 7.506632696413208e-07, "loss": 0.0007, "step": 205680 }, { "epoch": 1.6820542176064113, "grad_norm": 0.005951812956482172, "learning_rate": 7.502872313478716e-07, "loss": 0.001, "step": 205690 }, { "epoch": 1.6821359937850104, "grad_norm": 0.0377063974738121, "learning_rate": 7.499112796245256e-07, "loss": 0.0006, "step": 205700 }, { "epoch": 1.6822177699636096, "grad_norm": 0.030442144721746445, "learning_rate": 7.495354144789419e-07, "loss": 0.0004, "step": 205710 }, { "epoch": 1.6822995461422088, "grad_norm": 0.14693741500377655, "learning_rate": 7.491596359187747e-07, "loss": 0.0005, "step": 205720 }, { "epoch": 1.682381322320808, "grad_norm": 0.042169369757175446, "learning_rate": 7.48783943951683e-07, "loss": 0.0004, "step": 205730 }, { "epoch": 1.6824630984994071, "grad_norm": 0.02539418637752533, "learning_rate": 7.484083385853175e-07, "loss": 0.0008, "step": 205740 }, { "epoch": 1.6825448746780063, "grad_norm": 0.019622160121798515, "learning_rate": 7.480328198273301e-07, "loss": 0.0011, "step": 205750 }, { "epoch": 1.6826266508566055, "grad_norm": 0.06614606082439423, "learning_rate": 7.476573876853704e-07, "loss": 0.0008, "step": 205760 }, { "epoch": 1.6827084270352046, "grad_norm": 0.038609445095062256, "learning_rate": 7.472820421670862e-07, "loss": 0.0007, "step": 205770 }, { "epoch": 1.6827902032138038, "grad_norm": 0.019597632810473442, "learning_rate": 7.469067832801236e-07, "loss": 0.0006, "step": 205780 }, { "epoch": 1.682871979392403, "grad_norm": 0.10454995185136795, "learning_rate": 7.465316110321269e-07, "loss": 0.0006, "step": 205790 }, { "epoch": 1.6829537555710021, "grad_norm": 0.09320510178804398, "learning_rate": 7.46156525430738e-07, "loss": 0.0013, "step": 205800 }, { "epoch": 1.6830355317496013, "grad_norm": 0.02969062328338623, "learning_rate": 7.457815264835988e-07, "loss": 0.0005, "step": 205810 }, { "epoch": 1.6831173079282005, "grad_norm": 0.019299913197755814, "learning_rate": 7.45406614198348e-07, "loss": 0.0009, "step": 205820 }, { "epoch": 1.6831990841067996, "grad_norm": 0.03289001062512398, "learning_rate": 7.45031788582623e-07, "loss": 0.0005, "step": 205830 }, { "epoch": 1.6832808602853988, "grad_norm": 0.03672158345580101, "learning_rate": 7.44657049644058e-07, "loss": 0.0003, "step": 205840 }, { "epoch": 1.6833626364639982, "grad_norm": 0.03417637199163437, "learning_rate": 7.442823973902885e-07, "loss": 0.0005, "step": 205850 }, { "epoch": 1.6834444126425971, "grad_norm": 0.012615927495062351, "learning_rate": 7.439078318289461e-07, "loss": 0.0015, "step": 205860 }, { "epoch": 1.6835261888211965, "grad_norm": 0.02085421048104763, "learning_rate": 7.435333529676608e-07, "loss": 0.0004, "step": 205870 }, { "epoch": 1.6836079649997955, "grad_norm": 0.019710805267095566, "learning_rate": 7.431589608140611e-07, "loss": 0.0009, "step": 205880 }, { "epoch": 1.6836897411783949, "grad_norm": 0.028121987357735634, "learning_rate": 7.427846553757729e-07, "loss": 0.0008, "step": 205890 }, { "epoch": 1.6837715173569938, "grad_norm": 0.016764679923653603, "learning_rate": 7.424104366604223e-07, "loss": 0.0005, "step": 205900 }, { "epoch": 1.6838532935355932, "grad_norm": 0.0005563950981013477, "learning_rate": 7.420363046756313e-07, "loss": 0.0005, "step": 205910 }, { "epoch": 1.6839350697141922, "grad_norm": 0.14378629624843597, "learning_rate": 7.416622594290223e-07, "loss": 0.0004, "step": 205920 }, { "epoch": 1.6840168458927915, "grad_norm": 0.047054775059223175, "learning_rate": 7.412883009282129e-07, "loss": 0.0014, "step": 205930 }, { "epoch": 1.6840986220713905, "grad_norm": 0.1096964105963707, "learning_rate": 7.40914429180824e-07, "loss": 0.0011, "step": 205940 }, { "epoch": 1.6841803982499899, "grad_norm": 0.02626664750277996, "learning_rate": 7.405406441944702e-07, "loss": 0.0004, "step": 205950 }, { "epoch": 1.6842621744285888, "grad_norm": 0.04576437175273895, "learning_rate": 7.401669459767652e-07, "loss": 0.0005, "step": 205960 }, { "epoch": 1.6843439506071882, "grad_norm": 0.004221511073410511, "learning_rate": 7.397933345353214e-07, "loss": 0.0005, "step": 205970 }, { "epoch": 1.6844257267857872, "grad_norm": 0.05921265110373497, "learning_rate": 7.394198098777511e-07, "loss": 0.0005, "step": 205980 }, { "epoch": 1.6845075029643866, "grad_norm": 0.01685553975403309, "learning_rate": 7.390463720116625e-07, "loss": 0.001, "step": 205990 }, { "epoch": 1.6845892791429855, "grad_norm": 0.01662653684616089, "learning_rate": 7.38673020944663e-07, "loss": 0.0004, "step": 206000 }, { "epoch": 1.684671055321585, "grad_norm": 0.02500949241220951, "learning_rate": 7.382997566843575e-07, "loss": 0.0007, "step": 206010 }, { "epoch": 1.6847528315001838, "grad_norm": 0.11597361415624619, "learning_rate": 7.3792657923835e-07, "loss": 0.0008, "step": 206020 }, { "epoch": 1.6848346076787832, "grad_norm": 0.029130855575203896, "learning_rate": 7.375534886142427e-07, "loss": 0.0009, "step": 206030 }, { "epoch": 1.6849163838573822, "grad_norm": 0.08595727384090424, "learning_rate": 7.371804848196357e-07, "loss": 0.0013, "step": 206040 }, { "epoch": 1.6849981600359816, "grad_norm": 0.014475410804152489, "learning_rate": 7.368075678621261e-07, "loss": 0.0003, "step": 206050 }, { "epoch": 1.6850799362145807, "grad_norm": 0.04595265910029411, "learning_rate": 7.364347377493125e-07, "loss": 0.0012, "step": 206060 }, { "epoch": 1.68516171239318, "grad_norm": 0.036522600799798965, "learning_rate": 7.360619944887886e-07, "loss": 0.0007, "step": 206070 }, { "epoch": 1.685243488571779, "grad_norm": 0.049369435757398605, "learning_rate": 7.356893380881486e-07, "loss": 0.0003, "step": 206080 }, { "epoch": 1.6853252647503782, "grad_norm": 0.05305035412311554, "learning_rate": 7.353167685549811e-07, "loss": 0.0005, "step": 206090 }, { "epoch": 1.6854070409289774, "grad_norm": 0.01377018541097641, "learning_rate": 7.349442858968791e-07, "loss": 0.0005, "step": 206100 }, { "epoch": 1.6854888171075766, "grad_norm": 0.0658927708864212, "learning_rate": 7.345718901214289e-07, "loss": 0.0008, "step": 206110 }, { "epoch": 1.6855705932861758, "grad_norm": 0.019522814080119133, "learning_rate": 7.341995812362157e-07, "loss": 0.0008, "step": 206120 }, { "epoch": 1.685652369464775, "grad_norm": 0.03826078027486801, "learning_rate": 7.338273592488255e-07, "loss": 0.0003, "step": 206130 }, { "epoch": 1.685734145643374, "grad_norm": 0.003457523649558425, "learning_rate": 7.334552241668385e-07, "loss": 0.0003, "step": 206140 }, { "epoch": 1.6858159218219733, "grad_norm": 0.002220138907432556, "learning_rate": 7.330831759978374e-07, "loss": 0.0002, "step": 206150 }, { "epoch": 1.6858976980005724, "grad_norm": 0.012741880491375923, "learning_rate": 7.327112147494003e-07, "loss": 0.0022, "step": 206160 }, { "epoch": 1.6859794741791716, "grad_norm": 0.10776611417531967, "learning_rate": 7.323393404291029e-07, "loss": 0.0007, "step": 206170 }, { "epoch": 1.6860612503577708, "grad_norm": 0.013498705811798573, "learning_rate": 7.319675530445236e-07, "loss": 0.0005, "step": 206180 }, { "epoch": 1.68614302653637, "grad_norm": 0.002103244885802269, "learning_rate": 7.315958526032346e-07, "loss": 0.0005, "step": 206190 }, { "epoch": 1.686224802714969, "grad_norm": 0.009065303951501846, "learning_rate": 7.312242391128071e-07, "loss": 0.0004, "step": 206200 }, { "epoch": 1.6863065788935683, "grad_norm": 0.10825743526220322, "learning_rate": 7.308527125808107e-07, "loss": 0.001, "step": 206210 }, { "epoch": 1.6863883550721674, "grad_norm": 0.028987620025873184, "learning_rate": 7.304812730148158e-07, "loss": 0.0005, "step": 206220 }, { "epoch": 1.6864701312507666, "grad_norm": 0.03121432662010193, "learning_rate": 7.301099204223882e-07, "loss": 0.0008, "step": 206230 }, { "epoch": 1.6865519074293658, "grad_norm": 0.04675588384270668, "learning_rate": 7.297386548110919e-07, "loss": 0.001, "step": 206240 }, { "epoch": 1.686633683607965, "grad_norm": 0.024939142167568207, "learning_rate": 7.293674761884901e-07, "loss": 0.0007, "step": 206250 }, { "epoch": 1.6867154597865641, "grad_norm": 0.10717324167490005, "learning_rate": 7.289963845621445e-07, "loss": 0.0004, "step": 206260 }, { "epoch": 1.6867972359651633, "grad_norm": 0.09339920431375504, "learning_rate": 7.286253799396142e-07, "loss": 0.0008, "step": 206270 }, { "epoch": 1.6868790121437627, "grad_norm": 0.03687084838747978, "learning_rate": 7.282544623284566e-07, "loss": 0.0007, "step": 206280 }, { "epoch": 1.6869607883223616, "grad_norm": 0.0165325365960598, "learning_rate": 7.27883631736227e-07, "loss": 0.0014, "step": 206290 }, { "epoch": 1.687042564500961, "grad_norm": 0.03384783864021301, "learning_rate": 7.275128881704812e-07, "loss": 0.0006, "step": 206300 }, { "epoch": 1.68712434067956, "grad_norm": 0.0038010047283023596, "learning_rate": 7.27142231638771e-07, "loss": 0.0006, "step": 206310 }, { "epoch": 1.6872061168581594, "grad_norm": 0.05446847528219223, "learning_rate": 7.267716621486464e-07, "loss": 0.0008, "step": 206320 }, { "epoch": 1.6872878930367583, "grad_norm": 0.003680146997794509, "learning_rate": 7.264011797076554e-07, "loss": 0.0003, "step": 206330 }, { "epoch": 1.6873696692153577, "grad_norm": 0.03533371165394783, "learning_rate": 7.260307843233477e-07, "loss": 0.0004, "step": 206340 }, { "epoch": 1.6874514453939566, "grad_norm": 0.024836063385009766, "learning_rate": 7.256604760032665e-07, "loss": 0.0006, "step": 206350 }, { "epoch": 1.687533221572556, "grad_norm": 0.04232259467244148, "learning_rate": 7.252902547549557e-07, "loss": 0.0006, "step": 206360 }, { "epoch": 1.687614997751155, "grad_norm": 0.026347283273935318, "learning_rate": 7.249201205859573e-07, "loss": 0.0003, "step": 206370 }, { "epoch": 1.6876967739297544, "grad_norm": 0.006357148289680481, "learning_rate": 7.245500735038113e-07, "loss": 0.0005, "step": 206380 }, { "epoch": 1.6877785501083533, "grad_norm": 0.007452561054378748, "learning_rate": 7.241801135160553e-07, "loss": 0.0006, "step": 206390 }, { "epoch": 1.6878603262869527, "grad_norm": 0.03279183432459831, "learning_rate": 7.238102406302255e-07, "loss": 0.0007, "step": 206400 }, { "epoch": 1.6879421024655517, "grad_norm": 0.06434199213981628, "learning_rate": 7.234404548538565e-07, "loss": 0.0006, "step": 206410 }, { "epoch": 1.688023878644151, "grad_norm": 0.03488403186202049, "learning_rate": 7.230707561944822e-07, "loss": 0.0007, "step": 206420 }, { "epoch": 1.68810565482275, "grad_norm": 0.019966034218668938, "learning_rate": 7.227011446596333e-07, "loss": 0.0004, "step": 206430 }, { "epoch": 1.6881874310013494, "grad_norm": 0.027550239115953445, "learning_rate": 7.223316202568392e-07, "loss": 0.0003, "step": 206440 }, { "epoch": 1.6882692071799483, "grad_norm": 0.09810832887887955, "learning_rate": 7.219621829936257e-07, "loss": 0.0009, "step": 206450 }, { "epoch": 1.6883509833585477, "grad_norm": 0.0015943646430969238, "learning_rate": 7.215928328775207e-07, "loss": 0.0009, "step": 206460 }, { "epoch": 1.6884327595371467, "grad_norm": 0.01905120350420475, "learning_rate": 7.212235699160475e-07, "loss": 0.0004, "step": 206470 }, { "epoch": 1.688514535715746, "grad_norm": 0.01980079896748066, "learning_rate": 7.208543941167284e-07, "loss": 0.0007, "step": 206480 }, { "epoch": 1.6885963118943452, "grad_norm": 0.005809019319713116, "learning_rate": 7.204853054870831e-07, "loss": 0.0012, "step": 206490 }, { "epoch": 1.6886780880729444, "grad_norm": 0.03527253493666649, "learning_rate": 7.20116304034631e-07, "loss": 0.0003, "step": 206500 }, { "epoch": 1.6887598642515436, "grad_norm": 0.219846710562706, "learning_rate": 7.197473897668888e-07, "loss": 0.0019, "step": 206510 }, { "epoch": 1.6888416404301427, "grad_norm": 0.11759908497333527, "learning_rate": 7.193785626913713e-07, "loss": 0.0007, "step": 206520 }, { "epoch": 1.688923416608742, "grad_norm": 0.08878560364246368, "learning_rate": 7.190098228155906e-07, "loss": 0.001, "step": 206530 }, { "epoch": 1.689005192787341, "grad_norm": 0.04904954135417938, "learning_rate": 7.186411701470614e-07, "loss": 0.0004, "step": 206540 }, { "epoch": 1.6890869689659402, "grad_norm": 0.16329658031463623, "learning_rate": 7.182726046932909e-07, "loss": 0.0023, "step": 206550 }, { "epoch": 1.6891687451445394, "grad_norm": 0.035632893443107605, "learning_rate": 7.17904126461788e-07, "loss": 0.0013, "step": 206560 }, { "epoch": 1.6892505213231386, "grad_norm": 0.0007211858755908906, "learning_rate": 7.175357354600576e-07, "loss": 0.0003, "step": 206570 }, { "epoch": 1.6893322975017377, "grad_norm": 0.042965952306985855, "learning_rate": 7.171674316956062e-07, "loss": 0.0011, "step": 206580 }, { "epoch": 1.689414073680337, "grad_norm": 0.053166259080171585, "learning_rate": 7.167992151759356e-07, "loss": 0.0008, "step": 206590 }, { "epoch": 1.689495849858936, "grad_norm": 0.0805736631155014, "learning_rate": 7.164310859085466e-07, "loss": 0.0009, "step": 206600 }, { "epoch": 1.6895776260375353, "grad_norm": 0.10523431748151779, "learning_rate": 7.16063043900938e-07, "loss": 0.0009, "step": 206610 }, { "epoch": 1.6896594022161344, "grad_norm": 0.06233742833137512, "learning_rate": 7.156950891606079e-07, "loss": 0.0008, "step": 206620 }, { "epoch": 1.6897411783947336, "grad_norm": 0.04500522091984749, "learning_rate": 7.153272216950508e-07, "loss": 0.0007, "step": 206630 }, { "epoch": 1.6898229545733328, "grad_norm": 0.02319944277405739, "learning_rate": 7.149594415117611e-07, "loss": 0.0006, "step": 206640 }, { "epoch": 1.689904730751932, "grad_norm": 0.0025370221119374037, "learning_rate": 7.145917486182297e-07, "loss": 0.0009, "step": 206650 }, { "epoch": 1.689986506930531, "grad_norm": 0.0017573559889569879, "learning_rate": 7.142241430219487e-07, "loss": 0.0013, "step": 206660 }, { "epoch": 1.6900682831091303, "grad_norm": 0.12173675000667572, "learning_rate": 7.138566247304058e-07, "loss": 0.0007, "step": 206670 }, { "epoch": 1.6901500592877294, "grad_norm": 0.028683114796876907, "learning_rate": 7.134891937510869e-07, "loss": 0.0004, "step": 206680 }, { "epoch": 1.6902318354663288, "grad_norm": 0.019767532125115395, "learning_rate": 7.131218500914767e-07, "loss": 0.0005, "step": 206690 }, { "epoch": 1.6903136116449278, "grad_norm": 0.023660873994231224, "learning_rate": 7.127545937590597e-07, "loss": 0.0003, "step": 206700 }, { "epoch": 1.6903953878235272, "grad_norm": 0.005348937585949898, "learning_rate": 7.123874247613166e-07, "loss": 0.0003, "step": 206710 }, { "epoch": 1.690477164002126, "grad_norm": 0.028506020084023476, "learning_rate": 7.120203431057271e-07, "loss": 0.0009, "step": 206720 }, { "epoch": 1.6905589401807255, "grad_norm": 0.007117576897144318, "learning_rate": 7.116533487997685e-07, "loss": 0.0004, "step": 206730 }, { "epoch": 1.6906407163593244, "grad_norm": 0.03405661880970001, "learning_rate": 7.112864418509169e-07, "loss": 0.0004, "step": 206740 }, { "epoch": 1.6907224925379238, "grad_norm": 0.03452545031905174, "learning_rate": 7.109196222666465e-07, "loss": 0.0005, "step": 206750 }, { "epoch": 1.6908042687165228, "grad_norm": 0.03838936612010002, "learning_rate": 7.105528900544295e-07, "loss": 0.0006, "step": 206760 }, { "epoch": 1.6908860448951222, "grad_norm": 0.026225371286273003, "learning_rate": 7.101862452217356e-07, "loss": 0.0005, "step": 206770 }, { "epoch": 1.6909678210737211, "grad_norm": 0.018792640417814255, "learning_rate": 7.098196877760366e-07, "loss": 0.0006, "step": 206780 }, { "epoch": 1.6910495972523205, "grad_norm": 0.0063233571127057076, "learning_rate": 7.094532177247971e-07, "loss": 0.0006, "step": 206790 }, { "epoch": 1.6911313734309195, "grad_norm": 0.016383035108447075, "learning_rate": 7.090868350754837e-07, "loss": 0.0007, "step": 206800 }, { "epoch": 1.6912131496095189, "grad_norm": 0.0007898375624790788, "learning_rate": 7.087205398355579e-07, "loss": 0.0005, "step": 206810 }, { "epoch": 1.6912949257881178, "grad_norm": 0.030982738360762596, "learning_rate": 7.083543320124836e-07, "loss": 0.0006, "step": 206820 }, { "epoch": 1.6913767019667172, "grad_norm": 0.02430691570043564, "learning_rate": 7.079882116137204e-07, "loss": 0.0006, "step": 206830 }, { "epoch": 1.6914584781453161, "grad_norm": 0.016881505027413368, "learning_rate": 7.076221786467269e-07, "loss": 0.0004, "step": 206840 }, { "epoch": 1.6915402543239155, "grad_norm": 0.03837544471025467, "learning_rate": 7.072562331189569e-07, "loss": 0.0009, "step": 206850 }, { "epoch": 1.6916220305025145, "grad_norm": 0.01121398713439703, "learning_rate": 7.068903750378675e-07, "loss": 0.001, "step": 206860 }, { "epoch": 1.6917038066811139, "grad_norm": 0.006675133015960455, "learning_rate": 7.065246044109109e-07, "loss": 0.0009, "step": 206870 }, { "epoch": 1.6917855828597128, "grad_norm": 0.00583757646381855, "learning_rate": 7.061589212455378e-07, "loss": 0.0009, "step": 206880 }, { "epoch": 1.6918673590383122, "grad_norm": 0.04471621289849281, "learning_rate": 7.057933255491967e-07, "loss": 0.0006, "step": 206890 }, { "epoch": 1.6919491352169111, "grad_norm": 0.17707571387290955, "learning_rate": 7.054278173293372e-07, "loss": 0.0014, "step": 206900 }, { "epoch": 1.6920309113955105, "grad_norm": 0.017973290756344795, "learning_rate": 7.05062396593404e-07, "loss": 0.0004, "step": 206910 }, { "epoch": 1.6921126875741097, "grad_norm": 0.01634904555976391, "learning_rate": 7.046970633488409e-07, "loss": 0.0008, "step": 206920 }, { "epoch": 1.6921944637527089, "grad_norm": 0.014236390590667725, "learning_rate": 7.043318176030883e-07, "loss": 0.0008, "step": 206930 }, { "epoch": 1.692276239931308, "grad_norm": 0.017906587570905685, "learning_rate": 7.039666593635902e-07, "loss": 0.0005, "step": 206940 }, { "epoch": 1.6923580161099072, "grad_norm": 0.021367594599723816, "learning_rate": 7.036015886377828e-07, "loss": 0.0007, "step": 206950 }, { "epoch": 1.6924397922885064, "grad_norm": 0.1690969318151474, "learning_rate": 7.032366054331047e-07, "loss": 0.0006, "step": 206960 }, { "epoch": 1.6925215684671056, "grad_norm": 0.049068208783864975, "learning_rate": 7.02871709756987e-07, "loss": 0.0006, "step": 206970 }, { "epoch": 1.6926033446457047, "grad_norm": 0.031359702348709106, "learning_rate": 7.025069016168668e-07, "loss": 0.0004, "step": 206980 }, { "epoch": 1.692685120824304, "grad_norm": 0.003937064670026302, "learning_rate": 7.021421810201739e-07, "loss": 0.0003, "step": 206990 }, { "epoch": 1.692766897002903, "grad_norm": 0.0871475338935852, "learning_rate": 7.017775479743383e-07, "loss": 0.0007, "step": 207000 }, { "epoch": 1.6928486731815022, "grad_norm": 0.009738062508404255, "learning_rate": 7.014130024867871e-07, "loss": 0.0005, "step": 207010 }, { "epoch": 1.6929304493601014, "grad_norm": 0.04674387350678444, "learning_rate": 7.010485445649478e-07, "loss": 0.0011, "step": 207020 }, { "epoch": 1.6930122255387006, "grad_norm": 0.0009829450864344835, "learning_rate": 7.00684174216244e-07, "loss": 0.0012, "step": 207030 }, { "epoch": 1.6930940017172997, "grad_norm": 0.002566348295658827, "learning_rate": 7.003198914480986e-07, "loss": 0.0005, "step": 207040 }, { "epoch": 1.693175777895899, "grad_norm": 0.0044176927767694, "learning_rate": 6.999556962679305e-07, "loss": 0.0005, "step": 207050 }, { "epoch": 1.693257554074498, "grad_norm": 0.023559775203466415, "learning_rate": 6.995915886831611e-07, "loss": 0.0004, "step": 207060 }, { "epoch": 1.6933393302530972, "grad_norm": 0.03467691317200661, "learning_rate": 6.992275687012073e-07, "loss": 0.0008, "step": 207070 }, { "epoch": 1.6934211064316964, "grad_norm": 0.10099146515130997, "learning_rate": 6.988636363294843e-07, "loss": 0.0008, "step": 207080 }, { "epoch": 1.6935028826102956, "grad_norm": 0.018210584297776222, "learning_rate": 6.984997915754033e-07, "loss": 0.0014, "step": 207090 }, { "epoch": 1.6935846587888947, "grad_norm": 0.045553091913461685, "learning_rate": 6.981360344463789e-07, "loss": 0.0008, "step": 207100 }, { "epoch": 1.693666434967494, "grad_norm": 0.18539869785308838, "learning_rate": 6.977723649498202e-07, "loss": 0.0008, "step": 207110 }, { "epoch": 1.6937482111460933, "grad_norm": 0.04082641005516052, "learning_rate": 6.974087830931353e-07, "loss": 0.001, "step": 207120 }, { "epoch": 1.6938299873246923, "grad_norm": 0.12175210565328598, "learning_rate": 6.970452888837298e-07, "loss": 0.0004, "step": 207130 }, { "epoch": 1.6939117635032916, "grad_norm": 0.015109353698790073, "learning_rate": 6.966818823290105e-07, "loss": 0.0008, "step": 207140 }, { "epoch": 1.6939935396818906, "grad_norm": 0.06955298036336899, "learning_rate": 6.963185634363795e-07, "loss": 0.0006, "step": 207150 }, { "epoch": 1.69407531586049, "grad_norm": 0.029914753511548042, "learning_rate": 6.959553322132373e-07, "loss": 0.0005, "step": 207160 }, { "epoch": 1.694157092039089, "grad_norm": 0.009817062877118587, "learning_rate": 6.95592188666982e-07, "loss": 0.0005, "step": 207170 }, { "epoch": 1.6942388682176883, "grad_norm": 0.03786180913448334, "learning_rate": 6.952291328050142e-07, "loss": 0.0009, "step": 207180 }, { "epoch": 1.6943206443962873, "grad_norm": 0.0013864103239029646, "learning_rate": 6.948661646347277e-07, "loss": 0.0006, "step": 207190 }, { "epoch": 1.6944024205748867, "grad_norm": 0.0331263542175293, "learning_rate": 6.945032841635186e-07, "loss": 0.0003, "step": 207200 }, { "epoch": 1.6944841967534856, "grad_norm": 0.10184670984745026, "learning_rate": 6.941404913987743e-07, "loss": 0.0007, "step": 207210 }, { "epoch": 1.694565972932085, "grad_norm": 0.05486704409122467, "learning_rate": 6.937777863478895e-07, "loss": 0.0004, "step": 207220 }, { "epoch": 1.694647749110684, "grad_norm": 0.0021798477973788977, "learning_rate": 6.934151690182516e-07, "loss": 0.0007, "step": 207230 }, { "epoch": 1.6947295252892833, "grad_norm": 0.05825298652052879, "learning_rate": 6.930526394172476e-07, "loss": 0.0014, "step": 207240 }, { "epoch": 1.6948113014678823, "grad_norm": 0.08071929216384888, "learning_rate": 6.926901975522604e-07, "loss": 0.0005, "step": 207250 }, { "epoch": 1.6948930776464817, "grad_norm": 0.058597348630428314, "learning_rate": 6.923278434306768e-07, "loss": 0.0008, "step": 207260 }, { "epoch": 1.6949748538250806, "grad_norm": 0.019861310720443726, "learning_rate": 6.91965577059876e-07, "loss": 0.0012, "step": 207270 }, { "epoch": 1.69505663000368, "grad_norm": 0.023889929056167603, "learning_rate": 6.916033984472387e-07, "loss": 0.0009, "step": 207280 }, { "epoch": 1.695138406182279, "grad_norm": 0.007228900212794542, "learning_rate": 6.912413076001406e-07, "loss": 0.0012, "step": 207290 }, { "epoch": 1.6952201823608783, "grad_norm": 0.030908070504665375, "learning_rate": 6.908793045259604e-07, "loss": 0.0009, "step": 207300 }, { "epoch": 1.6953019585394773, "grad_norm": 0.09111952781677246, "learning_rate": 6.905173892320715e-07, "loss": 0.0005, "step": 207310 }, { "epoch": 1.6953837347180767, "grad_norm": 0.02894355170428753, "learning_rate": 6.901555617258477e-07, "loss": 0.0005, "step": 207320 }, { "epoch": 1.6954655108966759, "grad_norm": 0.03486952185630798, "learning_rate": 6.897938220146561e-07, "loss": 0.0009, "step": 207330 }, { "epoch": 1.695547287075275, "grad_norm": 0.03408101573586464, "learning_rate": 6.894321701058687e-07, "loss": 0.0005, "step": 207340 }, { "epoch": 1.6956290632538742, "grad_norm": 0.03483487665653229, "learning_rate": 6.89070606006852e-07, "loss": 0.0004, "step": 207350 }, { "epoch": 1.6957108394324734, "grad_norm": 0.02827521786093712, "learning_rate": 6.88709129724971e-07, "loss": 0.0008, "step": 207360 }, { "epoch": 1.6957926156110725, "grad_norm": 0.07483791559934616, "learning_rate": 6.883477412675893e-07, "loss": 0.0005, "step": 207370 }, { "epoch": 1.6958743917896717, "grad_norm": 0.00373904244042933, "learning_rate": 6.879864406420683e-07, "loss": 0.0006, "step": 207380 }, { "epoch": 1.6959561679682709, "grad_norm": 0.10390563309192657, "learning_rate": 6.876252278557688e-07, "loss": 0.0006, "step": 207390 }, { "epoch": 1.69603794414687, "grad_norm": 0.01623103767633438, "learning_rate": 6.872641029160493e-07, "loss": 0.0006, "step": 207400 }, { "epoch": 1.6961197203254692, "grad_norm": 0.018377967178821564, "learning_rate": 6.869030658302656e-07, "loss": 0.0007, "step": 207410 }, { "epoch": 1.6962014965040684, "grad_norm": 0.0016231167828664184, "learning_rate": 6.865421166057707e-07, "loss": 0.0003, "step": 207420 }, { "epoch": 1.6962832726826675, "grad_norm": 0.011931076645851135, "learning_rate": 6.861812552499203e-07, "loss": 0.0024, "step": 207430 }, { "epoch": 1.6963650488612667, "grad_norm": 0.015613628551363945, "learning_rate": 6.858204817700653e-07, "loss": 0.0009, "step": 207440 }, { "epoch": 1.6964468250398659, "grad_norm": 0.008836740627884865, "learning_rate": 6.854597961735531e-07, "loss": 0.0004, "step": 207450 }, { "epoch": 1.696528601218465, "grad_norm": 0.027899863198399544, "learning_rate": 6.850991984677302e-07, "loss": 0.0004, "step": 207460 }, { "epoch": 1.6966103773970642, "grad_norm": 0.0014529310865327716, "learning_rate": 6.847386886599455e-07, "loss": 0.0008, "step": 207470 }, { "epoch": 1.6966921535756634, "grad_norm": 0.01820690929889679, "learning_rate": 6.843782667575411e-07, "loss": 0.0007, "step": 207480 }, { "epoch": 1.6967739297542626, "grad_norm": 0.04885190725326538, "learning_rate": 6.840179327678598e-07, "loss": 0.0009, "step": 207490 }, { "epoch": 1.6968557059328617, "grad_norm": 0.05346057191491127, "learning_rate": 6.836576866982397e-07, "loss": 0.0012, "step": 207500 }, { "epoch": 1.696937482111461, "grad_norm": 0.0715756043791771, "learning_rate": 6.832975285560223e-07, "loss": 0.0004, "step": 207510 }, { "epoch": 1.69701925829006, "grad_norm": 0.0473812073469162, "learning_rate": 6.829374583485431e-07, "loss": 0.0005, "step": 207520 }, { "epoch": 1.6971010344686592, "grad_norm": 0.021401217207312584, "learning_rate": 6.825774760831372e-07, "loss": 0.0003, "step": 207530 }, { "epoch": 1.6971828106472584, "grad_norm": 0.012387442402541637, "learning_rate": 6.822175817671355e-07, "loss": 0.0007, "step": 207540 }, { "epoch": 1.6972645868258578, "grad_norm": 0.06393496692180634, "learning_rate": 6.818577754078737e-07, "loss": 0.0008, "step": 207550 }, { "epoch": 1.6973463630044567, "grad_norm": 0.0192177165299654, "learning_rate": 6.814980570126794e-07, "loss": 0.0004, "step": 207560 }, { "epoch": 1.6974281391830561, "grad_norm": 0.03565094992518425, "learning_rate": 6.811384265888788e-07, "loss": 0.0006, "step": 207570 }, { "epoch": 1.697509915361655, "grad_norm": 0.020758453756570816, "learning_rate": 6.807788841437979e-07, "loss": 0.0009, "step": 207580 }, { "epoch": 1.6975916915402545, "grad_norm": 0.020394792780280113, "learning_rate": 6.804194296847633e-07, "loss": 0.0003, "step": 207590 }, { "epoch": 1.6976734677188534, "grad_norm": 0.006157360505312681, "learning_rate": 6.800600632190957e-07, "loss": 0.0005, "step": 207600 }, { "epoch": 1.6977552438974528, "grad_norm": 0.060064345598220825, "learning_rate": 6.79700784754116e-07, "loss": 0.0009, "step": 207610 }, { "epoch": 1.6978370200760518, "grad_norm": 0.0013473997823894024, "learning_rate": 6.793415942971421e-07, "loss": 0.0006, "step": 207620 }, { "epoch": 1.6979187962546511, "grad_norm": 0.051476724445819855, "learning_rate": 6.789824918554932e-07, "loss": 0.003, "step": 207630 }, { "epoch": 1.69800057243325, "grad_norm": 0.019565340131521225, "learning_rate": 6.78623477436483e-07, "loss": 0.0007, "step": 207640 }, { "epoch": 1.6980823486118495, "grad_norm": 0.012473488226532936, "learning_rate": 6.782645510474256e-07, "loss": 0.0004, "step": 207650 }, { "epoch": 1.6981641247904484, "grad_norm": 0.06317155063152313, "learning_rate": 6.779057126956306e-07, "loss": 0.0007, "step": 207660 }, { "epoch": 1.6982459009690478, "grad_norm": 0.049398068338632584, "learning_rate": 6.775469623884106e-07, "loss": 0.0008, "step": 207670 }, { "epoch": 1.6983276771476468, "grad_norm": 0.04203151538968086, "learning_rate": 6.771883001330737e-07, "loss": 0.0004, "step": 207680 }, { "epoch": 1.6984094533262462, "grad_norm": 0.03701309859752655, "learning_rate": 6.768297259369239e-07, "loss": 0.0014, "step": 207690 }, { "epoch": 1.698491229504845, "grad_norm": 0.041108883917331696, "learning_rate": 6.764712398072653e-07, "loss": 0.0011, "step": 207700 }, { "epoch": 1.6985730056834445, "grad_norm": 0.0006211588624864817, "learning_rate": 6.761128417514029e-07, "loss": 0.0003, "step": 207710 }, { "epoch": 1.6986547818620434, "grad_norm": 0.0203546229749918, "learning_rate": 6.757545317766368e-07, "loss": 0.0009, "step": 207720 }, { "epoch": 1.6987365580406428, "grad_norm": 0.03507682681083679, "learning_rate": 6.753963098902655e-07, "loss": 0.0006, "step": 207730 }, { "epoch": 1.6988183342192418, "grad_norm": 0.021786626428365707, "learning_rate": 6.750381760995855e-07, "loss": 0.0005, "step": 207740 }, { "epoch": 1.6989001103978412, "grad_norm": 0.021584048867225647, "learning_rate": 6.74680130411895e-07, "loss": 0.0004, "step": 207750 }, { "epoch": 1.6989818865764403, "grad_norm": 0.03916703164577484, "learning_rate": 6.743221728344856e-07, "loss": 0.0004, "step": 207760 }, { "epoch": 1.6990636627550395, "grad_norm": 0.01654605194926262, "learning_rate": 6.739643033746496e-07, "loss": 0.001, "step": 207770 }, { "epoch": 1.6991454389336387, "grad_norm": 0.025829490274190903, "learning_rate": 6.736065220396759e-07, "loss": 0.0005, "step": 207780 }, { "epoch": 1.6992272151122378, "grad_norm": 0.08821187168359756, "learning_rate": 6.732488288368555e-07, "loss": 0.001, "step": 207790 }, { "epoch": 1.699308991290837, "grad_norm": 0.032495301216840744, "learning_rate": 6.728912237734747e-07, "loss": 0.0008, "step": 207800 }, { "epoch": 1.6993907674694362, "grad_norm": 0.07073378562927246, "learning_rate": 6.725337068568155e-07, "loss": 0.0009, "step": 207810 }, { "epoch": 1.6994725436480353, "grad_norm": 0.00847615860402584, "learning_rate": 6.72176278094161e-07, "loss": 0.0004, "step": 207820 }, { "epoch": 1.6995543198266345, "grad_norm": 0.10974664986133575, "learning_rate": 6.718189374927947e-07, "loss": 0.002, "step": 207830 }, { "epoch": 1.6996360960052337, "grad_norm": 0.10306022316217422, "learning_rate": 6.714616850599948e-07, "loss": 0.0011, "step": 207840 }, { "epoch": 1.6997178721838329, "grad_norm": 0.08589930087327957, "learning_rate": 6.711045208030387e-07, "loss": 0.0011, "step": 207850 }, { "epoch": 1.699799648362432, "grad_norm": 0.01744239032268524, "learning_rate": 6.707474447292012e-07, "loss": 0.0004, "step": 207860 }, { "epoch": 1.6998814245410312, "grad_norm": 0.03977061063051224, "learning_rate": 6.703904568457587e-07, "loss": 0.0003, "step": 207870 }, { "epoch": 1.6999632007196304, "grad_norm": 0.0053579071536660194, "learning_rate": 6.700335571599815e-07, "loss": 0.0005, "step": 207880 }, { "epoch": 1.7000449768982295, "grad_norm": 0.054303210228681564, "learning_rate": 6.696767456791403e-07, "loss": 0.0007, "step": 207890 }, { "epoch": 1.7001267530768287, "grad_norm": 0.0315762422978878, "learning_rate": 6.693200224105023e-07, "loss": 0.0004, "step": 207900 }, { "epoch": 1.7002085292554279, "grad_norm": 0.0763479471206665, "learning_rate": 6.689633873613372e-07, "loss": 0.0005, "step": 207910 }, { "epoch": 1.700290305434027, "grad_norm": 0.022694380953907967, "learning_rate": 6.68606840538909e-07, "loss": 0.0006, "step": 207920 }, { "epoch": 1.7003720816126262, "grad_norm": 0.06306340545415878, "learning_rate": 6.682503819504793e-07, "loss": 0.0004, "step": 207930 }, { "epoch": 1.7004538577912254, "grad_norm": 0.004442790523171425, "learning_rate": 6.678940116033095e-07, "loss": 0.0004, "step": 207940 }, { "epoch": 1.7005356339698245, "grad_norm": 0.005656104069203138, "learning_rate": 6.675377295046609e-07, "loss": 0.0005, "step": 207950 }, { "epoch": 1.7006174101484237, "grad_norm": 0.025615625083446503, "learning_rate": 6.671815356617905e-07, "loss": 0.0006, "step": 207960 }, { "epoch": 1.7006991863270229, "grad_norm": 0.05119645223021507, "learning_rate": 6.668254300819538e-07, "loss": 0.0008, "step": 207970 }, { "epoch": 1.7007809625056223, "grad_norm": 0.12699812650680542, "learning_rate": 6.664694127724042e-07, "loss": 0.0016, "step": 207980 }, { "epoch": 1.7008627386842212, "grad_norm": 0.0007153292535804212, "learning_rate": 6.661134837403965e-07, "loss": 0.0006, "step": 207990 }, { "epoch": 1.7009445148628206, "grad_norm": 0.035523414611816406, "learning_rate": 6.657576429931795e-07, "loss": 0.0005, "step": 208000 }, { "epoch": 1.7010262910414196, "grad_norm": 0.004385926760733128, "learning_rate": 6.654018905380027e-07, "loss": 0.0004, "step": 208010 }, { "epoch": 1.701108067220019, "grad_norm": 0.05783892795443535, "learning_rate": 6.650462263821117e-07, "loss": 0.0004, "step": 208020 }, { "epoch": 1.701189843398618, "grad_norm": 0.024046430364251137, "learning_rate": 6.646906505327538e-07, "loss": 0.0007, "step": 208030 }, { "epoch": 1.7012716195772173, "grad_norm": 0.0528118722140789, "learning_rate": 6.643351629971723e-07, "loss": 0.0004, "step": 208040 }, { "epoch": 1.7013533957558162, "grad_norm": 0.03246631100773811, "learning_rate": 6.639797637826062e-07, "loss": 0.0004, "step": 208050 }, { "epoch": 1.7014351719344156, "grad_norm": 0.00524304760619998, "learning_rate": 6.636244528962959e-07, "loss": 0.0005, "step": 208060 }, { "epoch": 1.7015169481130146, "grad_norm": 0.011865156702697277, "learning_rate": 6.632692303454818e-07, "loss": 0.0005, "step": 208070 }, { "epoch": 1.701598724291614, "grad_norm": 0.005948696751147509, "learning_rate": 6.629140961373981e-07, "loss": 0.0004, "step": 208080 }, { "epoch": 1.701680500470213, "grad_norm": 0.03940321132540703, "learning_rate": 6.6255905027928e-07, "loss": 0.0011, "step": 208090 }, { "epoch": 1.7017622766488123, "grad_norm": 0.08535057306289673, "learning_rate": 6.622040927783585e-07, "loss": 0.0007, "step": 208100 }, { "epoch": 1.7018440528274112, "grad_norm": 0.014979484491050243, "learning_rate": 6.618492236418661e-07, "loss": 0.0003, "step": 208110 }, { "epoch": 1.7019258290060106, "grad_norm": 0.03985595703125, "learning_rate": 6.61494442877032e-07, "loss": 0.0004, "step": 208120 }, { "epoch": 1.7020076051846096, "grad_norm": 0.09685583412647247, "learning_rate": 6.611397504910822e-07, "loss": 0.0011, "step": 208130 }, { "epoch": 1.702089381363209, "grad_norm": 0.007986454293131828, "learning_rate": 6.607851464912418e-07, "loss": 0.0007, "step": 208140 }, { "epoch": 1.702171157541808, "grad_norm": 0.005190311465412378, "learning_rate": 6.604306308847358e-07, "loss": 0.0002, "step": 208150 }, { "epoch": 1.7022529337204073, "grad_norm": 0.05401008948683739, "learning_rate": 6.600762036787861e-07, "loss": 0.0009, "step": 208160 }, { "epoch": 1.7023347098990063, "grad_norm": 0.001458899350836873, "learning_rate": 6.597218648806108e-07, "loss": 0.0003, "step": 208170 }, { "epoch": 1.7024164860776057, "grad_norm": 0.02219722792506218, "learning_rate": 6.593676144974287e-07, "loss": 0.0007, "step": 208180 }, { "epoch": 1.7024982622562048, "grad_norm": 0.004962075036019087, "learning_rate": 6.590134525364566e-07, "loss": 0.0019, "step": 208190 }, { "epoch": 1.702580038434804, "grad_norm": 0.07494751363992691, "learning_rate": 6.586593790049095e-07, "loss": 0.0009, "step": 208200 }, { "epoch": 1.7026618146134032, "grad_norm": 0.023435385897755623, "learning_rate": 6.583053939099998e-07, "loss": 0.0005, "step": 208210 }, { "epoch": 1.7027435907920023, "grad_norm": 0.017867259681224823, "learning_rate": 6.579514972589374e-07, "loss": 0.0009, "step": 208220 }, { "epoch": 1.7028253669706015, "grad_norm": 0.021840617060661316, "learning_rate": 6.575976890589331e-07, "loss": 0.0003, "step": 208230 }, { "epoch": 1.7029071431492007, "grad_norm": 0.0023529708851128817, "learning_rate": 6.572439693171934e-07, "loss": 0.0007, "step": 208240 }, { "epoch": 1.7029889193277998, "grad_norm": 0.03405731916427612, "learning_rate": 6.568903380409242e-07, "loss": 0.0003, "step": 208250 }, { "epoch": 1.703070695506399, "grad_norm": 0.03910977393388748, "learning_rate": 6.565367952373281e-07, "loss": 0.0007, "step": 208260 }, { "epoch": 1.7031524716849982, "grad_norm": 0.09295249730348587, "learning_rate": 6.561833409136093e-07, "loss": 0.0003, "step": 208270 }, { "epoch": 1.7032342478635973, "grad_norm": 0.024200353771448135, "learning_rate": 6.558299750769676e-07, "loss": 0.0005, "step": 208280 }, { "epoch": 1.7033160240421965, "grad_norm": 0.1168447807431221, "learning_rate": 6.554766977345988e-07, "loss": 0.0004, "step": 208290 }, { "epoch": 1.7033978002207957, "grad_norm": 0.031331826001405716, "learning_rate": 6.551235088937008e-07, "loss": 0.0007, "step": 208300 }, { "epoch": 1.7034795763993948, "grad_norm": 0.01365096028894186, "learning_rate": 6.547704085614692e-07, "loss": 0.0006, "step": 208310 }, { "epoch": 1.703561352577994, "grad_norm": 0.03320321440696716, "learning_rate": 6.544173967450967e-07, "loss": 0.0003, "step": 208320 }, { "epoch": 1.7036431287565932, "grad_norm": 0.029929976910352707, "learning_rate": 6.540644734517737e-07, "loss": 0.0005, "step": 208330 }, { "epoch": 1.7037249049351924, "grad_norm": 0.03610312193632126, "learning_rate": 6.537116386886894e-07, "loss": 0.001, "step": 208340 }, { "epoch": 1.7038066811137915, "grad_norm": 0.057587265968322754, "learning_rate": 6.533588924630324e-07, "loss": 0.0006, "step": 208350 }, { "epoch": 1.7038884572923907, "grad_norm": 0.06605558842420578, "learning_rate": 6.530062347819882e-07, "loss": 0.0006, "step": 208360 }, { "epoch": 1.7039702334709899, "grad_norm": 0.008117441087961197, "learning_rate": 6.526536656527405e-07, "loss": 0.0003, "step": 208370 }, { "epoch": 1.704052009649589, "grad_norm": 0.005110448691993952, "learning_rate": 6.523011850824695e-07, "loss": 0.0012, "step": 208380 }, { "epoch": 1.7041337858281882, "grad_norm": 0.045249853283166885, "learning_rate": 6.519487930783592e-07, "loss": 0.0012, "step": 208390 }, { "epoch": 1.7042155620067874, "grad_norm": 0.02095826156437397, "learning_rate": 6.515964896475874e-07, "loss": 0.0037, "step": 208400 }, { "epoch": 1.7042973381853868, "grad_norm": 0.14547905325889587, "learning_rate": 6.51244274797328e-07, "loss": 0.0005, "step": 208410 }, { "epoch": 1.7043791143639857, "grad_norm": 0.002934165531769395, "learning_rate": 6.508921485347569e-07, "loss": 0.0018, "step": 208420 }, { "epoch": 1.704460890542585, "grad_norm": 0.039791740477085114, "learning_rate": 6.50540110867049e-07, "loss": 0.0017, "step": 208430 }, { "epoch": 1.704542666721184, "grad_norm": 0.0821477547287941, "learning_rate": 6.501881618013739e-07, "loss": 0.0009, "step": 208440 }, { "epoch": 1.7046244428997834, "grad_norm": 0.00445972615852952, "learning_rate": 6.498363013449022e-07, "loss": 0.0005, "step": 208450 }, { "epoch": 1.7047062190783824, "grad_norm": 0.017516274005174637, "learning_rate": 6.494845295047996e-07, "loss": 0.0023, "step": 208460 }, { "epoch": 1.7047879952569818, "grad_norm": 0.15153515338897705, "learning_rate": 6.491328462882351e-07, "loss": 0.0013, "step": 208470 }, { "epoch": 1.7048697714355807, "grad_norm": 0.010827325284481049, "learning_rate": 6.487812517023706e-07, "loss": 0.0004, "step": 208480 }, { "epoch": 1.70495154761418, "grad_norm": 0.04913806915283203, "learning_rate": 6.484297457543693e-07, "loss": 0.0003, "step": 208490 }, { "epoch": 1.705033323792779, "grad_norm": 0.033009402453899384, "learning_rate": 6.480783284513892e-07, "loss": 0.0011, "step": 208500 }, { "epoch": 1.7051150999713784, "grad_norm": 0.015958517789840698, "learning_rate": 6.47726999800593e-07, "loss": 0.0002, "step": 208510 }, { "epoch": 1.7051968761499774, "grad_norm": 0.03182225674390793, "learning_rate": 6.473757598091363e-07, "loss": 0.0005, "step": 208520 }, { "epoch": 1.7052786523285768, "grad_norm": 0.007279432378709316, "learning_rate": 6.470246084841719e-07, "loss": 0.0029, "step": 208530 }, { "epoch": 1.7053604285071757, "grad_norm": 0.004298021551221609, "learning_rate": 6.466735458328538e-07, "loss": 0.0009, "step": 208540 }, { "epoch": 1.7054422046857751, "grad_norm": 0.13253988325595856, "learning_rate": 6.46322571862335e-07, "loss": 0.0008, "step": 208550 }, { "epoch": 1.705523980864374, "grad_norm": 0.021259360015392303, "learning_rate": 6.459716865797644e-07, "loss": 0.0007, "step": 208560 }, { "epoch": 1.7056057570429735, "grad_norm": 0.01252605952322483, "learning_rate": 6.4562088999229e-07, "loss": 0.0007, "step": 208570 }, { "epoch": 1.7056875332215724, "grad_norm": 0.06903848797082901, "learning_rate": 6.452701821070562e-07, "loss": 0.001, "step": 208580 }, { "epoch": 1.7057693094001718, "grad_norm": 0.06852411478757858, "learning_rate": 6.449195629312094e-07, "loss": 0.0008, "step": 208590 }, { "epoch": 1.7058510855787707, "grad_norm": 0.13558436930179596, "learning_rate": 6.445690324718912e-07, "loss": 0.0021, "step": 208600 }, { "epoch": 1.7059328617573701, "grad_norm": 0.00405079172924161, "learning_rate": 6.442185907362419e-07, "loss": 0.001, "step": 208610 }, { "epoch": 1.7060146379359693, "grad_norm": 0.029673395678400993, "learning_rate": 6.438682377313993e-07, "loss": 0.0008, "step": 208620 }, { "epoch": 1.7060964141145685, "grad_norm": 0.01353352889418602, "learning_rate": 6.435179734645031e-07, "loss": 0.0011, "step": 208630 }, { "epoch": 1.7061781902931676, "grad_norm": 0.009069659747183323, "learning_rate": 6.431677979426875e-07, "loss": 0.0008, "step": 208640 }, { "epoch": 1.7062599664717668, "grad_norm": 0.0031392250675708055, "learning_rate": 6.428177111730843e-07, "loss": 0.0005, "step": 208650 }, { "epoch": 1.706341742650366, "grad_norm": 0.03365061432123184, "learning_rate": 6.424677131628254e-07, "loss": 0.0006, "step": 208660 }, { "epoch": 1.7064235188289651, "grad_norm": 0.028199750930070877, "learning_rate": 6.421178039190418e-07, "loss": 0.0005, "step": 208670 }, { "epoch": 1.7065052950075643, "grad_norm": 0.005030372645705938, "learning_rate": 6.41767983448861e-07, "loss": 0.0007, "step": 208680 }, { "epoch": 1.7065870711861635, "grad_norm": 0.04760827124118805, "learning_rate": 6.414182517594086e-07, "loss": 0.0006, "step": 208690 }, { "epoch": 1.7066688473647627, "grad_norm": 0.041369881480932236, "learning_rate": 6.410686088578083e-07, "loss": 0.0004, "step": 208700 }, { "epoch": 1.7067506235433618, "grad_norm": 0.02335025928914547, "learning_rate": 6.407190547511849e-07, "loss": 0.0008, "step": 208710 }, { "epoch": 1.706832399721961, "grad_norm": 0.05037085339426994, "learning_rate": 6.403695894466577e-07, "loss": 0.0013, "step": 208720 }, { "epoch": 1.7069141759005602, "grad_norm": 0.005922986660152674, "learning_rate": 6.400202129513455e-07, "loss": 0.0003, "step": 208730 }, { "epoch": 1.7069959520791593, "grad_norm": 0.04695367068052292, "learning_rate": 6.396709252723643e-07, "loss": 0.0006, "step": 208740 }, { "epoch": 1.7070777282577585, "grad_norm": 0.03771013766527176, "learning_rate": 6.393217264168317e-07, "loss": 0.0007, "step": 208750 }, { "epoch": 1.7071595044363577, "grad_norm": 0.007813679054379463, "learning_rate": 6.389726163918614e-07, "loss": 0.0003, "step": 208760 }, { "epoch": 1.7072412806149568, "grad_norm": 0.09133918583393097, "learning_rate": 6.386235952045627e-07, "loss": 0.0009, "step": 208770 }, { "epoch": 1.707323056793556, "grad_norm": 0.04445631429553032, "learning_rate": 6.382746628620462e-07, "loss": 0.0006, "step": 208780 }, { "epoch": 1.7074048329721552, "grad_norm": 0.03430072218179703, "learning_rate": 6.379258193714194e-07, "loss": 0.0006, "step": 208790 }, { "epoch": 1.7074866091507543, "grad_norm": 0.012166537344455719, "learning_rate": 6.3757706473979e-07, "loss": 0.0005, "step": 208800 }, { "epoch": 1.7075683853293535, "grad_norm": 0.019280046224594116, "learning_rate": 6.372283989742623e-07, "loss": 0.0008, "step": 208810 }, { "epoch": 1.7076501615079527, "grad_norm": 0.023313498124480247, "learning_rate": 6.368798220819378e-07, "loss": 0.0005, "step": 208820 }, { "epoch": 1.7077319376865518, "grad_norm": 0.04569125548005104, "learning_rate": 6.365313340699175e-07, "loss": 0.0009, "step": 208830 }, { "epoch": 1.7078137138651512, "grad_norm": 0.052347730845212936, "learning_rate": 6.361829349453013e-07, "loss": 0.001, "step": 208840 }, { "epoch": 1.7078954900437502, "grad_norm": 0.0012932957615703344, "learning_rate": 6.358346247151859e-07, "loss": 0.0005, "step": 208850 }, { "epoch": 1.7079772662223496, "grad_norm": 0.14521940052509308, "learning_rate": 6.354864033866664e-07, "loss": 0.0005, "step": 208860 }, { "epoch": 1.7080590424009485, "grad_norm": 0.019480863586068153, "learning_rate": 6.351382709668353e-07, "loss": 0.0006, "step": 208870 }, { "epoch": 1.708140818579548, "grad_norm": 0.09269510954618454, "learning_rate": 6.347902274627882e-07, "loss": 0.0005, "step": 208880 }, { "epoch": 1.7082225947581469, "grad_norm": 0.0467166043817997, "learning_rate": 6.344422728816108e-07, "loss": 0.0015, "step": 208890 }, { "epoch": 1.7083043709367463, "grad_norm": 0.02803868055343628, "learning_rate": 6.340944072303934e-07, "loss": 0.0006, "step": 208900 }, { "epoch": 1.7083861471153452, "grad_norm": 0.001282865065149963, "learning_rate": 6.337466305162204e-07, "loss": 0.0004, "step": 208910 }, { "epoch": 1.7084679232939446, "grad_norm": 0.033314306288957596, "learning_rate": 6.333989427461784e-07, "loss": 0.0007, "step": 208920 }, { "epoch": 1.7085496994725435, "grad_norm": 0.019619164988398552, "learning_rate": 6.330513439273495e-07, "loss": 0.0004, "step": 208930 }, { "epoch": 1.708631475651143, "grad_norm": 0.0506427176296711, "learning_rate": 6.327038340668145e-07, "loss": 0.0007, "step": 208940 }, { "epoch": 1.7087132518297419, "grad_norm": 0.0007475563324987888, "learning_rate": 6.323564131716508e-07, "loss": 0.0004, "step": 208950 }, { "epoch": 1.7087950280083413, "grad_norm": 0.0030253238510340452, "learning_rate": 6.320090812489388e-07, "loss": 0.0003, "step": 208960 }, { "epoch": 1.7088768041869402, "grad_norm": 0.043488629162311554, "learning_rate": 6.316618383057521e-07, "loss": 0.0006, "step": 208970 }, { "epoch": 1.7089585803655396, "grad_norm": 0.04386650398373604, "learning_rate": 6.313146843491641e-07, "loss": 0.0007, "step": 208980 }, { "epoch": 1.7090403565441386, "grad_norm": 0.08025168627500534, "learning_rate": 6.309676193862463e-07, "loss": 0.0008, "step": 208990 }, { "epoch": 1.709122132722738, "grad_norm": 0.06643962115049362, "learning_rate": 6.306206434240719e-07, "loss": 0.0005, "step": 209000 }, { "epoch": 1.7092039089013369, "grad_norm": 0.0027633272111415863, "learning_rate": 6.302737564697053e-07, "loss": 0.0004, "step": 209010 }, { "epoch": 1.7092856850799363, "grad_norm": 0.009632970206439495, "learning_rate": 6.299269585302137e-07, "loss": 0.0006, "step": 209020 }, { "epoch": 1.7093674612585352, "grad_norm": 0.059831421822309494, "learning_rate": 6.295802496126618e-07, "loss": 0.0005, "step": 209030 }, { "epoch": 1.7094492374371346, "grad_norm": 0.03520934656262398, "learning_rate": 6.292336297241131e-07, "loss": 0.0009, "step": 209040 }, { "epoch": 1.7095310136157338, "grad_norm": 0.028261378407478333, "learning_rate": 6.288870988716284e-07, "loss": 0.0005, "step": 209050 }, { "epoch": 1.709612789794333, "grad_norm": 0.0445261150598526, "learning_rate": 6.285406570622665e-07, "loss": 0.0006, "step": 209060 }, { "epoch": 1.7096945659729321, "grad_norm": 0.048017967492341995, "learning_rate": 6.281943043030836e-07, "loss": 0.0007, "step": 209070 }, { "epoch": 1.7097763421515313, "grad_norm": 0.03430822491645813, "learning_rate": 6.278480406011378e-07, "loss": 0.0007, "step": 209080 }, { "epoch": 1.7098581183301305, "grad_norm": 0.011986869387328625, "learning_rate": 6.275018659634807e-07, "loss": 0.0009, "step": 209090 }, { "epoch": 1.7099398945087296, "grad_norm": 0.05789092555642128, "learning_rate": 6.271557803971651e-07, "loss": 0.0005, "step": 209100 }, { "epoch": 1.7100216706873288, "grad_norm": 0.05161901190876961, "learning_rate": 6.26809783909239e-07, "loss": 0.0007, "step": 209110 }, { "epoch": 1.710103446865928, "grad_norm": 0.11666113138198853, "learning_rate": 6.264638765067555e-07, "loss": 0.0005, "step": 209120 }, { "epoch": 1.7101852230445271, "grad_norm": 0.1353820562362671, "learning_rate": 6.261180581967558e-07, "loss": 0.0007, "step": 209130 }, { "epoch": 1.7102669992231263, "grad_norm": 0.10062485188245773, "learning_rate": 6.257723289862872e-07, "loss": 0.0006, "step": 209140 }, { "epoch": 1.7103487754017255, "grad_norm": 0.11881854385137558, "learning_rate": 6.254266888823901e-07, "loss": 0.0015, "step": 209150 }, { "epoch": 1.7104305515803246, "grad_norm": 0.01902141235768795, "learning_rate": 6.250811378921085e-07, "loss": 0.0005, "step": 209160 }, { "epoch": 1.7105123277589238, "grad_norm": 0.08341769129037857, "learning_rate": 6.247356760224804e-07, "loss": 0.0008, "step": 209170 }, { "epoch": 1.710594103937523, "grad_norm": 0.02049371972680092, "learning_rate": 6.243903032805426e-07, "loss": 0.0009, "step": 209180 }, { "epoch": 1.7106758801161221, "grad_norm": 0.04933442175388336, "learning_rate": 6.240450196733305e-07, "loss": 0.0007, "step": 209190 }, { "epoch": 1.7107576562947213, "grad_norm": 0.010252062231302261, "learning_rate": 6.236998252078791e-07, "loss": 0.0005, "step": 209200 }, { "epoch": 1.7108394324733205, "grad_norm": 0.05114077776670456, "learning_rate": 6.233547198912193e-07, "loss": 0.0005, "step": 209210 }, { "epoch": 1.7109212086519197, "grad_norm": 0.04968268796801567, "learning_rate": 6.230097037303817e-07, "loss": 0.0004, "step": 209220 }, { "epoch": 1.7110029848305188, "grad_norm": 0.0033207067754119635, "learning_rate": 6.226647767323929e-07, "loss": 0.0009, "step": 209230 }, { "epoch": 1.711084761009118, "grad_norm": 0.0006055954727344215, "learning_rate": 6.223199389042828e-07, "loss": 0.0005, "step": 209240 }, { "epoch": 1.7111665371877172, "grad_norm": 0.031988032162189484, "learning_rate": 6.219751902530729e-07, "loss": 0.0006, "step": 209250 }, { "epoch": 1.7112483133663163, "grad_norm": 0.024545911699533463, "learning_rate": 6.216305307857873e-07, "loss": 0.0005, "step": 209260 }, { "epoch": 1.7113300895449157, "grad_norm": 0.009758833795785904, "learning_rate": 6.212859605094451e-07, "loss": 0.0004, "step": 209270 }, { "epoch": 1.7114118657235147, "grad_norm": 0.02615996263921261, "learning_rate": 6.209414794310687e-07, "loss": 0.0008, "step": 209280 }, { "epoch": 1.711493641902114, "grad_norm": 0.0030981791205704212, "learning_rate": 6.205970875576733e-07, "loss": 0.0006, "step": 209290 }, { "epoch": 1.711575418080713, "grad_norm": 0.01372522208839655, "learning_rate": 6.202527848962758e-07, "loss": 0.0005, "step": 209300 }, { "epoch": 1.7116571942593124, "grad_norm": 0.02071203663945198, "learning_rate": 6.199085714538872e-07, "loss": 0.0006, "step": 209310 }, { "epoch": 1.7117389704379113, "grad_norm": 0.023375844582915306, "learning_rate": 6.195644472375228e-07, "loss": 0.0007, "step": 209320 }, { "epoch": 1.7118207466165107, "grad_norm": 0.0025833144318312407, "learning_rate": 6.192204122541912e-07, "loss": 0.0004, "step": 209330 }, { "epoch": 1.7119025227951097, "grad_norm": 0.07024764269590378, "learning_rate": 6.188764665109004e-07, "loss": 0.0007, "step": 209340 }, { "epoch": 1.711984298973709, "grad_norm": 0.07049128413200378, "learning_rate": 6.185326100146565e-07, "loss": 0.0006, "step": 209350 }, { "epoch": 1.712066075152308, "grad_norm": 0.04931814968585968, "learning_rate": 6.181888427724663e-07, "loss": 0.0005, "step": 209360 }, { "epoch": 1.7121478513309074, "grad_norm": 0.015146099962294102, "learning_rate": 6.178451647913309e-07, "loss": 0.0006, "step": 209370 }, { "epoch": 1.7122296275095064, "grad_norm": 0.01978924870491028, "learning_rate": 6.175015760782504e-07, "loss": 0.0017, "step": 209380 }, { "epoch": 1.7123114036881057, "grad_norm": 0.0028210096061229706, "learning_rate": 6.171580766402246e-07, "loss": 0.0007, "step": 209390 }, { "epoch": 1.7123931798667047, "grad_norm": 0.010238946415483952, "learning_rate": 6.168146664842528e-07, "loss": 0.0005, "step": 209400 }, { "epoch": 1.712474956045304, "grad_norm": 0.057830166071653366, "learning_rate": 6.164713456173288e-07, "loss": 0.0006, "step": 209410 }, { "epoch": 1.712556732223903, "grad_norm": 0.04307573661208153, "learning_rate": 6.161281140464465e-07, "loss": 0.0008, "step": 209420 }, { "epoch": 1.7126385084025024, "grad_norm": 0.0014939028769731522, "learning_rate": 6.157849717785963e-07, "loss": 0.0006, "step": 209430 }, { "epoch": 1.7127202845811014, "grad_norm": 0.010456462390720844, "learning_rate": 6.154419188207716e-07, "loss": 0.0005, "step": 209440 }, { "epoch": 1.7128020607597008, "grad_norm": 0.11629895865917206, "learning_rate": 6.150989551799591e-07, "loss": 0.0011, "step": 209450 }, { "epoch": 1.7128838369382997, "grad_norm": 0.026855820789933205, "learning_rate": 6.147560808631447e-07, "loss": 0.0007, "step": 209460 }, { "epoch": 1.712965613116899, "grad_norm": 0.011011730879545212, "learning_rate": 6.144132958773136e-07, "loss": 0.0004, "step": 209470 }, { "epoch": 1.7130473892954983, "grad_norm": 0.01819445565342903, "learning_rate": 6.140706002294484e-07, "loss": 0.0005, "step": 209480 }, { "epoch": 1.7131291654740974, "grad_norm": 0.01691248081624508, "learning_rate": 6.1372799392653e-07, "loss": 0.0005, "step": 209490 }, { "epoch": 1.7132109416526966, "grad_norm": 0.031036360189318657, "learning_rate": 6.133854769755377e-07, "loss": 0.0004, "step": 209500 }, { "epoch": 1.7132927178312958, "grad_norm": 0.0008343122317455709, "learning_rate": 6.130430493834483e-07, "loss": 0.0011, "step": 209510 }, { "epoch": 1.713374494009895, "grad_norm": 0.00980412308126688, "learning_rate": 6.127007111572387e-07, "loss": 0.0003, "step": 209520 }, { "epoch": 1.7134562701884941, "grad_norm": 0.01433449424803257, "learning_rate": 6.123584623038814e-07, "loss": 0.0005, "step": 209530 }, { "epoch": 1.7135380463670933, "grad_norm": 0.036186639219522476, "learning_rate": 6.120163028303488e-07, "loss": 0.0019, "step": 209540 }, { "epoch": 1.7136198225456925, "grad_norm": 0.11122867465019226, "learning_rate": 6.116742327436104e-07, "loss": 0.0004, "step": 209550 }, { "epoch": 1.7137015987242916, "grad_norm": 0.0051432084292173386, "learning_rate": 6.113322520506354e-07, "loss": 0.0006, "step": 209560 }, { "epoch": 1.7137833749028908, "grad_norm": 0.03533480316400528, "learning_rate": 6.109903607583895e-07, "loss": 0.0014, "step": 209570 }, { "epoch": 1.71386515108149, "grad_norm": 0.0051587410271167755, "learning_rate": 6.106485588738381e-07, "loss": 0.0004, "step": 209580 }, { "epoch": 1.7139469272600891, "grad_norm": 0.02138562873005867, "learning_rate": 6.10306846403943e-07, "loss": 0.0003, "step": 209590 }, { "epoch": 1.7140287034386883, "grad_norm": 0.05644657835364342, "learning_rate": 6.099652233556658e-07, "loss": 0.001, "step": 209600 }, { "epoch": 1.7141104796172875, "grad_norm": 0.0538456067442894, "learning_rate": 6.096236897359653e-07, "loss": 0.0006, "step": 209610 }, { "epoch": 1.7141922557958866, "grad_norm": 0.05425791069865227, "learning_rate": 6.092822455517988e-07, "loss": 0.0011, "step": 209620 }, { "epoch": 1.7142740319744858, "grad_norm": 0.10023349523544312, "learning_rate": 6.089408908101207e-07, "loss": 0.0011, "step": 209630 }, { "epoch": 1.714355808153085, "grad_norm": 0.017774533480405807, "learning_rate": 6.085996255178872e-07, "loss": 0.0003, "step": 209640 }, { "epoch": 1.7144375843316841, "grad_norm": 0.03161930292844772, "learning_rate": 6.082584496820493e-07, "loss": 0.0007, "step": 209650 }, { "epoch": 1.7145193605102833, "grad_norm": 0.023555884137749672, "learning_rate": 6.079173633095558e-07, "loss": 0.0006, "step": 209660 }, { "epoch": 1.7146011366888825, "grad_norm": 0.004746720660477877, "learning_rate": 6.075763664073553e-07, "loss": 0.0009, "step": 209670 }, { "epoch": 1.7146829128674819, "grad_norm": 0.012059456668794155, "learning_rate": 6.072354589823948e-07, "loss": 0.0006, "step": 209680 }, { "epoch": 1.7147646890460808, "grad_norm": 0.006318903993815184, "learning_rate": 6.068946410416193e-07, "loss": 0.0009, "step": 209690 }, { "epoch": 1.7148464652246802, "grad_norm": 0.07879181951284409, "learning_rate": 6.065539125919706e-07, "loss": 0.0006, "step": 209700 }, { "epoch": 1.7149282414032792, "grad_norm": 0.0179763101041317, "learning_rate": 6.062132736403897e-07, "loss": 0.0006, "step": 209710 }, { "epoch": 1.7150100175818785, "grad_norm": 0.050877757370471954, "learning_rate": 6.058727241938161e-07, "loss": 0.0005, "step": 209720 }, { "epoch": 1.7150917937604775, "grad_norm": 0.004734465386718512, "learning_rate": 6.055322642591871e-07, "loss": 0.0005, "step": 209730 }, { "epoch": 1.7151735699390769, "grad_norm": 0.06301059573888779, "learning_rate": 6.051918938434376e-07, "loss": 0.0005, "step": 209740 }, { "epoch": 1.7152553461176758, "grad_norm": 0.013984475284814835, "learning_rate": 6.048516129535004e-07, "loss": 0.0007, "step": 209750 }, { "epoch": 1.7153371222962752, "grad_norm": 0.023715829476714134, "learning_rate": 6.045114215963094e-07, "loss": 0.0003, "step": 209760 }, { "epoch": 1.7154188984748742, "grad_norm": 0.002914899028837681, "learning_rate": 6.041713197787935e-07, "loss": 0.0006, "step": 209770 }, { "epoch": 1.7155006746534736, "grad_norm": 0.06574256718158722, "learning_rate": 6.038313075078811e-07, "loss": 0.0008, "step": 209780 }, { "epoch": 1.7155824508320725, "grad_norm": 0.004359145648777485, "learning_rate": 6.034913847904972e-07, "loss": 0.0003, "step": 209790 }, { "epoch": 1.715664227010672, "grad_norm": 0.03664366528391838, "learning_rate": 6.03151551633569e-07, "loss": 0.001, "step": 209800 }, { "epoch": 1.7157460031892708, "grad_norm": 0.021203605458140373, "learning_rate": 6.02811808044017e-07, "loss": 0.0008, "step": 209810 }, { "epoch": 1.7158277793678702, "grad_norm": 0.028496889397501945, "learning_rate": 6.024721540287632e-07, "loss": 0.0007, "step": 209820 }, { "epoch": 1.7159095555464692, "grad_norm": 0.030030833557248116, "learning_rate": 6.021325895947255e-07, "loss": 0.0008, "step": 209830 }, { "epoch": 1.7159913317250686, "grad_norm": 0.04170927777886391, "learning_rate": 6.01793114748822e-07, "loss": 0.001, "step": 209840 }, { "epoch": 1.7160731079036675, "grad_norm": 0.06490566581487656, "learning_rate": 6.014537294979678e-07, "loss": 0.0005, "step": 209850 }, { "epoch": 1.716154884082267, "grad_norm": 0.05915537476539612, "learning_rate": 6.011144338490765e-07, "loss": 0.0006, "step": 209860 }, { "epoch": 1.7162366602608659, "grad_norm": 0.0031939276959747076, "learning_rate": 6.007752278090589e-07, "loss": 0.0007, "step": 209870 }, { "epoch": 1.7163184364394652, "grad_norm": 0.03879522532224655, "learning_rate": 6.004361113848267e-07, "loss": 0.0009, "step": 209880 }, { "epoch": 1.7164002126180642, "grad_norm": 0.04176490753889084, "learning_rate": 6.000970845832871e-07, "loss": 0.0011, "step": 209890 }, { "epoch": 1.7164819887966636, "grad_norm": 0.04788553714752197, "learning_rate": 5.997581474113462e-07, "loss": 0.0009, "step": 209900 }, { "epoch": 1.7165637649752628, "grad_norm": 0.02882957085967064, "learning_rate": 5.994192998759074e-07, "loss": 0.0005, "step": 209910 }, { "epoch": 1.716645541153862, "grad_norm": 0.045842599123716354, "learning_rate": 5.990805419838752e-07, "loss": 0.0004, "step": 209920 }, { "epoch": 1.716727317332461, "grad_norm": 0.018966495990753174, "learning_rate": 5.987418737421502e-07, "loss": 0.0006, "step": 209930 }, { "epoch": 1.7168090935110603, "grad_norm": 0.036652822047472, "learning_rate": 5.984032951576308e-07, "loss": 0.0009, "step": 209940 }, { "epoch": 1.7168908696896594, "grad_norm": 0.03552411124110222, "learning_rate": 5.980648062372135e-07, "loss": 0.0008, "step": 209950 }, { "epoch": 1.7169726458682586, "grad_norm": 0.019631098955869675, "learning_rate": 5.977264069877948e-07, "loss": 0.0003, "step": 209960 }, { "epoch": 1.7170544220468578, "grad_norm": 0.015501008369028568, "learning_rate": 5.973880974162672e-07, "loss": 0.0003, "step": 209970 }, { "epoch": 1.717136198225457, "grad_norm": 0.03492750972509384, "learning_rate": 5.970498775295225e-07, "loss": 0.0004, "step": 209980 }, { "epoch": 1.717217974404056, "grad_norm": 0.008397197350859642, "learning_rate": 5.967117473344497e-07, "loss": 0.0006, "step": 209990 }, { "epoch": 1.7172997505826553, "grad_norm": 0.05860750377178192, "learning_rate": 5.963737068379388e-07, "loss": 0.0007, "step": 210000 }, { "epoch": 1.7173815267612544, "grad_norm": 0.010248304344713688, "learning_rate": 5.960357560468754e-07, "loss": 0.001, "step": 210010 }, { "epoch": 1.7174633029398536, "grad_norm": 0.020440949127078056, "learning_rate": 5.95697894968143e-07, "loss": 0.0005, "step": 210020 }, { "epoch": 1.7175450791184528, "grad_norm": 0.07738757133483887, "learning_rate": 5.953601236086237e-07, "loss": 0.0008, "step": 210030 }, { "epoch": 1.717626855297052, "grad_norm": 0.02498946525156498, "learning_rate": 5.950224419751998e-07, "loss": 0.0006, "step": 210040 }, { "epoch": 1.7177086314756511, "grad_norm": 0.06704992055892944, "learning_rate": 5.946848500747493e-07, "loss": 0.0005, "step": 210050 }, { "epoch": 1.7177904076542503, "grad_norm": 0.01056290976703167, "learning_rate": 5.943473479141493e-07, "loss": 0.0002, "step": 210060 }, { "epoch": 1.7178721838328495, "grad_norm": 0.07593987882137299, "learning_rate": 5.940099355002749e-07, "loss": 0.0014, "step": 210070 }, { "epoch": 1.7179539600114486, "grad_norm": 0.005043750628829002, "learning_rate": 5.936726128399994e-07, "loss": 0.0004, "step": 210080 }, { "epoch": 1.7180357361900478, "grad_norm": 0.003911886364221573, "learning_rate": 5.933353799401942e-07, "loss": 0.0032, "step": 210090 }, { "epoch": 1.718117512368647, "grad_norm": 0.0016194513300433755, "learning_rate": 5.929982368077297e-07, "loss": 0.0006, "step": 210100 }, { "epoch": 1.7181992885472464, "grad_norm": 0.024252891540527344, "learning_rate": 5.926611834494722e-07, "loss": 0.0006, "step": 210110 }, { "epoch": 1.7182810647258453, "grad_norm": 0.023368433117866516, "learning_rate": 5.923242198722895e-07, "loss": 0.0006, "step": 210120 }, { "epoch": 1.7183628409044447, "grad_norm": 0.14128313958644867, "learning_rate": 5.919873460830455e-07, "loss": 0.0015, "step": 210130 }, { "epoch": 1.7184446170830436, "grad_norm": 0.04021099582314491, "learning_rate": 5.916505620886021e-07, "loss": 0.0012, "step": 210140 }, { "epoch": 1.718526393261643, "grad_norm": 0.04490454122424126, "learning_rate": 5.913138678958192e-07, "loss": 0.0006, "step": 210150 }, { "epoch": 1.718608169440242, "grad_norm": 0.013391564600169659, "learning_rate": 5.909772635115573e-07, "loss": 0.0009, "step": 210160 }, { "epoch": 1.7186899456188414, "grad_norm": 0.05187717825174332, "learning_rate": 5.906407489426724e-07, "loss": 0.0005, "step": 210170 }, { "epoch": 1.7187717217974403, "grad_norm": 0.041836608201265335, "learning_rate": 5.903043241960193e-07, "loss": 0.0008, "step": 210180 }, { "epoch": 1.7188534979760397, "grad_norm": 0.04349629208445549, "learning_rate": 5.899679892784521e-07, "loss": 0.0004, "step": 210190 }, { "epoch": 1.7189352741546386, "grad_norm": 0.002357285236939788, "learning_rate": 5.896317441968213e-07, "loss": 0.0005, "step": 210200 }, { "epoch": 1.719017050333238, "grad_norm": 0.0017557218670845032, "learning_rate": 5.892955889579766e-07, "loss": 0.0007, "step": 210210 }, { "epoch": 1.719098826511837, "grad_norm": 0.03557297959923744, "learning_rate": 5.889595235687662e-07, "loss": 0.0013, "step": 210220 }, { "epoch": 1.7191806026904364, "grad_norm": 0.00710515258833766, "learning_rate": 5.886235480360358e-07, "loss": 0.0003, "step": 210230 }, { "epoch": 1.7192623788690353, "grad_norm": 0.010325298644602299, "learning_rate": 5.882876623666283e-07, "loss": 0.0003, "step": 210240 }, { "epoch": 1.7193441550476347, "grad_norm": 0.0008896707440726459, "learning_rate": 5.879518665673878e-07, "loss": 0.0002, "step": 210250 }, { "epoch": 1.7194259312262337, "grad_norm": 0.02031339518725872, "learning_rate": 5.876161606451547e-07, "loss": 0.0006, "step": 210260 }, { "epoch": 1.719507707404833, "grad_norm": 0.044346198439598083, "learning_rate": 5.87280544606767e-07, "loss": 0.0012, "step": 210270 }, { "epoch": 1.719589483583432, "grad_norm": 0.047001294791698456, "learning_rate": 5.869450184590598e-07, "loss": 0.0005, "step": 210280 }, { "epoch": 1.7196712597620314, "grad_norm": 0.007089319638907909, "learning_rate": 5.866095822088713e-07, "loss": 0.0006, "step": 210290 }, { "epoch": 1.7197530359406303, "grad_norm": 0.016861753538250923, "learning_rate": 5.862742358630325e-07, "loss": 0.0012, "step": 210300 }, { "epoch": 1.7198348121192297, "grad_norm": 0.07301554828882217, "learning_rate": 5.859389794283754e-07, "loss": 0.0007, "step": 210310 }, { "epoch": 1.719916588297829, "grad_norm": 0.03761151805520058, "learning_rate": 5.856038129117298e-07, "loss": 0.0006, "step": 210320 }, { "epoch": 1.719998364476428, "grad_norm": 0.049022406339645386, "learning_rate": 5.852687363199222e-07, "loss": 0.0007, "step": 210330 }, { "epoch": 1.7200801406550272, "grad_norm": 0.10230111330747604, "learning_rate": 5.849337496597785e-07, "loss": 0.0016, "step": 210340 }, { "epoch": 1.7201619168336264, "grad_norm": 0.01611807383596897, "learning_rate": 5.845988529381236e-07, "loss": 0.0007, "step": 210350 }, { "epoch": 1.7202436930122256, "grad_norm": 0.007482263725250959, "learning_rate": 5.842640461617782e-07, "loss": 0.0008, "step": 210360 }, { "epoch": 1.7203254691908247, "grad_norm": 0.004526758100837469, "learning_rate": 5.839293293375641e-07, "loss": 0.0004, "step": 210370 }, { "epoch": 1.720407245369424, "grad_norm": 0.0039594476111233234, "learning_rate": 5.835947024722993e-07, "loss": 0.0005, "step": 210380 }, { "epoch": 1.720489021548023, "grad_norm": 0.010136056691408157, "learning_rate": 5.832601655727999e-07, "loss": 0.0006, "step": 210390 }, { "epoch": 1.7205707977266222, "grad_norm": 0.016167975962162018, "learning_rate": 5.829257186458803e-07, "loss": 0.0005, "step": 210400 }, { "epoch": 1.7206525739052214, "grad_norm": 0.01217914279550314, "learning_rate": 5.825913616983553e-07, "loss": 0.001, "step": 210410 }, { "epoch": 1.7207343500838206, "grad_norm": 0.01995452307164669, "learning_rate": 5.822570947370348e-07, "loss": 0.0008, "step": 210420 }, { "epoch": 1.7208161262624198, "grad_norm": 0.04119870811700821, "learning_rate": 5.819229177687286e-07, "loss": 0.0006, "step": 210430 }, { "epoch": 1.720897902441019, "grad_norm": 0.041046757251024246, "learning_rate": 5.815888308002432e-07, "loss": 0.0009, "step": 210440 }, { "epoch": 1.720979678619618, "grad_norm": 0.011208518408238888, "learning_rate": 5.81254833838385e-07, "loss": 0.0006, "step": 210450 }, { "epoch": 1.7210614547982173, "grad_norm": 0.01990240439772606, "learning_rate": 5.809209268899574e-07, "loss": 0.001, "step": 210460 }, { "epoch": 1.7211432309768164, "grad_norm": 0.05606112629175186, "learning_rate": 5.805871099617627e-07, "loss": 0.0007, "step": 210470 }, { "epoch": 1.7212250071554156, "grad_norm": 0.019369080662727356, "learning_rate": 5.802533830606e-07, "loss": 0.0003, "step": 210480 }, { "epoch": 1.7213067833340148, "grad_norm": 0.004877640400081873, "learning_rate": 5.799197461932693e-07, "loss": 0.0003, "step": 210490 }, { "epoch": 1.721388559512614, "grad_norm": 0.03481120243668556, "learning_rate": 5.795861993665664e-07, "loss": 0.0007, "step": 210500 }, { "epoch": 1.721470335691213, "grad_norm": 0.018469715490937233, "learning_rate": 5.792527425872851e-07, "loss": 0.0005, "step": 210510 }, { "epoch": 1.7215521118698123, "grad_norm": 0.11465582996606827, "learning_rate": 5.78919375862218e-07, "loss": 0.0008, "step": 210520 }, { "epoch": 1.7216338880484114, "grad_norm": 0.04061312973499298, "learning_rate": 5.785860991981584e-07, "loss": 0.0006, "step": 210530 }, { "epoch": 1.7217156642270108, "grad_norm": 0.04152119159698486, "learning_rate": 5.782529126018932e-07, "loss": 0.0006, "step": 210540 }, { "epoch": 1.7217974404056098, "grad_norm": 0.040730591863393784, "learning_rate": 5.779198160802107e-07, "loss": 0.0013, "step": 210550 }, { "epoch": 1.7218792165842092, "grad_norm": 0.014379354193806648, "learning_rate": 5.775868096398962e-07, "loss": 0.0007, "step": 210560 }, { "epoch": 1.7219609927628081, "grad_norm": 0.0226247888058424, "learning_rate": 5.772538932877325e-07, "loss": 0.0009, "step": 210570 }, { "epoch": 1.7220427689414075, "grad_norm": 0.11359091103076935, "learning_rate": 5.769210670305025e-07, "loss": 0.0007, "step": 210580 }, { "epoch": 1.7221245451200065, "grad_norm": 0.007283340208232403, "learning_rate": 5.765883308749854e-07, "loss": 0.0011, "step": 210590 }, { "epoch": 1.7222063212986058, "grad_norm": 0.05481727793812752, "learning_rate": 5.762556848279588e-07, "loss": 0.0006, "step": 210600 }, { "epoch": 1.7222880974772048, "grad_norm": 0.06245895102620125, "learning_rate": 5.759231288962002e-07, "loss": 0.0004, "step": 210610 }, { "epoch": 1.7223698736558042, "grad_norm": 0.01920308731496334, "learning_rate": 5.755906630864838e-07, "loss": 0.0009, "step": 210620 }, { "epoch": 1.7224516498344031, "grad_norm": 0.00248571764677763, "learning_rate": 5.752582874055824e-07, "loss": 0.0004, "step": 210630 }, { "epoch": 1.7225334260130025, "grad_norm": 0.2240210622549057, "learning_rate": 5.749260018602648e-07, "loss": 0.0014, "step": 210640 }, { "epoch": 1.7226152021916015, "grad_norm": 0.038828808814287186, "learning_rate": 5.74593806457302e-07, "loss": 0.0009, "step": 210650 }, { "epoch": 1.7226969783702009, "grad_norm": 0.07352352142333984, "learning_rate": 5.742617012034613e-07, "loss": 0.0007, "step": 210660 }, { "epoch": 1.7227787545487998, "grad_norm": 0.0072259316220879555, "learning_rate": 5.739296861055066e-07, "loss": 0.0004, "step": 210670 }, { "epoch": 1.7228605307273992, "grad_norm": 0.029121972620487213, "learning_rate": 5.735977611702021e-07, "loss": 0.0008, "step": 210680 }, { "epoch": 1.7229423069059981, "grad_norm": 0.05491799861192703, "learning_rate": 5.732659264043094e-07, "loss": 0.0006, "step": 210690 }, { "epoch": 1.7230240830845975, "grad_norm": 0.036831557750701904, "learning_rate": 5.729341818145878e-07, "loss": 0.0008, "step": 210700 }, { "epoch": 1.7231058592631965, "grad_norm": 0.03325850889086723, "learning_rate": 5.726025274077956e-07, "loss": 0.0004, "step": 210710 }, { "epoch": 1.7231876354417959, "grad_norm": 0.17586804926395416, "learning_rate": 5.722709631906876e-07, "loss": 0.0009, "step": 210720 }, { "epoch": 1.7232694116203948, "grad_norm": 0.0018911209190264344, "learning_rate": 5.719394891700197e-07, "loss": 0.0006, "step": 210730 }, { "epoch": 1.7233511877989942, "grad_norm": 0.02075938507914543, "learning_rate": 5.71608105352544e-07, "loss": 0.0003, "step": 210740 }, { "epoch": 1.7234329639775934, "grad_norm": 0.046587977558374405, "learning_rate": 5.712768117450113e-07, "loss": 0.0004, "step": 210750 }, { "epoch": 1.7235147401561925, "grad_norm": 0.011214486323297024, "learning_rate": 5.709456083541682e-07, "loss": 0.0003, "step": 210760 }, { "epoch": 1.7235965163347917, "grad_norm": 0.08598244935274124, "learning_rate": 5.706144951867643e-07, "loss": 0.0007, "step": 210770 }, { "epoch": 1.7236782925133909, "grad_norm": 0.0023423139937222004, "learning_rate": 5.702834722495431e-07, "loss": 0.0003, "step": 210780 }, { "epoch": 1.72376006869199, "grad_norm": 0.046124543994665146, "learning_rate": 5.699525395492489e-07, "loss": 0.0009, "step": 210790 }, { "epoch": 1.7238418448705892, "grad_norm": 0.02896764501929283, "learning_rate": 5.696216970926222e-07, "loss": 0.0004, "step": 210800 }, { "epoch": 1.7239236210491884, "grad_norm": 0.00311940535902977, "learning_rate": 5.692909448864026e-07, "loss": 0.0012, "step": 210810 }, { "epoch": 1.7240053972277876, "grad_norm": 0.01383906789124012, "learning_rate": 5.68960282937328e-07, "loss": 0.0008, "step": 210820 }, { "epoch": 1.7240871734063867, "grad_norm": 0.05044190213084221, "learning_rate": 5.686297112521338e-07, "loss": 0.0008, "step": 210830 }, { "epoch": 1.724168949584986, "grad_norm": 0.04986192286014557, "learning_rate": 5.682992298375539e-07, "loss": 0.0004, "step": 210840 }, { "epoch": 1.724250725763585, "grad_norm": 0.0069183846935629845, "learning_rate": 5.679688387003218e-07, "loss": 0.0006, "step": 210850 }, { "epoch": 1.7243325019421842, "grad_norm": 0.03186054900288582, "learning_rate": 5.676385378471672e-07, "loss": 0.0006, "step": 210860 }, { "epoch": 1.7244142781207834, "grad_norm": 0.03125925362110138, "learning_rate": 5.673083272848179e-07, "loss": 0.0007, "step": 210870 }, { "epoch": 1.7244960542993826, "grad_norm": 0.1052277609705925, "learning_rate": 5.669782070200003e-07, "loss": 0.0008, "step": 210880 }, { "epoch": 1.7245778304779817, "grad_norm": 0.01459401287138462, "learning_rate": 5.666481770594412e-07, "loss": 0.0006, "step": 210890 }, { "epoch": 1.724659606656581, "grad_norm": 0.007081855554133654, "learning_rate": 5.66318237409862e-07, "loss": 0.0005, "step": 210900 }, { "epoch": 1.72474138283518, "grad_norm": 0.0034523915965110064, "learning_rate": 5.659883880779843e-07, "loss": 0.0011, "step": 210910 }, { "epoch": 1.7248231590137793, "grad_norm": 0.010885505937039852, "learning_rate": 5.656586290705274e-07, "loss": 0.0007, "step": 210920 }, { "epoch": 1.7249049351923784, "grad_norm": 0.029315518215298653, "learning_rate": 5.65328960394208e-07, "loss": 0.0007, "step": 210930 }, { "epoch": 1.7249867113709776, "grad_norm": 0.007792809512466192, "learning_rate": 5.649993820557431e-07, "loss": 0.0005, "step": 210940 }, { "epoch": 1.7250684875495768, "grad_norm": 0.03365973383188248, "learning_rate": 5.646698940618456e-07, "loss": 0.0003, "step": 210950 }, { "epoch": 1.725150263728176, "grad_norm": 0.051415689289569855, "learning_rate": 5.643404964192261e-07, "loss": 0.0006, "step": 210960 }, { "epoch": 1.7252320399067753, "grad_norm": 0.005835509393364191, "learning_rate": 5.640111891345978e-07, "loss": 0.0001, "step": 210970 }, { "epoch": 1.7253138160853743, "grad_norm": 0.006100264377892017, "learning_rate": 5.636819722146675e-07, "loss": 0.0006, "step": 210980 }, { "epoch": 1.7253955922639737, "grad_norm": 0.024638239294290543, "learning_rate": 5.633528456661408e-07, "loss": 0.0005, "step": 210990 }, { "epoch": 1.7254773684425726, "grad_norm": 0.046778738498687744, "learning_rate": 5.630238094957219e-07, "loss": 0.0004, "step": 211000 }, { "epoch": 1.725559144621172, "grad_norm": 0.012819968163967133, "learning_rate": 5.626948637101159e-07, "loss": 0.0012, "step": 211010 }, { "epoch": 1.725640920799771, "grad_norm": 0.030194668099284172, "learning_rate": 5.623660083160226e-07, "loss": 0.0007, "step": 211020 }, { "epoch": 1.7257226969783703, "grad_norm": 0.00631309486925602, "learning_rate": 5.620372433201404e-07, "loss": 0.0007, "step": 211030 }, { "epoch": 1.7258044731569693, "grad_norm": 0.0037511398550122976, "learning_rate": 5.617085687291668e-07, "loss": 0.0005, "step": 211040 }, { "epoch": 1.7258862493355687, "grad_norm": 0.015197954140603542, "learning_rate": 5.613799845497975e-07, "loss": 0.0004, "step": 211050 }, { "epoch": 1.7259680255141676, "grad_norm": 0.09119699895381927, "learning_rate": 5.61051490788726e-07, "loss": 0.0005, "step": 211060 }, { "epoch": 1.726049801692767, "grad_norm": 0.06727687269449234, "learning_rate": 5.607230874526432e-07, "loss": 0.0006, "step": 211070 }, { "epoch": 1.726131577871366, "grad_norm": 0.04191076010465622, "learning_rate": 5.603947745482391e-07, "loss": 0.0007, "step": 211080 }, { "epoch": 1.7262133540499653, "grad_norm": 0.006215307861566544, "learning_rate": 5.600665520822029e-07, "loss": 0.0003, "step": 211090 }, { "epoch": 1.7262951302285643, "grad_norm": 0.016511259600520134, "learning_rate": 5.597384200612199e-07, "loss": 0.0005, "step": 211100 }, { "epoch": 1.7263769064071637, "grad_norm": 0.02110254392027855, "learning_rate": 5.594103784919747e-07, "loss": 0.0006, "step": 211110 }, { "epoch": 1.7264586825857626, "grad_norm": 0.03210601210594177, "learning_rate": 5.590824273811485e-07, "loss": 0.0018, "step": 211120 }, { "epoch": 1.726540458764362, "grad_norm": 0.009825285524129868, "learning_rate": 5.587545667354239e-07, "loss": 0.0006, "step": 211130 }, { "epoch": 1.726622234942961, "grad_norm": 0.04695611447095871, "learning_rate": 5.584267965614792e-07, "loss": 0.0006, "step": 211140 }, { "epoch": 1.7267040111215604, "grad_norm": 0.007863178849220276, "learning_rate": 5.580991168659905e-07, "loss": 0.0005, "step": 211150 }, { "epoch": 1.7267857873001593, "grad_norm": 0.04884995147585869, "learning_rate": 5.577715276556334e-07, "loss": 0.0005, "step": 211160 }, { "epoch": 1.7268675634787587, "grad_norm": 0.04714261367917061, "learning_rate": 5.574440289370813e-07, "loss": 0.0009, "step": 211170 }, { "epoch": 1.7269493396573579, "grad_norm": 0.0525147020816803, "learning_rate": 5.571166207170053e-07, "loss": 0.0004, "step": 211180 }, { "epoch": 1.727031115835957, "grad_norm": 0.0031036813743412495, "learning_rate": 5.567893030020755e-07, "loss": 0.0006, "step": 211190 }, { "epoch": 1.7271128920145562, "grad_norm": 0.043541669845581055, "learning_rate": 5.564620757989575e-07, "loss": 0.0007, "step": 211200 }, { "epoch": 1.7271946681931554, "grad_norm": 0.02145337127149105, "learning_rate": 5.561349391143206e-07, "loss": 0.0005, "step": 211210 }, { "epoch": 1.7272764443717545, "grad_norm": 0.004191593732684851, "learning_rate": 5.558078929548267e-07, "loss": 0.0005, "step": 211220 }, { "epoch": 1.7273582205503537, "grad_norm": 0.020445039495825768, "learning_rate": 5.554809373271386e-07, "loss": 0.0002, "step": 211230 }, { "epoch": 1.7274399967289529, "grad_norm": 0.007712217513471842, "learning_rate": 5.551540722379156e-07, "loss": 0.001, "step": 211240 }, { "epoch": 1.727521772907552, "grad_norm": 0.02458806522190571, "learning_rate": 5.548272976938179e-07, "loss": 0.0005, "step": 211250 }, { "epoch": 1.7276035490861512, "grad_norm": 0.02353096194565296, "learning_rate": 5.545006137015013e-07, "loss": 0.0003, "step": 211260 }, { "epoch": 1.7276853252647504, "grad_norm": 0.16403506696224213, "learning_rate": 5.54174020267621e-07, "loss": 0.0009, "step": 211270 }, { "epoch": 1.7277671014433496, "grad_norm": 0.023856407031416893, "learning_rate": 5.538475173988295e-07, "loss": 0.0008, "step": 211280 }, { "epoch": 1.7278488776219487, "grad_norm": 0.03251275420188904, "learning_rate": 5.535211051017781e-07, "loss": 0.0004, "step": 211290 }, { "epoch": 1.7279306538005479, "grad_norm": 0.025002866983413696, "learning_rate": 5.531947833831158e-07, "loss": 0.0006, "step": 211300 }, { "epoch": 1.728012429979147, "grad_norm": 0.03022778034210205, "learning_rate": 5.528685522494904e-07, "loss": 0.0007, "step": 211310 }, { "epoch": 1.7280942061577462, "grad_norm": 0.014809338375926018, "learning_rate": 5.525424117075467e-07, "loss": 0.0006, "step": 211320 }, { "epoch": 1.7281759823363454, "grad_norm": 0.07661006599664688, "learning_rate": 5.522163617639297e-07, "loss": 0.0006, "step": 211330 }, { "epoch": 1.7282577585149446, "grad_norm": 0.10561441630125046, "learning_rate": 5.518904024252813e-07, "loss": 0.0008, "step": 211340 }, { "epoch": 1.7283395346935437, "grad_norm": 0.029641440138220787, "learning_rate": 5.515645336982406e-07, "loss": 0.0005, "step": 211350 }, { "epoch": 1.728421310872143, "grad_norm": 0.007336243987083435, "learning_rate": 5.512387555894456e-07, "loss": 0.0013, "step": 211360 }, { "epoch": 1.728503087050742, "grad_norm": 0.0026743861380964518, "learning_rate": 5.509130681055341e-07, "loss": 0.0003, "step": 211370 }, { "epoch": 1.7285848632293412, "grad_norm": 0.0021813628263771534, "learning_rate": 5.505874712531394e-07, "loss": 0.0007, "step": 211380 }, { "epoch": 1.7286666394079404, "grad_norm": 0.13434790074825287, "learning_rate": 5.502619650388952e-07, "loss": 0.0006, "step": 211390 }, { "epoch": 1.7287484155865398, "grad_norm": 0.020116517320275307, "learning_rate": 5.499365494694314e-07, "loss": 0.0004, "step": 211400 }, { "epoch": 1.7288301917651387, "grad_norm": 0.1702236831188202, "learning_rate": 5.496112245513774e-07, "loss": 0.0019, "step": 211410 }, { "epoch": 1.7289119679437381, "grad_norm": 0.05365516245365143, "learning_rate": 5.492859902913599e-07, "loss": 0.0007, "step": 211420 }, { "epoch": 1.728993744122337, "grad_norm": 0.03178683668375015, "learning_rate": 5.489608466960044e-07, "loss": 0.0004, "step": 211430 }, { "epoch": 1.7290755203009365, "grad_norm": 0.037582624703645706, "learning_rate": 5.486357937719339e-07, "loss": 0.0007, "step": 211440 }, { "epoch": 1.7291572964795354, "grad_norm": 0.056306950747966766, "learning_rate": 5.483108315257718e-07, "loss": 0.0008, "step": 211450 }, { "epoch": 1.7292390726581348, "grad_norm": 0.0018956671701744199, "learning_rate": 5.479859599641358e-07, "loss": 0.0003, "step": 211460 }, { "epoch": 1.7293208488367338, "grad_norm": 0.06413526087999344, "learning_rate": 5.476611790936454e-07, "loss": 0.0005, "step": 211470 }, { "epoch": 1.7294026250153332, "grad_norm": 0.08012039214372635, "learning_rate": 5.473364889209143e-07, "loss": 0.0003, "step": 211480 }, { "epoch": 1.729484401193932, "grad_norm": 0.12821029126644135, "learning_rate": 5.470118894525588e-07, "loss": 0.0034, "step": 211490 }, { "epoch": 1.7295661773725315, "grad_norm": 0.004051438067108393, "learning_rate": 5.466873806951917e-07, "loss": 0.0003, "step": 211500 }, { "epoch": 1.7296479535511304, "grad_norm": 0.02063995972275734, "learning_rate": 5.463629626554218e-07, "loss": 0.0004, "step": 211510 }, { "epoch": 1.7297297297297298, "grad_norm": 0.004708837252110243, "learning_rate": 5.460386353398583e-07, "loss": 0.0008, "step": 211520 }, { "epoch": 1.7298115059083288, "grad_norm": 0.08541599661111832, "learning_rate": 5.457143987551089e-07, "loss": 0.0007, "step": 211530 }, { "epoch": 1.7298932820869282, "grad_norm": 0.018911074846982956, "learning_rate": 5.453902529077771e-07, "loss": 0.0009, "step": 211540 }, { "epoch": 1.729975058265527, "grad_norm": 0.07592667639255524, "learning_rate": 5.450661978044669e-07, "loss": 0.0009, "step": 211550 }, { "epoch": 1.7300568344441265, "grad_norm": 0.03953532874584198, "learning_rate": 5.447422334517788e-07, "loss": 0.001, "step": 211560 }, { "epoch": 1.7301386106227254, "grad_norm": 0.0533769428730011, "learning_rate": 5.444183598563135e-07, "loss": 0.0007, "step": 211570 }, { "epoch": 1.7302203868013248, "grad_norm": 0.05404309555888176, "learning_rate": 5.440945770246681e-07, "loss": 0.0004, "step": 211580 }, { "epoch": 1.7303021629799238, "grad_norm": 0.06344011425971985, "learning_rate": 5.437708849634376e-07, "loss": 0.0011, "step": 211590 }, { "epoch": 1.7303839391585232, "grad_norm": 0.01431242749094963, "learning_rate": 5.43447283679216e-07, "loss": 0.0005, "step": 211600 }, { "epoch": 1.7304657153371223, "grad_norm": 0.011040161363780499, "learning_rate": 5.431237731785966e-07, "loss": 0.0007, "step": 211610 }, { "epoch": 1.7305474915157215, "grad_norm": 0.05218668654561043, "learning_rate": 5.428003534681686e-07, "loss": 0.0005, "step": 211620 }, { "epoch": 1.7306292676943207, "grad_norm": 0.021925339475274086, "learning_rate": 5.424770245545203e-07, "loss": 0.0006, "step": 211630 }, { "epoch": 1.7307110438729199, "grad_norm": 0.05915200710296631, "learning_rate": 5.421537864442383e-07, "loss": 0.0006, "step": 211640 }, { "epoch": 1.730792820051519, "grad_norm": 0.09348554909229279, "learning_rate": 5.418306391439071e-07, "loss": 0.0007, "step": 211650 }, { "epoch": 1.7308745962301182, "grad_norm": 0.048184026032686234, "learning_rate": 5.415075826601096e-07, "loss": 0.001, "step": 211660 }, { "epoch": 1.7309563724087174, "grad_norm": 0.009961300529539585, "learning_rate": 5.411846169994267e-07, "loss": 0.0003, "step": 211670 }, { "epoch": 1.7310381485873165, "grad_norm": 0.04843764752149582, "learning_rate": 5.408617421684376e-07, "loss": 0.0005, "step": 211680 }, { "epoch": 1.7311199247659157, "grad_norm": 0.05103189870715141, "learning_rate": 5.40538958173718e-07, "loss": 0.0011, "step": 211690 }, { "epoch": 1.7312017009445149, "grad_norm": 0.009172914549708366, "learning_rate": 5.402162650218462e-07, "loss": 0.0003, "step": 211700 }, { "epoch": 1.731283477123114, "grad_norm": 0.04183577001094818, "learning_rate": 5.398936627193934e-07, "loss": 0.0007, "step": 211710 }, { "epoch": 1.7313652533017132, "grad_norm": 0.009597720578312874, "learning_rate": 5.395711512729329e-07, "loss": 0.0003, "step": 211720 }, { "epoch": 1.7314470294803124, "grad_norm": 0.05595460534095764, "learning_rate": 5.392487306890321e-07, "loss": 0.0007, "step": 211730 }, { "epoch": 1.7315288056589115, "grad_norm": 0.057351287454366684, "learning_rate": 5.389264009742617e-07, "loss": 0.0002, "step": 211740 }, { "epoch": 1.7316105818375107, "grad_norm": 0.12541179358959198, "learning_rate": 5.386041621351868e-07, "loss": 0.0015, "step": 211750 }, { "epoch": 1.7316923580161099, "grad_norm": 0.08485454320907593, "learning_rate": 5.382820141783712e-07, "loss": 0.0009, "step": 211760 }, { "epoch": 1.731774134194709, "grad_norm": 0.012739350087940693, "learning_rate": 5.379599571103783e-07, "loss": 0.0003, "step": 211770 }, { "epoch": 1.7318559103733082, "grad_norm": 0.011854995042085648, "learning_rate": 5.376379909377671e-07, "loss": 0.0002, "step": 211780 }, { "epoch": 1.7319376865519074, "grad_norm": 0.00527982460334897, "learning_rate": 5.373161156670981e-07, "loss": 0.0008, "step": 211790 }, { "epoch": 1.7320194627305066, "grad_norm": 0.08446852117776871, "learning_rate": 5.36994331304927e-07, "loss": 0.0008, "step": 211800 }, { "epoch": 1.7321012389091057, "grad_norm": 0.016031906008720398, "learning_rate": 5.366726378578075e-07, "loss": 0.0011, "step": 211810 }, { "epoch": 1.732183015087705, "grad_norm": 0.033660437911748886, "learning_rate": 5.363510353322959e-07, "loss": 0.001, "step": 211820 }, { "epoch": 1.7322647912663043, "grad_norm": 0.045696161687374115, "learning_rate": 5.360295237349417e-07, "loss": 0.0006, "step": 211830 }, { "epoch": 1.7323465674449032, "grad_norm": 0.002129939151927829, "learning_rate": 5.35708103072295e-07, "loss": 0.0004, "step": 211840 }, { "epoch": 1.7324283436235026, "grad_norm": 0.011094421148300171, "learning_rate": 5.353867733509016e-07, "loss": 0.0009, "step": 211850 }, { "epoch": 1.7325101198021016, "grad_norm": 0.1025220975279808, "learning_rate": 5.350655345773098e-07, "loss": 0.0008, "step": 211860 }, { "epoch": 1.732591895980701, "grad_norm": 0.043290212750434875, "learning_rate": 5.347443867580621e-07, "loss": 0.001, "step": 211870 }, { "epoch": 1.7326736721593, "grad_norm": 0.033998746424913406, "learning_rate": 5.344233298997015e-07, "loss": 0.0007, "step": 211880 }, { "epoch": 1.7327554483378993, "grad_norm": 0.06343897432088852, "learning_rate": 5.341023640087667e-07, "loss": 0.0004, "step": 211890 }, { "epoch": 1.7328372245164982, "grad_norm": 0.008983614854514599, "learning_rate": 5.337814890917969e-07, "loss": 0.0005, "step": 211900 }, { "epoch": 1.7329190006950976, "grad_norm": 0.03943852335214615, "learning_rate": 5.334607051553286e-07, "loss": 0.0008, "step": 211910 }, { "epoch": 1.7330007768736966, "grad_norm": 0.0072098178789019585, "learning_rate": 5.331400122058966e-07, "loss": 0.0009, "step": 211920 }, { "epoch": 1.733082553052296, "grad_norm": 0.04514174908399582, "learning_rate": 5.328194102500323e-07, "loss": 0.0007, "step": 211930 }, { "epoch": 1.733164329230895, "grad_norm": 0.008847849443554878, "learning_rate": 5.324988992942687e-07, "loss": 0.0006, "step": 211940 }, { "epoch": 1.7332461054094943, "grad_norm": 0.10555411875247955, "learning_rate": 5.321784793451335e-07, "loss": 0.0015, "step": 211950 }, { "epoch": 1.7333278815880933, "grad_norm": 0.047560565173625946, "learning_rate": 5.318581504091547e-07, "loss": 0.0006, "step": 211960 }, { "epoch": 1.7334096577666926, "grad_norm": 0.14757421612739563, "learning_rate": 5.31537912492856e-07, "loss": 0.0007, "step": 211970 }, { "epoch": 1.7334914339452916, "grad_norm": 0.07462812215089798, "learning_rate": 5.312177656027634e-07, "loss": 0.0005, "step": 211980 }, { "epoch": 1.733573210123891, "grad_norm": 0.010255946777760983, "learning_rate": 5.308977097453977e-07, "loss": 0.0005, "step": 211990 }, { "epoch": 1.73365498630249, "grad_norm": 0.023787526413798332, "learning_rate": 5.305777449272776e-07, "loss": 0.0005, "step": 212000 }, { "epoch": 1.7337367624810893, "grad_norm": 0.021058283746242523, "learning_rate": 5.302578711549222e-07, "loss": 0.0006, "step": 212010 }, { "epoch": 1.7338185386596883, "grad_norm": 0.21620623767375946, "learning_rate": 5.29938088434847e-07, "loss": 0.001, "step": 212020 }, { "epoch": 1.7339003148382877, "grad_norm": 0.03988874331116676, "learning_rate": 5.296183967735663e-07, "loss": 0.0009, "step": 212030 }, { "epoch": 1.7339820910168868, "grad_norm": 0.013824253343045712, "learning_rate": 5.292987961775925e-07, "loss": 0.0006, "step": 212040 }, { "epoch": 1.734063867195486, "grad_norm": 0.0431017242372036, "learning_rate": 5.289792866534355e-07, "loss": 0.0005, "step": 212050 }, { "epoch": 1.7341456433740852, "grad_norm": 0.07716850191354752, "learning_rate": 5.286598682076055e-07, "loss": 0.0012, "step": 212060 }, { "epoch": 1.7342274195526843, "grad_norm": 0.11549952626228333, "learning_rate": 5.283405408466086e-07, "loss": 0.0009, "step": 212070 }, { "epoch": 1.7343091957312835, "grad_norm": 0.10207700729370117, "learning_rate": 5.280213045769495e-07, "loss": 0.0008, "step": 212080 }, { "epoch": 1.7343909719098827, "grad_norm": 0.023098863661289215, "learning_rate": 5.277021594051307e-07, "loss": 0.0002, "step": 212090 }, { "epoch": 1.7344727480884818, "grad_norm": 0.1689005345106125, "learning_rate": 5.273831053376549e-07, "loss": 0.0015, "step": 212100 }, { "epoch": 1.734554524267081, "grad_norm": 0.027732333168387413, "learning_rate": 5.270641423810208e-07, "loss": 0.0025, "step": 212110 }, { "epoch": 1.7346363004456802, "grad_norm": 0.023303890600800514, "learning_rate": 5.267452705417258e-07, "loss": 0.0009, "step": 212120 }, { "epoch": 1.7347180766242793, "grad_norm": 0.092391237616539, "learning_rate": 5.264264898262661e-07, "loss": 0.0004, "step": 212130 }, { "epoch": 1.7347998528028785, "grad_norm": 0.04084574803709984, "learning_rate": 5.261078002411346e-07, "loss": 0.0009, "step": 212140 }, { "epoch": 1.7348816289814777, "grad_norm": 0.02969193644821644, "learning_rate": 5.25789201792824e-07, "loss": 0.0009, "step": 212150 }, { "epoch": 1.7349634051600769, "grad_norm": 0.051396243274211884, "learning_rate": 5.25470694487824e-07, "loss": 0.0006, "step": 212160 }, { "epoch": 1.735045181338676, "grad_norm": 0.02206563390791416, "learning_rate": 5.251522783326224e-07, "loss": 0.0005, "step": 212170 }, { "epoch": 1.7351269575172752, "grad_norm": 0.04516742005944252, "learning_rate": 5.248339533337071e-07, "loss": 0.0005, "step": 212180 }, { "epoch": 1.7352087336958744, "grad_norm": 0.045905835926532745, "learning_rate": 5.245157194975614e-07, "loss": 0.0003, "step": 212190 }, { "epoch": 1.7352905098744735, "grad_norm": 0.0014072065241634846, "learning_rate": 5.24197576830669e-07, "loss": 0.0006, "step": 212200 }, { "epoch": 1.7353722860530727, "grad_norm": 0.039929185062646866, "learning_rate": 5.238795253395085e-07, "loss": 0.0007, "step": 212210 }, { "epoch": 1.7354540622316719, "grad_norm": 0.04515977203845978, "learning_rate": 5.235615650305614e-07, "loss": 0.0007, "step": 212220 }, { "epoch": 1.735535838410271, "grad_norm": 0.026627451181411743, "learning_rate": 5.232436959103043e-07, "loss": 0.0007, "step": 212230 }, { "epoch": 1.7356176145888704, "grad_norm": 0.0585031658411026, "learning_rate": 5.22925917985212e-07, "loss": 0.0008, "step": 212240 }, { "epoch": 1.7356993907674694, "grad_norm": 0.01892547868192196, "learning_rate": 5.226082312617575e-07, "loss": 0.0012, "step": 212250 }, { "epoch": 1.7357811669460688, "grad_norm": 0.0027293418534100056, "learning_rate": 5.22290635746413e-07, "loss": 0.0003, "step": 212260 }, { "epoch": 1.7358629431246677, "grad_norm": 0.006490516010671854, "learning_rate": 5.219731314456477e-07, "loss": 0.0006, "step": 212270 }, { "epoch": 1.735944719303267, "grad_norm": 0.0241699256002903, "learning_rate": 5.216557183659299e-07, "loss": 0.0005, "step": 212280 }, { "epoch": 1.736026495481866, "grad_norm": 0.003467107657343149, "learning_rate": 5.213383965137243e-07, "loss": 0.0004, "step": 212290 }, { "epoch": 1.7361082716604654, "grad_norm": 0.10051281750202179, "learning_rate": 5.210211658954967e-07, "loss": 0.0006, "step": 212300 }, { "epoch": 1.7361900478390644, "grad_norm": 0.08007475733757019, "learning_rate": 5.207040265177093e-07, "loss": 0.0022, "step": 212310 }, { "epoch": 1.7362718240176638, "grad_norm": 0.015210255980491638, "learning_rate": 5.203869783868209e-07, "loss": 0.0004, "step": 212320 }, { "epoch": 1.7363536001962627, "grad_norm": 0.07088744640350342, "learning_rate": 5.200700215092907e-07, "loss": 0.0007, "step": 212330 }, { "epoch": 1.7364353763748621, "grad_norm": 0.031429097056388855, "learning_rate": 5.197531558915764e-07, "loss": 0.0008, "step": 212340 }, { "epoch": 1.736517152553461, "grad_norm": 0.032813575118780136, "learning_rate": 5.19436381540132e-07, "loss": 0.0003, "step": 212350 }, { "epoch": 1.7365989287320605, "grad_norm": 0.08008228987455368, "learning_rate": 5.191196984614105e-07, "loss": 0.0006, "step": 212360 }, { "epoch": 1.7366807049106594, "grad_norm": 0.005568745546042919, "learning_rate": 5.188031066618631e-07, "loss": 0.001, "step": 212370 }, { "epoch": 1.7367624810892588, "grad_norm": 0.045544445514678955, "learning_rate": 5.184866061479388e-07, "loss": 0.0005, "step": 212380 }, { "epoch": 1.7368442572678577, "grad_norm": 0.012825828976929188, "learning_rate": 5.181701969260849e-07, "loss": 0.0005, "step": 212390 }, { "epoch": 1.7369260334464571, "grad_norm": 0.019068149849772453, "learning_rate": 5.178538790027476e-07, "loss": 0.0005, "step": 212400 }, { "epoch": 1.737007809625056, "grad_norm": 0.03088955394923687, "learning_rate": 5.17537652384369e-07, "loss": 0.0007, "step": 212410 }, { "epoch": 1.7370895858036555, "grad_norm": 0.0399368554353714, "learning_rate": 5.172215170773926e-07, "loss": 0.0005, "step": 212420 }, { "epoch": 1.7371713619822544, "grad_norm": 0.016512855887413025, "learning_rate": 5.169054730882578e-07, "loss": 0.0009, "step": 212430 }, { "epoch": 1.7372531381608538, "grad_norm": 0.026927078142762184, "learning_rate": 5.165895204234023e-07, "loss": 0.0009, "step": 212440 }, { "epoch": 1.7373349143394528, "grad_norm": 0.01830398291349411, "learning_rate": 5.16273659089262e-07, "loss": 0.0005, "step": 212450 }, { "epoch": 1.7374166905180521, "grad_norm": 0.02244427800178528, "learning_rate": 5.159578890922728e-07, "loss": 0.0004, "step": 212460 }, { "epoch": 1.7374984666966513, "grad_norm": 0.0012773318449035287, "learning_rate": 5.156422104388659e-07, "loss": 0.0017, "step": 212470 }, { "epoch": 1.7375802428752505, "grad_norm": 0.002359124133363366, "learning_rate": 5.153266231354725e-07, "loss": 0.0002, "step": 212480 }, { "epoch": 1.7376620190538496, "grad_norm": 0.025583267211914062, "learning_rate": 5.150111271885211e-07, "loss": 0.0007, "step": 212490 }, { "epoch": 1.7377437952324488, "grad_norm": 0.007603846490383148, "learning_rate": 5.146957226044385e-07, "loss": 0.0018, "step": 212500 }, { "epoch": 1.737825571411048, "grad_norm": 0.04698578268289566, "learning_rate": 5.143804093896499e-07, "loss": 0.0008, "step": 212510 }, { "epoch": 1.7379073475896472, "grad_norm": 0.06858784705400467, "learning_rate": 5.140651875505786e-07, "loss": 0.0006, "step": 212520 }, { "epoch": 1.7379891237682463, "grad_norm": 0.04355073720216751, "learning_rate": 5.137500570936443e-07, "loss": 0.0007, "step": 212530 }, { "epoch": 1.7380708999468455, "grad_norm": 0.024104256182909012, "learning_rate": 5.134350180252695e-07, "loss": 0.0003, "step": 212540 }, { "epoch": 1.7381526761254447, "grad_norm": 0.0033253037836402655, "learning_rate": 5.131200703518702e-07, "loss": 0.0029, "step": 212550 }, { "epoch": 1.7382344523040438, "grad_norm": 0.10514599829912186, "learning_rate": 5.128052140798617e-07, "loss": 0.0007, "step": 212560 }, { "epoch": 1.738316228482643, "grad_norm": 0.057668715715408325, "learning_rate": 5.124904492156579e-07, "loss": 0.0005, "step": 212570 }, { "epoch": 1.7383980046612422, "grad_norm": 0.008893102407455444, "learning_rate": 5.12175775765672e-07, "loss": 0.0006, "step": 212580 }, { "epoch": 1.7384797808398413, "grad_norm": 0.04248974844813347, "learning_rate": 5.118611937363138e-07, "loss": 0.0017, "step": 212590 }, { "epoch": 1.7385615570184405, "grad_norm": 0.013690480031073093, "learning_rate": 5.11546703133991e-07, "loss": 0.0004, "step": 212600 }, { "epoch": 1.7386433331970397, "grad_norm": 0.0016641895053908229, "learning_rate": 5.112323039651101e-07, "loss": 0.0005, "step": 212610 }, { "epoch": 1.7387251093756388, "grad_norm": 0.04535284265875816, "learning_rate": 5.10917996236076e-07, "loss": 0.0006, "step": 212620 }, { "epoch": 1.738806885554238, "grad_norm": 0.0039252047426998615, "learning_rate": 5.106037799532909e-07, "loss": 0.0004, "step": 212630 }, { "epoch": 1.7388886617328372, "grad_norm": 0.00343586434610188, "learning_rate": 5.102896551231562e-07, "loss": 0.0002, "step": 212640 }, { "epoch": 1.7389704379114364, "grad_norm": 0.020229633897542953, "learning_rate": 5.099756217520696e-07, "loss": 0.0006, "step": 212650 }, { "epoch": 1.7390522140900355, "grad_norm": 0.12165974825620651, "learning_rate": 5.096616798464305e-07, "loss": 0.0006, "step": 212660 }, { "epoch": 1.739133990268635, "grad_norm": 0.002299159299582243, "learning_rate": 5.093478294126325e-07, "loss": 0.0009, "step": 212670 }, { "epoch": 1.7392157664472339, "grad_norm": 0.07226867973804474, "learning_rate": 5.090340704570701e-07, "loss": 0.0009, "step": 212680 }, { "epoch": 1.7392975426258332, "grad_norm": 0.0009121893672272563, "learning_rate": 5.087204029861326e-07, "loss": 0.0007, "step": 212690 }, { "epoch": 1.7393793188044322, "grad_norm": 0.009858405217528343, "learning_rate": 5.08406827006212e-07, "loss": 0.0007, "step": 212700 }, { "epoch": 1.7394610949830316, "grad_norm": 0.02022344432771206, "learning_rate": 5.080933425236955e-07, "loss": 0.001, "step": 212710 }, { "epoch": 1.7395428711616305, "grad_norm": 0.05363445729017258, "learning_rate": 5.077799495449692e-07, "loss": 0.0011, "step": 212720 }, { "epoch": 1.73962464734023, "grad_norm": 0.010727456770837307, "learning_rate": 5.074666480764162e-07, "loss": 0.0006, "step": 212730 }, { "epoch": 1.7397064235188289, "grad_norm": 0.03629537671804428, "learning_rate": 5.071534381244198e-07, "loss": 0.0006, "step": 212740 }, { "epoch": 1.7397881996974283, "grad_norm": 0.03310488536953926, "learning_rate": 5.068403196953592e-07, "loss": 0.0004, "step": 212750 }, { "epoch": 1.7398699758760272, "grad_norm": 0.011428878642618656, "learning_rate": 5.065272927956139e-07, "loss": 0.0006, "step": 212760 }, { "epoch": 1.7399517520546266, "grad_norm": 0.05506521835923195, "learning_rate": 5.062143574315593e-07, "loss": 0.0022, "step": 212770 }, { "epoch": 1.7400335282332255, "grad_norm": 0.014663324691355228, "learning_rate": 5.059015136095719e-07, "loss": 0.0003, "step": 212780 }, { "epoch": 1.740115304411825, "grad_norm": 0.027353888377547264, "learning_rate": 5.055887613360233e-07, "loss": 0.0008, "step": 212790 }, { "epoch": 1.7401970805904239, "grad_norm": 0.03536815196275711, "learning_rate": 5.052761006172852e-07, "loss": 0.0005, "step": 212800 }, { "epoch": 1.7402788567690233, "grad_norm": 0.008587421849370003, "learning_rate": 5.049635314597251e-07, "loss": 0.0003, "step": 212810 }, { "epoch": 1.7403606329476222, "grad_norm": 0.023462822660803795, "learning_rate": 5.04651053869713e-07, "loss": 0.0006, "step": 212820 }, { "epoch": 1.7404424091262216, "grad_norm": 0.01563052088022232, "learning_rate": 5.043386678536128e-07, "loss": 0.0012, "step": 212830 }, { "epoch": 1.7405241853048206, "grad_norm": 0.023285528644919395, "learning_rate": 5.04026373417788e-07, "loss": 0.0003, "step": 212840 }, { "epoch": 1.74060596148342, "grad_norm": 0.012875864282250404, "learning_rate": 5.03714170568601e-07, "loss": 0.0005, "step": 212850 }, { "epoch": 1.740687737662019, "grad_norm": 0.020084049552679062, "learning_rate": 5.034020593124106e-07, "loss": 0.0003, "step": 212860 }, { "epoch": 1.7407695138406183, "grad_norm": 0.03239684924483299, "learning_rate": 5.030900396555754e-07, "loss": 0.0006, "step": 212870 }, { "epoch": 1.7408512900192175, "grad_norm": 0.1319034993648529, "learning_rate": 5.027781116044517e-07, "loss": 0.0011, "step": 212880 }, { "epoch": 1.7409330661978166, "grad_norm": 0.018612651154398918, "learning_rate": 5.024662751653919e-07, "loss": 0.0006, "step": 212890 }, { "epoch": 1.7410148423764158, "grad_norm": 0.012996191158890724, "learning_rate": 5.021545303447511e-07, "loss": 0.0008, "step": 212900 }, { "epoch": 1.741096618555015, "grad_norm": 0.02637871354818344, "learning_rate": 5.018428771488786e-07, "loss": 0.0008, "step": 212910 }, { "epoch": 1.7411783947336141, "grad_norm": 0.005871583707630634, "learning_rate": 5.015313155841228e-07, "loss": 0.0005, "step": 212920 }, { "epoch": 1.7412601709122133, "grad_norm": 0.034665822982788086, "learning_rate": 5.012198456568295e-07, "loss": 0.001, "step": 212930 }, { "epoch": 1.7413419470908125, "grad_norm": 0.14051595330238342, "learning_rate": 5.009084673733461e-07, "loss": 0.0006, "step": 212940 }, { "epoch": 1.7414237232694116, "grad_norm": 0.06345433741807938, "learning_rate": 5.005971807400145e-07, "loss": 0.0007, "step": 212950 }, { "epoch": 1.7415054994480108, "grad_norm": 0.024912556633353233, "learning_rate": 5.002859857631748e-07, "loss": 0.0007, "step": 212960 }, { "epoch": 1.74158727562661, "grad_norm": 0.07134335488080978, "learning_rate": 4.999748824491679e-07, "loss": 0.001, "step": 212970 }, { "epoch": 1.7416690518052091, "grad_norm": 0.003600466065108776, "learning_rate": 4.9966387080433e-07, "loss": 0.0005, "step": 212980 }, { "epoch": 1.7417508279838083, "grad_norm": 0.024764329195022583, "learning_rate": 4.99352950834997e-07, "loss": 0.0007, "step": 212990 }, { "epoch": 1.7418326041624075, "grad_norm": 0.02144446223974228, "learning_rate": 4.990421225475034e-07, "loss": 0.0006, "step": 213000 }, { "epoch": 1.7419143803410067, "grad_norm": 0.02486341819167137, "learning_rate": 4.987313859481785e-07, "loss": 0.0002, "step": 213010 }, { "epoch": 1.7419961565196058, "grad_norm": 0.057639606297016144, "learning_rate": 4.984207410433556e-07, "loss": 0.0005, "step": 213020 }, { "epoch": 1.742077932698205, "grad_norm": 0.053722232580184937, "learning_rate": 4.981101878393613e-07, "loss": 0.002, "step": 213030 }, { "epoch": 1.7421597088768042, "grad_norm": 0.0635237842798233, "learning_rate": 4.977997263425216e-07, "loss": 0.0003, "step": 213040 }, { "epoch": 1.7422414850554033, "grad_norm": 0.09402138739824295, "learning_rate": 4.974893565591598e-07, "loss": 0.0002, "step": 213050 }, { "epoch": 1.7423232612340025, "grad_norm": 0.05073641985654831, "learning_rate": 4.97179078495601e-07, "loss": 0.0007, "step": 213060 }, { "epoch": 1.7424050374126017, "grad_norm": 0.0455222986638546, "learning_rate": 4.968688921581643e-07, "loss": 0.0007, "step": 213070 }, { "epoch": 1.7424868135912008, "grad_norm": 0.03959387540817261, "learning_rate": 4.965587975531688e-07, "loss": 0.001, "step": 213080 }, { "epoch": 1.7425685897698, "grad_norm": 0.02788730151951313, "learning_rate": 4.96248794686931e-07, "loss": 0.0011, "step": 213090 }, { "epoch": 1.7426503659483994, "grad_norm": 0.013609671033918858, "learning_rate": 4.959388835657663e-07, "loss": 0.001, "step": 213100 }, { "epoch": 1.7427321421269983, "grad_norm": 0.0010308632627129555, "learning_rate": 4.956290641959871e-07, "loss": 0.0006, "step": 213110 }, { "epoch": 1.7428139183055977, "grad_norm": 0.05996045842766762, "learning_rate": 4.953193365839054e-07, "loss": 0.0005, "step": 213120 }, { "epoch": 1.7428956944841967, "grad_norm": 0.030896764248609543, "learning_rate": 4.950097007358306e-07, "loss": 0.0008, "step": 213130 }, { "epoch": 1.742977470662796, "grad_norm": 0.1334441602230072, "learning_rate": 4.947001566580695e-07, "loss": 0.0006, "step": 213140 }, { "epoch": 1.743059246841395, "grad_norm": 0.047497671097517014, "learning_rate": 4.943907043569285e-07, "loss": 0.0005, "step": 213150 }, { "epoch": 1.7431410230199944, "grad_norm": 0.08918236196041107, "learning_rate": 4.940813438387115e-07, "loss": 0.0008, "step": 213160 }, { "epoch": 1.7432227991985934, "grad_norm": 0.03713309019804001, "learning_rate": 4.937720751097203e-07, "loss": 0.0005, "step": 213170 }, { "epoch": 1.7433045753771927, "grad_norm": 0.009858128614723682, "learning_rate": 4.934628981762535e-07, "loss": 0.0006, "step": 213180 }, { "epoch": 1.7433863515557917, "grad_norm": 0.017395205795764923, "learning_rate": 4.931538130446112e-07, "loss": 0.0015, "step": 213190 }, { "epoch": 1.743468127734391, "grad_norm": 0.012919985689222813, "learning_rate": 4.9284481972109e-07, "loss": 0.0008, "step": 213200 }, { "epoch": 1.74354990391299, "grad_norm": 0.0243269894272089, "learning_rate": 4.925359182119827e-07, "loss": 0.0008, "step": 213210 }, { "epoch": 1.7436316800915894, "grad_norm": 0.021161654964089394, "learning_rate": 4.92227108523583e-07, "loss": 0.0005, "step": 213220 }, { "epoch": 1.7437134562701884, "grad_norm": 0.021857190877199173, "learning_rate": 4.91918390662181e-07, "loss": 0.0003, "step": 213230 }, { "epoch": 1.7437952324487878, "grad_norm": 0.0546647273004055, "learning_rate": 4.916097646340656e-07, "loss": 0.0006, "step": 213240 }, { "epoch": 1.7438770086273867, "grad_norm": 0.0068012503907084465, "learning_rate": 4.91301230445524e-07, "loss": 0.0008, "step": 213250 }, { "epoch": 1.743958784805986, "grad_norm": 0.0010526047553867102, "learning_rate": 4.909927881028403e-07, "loss": 0.0003, "step": 213260 }, { "epoch": 1.744040560984585, "grad_norm": 0.02023734711110592, "learning_rate": 4.906844376122999e-07, "loss": 0.0004, "step": 213270 }, { "epoch": 1.7441223371631844, "grad_norm": 0.16663798689842224, "learning_rate": 4.903761789801825e-07, "loss": 0.0008, "step": 213280 }, { "epoch": 1.7442041133417834, "grad_norm": 0.007506683934479952, "learning_rate": 4.900680122127677e-07, "loss": 0.0005, "step": 213290 }, { "epoch": 1.7442858895203828, "grad_norm": 0.05418878048658371, "learning_rate": 4.897599373163325e-07, "loss": 0.0005, "step": 213300 }, { "epoch": 1.744367665698982, "grad_norm": 0.09540930390357971, "learning_rate": 4.894519542971549e-07, "loss": 0.0004, "step": 213310 }, { "epoch": 1.744449441877581, "grad_norm": 0.100633405148983, "learning_rate": 4.891440631615069e-07, "loss": 0.0008, "step": 213320 }, { "epoch": 1.7445312180561803, "grad_norm": 0.03612590581178665, "learning_rate": 4.888362639156613e-07, "loss": 0.0008, "step": 213330 }, { "epoch": 1.7446129942347794, "grad_norm": 0.01536958385258913, "learning_rate": 4.885285565658876e-07, "loss": 0.0005, "step": 213340 }, { "epoch": 1.7446947704133786, "grad_norm": 0.03565377742052078, "learning_rate": 4.88220941118454e-07, "loss": 0.0004, "step": 213350 }, { "epoch": 1.7447765465919778, "grad_norm": 0.0026258158031851053, "learning_rate": 4.879134175796279e-07, "loss": 0.0004, "step": 213360 }, { "epoch": 1.744858322770577, "grad_norm": 0.019308963790535927, "learning_rate": 4.876059859556726e-07, "loss": 0.0009, "step": 213370 }, { "epoch": 1.7449400989491761, "grad_norm": 0.02139059454202652, "learning_rate": 4.872986462528506e-07, "loss": 0.0008, "step": 213380 }, { "epoch": 1.7450218751277753, "grad_norm": 0.003214652184396982, "learning_rate": 4.869913984774239e-07, "loss": 0.0004, "step": 213390 }, { "epoch": 1.7451036513063745, "grad_norm": 0.0014730868861079216, "learning_rate": 4.866842426356511e-07, "loss": 0.0006, "step": 213400 }, { "epoch": 1.7451854274849736, "grad_norm": 0.04361503943800926, "learning_rate": 4.863771787337884e-07, "loss": 0.0002, "step": 213410 }, { "epoch": 1.7452672036635728, "grad_norm": 0.06971085071563721, "learning_rate": 4.860702067780909e-07, "loss": 0.0009, "step": 213420 }, { "epoch": 1.745348979842172, "grad_norm": 0.044516634196043015, "learning_rate": 4.857633267748136e-07, "loss": 0.0005, "step": 213430 }, { "epoch": 1.7454307560207711, "grad_norm": 0.006722358055412769, "learning_rate": 4.854565387302063e-07, "loss": 0.0007, "step": 213440 }, { "epoch": 1.7455125321993703, "grad_norm": 0.03215064853429794, "learning_rate": 4.851498426505185e-07, "loss": 0.0007, "step": 213450 }, { "epoch": 1.7455943083779695, "grad_norm": 0.012233119457960129, "learning_rate": 4.848432385419987e-07, "loss": 0.0005, "step": 213460 }, { "epoch": 1.7456760845565686, "grad_norm": 0.016885118559002876, "learning_rate": 4.845367264108924e-07, "loss": 0.0006, "step": 213470 }, { "epoch": 1.7457578607351678, "grad_norm": 0.011356199160218239, "learning_rate": 4.842303062634429e-07, "loss": 0.0004, "step": 213480 }, { "epoch": 1.745839636913767, "grad_norm": 0.025105206295847893, "learning_rate": 4.83923978105893e-07, "loss": 0.0004, "step": 213490 }, { "epoch": 1.7459214130923661, "grad_norm": 0.006548046600073576, "learning_rate": 4.836177419444815e-07, "loss": 0.0004, "step": 213500 }, { "epoch": 1.7460031892709653, "grad_norm": 0.04011572524905205, "learning_rate": 4.833115977854491e-07, "loss": 0.001, "step": 213510 }, { "epoch": 1.7460849654495645, "grad_norm": 0.02568749524652958, "learning_rate": 4.830055456350302e-07, "loss": 0.0008, "step": 213520 }, { "epoch": 1.7461667416281639, "grad_norm": 0.03557606413960457, "learning_rate": 4.826995854994598e-07, "loss": 0.0002, "step": 213530 }, { "epoch": 1.7462485178067628, "grad_norm": 0.10233408212661743, "learning_rate": 4.823937173849702e-07, "loss": 0.0005, "step": 213540 }, { "epoch": 1.7463302939853622, "grad_norm": 0.06141836568713188, "learning_rate": 4.820879412977936e-07, "loss": 0.0005, "step": 213550 }, { "epoch": 1.7464120701639612, "grad_norm": 0.05203387886285782, "learning_rate": 4.817822572441578e-07, "loss": 0.0006, "step": 213560 }, { "epoch": 1.7464938463425606, "grad_norm": 0.030500195920467377, "learning_rate": 4.8147666523029e-07, "loss": 0.0005, "step": 213570 }, { "epoch": 1.7465756225211595, "grad_norm": 0.09464865922927856, "learning_rate": 4.811711652624151e-07, "loss": 0.0009, "step": 213580 }, { "epoch": 1.746657398699759, "grad_norm": 0.008386613801121712, "learning_rate": 4.808657573467573e-07, "loss": 0.0003, "step": 213590 }, { "epoch": 1.7467391748783578, "grad_norm": 0.07124590128660202, "learning_rate": 4.80560441489537e-07, "loss": 0.0004, "step": 213600 }, { "epoch": 1.7468209510569572, "grad_norm": 0.046008091419935226, "learning_rate": 4.802552176969738e-07, "loss": 0.0006, "step": 213610 }, { "epoch": 1.7469027272355562, "grad_norm": 0.0053662401624023914, "learning_rate": 4.799500859752853e-07, "loss": 0.0009, "step": 213620 }, { "epoch": 1.7469845034141556, "grad_norm": 0.03129817172884941, "learning_rate": 4.796450463306884e-07, "loss": 0.0003, "step": 213630 }, { "epoch": 1.7470662795927545, "grad_norm": 0.10140752047300339, "learning_rate": 4.793400987693958e-07, "loss": 0.0006, "step": 213640 }, { "epoch": 1.747148055771354, "grad_norm": 0.040343400090932846, "learning_rate": 4.790352432976203e-07, "loss": 0.0004, "step": 213650 }, { "epoch": 1.7472298319499528, "grad_norm": 0.011048424988985062, "learning_rate": 4.787304799215709e-07, "loss": 0.0004, "step": 213660 }, { "epoch": 1.7473116081285522, "grad_norm": 0.0026394911110401154, "learning_rate": 4.784258086474574e-07, "loss": 0.0008, "step": 213670 }, { "epoch": 1.7473933843071512, "grad_norm": 0.07792454212903976, "learning_rate": 4.781212294814858e-07, "loss": 0.0008, "step": 213680 }, { "epoch": 1.7474751604857506, "grad_norm": 0.036303889006376266, "learning_rate": 4.778167424298602e-07, "loss": 0.0004, "step": 213690 }, { "epoch": 1.7475569366643495, "grad_norm": 0.04447534307837486, "learning_rate": 4.775123474987831e-07, "loss": 0.0006, "step": 213700 }, { "epoch": 1.747638712842949, "grad_norm": 0.08806213736534119, "learning_rate": 4.772080446944561e-07, "loss": 0.0006, "step": 213710 }, { "epoch": 1.7477204890215479, "grad_norm": 0.0858229249715805, "learning_rate": 4.76903834023077e-07, "loss": 0.0007, "step": 213720 }, { "epoch": 1.7478022652001473, "grad_norm": 0.0011550483759492636, "learning_rate": 4.765997154908436e-07, "loss": 0.0002, "step": 213730 }, { "epoch": 1.7478840413787464, "grad_norm": 0.004152436275035143, "learning_rate": 4.7629568910395e-07, "loss": 0.0004, "step": 213740 }, { "epoch": 1.7479658175573456, "grad_norm": 0.010259650647640228, "learning_rate": 4.7599175486859107e-07, "loss": 0.0003, "step": 213750 }, { "epoch": 1.7480475937359448, "grad_norm": 0.06032314896583557, "learning_rate": 4.756879127909569e-07, "loss": 0.0007, "step": 213760 }, { "epoch": 1.748129369914544, "grad_norm": 0.06199444457888603, "learning_rate": 4.753841628772382e-07, "loss": 0.0008, "step": 213770 }, { "epoch": 1.748211146093143, "grad_norm": 0.004651614464819431, "learning_rate": 4.750805051336205e-07, "loss": 0.0006, "step": 213780 }, { "epoch": 1.7482929222717423, "grad_norm": 0.01805395446717739, "learning_rate": 4.747769395662921e-07, "loss": 0.0006, "step": 213790 }, { "epoch": 1.7483746984503414, "grad_norm": 0.045187342911958694, "learning_rate": 4.7447346618143595e-07, "loss": 0.0005, "step": 213800 }, { "epoch": 1.7484564746289406, "grad_norm": 0.02229049988090992, "learning_rate": 4.741700849852332e-07, "loss": 0.0006, "step": 213810 }, { "epoch": 1.7485382508075398, "grad_norm": 0.06471867859363556, "learning_rate": 4.73866795983865e-07, "loss": 0.0005, "step": 213820 }, { "epoch": 1.748620026986139, "grad_norm": 0.09185317158699036, "learning_rate": 4.7356359918350915e-07, "loss": 0.0005, "step": 213830 }, { "epoch": 1.7487018031647381, "grad_norm": 0.021808870136737823, "learning_rate": 4.732604945903424e-07, "loss": 0.0006, "step": 213840 }, { "epoch": 1.7487835793433373, "grad_norm": 0.015628760680556297, "learning_rate": 4.729574822105387e-07, "loss": 0.0012, "step": 213850 }, { "epoch": 1.7488653555219364, "grad_norm": 0.010933032259345055, "learning_rate": 4.7265456205026984e-07, "loss": 0.001, "step": 213860 }, { "epoch": 1.7489471317005356, "grad_norm": 0.006528758909553289, "learning_rate": 4.72351734115708e-07, "loss": 0.0008, "step": 213870 }, { "epoch": 1.7490289078791348, "grad_norm": 0.029287803918123245, "learning_rate": 4.720489984130222e-07, "loss": 0.0008, "step": 213880 }, { "epoch": 1.749110684057734, "grad_norm": 0.031535208225250244, "learning_rate": 4.717463549483786e-07, "loss": 0.0005, "step": 213890 }, { "epoch": 1.7491924602363331, "grad_norm": 0.018759125843644142, "learning_rate": 4.714438037279417e-07, "loss": 0.0006, "step": 213900 }, { "epoch": 1.7492742364149323, "grad_norm": 0.02203252911567688, "learning_rate": 4.7114134475787667e-07, "loss": 0.0008, "step": 213910 }, { "epoch": 1.7493560125935315, "grad_norm": 0.0008380596409551799, "learning_rate": 4.7083897804434286e-07, "loss": 0.0006, "step": 213920 }, { "epoch": 1.7494377887721306, "grad_norm": 0.0712834894657135, "learning_rate": 4.7053670359350157e-07, "loss": 0.0007, "step": 213930 }, { "epoch": 1.7495195649507298, "grad_norm": 0.03484271466732025, "learning_rate": 4.702345214115084e-07, "loss": 0.0002, "step": 213940 }, { "epoch": 1.749601341129329, "grad_norm": 0.03757186233997345, "learning_rate": 4.699324315045206e-07, "loss": 0.0006, "step": 213950 }, { "epoch": 1.7496831173079284, "grad_norm": 0.024369968101382256, "learning_rate": 4.696304338786917e-07, "loss": 0.0005, "step": 213960 }, { "epoch": 1.7497648934865273, "grad_norm": 0.019255822524428368, "learning_rate": 4.6932852854017275e-07, "loss": 0.0006, "step": 213970 }, { "epoch": 1.7498466696651267, "grad_norm": 0.0007060236530378461, "learning_rate": 4.6902671549511337e-07, "loss": 0.0009, "step": 213980 }, { "epoch": 1.7499284458437256, "grad_norm": 0.025242209434509277, "learning_rate": 4.6872499474966417e-07, "loss": 0.0014, "step": 213990 }, { "epoch": 1.750010222022325, "grad_norm": 0.08791934698820114, "learning_rate": 4.6842336630996963e-07, "loss": 0.0006, "step": 214000 }, { "epoch": 1.750091998200924, "grad_norm": 0.0020679279696196318, "learning_rate": 4.6812183018217493e-07, "loss": 0.0008, "step": 214010 }, { "epoch": 1.7501737743795234, "grad_norm": 0.06240114942193031, "learning_rate": 4.678203863724212e-07, "loss": 0.0008, "step": 214020 }, { "epoch": 1.7502555505581223, "grad_norm": 0.021585548296570778, "learning_rate": 4.675190348868508e-07, "loss": 0.0003, "step": 214030 }, { "epoch": 1.7503373267367217, "grad_norm": 0.03271359205245972, "learning_rate": 4.672177757316021e-07, "loss": 0.0005, "step": 214040 }, { "epoch": 1.7504191029153207, "grad_norm": 0.002470935694873333, "learning_rate": 4.6691660891281133e-07, "loss": 0.0013, "step": 214050 }, { "epoch": 1.75050087909392, "grad_norm": 0.013678806833922863, "learning_rate": 4.6661553443661465e-07, "loss": 0.0035, "step": 214060 }, { "epoch": 1.750582655272519, "grad_norm": 0.002303092274814844, "learning_rate": 4.663145523091439e-07, "loss": 0.0003, "step": 214070 }, { "epoch": 1.7506644314511184, "grad_norm": 0.06678339093923569, "learning_rate": 4.6601366253653134e-07, "loss": 0.0006, "step": 214080 }, { "epoch": 1.7507462076297173, "grad_norm": 0.020962538197636604, "learning_rate": 4.6571286512490546e-07, "loss": 0.0003, "step": 214090 }, { "epoch": 1.7508279838083167, "grad_norm": 0.021274615079164505, "learning_rate": 4.6541216008039347e-07, "loss": 0.0007, "step": 214100 }, { "epoch": 1.7509097599869157, "grad_norm": 0.026302779093384743, "learning_rate": 4.651115474091222e-07, "loss": 0.0007, "step": 214110 }, { "epoch": 1.750991536165515, "grad_norm": 0.028679542243480682, "learning_rate": 4.6481102711721513e-07, "loss": 0.0004, "step": 214120 }, { "epoch": 1.751073312344114, "grad_norm": 0.020868176594376564, "learning_rate": 4.64510599210794e-07, "loss": 0.0004, "step": 214130 }, { "epoch": 1.7511550885227134, "grad_norm": 0.06267914921045303, "learning_rate": 4.6421026369597775e-07, "loss": 0.0003, "step": 214140 }, { "epoch": 1.7512368647013123, "grad_norm": 0.08561267703771591, "learning_rate": 4.6391002057888603e-07, "loss": 0.0003, "step": 214150 }, { "epoch": 1.7513186408799117, "grad_norm": 0.06853088736534119, "learning_rate": 4.6360986986563385e-07, "loss": 0.0003, "step": 214160 }, { "epoch": 1.751400417058511, "grad_norm": 0.03467622026801109, "learning_rate": 4.633098115623369e-07, "loss": 0.0017, "step": 214170 }, { "epoch": 1.75148219323711, "grad_norm": 0.005084151867777109, "learning_rate": 4.630098456751059e-07, "loss": 0.0004, "step": 214180 }, { "epoch": 1.7515639694157092, "grad_norm": 0.040181759744882584, "learning_rate": 4.627099722100525e-07, "loss": 0.0004, "step": 214190 }, { "epoch": 1.7516457455943084, "grad_norm": 0.030166789889335632, "learning_rate": 4.6241019117328534e-07, "loss": 0.0006, "step": 214200 }, { "epoch": 1.7517275217729076, "grad_norm": 0.016926484182476997, "learning_rate": 4.621105025709105e-07, "loss": 0.0004, "step": 214210 }, { "epoch": 1.7518092979515067, "grad_norm": 0.0277058407664299, "learning_rate": 4.6181090640903256e-07, "loss": 0.0008, "step": 214220 }, { "epoch": 1.751891074130106, "grad_norm": 0.016246315091848373, "learning_rate": 4.6151140269375615e-07, "loss": 0.0005, "step": 214230 }, { "epoch": 1.751972850308705, "grad_norm": 0.04529081657528877, "learning_rate": 4.6121199143118134e-07, "loss": 0.0007, "step": 214240 }, { "epoch": 1.7520546264873043, "grad_norm": 0.02252158150076866, "learning_rate": 4.6091267262740823e-07, "loss": 0.0009, "step": 214250 }, { "epoch": 1.7521364026659034, "grad_norm": 0.0005880750250071287, "learning_rate": 4.60613446288532e-07, "loss": 0.0005, "step": 214260 }, { "epoch": 1.7522181788445026, "grad_norm": 0.020670130848884583, "learning_rate": 4.6031431242065106e-07, "loss": 0.0003, "step": 214270 }, { "epoch": 1.7522999550231018, "grad_norm": 0.03450706601142883, "learning_rate": 4.6001527102985723e-07, "loss": 0.0007, "step": 214280 }, { "epoch": 1.752381731201701, "grad_norm": 0.022783590480685234, "learning_rate": 4.5971632212224283e-07, "loss": 0.0004, "step": 214290 }, { "epoch": 1.7524635073803, "grad_norm": 0.004337949212640524, "learning_rate": 4.594174657038969e-07, "loss": 0.0001, "step": 214300 }, { "epoch": 1.7525452835588993, "grad_norm": 0.06088563799858093, "learning_rate": 4.5911870178090847e-07, "loss": 0.0003, "step": 214310 }, { "epoch": 1.7526270597374984, "grad_norm": 0.0046447692438960075, "learning_rate": 4.588200303593632e-07, "loss": 0.0004, "step": 214320 }, { "epoch": 1.7527088359160976, "grad_norm": 0.016558252274990082, "learning_rate": 4.585214514453451e-07, "loss": 0.0005, "step": 214330 }, { "epoch": 1.7527906120946968, "grad_norm": 0.007159022614359856, "learning_rate": 4.5822296504493546e-07, "loss": 0.0007, "step": 214340 }, { "epoch": 1.752872388273296, "grad_norm": 0.014208104461431503, "learning_rate": 4.5792457116421716e-07, "loss": 0.0008, "step": 214350 }, { "epoch": 1.7529541644518951, "grad_norm": 0.0206100195646286, "learning_rate": 4.576262698092665e-07, "loss": 0.0005, "step": 214360 }, { "epoch": 1.7530359406304943, "grad_norm": 0.032052937895059586, "learning_rate": 4.573280609861619e-07, "loss": 0.0003, "step": 214370 }, { "epoch": 1.7531177168090935, "grad_norm": 0.07698627561330795, "learning_rate": 4.5702994470097574e-07, "loss": 0.0007, "step": 214380 }, { "epoch": 1.7531994929876928, "grad_norm": 0.0030726222321391106, "learning_rate": 4.5673192095978316e-07, "loss": 0.0004, "step": 214390 }, { "epoch": 1.7532812691662918, "grad_norm": 0.030413512140512466, "learning_rate": 4.5643398976865427e-07, "loss": 0.0006, "step": 214400 }, { "epoch": 1.7533630453448912, "grad_norm": 0.0023584391456097364, "learning_rate": 4.5613615113365817e-07, "loss": 0.0006, "step": 214410 }, { "epoch": 1.7534448215234901, "grad_norm": 0.054089684039354324, "learning_rate": 4.558384050608622e-07, "loss": 0.001, "step": 214420 }, { "epoch": 1.7535265977020895, "grad_norm": 0.12634596228599548, "learning_rate": 4.55540751556332e-07, "loss": 0.0012, "step": 214430 }, { "epoch": 1.7536083738806885, "grad_norm": 0.1158435046672821, "learning_rate": 4.552431906261301e-07, "loss": 0.0005, "step": 214440 }, { "epoch": 1.7536901500592879, "grad_norm": 0.03100237436592579, "learning_rate": 4.5494572227631874e-07, "loss": 0.0004, "step": 214450 }, { "epoch": 1.7537719262378868, "grad_norm": 0.029120994731783867, "learning_rate": 4.546483465129564e-07, "loss": 0.001, "step": 214460 }, { "epoch": 1.7538537024164862, "grad_norm": 0.006878990679979324, "learning_rate": 4.5435106334210335e-07, "loss": 0.0003, "step": 214470 }, { "epoch": 1.7539354785950851, "grad_norm": 0.017222164198756218, "learning_rate": 4.540538727698135e-07, "loss": 0.0005, "step": 214480 }, { "epoch": 1.7540172547736845, "grad_norm": 0.05625404417514801, "learning_rate": 4.5375677480214153e-07, "loss": 0.0009, "step": 214490 }, { "epoch": 1.7540990309522835, "grad_norm": 0.017005296424031258, "learning_rate": 4.5345976944513926e-07, "loss": 0.0008, "step": 214500 }, { "epoch": 1.7541808071308829, "grad_norm": 0.019746027886867523, "learning_rate": 4.5316285670485624e-07, "loss": 0.0006, "step": 214510 }, { "epoch": 1.7542625833094818, "grad_norm": 0.04079965874552727, "learning_rate": 4.5286603658734273e-07, "loss": 0.0006, "step": 214520 }, { "epoch": 1.7543443594880812, "grad_norm": 0.00807745847851038, "learning_rate": 4.5256930909864436e-07, "loss": 0.0008, "step": 214530 }, { "epoch": 1.7544261356666802, "grad_norm": 0.010064659640192986, "learning_rate": 4.5227267424480525e-07, "loss": 0.0004, "step": 214540 }, { "epoch": 1.7545079118452795, "grad_norm": 0.014528443105518818, "learning_rate": 4.519761320318683e-07, "loss": 0.0004, "step": 214550 }, { "epoch": 1.7545896880238785, "grad_norm": 0.00770438089966774, "learning_rate": 4.5167968246587425e-07, "loss": 0.0003, "step": 214560 }, { "epoch": 1.7546714642024779, "grad_norm": 0.008554904721677303, "learning_rate": 4.513833255528621e-07, "loss": 0.0008, "step": 214570 }, { "epoch": 1.7547532403810768, "grad_norm": 0.06496073305606842, "learning_rate": 4.5108706129886936e-07, "loss": 0.0008, "step": 214580 }, { "epoch": 1.7548350165596762, "grad_norm": 0.5151431560516357, "learning_rate": 4.5079088970992947e-07, "loss": 0.0015, "step": 214590 }, { "epoch": 1.7549167927382754, "grad_norm": 0.05034331977367401, "learning_rate": 4.504948107920781e-07, "loss": 0.0004, "step": 214600 }, { "epoch": 1.7549985689168746, "grad_norm": 0.00861447211354971, "learning_rate": 4.50198824551345e-07, "loss": 0.0007, "step": 214610 }, { "epoch": 1.7550803450954737, "grad_norm": 0.036180462688207626, "learning_rate": 4.4990293099376024e-07, "loss": 0.0006, "step": 214620 }, { "epoch": 1.755162121274073, "grad_norm": 0.03456244617700577, "learning_rate": 4.4960713012535074e-07, "loss": 0.0003, "step": 214630 }, { "epoch": 1.755243897452672, "grad_norm": 0.09912465512752533, "learning_rate": 4.493114219521433e-07, "loss": 0.0006, "step": 214640 }, { "epoch": 1.7553256736312712, "grad_norm": 0.025522397831082344, "learning_rate": 4.490158064801614e-07, "loss": 0.0004, "step": 214650 }, { "epoch": 1.7554074498098704, "grad_norm": 0.007609810680150986, "learning_rate": 4.48720283715427e-07, "loss": 0.0004, "step": 214660 }, { "epoch": 1.7554892259884696, "grad_norm": 0.04006507620215416, "learning_rate": 4.484248536639591e-07, "loss": 0.0003, "step": 214670 }, { "epoch": 1.7555710021670687, "grad_norm": 0.011253363452851772, "learning_rate": 4.4812951633177727e-07, "loss": 0.0004, "step": 214680 }, { "epoch": 1.755652778345668, "grad_norm": 0.0418836884200573, "learning_rate": 4.478342717248968e-07, "loss": 0.0008, "step": 214690 }, { "epoch": 1.755734554524267, "grad_norm": 0.10335352271795273, "learning_rate": 4.4753911984933283e-07, "loss": 0.0008, "step": 214700 }, { "epoch": 1.7558163307028662, "grad_norm": 0.0021843647118657827, "learning_rate": 4.4724406071109616e-07, "loss": 0.0006, "step": 214710 }, { "epoch": 1.7558981068814654, "grad_norm": 0.03693593293428421, "learning_rate": 4.469490943161997e-07, "loss": 0.0006, "step": 214720 }, { "epoch": 1.7559798830600646, "grad_norm": 0.06023697182536125, "learning_rate": 4.466542206706509e-07, "loss": 0.0008, "step": 214730 }, { "epoch": 1.7560616592386638, "grad_norm": 0.0318242646753788, "learning_rate": 4.463594397804572e-07, "loss": 0.0004, "step": 214740 }, { "epoch": 1.756143435417263, "grad_norm": 0.028307387605309486, "learning_rate": 4.4606475165162143e-07, "loss": 0.0006, "step": 214750 }, { "epoch": 1.756225211595862, "grad_norm": 0.001709606614895165, "learning_rate": 4.4577015629015006e-07, "loss": 0.0003, "step": 214760 }, { "epoch": 1.7563069877744613, "grad_norm": 0.04914551228284836, "learning_rate": 4.4547565370204213e-07, "loss": 0.0011, "step": 214770 }, { "epoch": 1.7563887639530604, "grad_norm": 0.027812687680125237, "learning_rate": 4.4518124389329675e-07, "loss": 0.0006, "step": 214780 }, { "epoch": 1.7564705401316596, "grad_norm": 0.06566407531499863, "learning_rate": 4.448869268699124e-07, "loss": 0.0005, "step": 214790 }, { "epoch": 1.756552316310259, "grad_norm": 0.041353486478328705, "learning_rate": 4.445927026378838e-07, "loss": 0.0007, "step": 214800 }, { "epoch": 1.756634092488858, "grad_norm": 0.06518329679965973, "learning_rate": 4.442985712032044e-07, "loss": 0.0006, "step": 214810 }, { "epoch": 1.7567158686674573, "grad_norm": 0.0254363976418972, "learning_rate": 4.440045325718667e-07, "loss": 0.0007, "step": 214820 }, { "epoch": 1.7567976448460563, "grad_norm": 0.0179145447909832, "learning_rate": 4.437105867498587e-07, "loss": 0.0004, "step": 214830 }, { "epoch": 1.7568794210246557, "grad_norm": 0.021623052656650543, "learning_rate": 4.434167337431705e-07, "loss": 0.0006, "step": 214840 }, { "epoch": 1.7569611972032546, "grad_norm": 0.053360529243946075, "learning_rate": 4.4312297355778686e-07, "loss": 0.0007, "step": 214850 }, { "epoch": 1.757042973381854, "grad_norm": 0.0009087193175218999, "learning_rate": 4.42829306199693e-07, "loss": 0.0004, "step": 214860 }, { "epoch": 1.757124749560453, "grad_norm": 0.0682779997587204, "learning_rate": 4.4253573167486907e-07, "loss": 0.0004, "step": 214870 }, { "epoch": 1.7572065257390523, "grad_norm": 0.02190873585641384, "learning_rate": 4.4224224998929756e-07, "loss": 0.0044, "step": 214880 }, { "epoch": 1.7572883019176513, "grad_norm": 0.008023757487535477, "learning_rate": 4.419488611489564e-07, "loss": 0.0016, "step": 214890 }, { "epoch": 1.7573700780962507, "grad_norm": 0.04132143408060074, "learning_rate": 4.41655565159822e-07, "loss": 0.0007, "step": 214900 }, { "epoch": 1.7574518542748496, "grad_norm": 0.019259095191955566, "learning_rate": 4.41362362027869e-07, "loss": 0.0006, "step": 214910 }, { "epoch": 1.757533630453449, "grad_norm": 0.10169680416584015, "learning_rate": 4.410692517590698e-07, "loss": 0.0005, "step": 214920 }, { "epoch": 1.757615406632048, "grad_norm": 0.015382734127342701, "learning_rate": 4.4077623435939577e-07, "loss": 0.0009, "step": 214930 }, { "epoch": 1.7576971828106474, "grad_norm": 0.0010857422603294253, "learning_rate": 4.40483309834816e-07, "loss": 0.0006, "step": 214940 }, { "epoch": 1.7577789589892463, "grad_norm": 0.013933786191046238, "learning_rate": 4.4019047819129637e-07, "loss": 0.0006, "step": 214950 }, { "epoch": 1.7578607351678457, "grad_norm": 0.005839883349835873, "learning_rate": 4.3989773943480363e-07, "loss": 0.0008, "step": 214960 }, { "epoch": 1.7579425113464446, "grad_norm": 0.037157777696847916, "learning_rate": 4.396050935713009e-07, "loss": 0.0004, "step": 214970 }, { "epoch": 1.758024287525044, "grad_norm": 0.011780385859310627, "learning_rate": 4.3931254060674943e-07, "loss": 0.0004, "step": 214980 }, { "epoch": 1.758106063703643, "grad_norm": 0.03323956951498985, "learning_rate": 4.390200805471073e-07, "loss": 0.0004, "step": 214990 }, { "epoch": 1.7581878398822424, "grad_norm": 0.060871023684740067, "learning_rate": 4.3872771339833474e-07, "loss": 0.0009, "step": 215000 }, { "epoch": 1.7582696160608413, "grad_norm": 0.029642276465892792, "learning_rate": 4.384354391663859e-07, "loss": 0.0004, "step": 215010 }, { "epoch": 1.7583513922394407, "grad_norm": 0.00154603342525661, "learning_rate": 4.3814325785721533e-07, "loss": 0.0008, "step": 215020 }, { "epoch": 1.7584331684180399, "grad_norm": 0.0028870562091469765, "learning_rate": 4.3785116947677454e-07, "loss": 0.0003, "step": 215030 }, { "epoch": 1.758514944596639, "grad_norm": 0.040583688765764236, "learning_rate": 4.3755917403101366e-07, "loss": 0.0002, "step": 215040 }, { "epoch": 1.7585967207752382, "grad_norm": 0.023539427667856216, "learning_rate": 4.3726727152588077e-07, "loss": 0.0006, "step": 215050 }, { "epoch": 1.7586784969538374, "grad_norm": 0.02578694000840187, "learning_rate": 4.369754619673228e-07, "loss": 0.0005, "step": 215060 }, { "epoch": 1.7587602731324365, "grad_norm": 0.025747662410140038, "learning_rate": 4.366837453612827e-07, "loss": 0.0013, "step": 215070 }, { "epoch": 1.7588420493110357, "grad_norm": 0.023195166140794754, "learning_rate": 4.363921217137046e-07, "loss": 0.0007, "step": 215080 }, { "epoch": 1.7589238254896349, "grad_norm": 0.021669408306479454, "learning_rate": 4.3610059103052825e-07, "loss": 0.0021, "step": 215090 }, { "epoch": 1.759005601668234, "grad_norm": 0.09290850907564163, "learning_rate": 4.3580915331769335e-07, "loss": 0.0005, "step": 215100 }, { "epoch": 1.7590873778468332, "grad_norm": 0.009637316688895226, "learning_rate": 4.3551780858113455e-07, "loss": 0.0005, "step": 215110 }, { "epoch": 1.7591691540254324, "grad_norm": 0.0882839560508728, "learning_rate": 4.352265568267888e-07, "loss": 0.0006, "step": 215120 }, { "epoch": 1.7592509302040316, "grad_norm": 0.01190457958728075, "learning_rate": 4.3493539806058915e-07, "loss": 0.001, "step": 215130 }, { "epoch": 1.7593327063826307, "grad_norm": 0.008465789258480072, "learning_rate": 4.3464433228846583e-07, "loss": 0.0003, "step": 215140 }, { "epoch": 1.75941448256123, "grad_norm": 0.10385280847549438, "learning_rate": 4.343533595163485e-07, "loss": 0.0007, "step": 215150 }, { "epoch": 1.759496258739829, "grad_norm": 0.013096536509692669, "learning_rate": 4.3406247975016414e-07, "loss": 0.0051, "step": 215160 }, { "epoch": 1.7595780349184282, "grad_norm": 0.031236128881573677, "learning_rate": 4.3377169299583853e-07, "loss": 0.0016, "step": 215170 }, { "epoch": 1.7596598110970274, "grad_norm": 0.060098376125097275, "learning_rate": 4.334809992592953e-07, "loss": 0.0004, "step": 215180 }, { "epoch": 1.7597415872756266, "grad_norm": 0.024652646854519844, "learning_rate": 4.331903985464547e-07, "loss": 0.0007, "step": 215190 }, { "epoch": 1.7598233634542257, "grad_norm": 0.13443748652935028, "learning_rate": 4.3289989086323927e-07, "loss": 0.0008, "step": 215200 }, { "epoch": 1.759905139632825, "grad_norm": 0.10314908623695374, "learning_rate": 4.326094762155647e-07, "loss": 0.0004, "step": 215210 }, { "epoch": 1.759986915811424, "grad_norm": 0.0026728566735982895, "learning_rate": 4.3231915460934804e-07, "loss": 0.0003, "step": 215220 }, { "epoch": 1.7600686919900235, "grad_norm": 0.009512088261544704, "learning_rate": 4.320289260505023e-07, "loss": 0.0004, "step": 215230 }, { "epoch": 1.7601504681686224, "grad_norm": 0.032467007637023926, "learning_rate": 4.3173879054494106e-07, "loss": 0.0004, "step": 215240 }, { "epoch": 1.7602322443472218, "grad_norm": 0.02554217539727688, "learning_rate": 4.3144874809857407e-07, "loss": 0.0006, "step": 215250 }, { "epoch": 1.7603140205258208, "grad_norm": 0.09622301906347275, "learning_rate": 4.3115879871730936e-07, "loss": 0.0014, "step": 215260 }, { "epoch": 1.7603957967044201, "grad_norm": 0.02932392805814743, "learning_rate": 4.3086894240705335e-07, "loss": 0.0007, "step": 215270 }, { "epoch": 1.760477572883019, "grad_norm": 0.01339507382363081, "learning_rate": 4.305791791737113e-07, "loss": 0.0002, "step": 215280 }, { "epoch": 1.7605593490616185, "grad_norm": 0.0371810644865036, "learning_rate": 4.3028950902318566e-07, "loss": 0.0004, "step": 215290 }, { "epoch": 1.7606411252402174, "grad_norm": 0.014754123985767365, "learning_rate": 4.2999993196137734e-07, "loss": 0.0008, "step": 215300 }, { "epoch": 1.7607229014188168, "grad_norm": 0.03257450461387634, "learning_rate": 4.2971044799418383e-07, "loss": 0.0007, "step": 215310 }, { "epoch": 1.7608046775974158, "grad_norm": 0.06221978738903999, "learning_rate": 4.2942105712750483e-07, "loss": 0.0003, "step": 215320 }, { "epoch": 1.7608864537760152, "grad_norm": 0.04671618342399597, "learning_rate": 4.291317593672334e-07, "loss": 0.0008, "step": 215330 }, { "epoch": 1.760968229954614, "grad_norm": 0.0097306277602911, "learning_rate": 4.288425547192637e-07, "loss": 0.0019, "step": 215340 }, { "epoch": 1.7610500061332135, "grad_norm": 0.023314788937568665, "learning_rate": 4.285534431894861e-07, "loss": 0.0003, "step": 215350 }, { "epoch": 1.7611317823118124, "grad_norm": 0.022619662806391716, "learning_rate": 4.2826442478379136e-07, "loss": 0.0004, "step": 215360 }, { "epoch": 1.7612135584904118, "grad_norm": 0.04482109844684601, "learning_rate": 4.279754995080665e-07, "loss": 0.0004, "step": 215370 }, { "epoch": 1.7612953346690108, "grad_norm": 0.011571360751986504, "learning_rate": 4.2768666736819677e-07, "loss": 0.0005, "step": 215380 }, { "epoch": 1.7613771108476102, "grad_norm": 0.08962029218673706, "learning_rate": 4.2739792837006634e-07, "loss": 0.0011, "step": 215390 }, { "epoch": 1.7614588870262091, "grad_norm": 0.014494159258902073, "learning_rate": 4.271092825195572e-07, "loss": 0.0004, "step": 215400 }, { "epoch": 1.7615406632048085, "grad_norm": 0.02449723891913891, "learning_rate": 4.268207298225485e-07, "loss": 0.0007, "step": 215410 }, { "epoch": 1.7616224393834075, "grad_norm": 0.16803999245166779, "learning_rate": 4.2653227028491893e-07, "loss": 0.0017, "step": 215420 }, { "epoch": 1.7617042155620068, "grad_norm": 0.04446712136268616, "learning_rate": 4.2624390391254375e-07, "loss": 0.0009, "step": 215430 }, { "epoch": 1.761785991740606, "grad_norm": 0.03652815520763397, "learning_rate": 4.259556307112994e-07, "loss": 0.0012, "step": 215440 }, { "epoch": 1.7618677679192052, "grad_norm": 0.006781425327062607, "learning_rate": 4.2566745068705615e-07, "loss": 0.0005, "step": 215450 }, { "epoch": 1.7619495440978044, "grad_norm": 0.032393865287303925, "learning_rate": 4.2537936384568546e-07, "loss": 0.0004, "step": 215460 }, { "epoch": 1.7620313202764035, "grad_norm": 0.03330178186297417, "learning_rate": 4.2509137019305426e-07, "loss": 0.0006, "step": 215470 }, { "epoch": 1.7621130964550027, "grad_norm": 0.05054892599582672, "learning_rate": 4.248034697350317e-07, "loss": 0.0004, "step": 215480 }, { "epoch": 1.7621948726336019, "grad_norm": 0.2199217528104782, "learning_rate": 4.2451566247748156e-07, "loss": 0.0016, "step": 215490 }, { "epoch": 1.762276648812201, "grad_norm": 0.0026940149255096912, "learning_rate": 4.2422794842626626e-07, "loss": 0.0011, "step": 215500 }, { "epoch": 1.7623584249908002, "grad_norm": 0.018865106627345085, "learning_rate": 4.2394032758724666e-07, "loss": 0.0006, "step": 215510 }, { "epoch": 1.7624402011693994, "grad_norm": 0.06839648634195328, "learning_rate": 4.236527999662826e-07, "loss": 0.001, "step": 215520 }, { "epoch": 1.7625219773479985, "grad_norm": 0.03031935915350914, "learning_rate": 4.23365365569231e-07, "loss": 0.0007, "step": 215530 }, { "epoch": 1.7626037535265977, "grad_norm": 0.03888219594955444, "learning_rate": 4.2307802440194665e-07, "loss": 0.0005, "step": 215540 }, { "epoch": 1.7626855297051969, "grad_norm": 0.08845561742782593, "learning_rate": 4.2279077647028264e-07, "loss": 0.0005, "step": 215550 }, { "epoch": 1.762767305883796, "grad_norm": 0.016494592651724815, "learning_rate": 4.225036217800921e-07, "loss": 0.0002, "step": 215560 }, { "epoch": 1.7628490820623952, "grad_norm": 0.009624057449400425, "learning_rate": 4.2221656033722303e-07, "loss": 0.0008, "step": 215570 }, { "epoch": 1.7629308582409944, "grad_norm": 0.01692551001906395, "learning_rate": 4.2192959214752415e-07, "loss": 0.0002, "step": 215580 }, { "epoch": 1.7630126344195935, "grad_norm": 0.006184152327477932, "learning_rate": 4.216427172168397e-07, "loss": 0.0005, "step": 215590 }, { "epoch": 1.7630944105981927, "grad_norm": 0.05396448075771332, "learning_rate": 4.213559355510149e-07, "loss": 0.0005, "step": 215600 }, { "epoch": 1.7631761867767919, "grad_norm": 0.0025720426347106695, "learning_rate": 4.2106924715589193e-07, "loss": 0.0005, "step": 215610 }, { "epoch": 1.763257962955391, "grad_norm": 0.012360106222331524, "learning_rate": 4.207826520373104e-07, "loss": 0.0005, "step": 215620 }, { "epoch": 1.7633397391339902, "grad_norm": 0.027187885716557503, "learning_rate": 4.2049615020110844e-07, "loss": 0.0005, "step": 215630 }, { "epoch": 1.7634215153125894, "grad_norm": 0.04361334443092346, "learning_rate": 4.20209741653122e-07, "loss": 0.0003, "step": 215640 }, { "epoch": 1.7635032914911886, "grad_norm": 0.02711525745689869, "learning_rate": 4.1992342639918583e-07, "loss": 0.0002, "step": 215650 }, { "epoch": 1.763585067669788, "grad_norm": 0.04685693979263306, "learning_rate": 4.196372044451325e-07, "loss": 0.0008, "step": 215660 }, { "epoch": 1.763666843848387, "grad_norm": 0.031310684978961945, "learning_rate": 4.193510757967917e-07, "loss": 0.0013, "step": 215670 }, { "epoch": 1.7637486200269863, "grad_norm": 0.0005939237307757139, "learning_rate": 4.190650404599933e-07, "loss": 0.0003, "step": 215680 }, { "epoch": 1.7638303962055852, "grad_norm": 0.01944715902209282, "learning_rate": 4.1877909844056376e-07, "loss": 0.0008, "step": 215690 }, { "epoch": 1.7639121723841846, "grad_norm": 0.056930556893348694, "learning_rate": 4.1849324974432784e-07, "loss": 0.0008, "step": 215700 }, { "epoch": 1.7639939485627836, "grad_norm": 0.026347873732447624, "learning_rate": 4.182074943771075e-07, "loss": 0.0006, "step": 215710 }, { "epoch": 1.764075724741383, "grad_norm": 0.05199331045150757, "learning_rate": 4.1792183234472595e-07, "loss": 0.0007, "step": 215720 }, { "epoch": 1.764157500919982, "grad_norm": 0.005057350266724825, "learning_rate": 4.176362636530007e-07, "loss": 0.0003, "step": 215730 }, { "epoch": 1.7642392770985813, "grad_norm": 0.06391061097383499, "learning_rate": 4.173507883077499e-07, "loss": 0.0013, "step": 215740 }, { "epoch": 1.7643210532771803, "grad_norm": 0.0016940736677497625, "learning_rate": 4.1706540631478833e-07, "loss": 0.0008, "step": 215750 }, { "epoch": 1.7644028294557796, "grad_norm": 0.015490693971514702, "learning_rate": 4.167801176799291e-07, "loss": 0.0005, "step": 215760 }, { "epoch": 1.7644846056343786, "grad_norm": 0.01805531233549118, "learning_rate": 4.1649492240898484e-07, "loss": 0.0003, "step": 215770 }, { "epoch": 1.764566381812978, "grad_norm": 0.10466509312391281, "learning_rate": 4.1620982050776473e-07, "loss": 0.0009, "step": 215780 }, { "epoch": 1.764648157991577, "grad_norm": 0.11986783891916275, "learning_rate": 4.159248119820758e-07, "loss": 0.0004, "step": 215790 }, { "epoch": 1.7647299341701763, "grad_norm": 0.0030752539169043303, "learning_rate": 4.156398968377251e-07, "loss": 0.0009, "step": 215800 }, { "epoch": 1.7648117103487753, "grad_norm": 0.057423822581768036, "learning_rate": 4.153550750805163e-07, "loss": 0.0008, "step": 215810 }, { "epoch": 1.7648934865273747, "grad_norm": 0.036140646785497665, "learning_rate": 4.150703467162509e-07, "loss": 0.0003, "step": 215820 }, { "epoch": 1.7649752627059736, "grad_norm": 0.011841919273138046, "learning_rate": 4.1478571175072867e-07, "loss": 0.0003, "step": 215830 }, { "epoch": 1.765057038884573, "grad_norm": 0.0019102544756606221, "learning_rate": 4.145011701897489e-07, "loss": 0.0006, "step": 215840 }, { "epoch": 1.765138815063172, "grad_norm": 0.01127880159765482, "learning_rate": 4.1421672203910855e-07, "loss": 0.001, "step": 215850 }, { "epoch": 1.7652205912417713, "grad_norm": 0.06749095022678375, "learning_rate": 4.139323673046003e-07, "loss": 0.0015, "step": 215860 }, { "epoch": 1.7653023674203705, "grad_norm": 0.020703818649053574, "learning_rate": 4.136481059920178e-07, "loss": 0.0004, "step": 215870 }, { "epoch": 1.7653841435989697, "grad_norm": 0.024423271417617798, "learning_rate": 4.1336393810715134e-07, "loss": 0.0005, "step": 215880 }, { "epoch": 1.7654659197775688, "grad_norm": 0.10580329596996307, "learning_rate": 4.1307986365578977e-07, "loss": 0.0008, "step": 215890 }, { "epoch": 1.765547695956168, "grad_norm": 0.03530995920300484, "learning_rate": 4.127958826437195e-07, "loss": 0.0006, "step": 215900 }, { "epoch": 1.7656294721347672, "grad_norm": 0.1071067526936531, "learning_rate": 4.125119950767248e-07, "loss": 0.0006, "step": 215910 }, { "epoch": 1.7657112483133663, "grad_norm": 0.05387295037508011, "learning_rate": 4.12228200960591e-07, "loss": 0.0002, "step": 215920 }, { "epoch": 1.7657930244919655, "grad_norm": 0.058560263365507126, "learning_rate": 4.1194450030109744e-07, "loss": 0.0009, "step": 215930 }, { "epoch": 1.7658748006705647, "grad_norm": 0.0436827577650547, "learning_rate": 4.116608931040239e-07, "loss": 0.0011, "step": 215940 }, { "epoch": 1.7659565768491639, "grad_norm": 0.013948339968919754, "learning_rate": 4.1137737937514797e-07, "loss": 0.0005, "step": 215950 }, { "epoch": 1.766038353027763, "grad_norm": 0.04136810824275017, "learning_rate": 4.110939591202434e-07, "loss": 0.0006, "step": 215960 }, { "epoch": 1.7661201292063622, "grad_norm": 0.019037481397390366, "learning_rate": 4.108106323450861e-07, "loss": 0.0004, "step": 215970 }, { "epoch": 1.7662019053849614, "grad_norm": 0.05797388404607773, "learning_rate": 4.1052739905544593e-07, "loss": 0.0006, "step": 215980 }, { "epoch": 1.7662836815635605, "grad_norm": 0.03938599303364754, "learning_rate": 4.1024425925709376e-07, "loss": 0.0006, "step": 215990 }, { "epoch": 1.7663654577421597, "grad_norm": 0.007564443163573742, "learning_rate": 4.0996121295579616e-07, "loss": 0.0009, "step": 216000 }, { "epoch": 1.7664472339207589, "grad_norm": 0.03203636035323143, "learning_rate": 4.0967826015732016e-07, "loss": 0.0006, "step": 216010 }, { "epoch": 1.766529010099358, "grad_norm": 0.031475409865379333, "learning_rate": 4.093954008674289e-07, "loss": 0.0005, "step": 216020 }, { "epoch": 1.7666107862779572, "grad_norm": 0.018165405839681625, "learning_rate": 4.0911263509188504e-07, "loss": 0.0004, "step": 216030 }, { "epoch": 1.7666925624565564, "grad_norm": 0.0354769341647625, "learning_rate": 4.0882996283644727e-07, "loss": 0.0009, "step": 216040 }, { "epoch": 1.7667743386351555, "grad_norm": 0.08885294198989868, "learning_rate": 4.08547384106876e-07, "loss": 0.0004, "step": 216050 }, { "epoch": 1.7668561148137547, "grad_norm": 0.051060453057289124, "learning_rate": 4.082648989089266e-07, "loss": 0.0007, "step": 216060 }, { "epoch": 1.7669378909923539, "grad_norm": 0.03869372606277466, "learning_rate": 4.07982507248354e-07, "loss": 0.0003, "step": 216070 }, { "epoch": 1.767019667170953, "grad_norm": 0.0364515483379364, "learning_rate": 4.0770020913090904e-07, "loss": 0.0003, "step": 216080 }, { "epoch": 1.7671014433495524, "grad_norm": 0.014586780220270157, "learning_rate": 4.0741800456234437e-07, "loss": 0.0007, "step": 216090 }, { "epoch": 1.7671832195281514, "grad_norm": 0.030223121866583824, "learning_rate": 4.071358935484082e-07, "loss": 0.0005, "step": 216100 }, { "epoch": 1.7672649957067508, "grad_norm": 0.07245650142431259, "learning_rate": 4.0685387609484704e-07, "loss": 0.0006, "step": 216110 }, { "epoch": 1.7673467718853497, "grad_norm": 0.10907609015703201, "learning_rate": 4.065719522074063e-07, "loss": 0.0008, "step": 216120 }, { "epoch": 1.7674285480639491, "grad_norm": 0.045034147799015045, "learning_rate": 4.0629012189182806e-07, "loss": 0.0005, "step": 216130 }, { "epoch": 1.767510324242548, "grad_norm": 0.033316582441329956, "learning_rate": 4.060083851538543e-07, "loss": 0.0009, "step": 216140 }, { "epoch": 1.7675921004211474, "grad_norm": 0.009700472466647625, "learning_rate": 4.057267419992239e-07, "loss": 0.0009, "step": 216150 }, { "epoch": 1.7676738765997464, "grad_norm": 0.0662631243467331, "learning_rate": 4.054451924336733e-07, "loss": 0.0004, "step": 216160 }, { "epoch": 1.7677556527783458, "grad_norm": 0.06980321556329727, "learning_rate": 4.051637364629396e-07, "loss": 0.0006, "step": 216170 }, { "epoch": 1.7678374289569447, "grad_norm": 0.03309985250234604, "learning_rate": 4.0488237409275545e-07, "loss": 0.0003, "step": 216180 }, { "epoch": 1.7679192051355441, "grad_norm": 0.016725728288292885, "learning_rate": 4.0460110532885233e-07, "loss": 0.0006, "step": 216190 }, { "epoch": 1.768000981314143, "grad_norm": 0.07055684179067612, "learning_rate": 4.043199301769596e-07, "loss": 0.0008, "step": 216200 }, { "epoch": 1.7680827574927425, "grad_norm": 0.02729487046599388, "learning_rate": 4.0403884864280596e-07, "loss": 0.0008, "step": 216210 }, { "epoch": 1.7681645336713414, "grad_norm": 0.005039965268224478, "learning_rate": 4.0375786073211686e-07, "loss": 0.0008, "step": 216220 }, { "epoch": 1.7682463098499408, "grad_norm": 0.07407877594232559, "learning_rate": 4.034769664506161e-07, "loss": 0.0009, "step": 216230 }, { "epoch": 1.7683280860285397, "grad_norm": 0.1063503846526146, "learning_rate": 4.0319616580402567e-07, "loss": 0.0007, "step": 216240 }, { "epoch": 1.7684098622071391, "grad_norm": 0.08810346573591232, "learning_rate": 4.029154587980655e-07, "loss": 0.0004, "step": 216250 }, { "epoch": 1.768491638385738, "grad_norm": 0.051354724913835526, "learning_rate": 4.026348454384549e-07, "loss": 0.0007, "step": 216260 }, { "epoch": 1.7685734145643375, "grad_norm": 0.044471416622400284, "learning_rate": 4.023543257309087e-07, "loss": 0.0005, "step": 216270 }, { "epoch": 1.7686551907429364, "grad_norm": 0.004051995929330587, "learning_rate": 4.0207389968114187e-07, "loss": 0.0015, "step": 216280 }, { "epoch": 1.7687369669215358, "grad_norm": 0.03321671858429909, "learning_rate": 4.017935672948675e-07, "loss": 0.0005, "step": 216290 }, { "epoch": 1.768818743100135, "grad_norm": 0.012958678416907787, "learning_rate": 4.015133285777961e-07, "loss": 0.0006, "step": 216300 }, { "epoch": 1.7689005192787342, "grad_norm": 0.06748562306165695, "learning_rate": 4.012331835356359e-07, "loss": 0.0005, "step": 216310 }, { "epoch": 1.7689822954573333, "grad_norm": 0.09544575214385986, "learning_rate": 4.0095313217409283e-07, "loss": 0.0008, "step": 216320 }, { "epoch": 1.7690640716359325, "grad_norm": 0.03860259801149368, "learning_rate": 4.006731744988734e-07, "loss": 0.0006, "step": 216330 }, { "epoch": 1.7691458478145317, "grad_norm": 0.029187725856900215, "learning_rate": 4.003933105156804e-07, "loss": 0.0005, "step": 216340 }, { "epoch": 1.7692276239931308, "grad_norm": 0.039271675050258636, "learning_rate": 4.001135402302142e-07, "loss": 0.0008, "step": 216350 }, { "epoch": 1.76930940017173, "grad_norm": 0.027963250875473022, "learning_rate": 3.998338636481741e-07, "loss": 0.0008, "step": 216360 }, { "epoch": 1.7693911763503292, "grad_norm": 0.11917093396186829, "learning_rate": 3.995542807752578e-07, "loss": 0.0004, "step": 216370 }, { "epoch": 1.7694729525289283, "grad_norm": 0.0024705769028514624, "learning_rate": 3.9927479161715966e-07, "loss": 0.0004, "step": 216380 }, { "epoch": 1.7695547287075275, "grad_norm": 0.0033381085377186537, "learning_rate": 3.9899539617957394e-07, "loss": 0.0007, "step": 216390 }, { "epoch": 1.7696365048861267, "grad_norm": 0.1430136114358902, "learning_rate": 3.9871609446819114e-07, "loss": 0.0005, "step": 216400 }, { "epoch": 1.7697182810647258, "grad_norm": 0.06279769539833069, "learning_rate": 3.9843688648870227e-07, "loss": 0.0011, "step": 216410 }, { "epoch": 1.769800057243325, "grad_norm": 0.04055090993642807, "learning_rate": 3.981577722467944e-07, "loss": 0.0007, "step": 216420 }, { "epoch": 1.7698818334219242, "grad_norm": 0.02805539034307003, "learning_rate": 3.9787875174815306e-07, "loss": 0.0005, "step": 216430 }, { "epoch": 1.7699636096005233, "grad_norm": 0.037027835845947266, "learning_rate": 3.9759982499846196e-07, "loss": 0.0005, "step": 216440 }, { "epoch": 1.7700453857791225, "grad_norm": 0.010100873187184334, "learning_rate": 3.973209920034038e-07, "loss": 0.0007, "step": 216450 }, { "epoch": 1.7701271619577217, "grad_norm": 0.002422360936179757, "learning_rate": 3.9704225276865903e-07, "loss": 0.0003, "step": 216460 }, { "epoch": 1.7702089381363209, "grad_norm": 0.06365318596363068, "learning_rate": 3.9676360729990427e-07, "loss": 0.0005, "step": 216470 }, { "epoch": 1.77029071431492, "grad_norm": 0.02496480755507946, "learning_rate": 3.9648505560281713e-07, "loss": 0.0004, "step": 216480 }, { "epoch": 1.7703724904935192, "grad_norm": 0.013562692329287529, "learning_rate": 3.962065976830709e-07, "loss": 0.0005, "step": 216490 }, { "epoch": 1.7704542666721184, "grad_norm": 0.07627330720424652, "learning_rate": 3.9592823354633823e-07, "loss": 0.0003, "step": 216500 }, { "epoch": 1.7705360428507175, "grad_norm": 0.016519734635949135, "learning_rate": 3.956499631982902e-07, "loss": 0.0005, "step": 216510 }, { "epoch": 1.770617819029317, "grad_norm": 0.0019850018434226513, "learning_rate": 3.953717866445944e-07, "loss": 0.0007, "step": 216520 }, { "epoch": 1.7706995952079159, "grad_norm": 0.029356930404901505, "learning_rate": 3.9509370389091864e-07, "loss": 0.0007, "step": 216530 }, { "epoch": 1.7707813713865153, "grad_norm": 0.003559915814548731, "learning_rate": 3.9481571494292714e-07, "loss": 0.0005, "step": 216540 }, { "epoch": 1.7708631475651142, "grad_norm": 0.035582926124334335, "learning_rate": 3.945378198062827e-07, "loss": 0.0006, "step": 216550 }, { "epoch": 1.7709449237437136, "grad_norm": 0.025573240593075752, "learning_rate": 3.9426001848664573e-07, "loss": 0.0009, "step": 216560 }, { "epoch": 1.7710266999223125, "grad_norm": 0.0009714775369502604, "learning_rate": 3.939823109896773e-07, "loss": 0.0009, "step": 216570 }, { "epoch": 1.771108476100912, "grad_norm": 0.009619015268981457, "learning_rate": 3.9370469732103235e-07, "loss": 0.0006, "step": 216580 }, { "epoch": 1.7711902522795109, "grad_norm": 0.055419519543647766, "learning_rate": 3.934271774863674e-07, "loss": 0.0011, "step": 216590 }, { "epoch": 1.7712720284581103, "grad_norm": 0.01602439023554325, "learning_rate": 3.931497514913346e-07, "loss": 0.0004, "step": 216600 }, { "epoch": 1.7713538046367092, "grad_norm": 0.026665136218070984, "learning_rate": 3.928724193415867e-07, "loss": 0.0005, "step": 216610 }, { "epoch": 1.7714355808153086, "grad_norm": 0.045624349266290665, "learning_rate": 3.925951810427725e-07, "loss": 0.0007, "step": 216620 }, { "epoch": 1.7715173569939076, "grad_norm": 0.005930849816650152, "learning_rate": 3.9231803660053914e-07, "loss": 0.0006, "step": 216630 }, { "epoch": 1.771599133172507, "grad_norm": 0.04322626441717148, "learning_rate": 3.9204098602053206e-07, "loss": 0.0003, "step": 216640 }, { "epoch": 1.771680909351106, "grad_norm": 0.055324215441942215, "learning_rate": 3.917640293083963e-07, "loss": 0.0008, "step": 216650 }, { "epoch": 1.7717626855297053, "grad_norm": 0.025185097008943558, "learning_rate": 3.914871664697728e-07, "loss": 0.0005, "step": 216660 }, { "epoch": 1.7718444617083042, "grad_norm": 0.03395399451255798, "learning_rate": 3.912103975103021e-07, "loss": 0.0007, "step": 216670 }, { "epoch": 1.7719262378869036, "grad_norm": 0.0138374799862504, "learning_rate": 3.9093372243562076e-07, "loss": 0.0005, "step": 216680 }, { "epoch": 1.7720080140655026, "grad_norm": 0.014300045557320118, "learning_rate": 3.906571412513671e-07, "loss": 0.0003, "step": 216690 }, { "epoch": 1.772089790244102, "grad_norm": 0.014567721635103226, "learning_rate": 3.9038065396317384e-07, "loss": 0.001, "step": 216700 }, { "epoch": 1.772171566422701, "grad_norm": 0.15037083625793457, "learning_rate": 3.901042605766736e-07, "loss": 0.0012, "step": 216710 }, { "epoch": 1.7722533426013003, "grad_norm": 0.03275178745388985, "learning_rate": 3.89827961097497e-07, "loss": 0.0012, "step": 216720 }, { "epoch": 1.7723351187798995, "grad_norm": 0.025860317051410675, "learning_rate": 3.8955175553127167e-07, "loss": 0.0005, "step": 216730 }, { "epoch": 1.7724168949584986, "grad_norm": 0.0003684656403493136, "learning_rate": 3.892756438836254e-07, "loss": 0.0004, "step": 216740 }, { "epoch": 1.7724986711370978, "grad_norm": 0.018128884956240654, "learning_rate": 3.8899962616018136e-07, "loss": 0.0011, "step": 216750 }, { "epoch": 1.772580447315697, "grad_norm": 0.04545598849654198, "learning_rate": 3.8872370236656244e-07, "loss": 0.0007, "step": 216760 }, { "epoch": 1.7726622234942961, "grad_norm": 0.0017200689762830734, "learning_rate": 3.884478725083901e-07, "loss": 0.0003, "step": 216770 }, { "epoch": 1.7727439996728953, "grad_norm": 0.019703170284628868, "learning_rate": 3.881721365912838e-07, "loss": 0.0003, "step": 216780 }, { "epoch": 1.7728257758514945, "grad_norm": 0.03549492359161377, "learning_rate": 3.8789649462085965e-07, "loss": 0.0003, "step": 216790 }, { "epoch": 1.7729075520300936, "grad_norm": 0.010318158194422722, "learning_rate": 3.87620946602732e-07, "loss": 0.0005, "step": 216800 }, { "epoch": 1.7729893282086928, "grad_norm": 0.016296064481139183, "learning_rate": 3.8734549254251517e-07, "loss": 0.0003, "step": 216810 }, { "epoch": 1.773071104387292, "grad_norm": 0.02360234037041664, "learning_rate": 3.8707013244582037e-07, "loss": 0.0009, "step": 216820 }, { "epoch": 1.7731528805658912, "grad_norm": 0.04483257234096527, "learning_rate": 3.8679486631825633e-07, "loss": 0.0006, "step": 216830 }, { "epoch": 1.7732346567444903, "grad_norm": 0.044644128531217575, "learning_rate": 3.865196941654309e-07, "loss": 0.0007, "step": 216840 }, { "epoch": 1.7733164329230895, "grad_norm": 0.0036278723273426294, "learning_rate": 3.8624461599294947e-07, "loss": 0.0009, "step": 216850 }, { "epoch": 1.7733982091016887, "grad_norm": 0.04681297391653061, "learning_rate": 3.8596963180641543e-07, "loss": 0.0012, "step": 216860 }, { "epoch": 1.7734799852802878, "grad_norm": 0.0005742681096307933, "learning_rate": 3.856947416114304e-07, "loss": 0.0002, "step": 216870 }, { "epoch": 1.773561761458887, "grad_norm": 0.013099364005029202, "learning_rate": 3.854199454135932e-07, "loss": 0.0009, "step": 216880 }, { "epoch": 1.7736435376374862, "grad_norm": 0.018758319318294525, "learning_rate": 3.851452432185032e-07, "loss": 0.0011, "step": 216890 }, { "epoch": 1.7737253138160853, "grad_norm": 0.0364685133099556, "learning_rate": 3.84870635031756e-07, "loss": 0.0004, "step": 216900 }, { "epoch": 1.7738070899946845, "grad_norm": 0.023434247821569443, "learning_rate": 3.845961208589455e-07, "loss": 0.0004, "step": 216910 }, { "epoch": 1.7738888661732837, "grad_norm": 0.11926590651273727, "learning_rate": 3.8432170070566263e-07, "loss": 0.0008, "step": 216920 }, { "epoch": 1.7739706423518828, "grad_norm": 0.0451943576335907, "learning_rate": 3.840473745774992e-07, "loss": 0.0003, "step": 216930 }, { "epoch": 1.774052418530482, "grad_norm": 0.006116872187703848, "learning_rate": 3.837731424800434e-07, "loss": 0.001, "step": 216940 }, { "epoch": 1.7741341947090814, "grad_norm": 0.028357146307826042, "learning_rate": 3.8349900441888024e-07, "loss": 0.001, "step": 216950 }, { "epoch": 1.7742159708876803, "grad_norm": 0.03455321490764618, "learning_rate": 3.832249603995952e-07, "loss": 0.001, "step": 216960 }, { "epoch": 1.7742977470662797, "grad_norm": 0.03231365978717804, "learning_rate": 3.829510104277706e-07, "loss": 0.0005, "step": 216970 }, { "epoch": 1.7743795232448787, "grad_norm": 0.0020361472852528095, "learning_rate": 3.8267715450898637e-07, "loss": 0.0016, "step": 216980 }, { "epoch": 1.774461299423478, "grad_norm": 0.09274569898843765, "learning_rate": 3.8240339264882184e-07, "loss": 0.0005, "step": 216990 }, { "epoch": 1.774543075602077, "grad_norm": 0.05035685375332832, "learning_rate": 3.821297248528527e-07, "loss": 0.0007, "step": 217000 }, { "epoch": 1.7746248517806764, "grad_norm": 0.028707230463624, "learning_rate": 3.8185615112665553e-07, "loss": 0.0006, "step": 217010 }, { "epoch": 1.7747066279592754, "grad_norm": 0.08036583662033081, "learning_rate": 3.815826714758025e-07, "loss": 0.0006, "step": 217020 }, { "epoch": 1.7747884041378748, "grad_norm": 0.09202633053064346, "learning_rate": 3.813092859058642e-07, "loss": 0.0005, "step": 217030 }, { "epoch": 1.7748701803164737, "grad_norm": 0.027062101289629936, "learning_rate": 3.8103599442240956e-07, "loss": 0.0006, "step": 217040 }, { "epoch": 1.774951956495073, "grad_norm": 0.04286613687872887, "learning_rate": 3.807627970310068e-07, "loss": 0.0005, "step": 217050 }, { "epoch": 1.775033732673672, "grad_norm": 0.030587242916226387, "learning_rate": 3.8048969373722046e-07, "loss": 0.0005, "step": 217060 }, { "epoch": 1.7751155088522714, "grad_norm": 0.006644631735980511, "learning_rate": 3.8021668454661433e-07, "loss": 0.0009, "step": 217070 }, { "epoch": 1.7751972850308704, "grad_norm": 0.011018538847565651, "learning_rate": 3.799437694647495e-07, "loss": 0.0004, "step": 217080 }, { "epoch": 1.7752790612094698, "grad_norm": 0.06818480044603348, "learning_rate": 3.79670948497185e-07, "loss": 0.0009, "step": 217090 }, { "epoch": 1.7753608373880687, "grad_norm": 0.12047635018825531, "learning_rate": 3.7939822164947906e-07, "loss": 0.0008, "step": 217100 }, { "epoch": 1.775442613566668, "grad_norm": 0.07727299630641937, "learning_rate": 3.791255889271872e-07, "loss": 0.0009, "step": 217110 }, { "epoch": 1.775524389745267, "grad_norm": 0.013244773261249065, "learning_rate": 3.788530503358623e-07, "loss": 0.0005, "step": 217120 }, { "epoch": 1.7756061659238664, "grad_norm": 0.029729261994361877, "learning_rate": 3.7858060588105817e-07, "loss": 0.0003, "step": 217130 }, { "epoch": 1.7756879421024654, "grad_norm": 0.04231296852231026, "learning_rate": 3.783082555683232e-07, "loss": 0.0003, "step": 217140 }, { "epoch": 1.7757697182810648, "grad_norm": 0.011867920868098736, "learning_rate": 3.780359994032057e-07, "loss": 0.0007, "step": 217150 }, { "epoch": 1.775851494459664, "grad_norm": 0.0631655678153038, "learning_rate": 3.777638373912512e-07, "loss": 0.0004, "step": 217160 }, { "epoch": 1.7759332706382631, "grad_norm": 0.035317808389663696, "learning_rate": 3.7749176953800535e-07, "loss": 0.0004, "step": 217170 }, { "epoch": 1.7760150468168623, "grad_norm": 0.041813306510448456, "learning_rate": 3.7721979584900927e-07, "loss": 0.0005, "step": 217180 }, { "epoch": 1.7760968229954615, "grad_norm": 0.0020349377300590277, "learning_rate": 3.7694791632980346e-07, "loss": 0.0005, "step": 217190 }, { "epoch": 1.7761785991740606, "grad_norm": 0.00690054427832365, "learning_rate": 3.766761309859268e-07, "loss": 0.0004, "step": 217200 }, { "epoch": 1.7762603753526598, "grad_norm": 0.014983776956796646, "learning_rate": 3.7640443982291497e-07, "loss": 0.0004, "step": 217210 }, { "epoch": 1.776342151531259, "grad_norm": 0.02624954842031002, "learning_rate": 3.7613284284630345e-07, "loss": 0.0007, "step": 217220 }, { "epoch": 1.7764239277098581, "grad_norm": 0.0025302080903202295, "learning_rate": 3.75861340061624e-07, "loss": 0.0005, "step": 217230 }, { "epoch": 1.7765057038884573, "grad_norm": 0.07789475470781326, "learning_rate": 3.755899314744071e-07, "loss": 0.0006, "step": 217240 }, { "epoch": 1.7765874800670565, "grad_norm": 0.03378549590706825, "learning_rate": 3.753186170901829e-07, "loss": 0.0009, "step": 217250 }, { "epoch": 1.7766692562456556, "grad_norm": 0.0032056684140115976, "learning_rate": 3.7504739691447746e-07, "loss": 0.0005, "step": 217260 }, { "epoch": 1.7767510324242548, "grad_norm": 0.002666471293196082, "learning_rate": 3.7477627095281634e-07, "loss": 0.0006, "step": 217270 }, { "epoch": 1.776832808602854, "grad_norm": 0.032761018723249435, "learning_rate": 3.7450523921072123e-07, "loss": 0.0004, "step": 217280 }, { "epoch": 1.7769145847814531, "grad_norm": 0.012096703052520752, "learning_rate": 3.7423430169371445e-07, "loss": 0.0007, "step": 217290 }, { "epoch": 1.7769963609600523, "grad_norm": 0.06942789256572723, "learning_rate": 3.739634584073154e-07, "loss": 0.0007, "step": 217300 }, { "epoch": 1.7770781371386515, "grad_norm": 0.01062056701630354, "learning_rate": 3.7369270935704084e-07, "loss": 0.001, "step": 217310 }, { "epoch": 1.7771599133172507, "grad_norm": 0.057151976972818375, "learning_rate": 3.734220545484063e-07, "loss": 0.0005, "step": 217320 }, { "epoch": 1.7772416894958498, "grad_norm": 0.027256887406110764, "learning_rate": 3.731514939869252e-07, "loss": 0.0007, "step": 217330 }, { "epoch": 1.777323465674449, "grad_norm": 0.01841878332197666, "learning_rate": 3.728810276781092e-07, "loss": 0.0005, "step": 217340 }, { "epoch": 1.7774052418530482, "grad_norm": 0.03366464748978615, "learning_rate": 3.726106556274678e-07, "loss": 0.0006, "step": 217350 }, { "epoch": 1.7774870180316475, "grad_norm": 0.027359887957572937, "learning_rate": 3.7234037784050767e-07, "loss": 0.0006, "step": 217360 }, { "epoch": 1.7775687942102465, "grad_norm": 0.01967841386795044, "learning_rate": 3.7207019432273616e-07, "loss": 0.0005, "step": 217370 }, { "epoch": 1.7776505703888459, "grad_norm": 0.006112389732152224, "learning_rate": 3.7180010507965714e-07, "loss": 0.0002, "step": 217380 }, { "epoch": 1.7777323465674448, "grad_norm": 0.02301167882978916, "learning_rate": 3.7153011011677177e-07, "loss": 0.0007, "step": 217390 }, { "epoch": 1.7778141227460442, "grad_norm": 0.028115147724747658, "learning_rate": 3.7126020943958005e-07, "loss": 0.0006, "step": 217400 }, { "epoch": 1.7778958989246432, "grad_norm": 0.027224784716963768, "learning_rate": 3.709904030535794e-07, "loss": 0.0013, "step": 217410 }, { "epoch": 1.7779776751032426, "grad_norm": 0.029228271916508675, "learning_rate": 3.70720690964268e-07, "loss": 0.0006, "step": 217420 }, { "epoch": 1.7780594512818415, "grad_norm": 0.04843485355377197, "learning_rate": 3.704510731771388e-07, "loss": 0.0007, "step": 217430 }, { "epoch": 1.778141227460441, "grad_norm": 0.1225341409444809, "learning_rate": 3.7018154969768516e-07, "loss": 0.0005, "step": 217440 }, { "epoch": 1.7782230036390398, "grad_norm": 0.057636406272649765, "learning_rate": 3.6991212053139493e-07, "loss": 0.0008, "step": 217450 }, { "epoch": 1.7783047798176392, "grad_norm": 0.028866805136203766, "learning_rate": 3.696427856837598e-07, "loss": 0.0004, "step": 217460 }, { "epoch": 1.7783865559962382, "grad_norm": 0.08189229667186737, "learning_rate": 3.6937354516026427e-07, "loss": 0.0002, "step": 217470 }, { "epoch": 1.7784683321748376, "grad_norm": 0.05114108324050903, "learning_rate": 3.691043989663934e-07, "loss": 0.0009, "step": 217480 }, { "epoch": 1.7785501083534365, "grad_norm": 0.007835679687559605, "learning_rate": 3.688353471076295e-07, "loss": 0.0004, "step": 217490 }, { "epoch": 1.778631884532036, "grad_norm": 0.11212079226970673, "learning_rate": 3.685663895894548e-07, "loss": 0.0007, "step": 217500 }, { "epoch": 1.7787136607106349, "grad_norm": 0.0035988923627883196, "learning_rate": 3.6829752641734775e-07, "loss": 0.0007, "step": 217510 }, { "epoch": 1.7787954368892342, "grad_norm": 0.042530711740255356, "learning_rate": 3.6802875759678446e-07, "loss": 0.0006, "step": 217520 }, { "epoch": 1.7788772130678332, "grad_norm": 0.06977548450231552, "learning_rate": 3.6776008313324007e-07, "loss": 0.0009, "step": 217530 }, { "epoch": 1.7789589892464326, "grad_norm": 0.050941772758960724, "learning_rate": 3.674915030321885e-07, "loss": 0.0003, "step": 217540 }, { "epoch": 1.7790407654250315, "grad_norm": 0.07183236628770828, "learning_rate": 3.6722301729910037e-07, "loss": 0.0006, "step": 217550 }, { "epoch": 1.779122541603631, "grad_norm": 0.024156782776117325, "learning_rate": 3.669546259394463e-07, "loss": 0.0002, "step": 217560 }, { "epoch": 1.7792043177822299, "grad_norm": 0.0012636102037504315, "learning_rate": 3.666863289586903e-07, "loss": 0.0003, "step": 217570 }, { "epoch": 1.7792860939608293, "grad_norm": 0.046073541045188904, "learning_rate": 3.6641812636230124e-07, "loss": 0.0004, "step": 217580 }, { "epoch": 1.7793678701394284, "grad_norm": 0.006723989732563496, "learning_rate": 3.661500181557409e-07, "loss": 0.0004, "step": 217590 }, { "epoch": 1.7794496463180276, "grad_norm": 0.004792458843439817, "learning_rate": 3.6588200434447163e-07, "loss": 0.0004, "step": 217600 }, { "epoch": 1.7795314224966268, "grad_norm": 0.06480412930250168, "learning_rate": 3.656140849339518e-07, "loss": 0.0004, "step": 217610 }, { "epoch": 1.779613198675226, "grad_norm": 0.06851611286401749, "learning_rate": 3.653462599296409e-07, "loss": 0.0006, "step": 217620 }, { "epoch": 1.779694974853825, "grad_norm": 0.006884871982038021, "learning_rate": 3.6507852933699406e-07, "loss": 0.0004, "step": 217630 }, { "epoch": 1.7797767510324243, "grad_norm": 0.0005849375738762319, "learning_rate": 3.6481089316146466e-07, "loss": 0.0005, "step": 217640 }, { "epoch": 1.7798585272110234, "grad_norm": 0.01905251108109951, "learning_rate": 3.645433514085045e-07, "loss": 0.0009, "step": 217650 }, { "epoch": 1.7799403033896226, "grad_norm": 0.012826344929635525, "learning_rate": 3.642759040835653e-07, "loss": 0.0006, "step": 217660 }, { "epoch": 1.7800220795682218, "grad_norm": 0.02053554356098175, "learning_rate": 3.6400855119209323e-07, "loss": 0.0007, "step": 217670 }, { "epoch": 1.780103855746821, "grad_norm": 0.12564034759998322, "learning_rate": 3.637412927395367e-07, "loss": 0.0012, "step": 217680 }, { "epoch": 1.7801856319254201, "grad_norm": 0.037634845823049545, "learning_rate": 3.6347412873133704e-07, "loss": 0.0004, "step": 217690 }, { "epoch": 1.7802674081040193, "grad_norm": 0.019308675080537796, "learning_rate": 3.632070591729386e-07, "loss": 0.0003, "step": 217700 }, { "epoch": 1.7803491842826185, "grad_norm": 0.02608473226428032, "learning_rate": 3.629400840697811e-07, "loss": 0.0004, "step": 217710 }, { "epoch": 1.7804309604612176, "grad_norm": 0.07755958288908005, "learning_rate": 3.6267320342730396e-07, "loss": 0.0003, "step": 217720 }, { "epoch": 1.7805127366398168, "grad_norm": 0.05730582773685455, "learning_rate": 3.624064172509417e-07, "loss": 0.0003, "step": 217730 }, { "epoch": 1.780594512818416, "grad_norm": 0.017780102789402008, "learning_rate": 3.621397255461312e-07, "loss": 0.0014, "step": 217740 }, { "epoch": 1.7806762889970151, "grad_norm": 0.04239258915185928, "learning_rate": 3.6187312831830467e-07, "loss": 0.0008, "step": 217750 }, { "epoch": 1.7807580651756143, "grad_norm": 0.010500837117433548, "learning_rate": 3.616066255728923e-07, "loss": 0.0007, "step": 217760 }, { "epoch": 1.7808398413542135, "grad_norm": 0.0022183640394359827, "learning_rate": 3.6134021731532297e-07, "loss": 0.0005, "step": 217770 }, { "epoch": 1.7809216175328126, "grad_norm": 0.011324713937938213, "learning_rate": 3.610739035510241e-07, "loss": 0.001, "step": 217780 }, { "epoch": 1.781003393711412, "grad_norm": 0.038964394479990005, "learning_rate": 3.608076842854208e-07, "loss": 0.0004, "step": 217790 }, { "epoch": 1.781085169890011, "grad_norm": 0.04641628637909889, "learning_rate": 3.6054155952393643e-07, "loss": 0.0008, "step": 217800 }, { "epoch": 1.7811669460686104, "grad_norm": 0.03193972632288933, "learning_rate": 3.6027552927199004e-07, "loss": 0.0005, "step": 217810 }, { "epoch": 1.7812487222472093, "grad_norm": 0.05339169502258301, "learning_rate": 3.6000959353500397e-07, "loss": 0.0008, "step": 217820 }, { "epoch": 1.7813304984258087, "grad_norm": 0.02543969638645649, "learning_rate": 3.597437523183933e-07, "loss": 0.0007, "step": 217830 }, { "epoch": 1.7814122746044077, "grad_norm": 0.08085696399211884, "learning_rate": 3.594780056275743e-07, "loss": 0.0004, "step": 217840 }, { "epoch": 1.781494050783007, "grad_norm": 0.001735311234369874, "learning_rate": 3.592123534679598e-07, "loss": 0.0005, "step": 217850 }, { "epoch": 1.781575826961606, "grad_norm": 0.035679735243320465, "learning_rate": 3.5894679584496273e-07, "loss": 0.0004, "step": 217860 }, { "epoch": 1.7816576031402054, "grad_norm": 0.021300066262483597, "learning_rate": 3.586813327639915e-07, "loss": 0.0004, "step": 217870 }, { "epoch": 1.7817393793188043, "grad_norm": 0.025522001087665558, "learning_rate": 3.584159642304541e-07, "loss": 0.0005, "step": 217880 }, { "epoch": 1.7818211554974037, "grad_norm": 0.051141947507858276, "learning_rate": 3.581506902497561e-07, "loss": 0.0006, "step": 217890 }, { "epoch": 1.7819029316760027, "grad_norm": 0.02226855233311653, "learning_rate": 3.5788551082730216e-07, "loss": 0.0003, "step": 217900 }, { "epoch": 1.781984707854602, "grad_norm": 0.0509052649140358, "learning_rate": 3.5762042596849345e-07, "loss": 0.0006, "step": 217910 }, { "epoch": 1.782066484033201, "grad_norm": 0.15376418828964233, "learning_rate": 3.5735543567873124e-07, "loss": 0.0011, "step": 217920 }, { "epoch": 1.7821482602118004, "grad_norm": 0.1121075376868248, "learning_rate": 3.570905399634111e-07, "loss": 0.0007, "step": 217930 }, { "epoch": 1.7822300363903993, "grad_norm": 0.01566065475344658, "learning_rate": 3.5682573882793113e-07, "loss": 0.0016, "step": 217940 }, { "epoch": 1.7823118125689987, "grad_norm": 0.03804849460721016, "learning_rate": 3.5656103227768524e-07, "loss": 0.0012, "step": 217950 }, { "epoch": 1.7823935887475977, "grad_norm": 0.024837225675582886, "learning_rate": 3.5629642031806523e-07, "loss": 0.0005, "step": 217960 }, { "epoch": 1.782475364926197, "grad_norm": 0.021715471521019936, "learning_rate": 3.560319029544612e-07, "loss": 0.0011, "step": 217970 }, { "epoch": 1.782557141104796, "grad_norm": 0.015950003638863564, "learning_rate": 3.5576748019226227e-07, "loss": 0.0007, "step": 217980 }, { "epoch": 1.7826389172833954, "grad_norm": 0.0666738897562027, "learning_rate": 3.555031520368557e-07, "loss": 0.0006, "step": 217990 }, { "epoch": 1.7827206934619944, "grad_norm": 0.010248306207358837, "learning_rate": 3.552389184936245e-07, "loss": 0.0013, "step": 218000 }, { "epoch": 1.7828024696405937, "grad_norm": 0.0009392197243869305, "learning_rate": 3.5497477956795145e-07, "loss": 0.0002, "step": 218010 }, { "epoch": 1.782884245819193, "grad_norm": 0.03078552894294262, "learning_rate": 3.547107352652185e-07, "loss": 0.0004, "step": 218020 }, { "epoch": 1.782966021997792, "grad_norm": 0.008426619693636894, "learning_rate": 3.5444678559080457e-07, "loss": 0.0005, "step": 218030 }, { "epoch": 1.7830477981763913, "grad_norm": 0.0023173987865448, "learning_rate": 3.541829305500855e-07, "loss": 0.0002, "step": 218040 }, { "epoch": 1.7831295743549904, "grad_norm": 0.029447225853800774, "learning_rate": 3.5391917014843457e-07, "loss": 0.0004, "step": 218050 }, { "epoch": 1.7832113505335896, "grad_norm": 0.049117784947156906, "learning_rate": 3.536555043912282e-07, "loss": 0.0006, "step": 218060 }, { "epoch": 1.7832931267121888, "grad_norm": 0.005160725675523281, "learning_rate": 3.533919332838359e-07, "loss": 0.0006, "step": 218070 }, { "epoch": 1.783374902890788, "grad_norm": 0.12752914428710938, "learning_rate": 3.531284568316268e-07, "loss": 0.0004, "step": 218080 }, { "epoch": 1.783456679069387, "grad_norm": 0.0765778049826622, "learning_rate": 3.52865075039967e-07, "loss": 0.0009, "step": 218090 }, { "epoch": 1.7835384552479863, "grad_norm": 0.10646718740463257, "learning_rate": 3.526017879142246e-07, "loss": 0.0004, "step": 218100 }, { "epoch": 1.7836202314265854, "grad_norm": 0.04829498752951622, "learning_rate": 3.523385954597608e-07, "loss": 0.0012, "step": 218110 }, { "epoch": 1.7837020076051846, "grad_norm": 0.04894334450364113, "learning_rate": 3.5207549768193795e-07, "loss": 0.0011, "step": 218120 }, { "epoch": 1.7837837837837838, "grad_norm": 0.030568866059184074, "learning_rate": 3.51812494586114e-07, "loss": 0.0011, "step": 218130 }, { "epoch": 1.783865559962383, "grad_norm": 0.03628193214535713, "learning_rate": 3.5154958617764857e-07, "loss": 0.0006, "step": 218140 }, { "epoch": 1.783947336140982, "grad_norm": 0.03862854093313217, "learning_rate": 3.512867724618979e-07, "loss": 0.0003, "step": 218150 }, { "epoch": 1.7840291123195813, "grad_norm": 0.0077231163159012794, "learning_rate": 3.510240534442133e-07, "loss": 0.0004, "step": 218160 }, { "epoch": 1.7841108884981804, "grad_norm": 0.09156803041696548, "learning_rate": 3.507614291299466e-07, "loss": 0.0009, "step": 218170 }, { "epoch": 1.7841926646767796, "grad_norm": 0.04871368780732155, "learning_rate": 3.504988995244496e-07, "loss": 0.0014, "step": 218180 }, { "epoch": 1.7842744408553788, "grad_norm": 0.039403900504112244, "learning_rate": 3.5023646463306917e-07, "loss": 0.0003, "step": 218190 }, { "epoch": 1.784356217033978, "grad_norm": 0.012919052504003048, "learning_rate": 3.4997412446115156e-07, "loss": 0.0004, "step": 218200 }, { "epoch": 1.7844379932125771, "grad_norm": 0.02408089116215706, "learning_rate": 3.497118790140402e-07, "loss": 0.0004, "step": 218210 }, { "epoch": 1.7845197693911765, "grad_norm": 0.03554269298911095, "learning_rate": 3.4944972829707824e-07, "loss": 0.0005, "step": 218220 }, { "epoch": 1.7846015455697755, "grad_norm": 0.021939018741250038, "learning_rate": 3.491876723156057e-07, "loss": 0.0001, "step": 218230 }, { "epoch": 1.7846833217483749, "grad_norm": 0.00485972361639142, "learning_rate": 3.4892571107496056e-07, "loss": 0.0003, "step": 218240 }, { "epoch": 1.7847650979269738, "grad_norm": 0.030759792774915695, "learning_rate": 3.4866384458047853e-07, "loss": 0.0012, "step": 218250 }, { "epoch": 1.7848468741055732, "grad_norm": 0.033381927758455276, "learning_rate": 3.484020728374954e-07, "loss": 0.0003, "step": 218260 }, { "epoch": 1.7849286502841721, "grad_norm": 0.016146082431077957, "learning_rate": 3.481403958513441e-07, "loss": 0.0009, "step": 218270 }, { "epoch": 1.7850104264627715, "grad_norm": 0.03623836860060692, "learning_rate": 3.4787881362735363e-07, "loss": 0.0003, "step": 218280 }, { "epoch": 1.7850922026413705, "grad_norm": 0.03678332641720772, "learning_rate": 3.4761732617085155e-07, "loss": 0.0007, "step": 218290 }, { "epoch": 1.7851739788199699, "grad_norm": 0.021499427035450935, "learning_rate": 3.473559334871679e-07, "loss": 0.0006, "step": 218300 }, { "epoch": 1.7852557549985688, "grad_norm": 0.05898093432188034, "learning_rate": 3.470946355816251e-07, "loss": 0.0005, "step": 218310 }, { "epoch": 1.7853375311771682, "grad_norm": 0.04827253147959709, "learning_rate": 3.468334324595468e-07, "loss": 0.0019, "step": 218320 }, { "epoch": 1.7854193073557671, "grad_norm": 0.05621224269270897, "learning_rate": 3.465723241262531e-07, "loss": 0.0039, "step": 218330 }, { "epoch": 1.7855010835343665, "grad_norm": 0.01056500431150198, "learning_rate": 3.463113105870641e-07, "loss": 0.0007, "step": 218340 }, { "epoch": 1.7855828597129655, "grad_norm": 0.016728326678276062, "learning_rate": 3.4605039184729684e-07, "loss": 0.0002, "step": 218350 }, { "epoch": 1.7856646358915649, "grad_norm": 0.012002475559711456, "learning_rate": 3.457895679122658e-07, "loss": 0.0005, "step": 218360 }, { "epoch": 1.7857464120701638, "grad_norm": 0.016977576538920403, "learning_rate": 3.4552883878728347e-07, "loss": 0.0004, "step": 218370 }, { "epoch": 1.7858281882487632, "grad_norm": 0.009631275199353695, "learning_rate": 3.452682044776634e-07, "loss": 0.0006, "step": 218380 }, { "epoch": 1.7859099644273622, "grad_norm": 0.033848077058792114, "learning_rate": 3.4500766498871407e-07, "loss": 0.0005, "step": 218390 }, { "epoch": 1.7859917406059616, "grad_norm": 0.012226277031004429, "learning_rate": 3.447472203257413e-07, "loss": 0.0003, "step": 218400 }, { "epoch": 1.7860735167845605, "grad_norm": 0.003581266850233078, "learning_rate": 3.444868704940507e-07, "loss": 0.0012, "step": 218410 }, { "epoch": 1.78615529296316, "grad_norm": 0.04580236226320267, "learning_rate": 3.442266154989482e-07, "loss": 0.0001, "step": 218420 }, { "epoch": 1.786237069141759, "grad_norm": 0.0010231006890535355, "learning_rate": 3.4396645534573337e-07, "loss": 0.0001, "step": 218430 }, { "epoch": 1.7863188453203582, "grad_norm": 0.06557943671941757, "learning_rate": 3.43706390039707e-07, "loss": 0.0008, "step": 218440 }, { "epoch": 1.7864006214989574, "grad_norm": 0.0061146896332502365, "learning_rate": 3.4344641958616475e-07, "loss": 0.0009, "step": 218450 }, { "epoch": 1.7864823976775566, "grad_norm": 0.04270866513252258, "learning_rate": 3.431865439904053e-07, "loss": 0.0012, "step": 218460 }, { "epoch": 1.7865641738561557, "grad_norm": 0.06759832054376602, "learning_rate": 3.42926763257721e-07, "loss": 0.0004, "step": 218470 }, { "epoch": 1.786645950034755, "grad_norm": 0.12288182228803635, "learning_rate": 3.426670773934043e-07, "loss": 0.001, "step": 218480 }, { "epoch": 1.786727726213354, "grad_norm": 0.007819633930921555, "learning_rate": 3.424074864027438e-07, "loss": 0.0006, "step": 218490 }, { "epoch": 1.7868095023919532, "grad_norm": 0.010696832090616226, "learning_rate": 3.421479902910296e-07, "loss": 0.0004, "step": 218500 }, { "epoch": 1.7868912785705524, "grad_norm": 0.0012315659550949931, "learning_rate": 3.418885890635476e-07, "loss": 0.0003, "step": 218510 }, { "epoch": 1.7869730547491516, "grad_norm": 0.015372778289020061, "learning_rate": 3.416292827255807e-07, "loss": 0.0018, "step": 218520 }, { "epoch": 1.7870548309277507, "grad_norm": 9.874694660538808e-05, "learning_rate": 3.4137007128241083e-07, "loss": 0.0008, "step": 218530 }, { "epoch": 1.78713660710635, "grad_norm": 0.004223728086799383, "learning_rate": 3.411109547393204e-07, "loss": 0.001, "step": 218540 }, { "epoch": 1.787218383284949, "grad_norm": 0.013072717934846878, "learning_rate": 3.4085193310158695e-07, "loss": 0.0009, "step": 218550 }, { "epoch": 1.7873001594635483, "grad_norm": 0.01989813707768917, "learning_rate": 3.4059300637448614e-07, "loss": 0.0006, "step": 218560 }, { "epoch": 1.7873819356421474, "grad_norm": 0.11352585256099701, "learning_rate": 3.4033417456329266e-07, "loss": 0.0004, "step": 218570 }, { "epoch": 1.7874637118207466, "grad_norm": 0.017409486696124077, "learning_rate": 3.400754376732807e-07, "loss": 0.0004, "step": 218580 }, { "epoch": 1.7875454879993458, "grad_norm": 0.07034614682197571, "learning_rate": 3.398167957097193e-07, "loss": 0.0005, "step": 218590 }, { "epoch": 1.787627264177945, "grad_norm": 0.11962094902992249, "learning_rate": 3.3955824867787814e-07, "loss": 0.0003, "step": 218600 }, { "epoch": 1.787709040356544, "grad_norm": 0.022471880540251732, "learning_rate": 3.392997965830225e-07, "loss": 0.0005, "step": 218610 }, { "epoch": 1.7877908165351433, "grad_norm": 0.04005882143974304, "learning_rate": 3.3904143943041977e-07, "loss": 0.0005, "step": 218620 }, { "epoch": 1.7878725927137424, "grad_norm": 0.01708664745092392, "learning_rate": 3.3878317722533194e-07, "loss": 0.0003, "step": 218630 }, { "epoch": 1.7879543688923416, "grad_norm": 0.030315440148115158, "learning_rate": 3.385250099730186e-07, "loss": 0.0011, "step": 218640 }, { "epoch": 1.788036145070941, "grad_norm": 0.11327879875898361, "learning_rate": 3.3826693767873955e-07, "loss": 0.0088, "step": 218650 }, { "epoch": 1.78811792124954, "grad_norm": 0.06599225103855133, "learning_rate": 3.3800896034775264e-07, "loss": 0.0007, "step": 218660 }, { "epoch": 1.7881996974281393, "grad_norm": 0.04378923773765564, "learning_rate": 3.3775107798531325e-07, "loss": 0.0006, "step": 218670 }, { "epoch": 1.7882814736067383, "grad_norm": 0.00670287199318409, "learning_rate": 3.3749329059667326e-07, "loss": 0.0005, "step": 218680 }, { "epoch": 1.7883632497853377, "grad_norm": 0.04273352026939392, "learning_rate": 3.372355981870845e-07, "loss": 0.0012, "step": 218690 }, { "epoch": 1.7884450259639366, "grad_norm": 0.013976098969578743, "learning_rate": 3.3697800076179733e-07, "loss": 0.0008, "step": 218700 }, { "epoch": 1.788526802142536, "grad_norm": 0.014562558382749557, "learning_rate": 3.367204983260591e-07, "loss": 0.0009, "step": 218710 }, { "epoch": 1.788608578321135, "grad_norm": 0.03600281849503517, "learning_rate": 3.364630908851141e-07, "loss": 0.0006, "step": 218720 }, { "epoch": 1.7886903544997343, "grad_norm": 0.014456599950790405, "learning_rate": 3.362057784442063e-07, "loss": 0.0005, "step": 218730 }, { "epoch": 1.7887721306783333, "grad_norm": 0.009737142361700535, "learning_rate": 3.3594856100857833e-07, "loss": 0.0006, "step": 218740 }, { "epoch": 1.7888539068569327, "grad_norm": 0.06370901316404343, "learning_rate": 3.356914385834703e-07, "loss": 0.0004, "step": 218750 }, { "epoch": 1.7889356830355316, "grad_norm": 0.0007799908053129911, "learning_rate": 3.3543441117411757e-07, "loss": 0.0005, "step": 218760 }, { "epoch": 1.789017459214131, "grad_norm": 0.004372438881546259, "learning_rate": 3.3517747878575755e-07, "loss": 0.0004, "step": 218770 }, { "epoch": 1.78909923539273, "grad_norm": 0.030431099236011505, "learning_rate": 3.349206414236239e-07, "loss": 0.0003, "step": 218780 }, { "epoch": 1.7891810115713294, "grad_norm": 0.02459658868610859, "learning_rate": 3.346638990929496e-07, "loss": 0.0007, "step": 218790 }, { "epoch": 1.7892627877499283, "grad_norm": 0.031341470777988434, "learning_rate": 3.3440725179896327e-07, "loss": 0.0005, "step": 218800 }, { "epoch": 1.7893445639285277, "grad_norm": 0.07729361206293106, "learning_rate": 3.3415069954689403e-07, "loss": 0.0005, "step": 218810 }, { "epoch": 1.7894263401071266, "grad_norm": 0.023830287158489227, "learning_rate": 3.3389424234196665e-07, "loss": 0.0012, "step": 218820 }, { "epoch": 1.789508116285726, "grad_norm": 0.023568255826830864, "learning_rate": 3.336378801894069e-07, "loss": 0.0007, "step": 218830 }, { "epoch": 1.789589892464325, "grad_norm": 0.004477509763091803, "learning_rate": 3.333816130944367e-07, "loss": 0.0004, "step": 218840 }, { "epoch": 1.7896716686429244, "grad_norm": 0.008215200155973434, "learning_rate": 3.331254410622764e-07, "loss": 0.0011, "step": 218850 }, { "epoch": 1.7897534448215235, "grad_norm": 0.05870891362428665, "learning_rate": 3.328693640981434e-07, "loss": 0.0005, "step": 218860 }, { "epoch": 1.7898352210001227, "grad_norm": 0.03478904440999031, "learning_rate": 3.3261338220725694e-07, "loss": 0.0006, "step": 218870 }, { "epoch": 1.7899169971787219, "grad_norm": 0.05904970318078995, "learning_rate": 3.3235749539482886e-07, "loss": 0.0005, "step": 218880 }, { "epoch": 1.789998773357321, "grad_norm": 0.011607005260884762, "learning_rate": 3.321017036660723e-07, "loss": 0.0009, "step": 218890 }, { "epoch": 1.7900805495359202, "grad_norm": 0.013356641866266727, "learning_rate": 3.3184600702619753e-07, "loss": 0.0007, "step": 218900 }, { "epoch": 1.7901623257145194, "grad_norm": 0.01920408196747303, "learning_rate": 3.3159040548041534e-07, "loss": 0.0006, "step": 218910 }, { "epoch": 1.7902441018931186, "grad_norm": 0.03381866589188576, "learning_rate": 3.31334899033931e-07, "loss": 0.0007, "step": 218920 }, { "epoch": 1.7903258780717177, "grad_norm": 0.014988234266638756, "learning_rate": 3.3107948769194933e-07, "loss": 0.0006, "step": 218930 }, { "epoch": 1.790407654250317, "grad_norm": 0.04655390605330467, "learning_rate": 3.308241714596733e-07, "loss": 0.0005, "step": 218940 }, { "epoch": 1.790489430428916, "grad_norm": 0.0010882735950872302, "learning_rate": 3.3056895034230484e-07, "loss": 0.0005, "step": 218950 }, { "epoch": 1.7905712066075152, "grad_norm": 0.016920998692512512, "learning_rate": 3.303138243450421e-07, "loss": 0.0004, "step": 218960 }, { "epoch": 1.7906529827861144, "grad_norm": 0.006381748244166374, "learning_rate": 3.3005879347308256e-07, "loss": 0.0006, "step": 218970 }, { "epoch": 1.7907347589647136, "grad_norm": 0.049073562026023865, "learning_rate": 3.298038577316209e-07, "loss": 0.0005, "step": 218980 }, { "epoch": 1.7908165351433127, "grad_norm": 0.01434137299656868, "learning_rate": 3.2954901712585196e-07, "loss": 0.0006, "step": 218990 }, { "epoch": 1.790898311321912, "grad_norm": 0.0024038664996623993, "learning_rate": 3.2929427166096537e-07, "loss": 0.0006, "step": 219000 }, { "epoch": 1.790980087500511, "grad_norm": 0.07991598546504974, "learning_rate": 3.2903962134215037e-07, "loss": 0.0014, "step": 219010 }, { "epoch": 1.7910618636791102, "grad_norm": 0.03351704403758049, "learning_rate": 3.2878506617459505e-07, "loss": 0.0003, "step": 219020 }, { "epoch": 1.7911436398577094, "grad_norm": 0.014247515238821507, "learning_rate": 3.285306061634852e-07, "loss": 0.0004, "step": 219030 }, { "epoch": 1.7912254160363086, "grad_norm": 0.00495633902028203, "learning_rate": 3.282762413140045e-07, "loss": 0.0006, "step": 219040 }, { "epoch": 1.7913071922149078, "grad_norm": 0.019154896959662437, "learning_rate": 3.280219716313338e-07, "loss": 0.0009, "step": 219050 }, { "epoch": 1.791388968393507, "grad_norm": 0.01755622774362564, "learning_rate": 3.277677971206522e-07, "loss": 0.0014, "step": 219060 }, { "epoch": 1.791470744572106, "grad_norm": 0.05879170075058937, "learning_rate": 3.275137177871396e-07, "loss": 0.0008, "step": 219070 }, { "epoch": 1.7915525207507055, "grad_norm": 0.0971146821975708, "learning_rate": 3.272597336359701e-07, "loss": 0.0007, "step": 219080 }, { "epoch": 1.7916342969293044, "grad_norm": 0.021151460707187653, "learning_rate": 3.2700584467231787e-07, "loss": 0.0009, "step": 219090 }, { "epoch": 1.7917160731079038, "grad_norm": 0.009885299950838089, "learning_rate": 3.26752050901355e-07, "loss": 0.0004, "step": 219100 }, { "epoch": 1.7917978492865028, "grad_norm": 0.036937445402145386, "learning_rate": 3.2649835232825224e-07, "loss": 0.0004, "step": 219110 }, { "epoch": 1.7918796254651022, "grad_norm": 0.0060908375307917595, "learning_rate": 3.262447489581766e-07, "loss": 0.0009, "step": 219120 }, { "epoch": 1.791961401643701, "grad_norm": 0.016567086800932884, "learning_rate": 3.2599124079629395e-07, "loss": 0.0005, "step": 219130 }, { "epoch": 1.7920431778223005, "grad_norm": 0.09700672328472137, "learning_rate": 3.2573782784776855e-07, "loss": 0.0008, "step": 219140 }, { "epoch": 1.7921249540008994, "grad_norm": 0.0769948810338974, "learning_rate": 3.254845101177634e-07, "loss": 0.0004, "step": 219150 }, { "epoch": 1.7922067301794988, "grad_norm": 0.0030950545333325863, "learning_rate": 3.2523128761143885e-07, "loss": 0.0007, "step": 219160 }, { "epoch": 1.7922885063580978, "grad_norm": 0.06850289553403854, "learning_rate": 3.24978160333953e-07, "loss": 0.0008, "step": 219170 }, { "epoch": 1.7923702825366972, "grad_norm": 0.10142664611339569, "learning_rate": 3.2472512829046065e-07, "loss": 0.0004, "step": 219180 }, { "epoch": 1.7924520587152961, "grad_norm": 0.027909137308597565, "learning_rate": 3.2447219148611877e-07, "loss": 0.0003, "step": 219190 }, { "epoch": 1.7925338348938955, "grad_norm": 0.062226615846157074, "learning_rate": 3.2421934992607875e-07, "loss": 0.0013, "step": 219200 }, { "epoch": 1.7926156110724945, "grad_norm": 0.0008066861773841083, "learning_rate": 3.239666036154915e-07, "loss": 0.0004, "step": 219210 }, { "epoch": 1.7926973872510938, "grad_norm": 0.07968220859766006, "learning_rate": 3.2371395255950456e-07, "loss": 0.0009, "step": 219220 }, { "epoch": 1.7927791634296928, "grad_norm": 0.008472763933241367, "learning_rate": 3.2346139676326715e-07, "loss": 0.0004, "step": 219230 }, { "epoch": 1.7928609396082922, "grad_norm": 0.023879004642367363, "learning_rate": 3.2320893623192127e-07, "loss": 0.0009, "step": 219240 }, { "epoch": 1.7929427157868911, "grad_norm": 0.03544822707772255, "learning_rate": 3.2295657097061063e-07, "loss": 0.0004, "step": 219250 }, { "epoch": 1.7930244919654905, "grad_norm": 0.03980501368641853, "learning_rate": 3.2270430098447604e-07, "loss": 0.0004, "step": 219260 }, { "epoch": 1.7931062681440895, "grad_norm": 0.04087410494685173, "learning_rate": 3.224521262786573e-07, "loss": 0.001, "step": 219270 }, { "epoch": 1.7931880443226889, "grad_norm": 0.03540690243244171, "learning_rate": 3.222000468582909e-07, "loss": 0.0002, "step": 219280 }, { "epoch": 1.793269820501288, "grad_norm": 0.05384340137243271, "learning_rate": 3.219480627285121e-07, "loss": 0.0005, "step": 219290 }, { "epoch": 1.7933515966798872, "grad_norm": 0.05706539750099182, "learning_rate": 3.216961738944524e-07, "loss": 0.0015, "step": 219300 }, { "epoch": 1.7934333728584864, "grad_norm": 0.013003986328840256, "learning_rate": 3.214443803612455e-07, "loss": 0.0006, "step": 219310 }, { "epoch": 1.7935151490370855, "grad_norm": 0.047473687678575516, "learning_rate": 3.211926821340194e-07, "loss": 0.0004, "step": 219320 }, { "epoch": 1.7935969252156847, "grad_norm": 0.033667586743831635, "learning_rate": 3.2094107921790183e-07, "loss": 0.0005, "step": 219330 }, { "epoch": 1.7936787013942839, "grad_norm": 0.07704804837703705, "learning_rate": 3.2068957161801697e-07, "loss": 0.0008, "step": 219340 }, { "epoch": 1.793760477572883, "grad_norm": 0.0032954728230834007, "learning_rate": 3.204381593394901e-07, "loss": 0.0002, "step": 219350 }, { "epoch": 1.7938422537514822, "grad_norm": 0.14517797529697418, "learning_rate": 3.201868423874416e-07, "loss": 0.0011, "step": 219360 }, { "epoch": 1.7939240299300814, "grad_norm": 0.024782665073871613, "learning_rate": 3.199356207669901e-07, "loss": 0.0002, "step": 219370 }, { "epoch": 1.7940058061086805, "grad_norm": 0.038974206894636154, "learning_rate": 3.1968449448325444e-07, "loss": 0.0007, "step": 219380 }, { "epoch": 1.7940875822872797, "grad_norm": 0.03711535036563873, "learning_rate": 3.1943346354134983e-07, "loss": 0.0004, "step": 219390 }, { "epoch": 1.7941693584658789, "grad_norm": 0.006365160923451185, "learning_rate": 3.191825279463906e-07, "loss": 0.0005, "step": 219400 }, { "epoch": 1.794251134644478, "grad_norm": 0.005761720705777407, "learning_rate": 3.1893168770348817e-07, "loss": 0.0007, "step": 219410 }, { "epoch": 1.7943329108230772, "grad_norm": 0.0033691113349050283, "learning_rate": 3.186809428177512e-07, "loss": 0.0006, "step": 219420 }, { "epoch": 1.7944146870016764, "grad_norm": 0.002866148017346859, "learning_rate": 3.1843029329428956e-07, "loss": 0.0005, "step": 219430 }, { "epoch": 1.7944964631802756, "grad_norm": 0.040414731949567795, "learning_rate": 3.18179739138208e-07, "loss": 0.0003, "step": 219440 }, { "epoch": 1.7945782393588747, "grad_norm": 0.002274208702147007, "learning_rate": 3.179292803546108e-07, "loss": 0.0004, "step": 219450 }, { "epoch": 1.794660015537474, "grad_norm": 0.08072289824485779, "learning_rate": 3.176789169485994e-07, "loss": 0.0008, "step": 219460 }, { "epoch": 1.794741791716073, "grad_norm": 0.08568271994590759, "learning_rate": 3.174286489252759e-07, "loss": 0.0005, "step": 219470 }, { "epoch": 1.7948235678946722, "grad_norm": 0.04020896181464195, "learning_rate": 3.171784762897362e-07, "loss": 0.001, "step": 219480 }, { "epoch": 1.7949053440732714, "grad_norm": 0.07231288403272629, "learning_rate": 3.1692839904707675e-07, "loss": 0.0007, "step": 219490 }, { "epoch": 1.7949871202518706, "grad_norm": 0.03469209372997284, "learning_rate": 3.1667841720239236e-07, "loss": 0.0008, "step": 219500 }, { "epoch": 1.79506889643047, "grad_norm": 0.07695210725069046, "learning_rate": 3.164285307607762e-07, "loss": 0.0005, "step": 219510 }, { "epoch": 1.795150672609069, "grad_norm": 0.010198927484452724, "learning_rate": 3.161787397273175e-07, "loss": 0.0013, "step": 219520 }, { "epoch": 1.7952324487876683, "grad_norm": 0.028073744848370552, "learning_rate": 3.15929044107105e-07, "loss": 0.0009, "step": 219530 }, { "epoch": 1.7953142249662672, "grad_norm": 0.047243040055036545, "learning_rate": 3.1567944390522466e-07, "loss": 0.0012, "step": 219540 }, { "epoch": 1.7953960011448666, "grad_norm": 0.05639428272843361, "learning_rate": 3.1542993912676235e-07, "loss": 0.0009, "step": 219550 }, { "epoch": 1.7954777773234656, "grad_norm": 0.0026836141478270292, "learning_rate": 3.1518052977680016e-07, "loss": 0.0008, "step": 219560 }, { "epoch": 1.795559553502065, "grad_norm": 0.03476203233003616, "learning_rate": 3.1493121586041897e-07, "loss": 0.0011, "step": 219570 }, { "epoch": 1.795641329680664, "grad_norm": 0.016280148178339005, "learning_rate": 3.146819973826959e-07, "loss": 0.0007, "step": 219580 }, { "epoch": 1.7957231058592633, "grad_norm": 0.006529605016112328, "learning_rate": 3.1443287434871016e-07, "loss": 0.0004, "step": 219590 }, { "epoch": 1.7958048820378623, "grad_norm": 0.0029649848584085703, "learning_rate": 3.14183846763535e-07, "loss": 0.0003, "step": 219600 }, { "epoch": 1.7958866582164617, "grad_norm": 0.008911835961043835, "learning_rate": 3.13934914632244e-07, "loss": 0.0005, "step": 219610 }, { "epoch": 1.7959684343950606, "grad_norm": 0.02074352465569973, "learning_rate": 3.1368607795990655e-07, "loss": 0.001, "step": 219620 }, { "epoch": 1.79605021057366, "grad_norm": 0.11954860389232635, "learning_rate": 3.134373367515936e-07, "loss": 0.0005, "step": 219630 }, { "epoch": 1.796131986752259, "grad_norm": 0.028108173981308937, "learning_rate": 3.1318869101237216e-07, "loss": 0.0005, "step": 219640 }, { "epoch": 1.7962137629308583, "grad_norm": 0.03146040812134743, "learning_rate": 3.129401407473065e-07, "loss": 0.0008, "step": 219650 }, { "epoch": 1.7962955391094573, "grad_norm": 0.02858981490135193, "learning_rate": 3.1269168596145873e-07, "loss": 0.0005, "step": 219660 }, { "epoch": 1.7963773152880567, "grad_norm": 0.06839139759540558, "learning_rate": 3.124433266598925e-07, "loss": 0.0016, "step": 219670 }, { "epoch": 1.7964590914666556, "grad_norm": 0.037010982632637024, "learning_rate": 3.121950628476661e-07, "loss": 0.0005, "step": 219680 }, { "epoch": 1.796540867645255, "grad_norm": 0.038067542016506195, "learning_rate": 3.119468945298365e-07, "loss": 0.0002, "step": 219690 }, { "epoch": 1.796622643823854, "grad_norm": 0.007430005352944136, "learning_rate": 3.116988217114586e-07, "loss": 0.0003, "step": 219700 }, { "epoch": 1.7967044200024533, "grad_norm": 0.03672502562403679, "learning_rate": 3.114508443975883e-07, "loss": 0.0007, "step": 219710 }, { "epoch": 1.7967861961810525, "grad_norm": 0.008291876874864101, "learning_rate": 3.1120296259327387e-07, "loss": 0.0006, "step": 219720 }, { "epoch": 1.7968679723596517, "grad_norm": 0.013598007149994373, "learning_rate": 3.109551763035673e-07, "loss": 0.0004, "step": 219730 }, { "epoch": 1.7969497485382508, "grad_norm": 0.004489898215979338, "learning_rate": 3.107074855335135e-07, "loss": 0.0005, "step": 219740 }, { "epoch": 1.79703152471685, "grad_norm": 0.0007171996403485537, "learning_rate": 3.1045989028816115e-07, "loss": 0.0005, "step": 219750 }, { "epoch": 1.7971133008954492, "grad_norm": 0.06060134246945381, "learning_rate": 3.1021239057255294e-07, "loss": 0.0006, "step": 219760 }, { "epoch": 1.7971950770740484, "grad_norm": 0.029229365289211273, "learning_rate": 3.099649863917298e-07, "loss": 0.0009, "step": 219770 }, { "epoch": 1.7972768532526475, "grad_norm": 0.006422697100788355, "learning_rate": 3.097176777507321e-07, "loss": 0.0006, "step": 219780 }, { "epoch": 1.7973586294312467, "grad_norm": 0.04035434499382973, "learning_rate": 3.094704646545976e-07, "loss": 0.001, "step": 219790 }, { "epoch": 1.7974404056098459, "grad_norm": 0.026944521814584732, "learning_rate": 3.0922334710836334e-07, "loss": 0.0009, "step": 219800 }, { "epoch": 1.797522181788445, "grad_norm": 0.04368811100721359, "learning_rate": 3.0897632511706187e-07, "loss": 0.0005, "step": 219810 }, { "epoch": 1.7976039579670442, "grad_norm": 0.0026093863416463137, "learning_rate": 3.087293986857248e-07, "loss": 0.0007, "step": 219820 }, { "epoch": 1.7976857341456434, "grad_norm": 0.025506947189569473, "learning_rate": 3.084825678193848e-07, "loss": 0.0006, "step": 219830 }, { "epoch": 1.7977675103242425, "grad_norm": 0.05468756705522537, "learning_rate": 3.0823583252306776e-07, "loss": 0.0004, "step": 219840 }, { "epoch": 1.7978492865028417, "grad_norm": 0.008196115493774414, "learning_rate": 3.079891928018003e-07, "loss": 0.001, "step": 219850 }, { "epoch": 1.7979310626814409, "grad_norm": 0.0563795380294323, "learning_rate": 3.077426486606061e-07, "loss": 0.0015, "step": 219860 }, { "epoch": 1.79801283886004, "grad_norm": 0.032547518610954285, "learning_rate": 3.07496200104509e-07, "loss": 0.0004, "step": 219870 }, { "epoch": 1.7980946150386392, "grad_norm": 0.11354563385248184, "learning_rate": 3.0724984713852824e-07, "loss": 0.0006, "step": 219880 }, { "epoch": 1.7981763912172384, "grad_norm": 0.01607944443821907, "learning_rate": 3.070035897676832e-07, "loss": 0.0009, "step": 219890 }, { "epoch": 1.7982581673958375, "grad_norm": 0.009980211965739727, "learning_rate": 3.067574279969887e-07, "loss": 0.0004, "step": 219900 }, { "epoch": 1.7983399435744367, "grad_norm": 0.016565250232815742, "learning_rate": 3.065113618314608e-07, "loss": 0.0006, "step": 219910 }, { "epoch": 1.7984217197530359, "grad_norm": 0.03956102207303047, "learning_rate": 3.062653912761115e-07, "loss": 0.0008, "step": 219920 }, { "epoch": 1.798503495931635, "grad_norm": 0.007920097559690475, "learning_rate": 3.060195163359514e-07, "loss": 0.0007, "step": 219930 }, { "epoch": 1.7985852721102344, "grad_norm": 0.05239963158965111, "learning_rate": 3.0577373701598856e-07, "loss": 0.0005, "step": 219940 }, { "epoch": 1.7986670482888334, "grad_norm": 0.009790288284420967, "learning_rate": 3.0552805332123236e-07, "loss": 0.001, "step": 219950 }, { "epoch": 1.7987488244674328, "grad_norm": 0.05606867000460625, "learning_rate": 3.052824652566844e-07, "loss": 0.0008, "step": 219960 }, { "epoch": 1.7988306006460317, "grad_norm": 0.001431085984222591, "learning_rate": 3.0503697282734834e-07, "loss": 0.0004, "step": 219970 }, { "epoch": 1.7989123768246311, "grad_norm": 0.006614912301301956, "learning_rate": 3.047915760382253e-07, "loss": 0.0006, "step": 219980 }, { "epoch": 1.79899415300323, "grad_norm": 0.0663016140460968, "learning_rate": 3.0454627489431453e-07, "loss": 0.0008, "step": 219990 }, { "epoch": 1.7990759291818295, "grad_norm": 0.046420346945524216, "learning_rate": 3.0430106940061324e-07, "loss": 0.0005, "step": 220000 }, { "epoch": 1.7991577053604284, "grad_norm": 0.022419391199946404, "learning_rate": 3.040559595621156e-07, "loss": 0.0004, "step": 220010 }, { "epoch": 1.7992394815390278, "grad_norm": 0.04419517517089844, "learning_rate": 3.038109453838145e-07, "loss": 0.0005, "step": 220020 }, { "epoch": 1.7993212577176267, "grad_norm": 0.08957263827323914, "learning_rate": 3.035660268707025e-07, "loss": 0.0006, "step": 220030 }, { "epoch": 1.7994030338962261, "grad_norm": 0.0046909223310649395, "learning_rate": 3.033212040277683e-07, "loss": 0.0002, "step": 220040 }, { "epoch": 1.799484810074825, "grad_norm": 0.09113872051239014, "learning_rate": 3.0307647685999807e-07, "loss": 0.0007, "step": 220050 }, { "epoch": 1.7995665862534245, "grad_norm": 0.012146303430199623, "learning_rate": 3.0283184537237775e-07, "loss": 0.0015, "step": 220060 }, { "epoch": 1.7996483624320234, "grad_norm": 0.03613055869936943, "learning_rate": 3.0258730956989223e-07, "loss": 0.0005, "step": 220070 }, { "epoch": 1.7997301386106228, "grad_norm": 0.01873071864247322, "learning_rate": 3.023428694575203e-07, "loss": 0.001, "step": 220080 }, { "epoch": 1.7998119147892218, "grad_norm": 0.05324595049023628, "learning_rate": 3.0209852504024305e-07, "loss": 0.0007, "step": 220090 }, { "epoch": 1.7998936909678211, "grad_norm": 0.0030825017020106316, "learning_rate": 3.018542763230364e-07, "loss": 0.0003, "step": 220100 }, { "epoch": 1.79997546714642, "grad_norm": 0.0215693898499012, "learning_rate": 3.016101233108781e-07, "loss": 0.0003, "step": 220110 }, { "epoch": 1.8000572433250195, "grad_norm": 0.05006064474582672, "learning_rate": 3.013660660087403e-07, "loss": 0.0008, "step": 220120 }, { "epoch": 1.8001390195036184, "grad_norm": 0.059224553406238556, "learning_rate": 3.011221044215951e-07, "loss": 0.0006, "step": 220130 }, { "epoch": 1.8002207956822178, "grad_norm": 0.07969417423009872, "learning_rate": 3.008782385544112e-07, "loss": 0.0005, "step": 220140 }, { "epoch": 1.800302571860817, "grad_norm": 0.016552770510315895, "learning_rate": 3.0063446841215815e-07, "loss": 0.0004, "step": 220150 }, { "epoch": 1.8003843480394162, "grad_norm": 0.0011688394006341696, "learning_rate": 3.0039079399980076e-07, "loss": 0.0006, "step": 220160 }, { "epoch": 1.8004661242180153, "grad_norm": 0.0712483823299408, "learning_rate": 3.001472153223028e-07, "loss": 0.0006, "step": 220170 }, { "epoch": 1.8005479003966145, "grad_norm": 0.027324145659804344, "learning_rate": 2.99903732384626e-07, "loss": 0.0007, "step": 220180 }, { "epoch": 1.8006296765752137, "grad_norm": 0.0026099737733602524, "learning_rate": 2.9966034519173184e-07, "loss": 0.0003, "step": 220190 }, { "epoch": 1.8007114527538128, "grad_norm": 0.0008802400552667677, "learning_rate": 2.9941705374857635e-07, "loss": 0.0003, "step": 220200 }, { "epoch": 1.800793228932412, "grad_norm": 0.02421431988477707, "learning_rate": 2.991738580601161e-07, "loss": 0.0009, "step": 220210 }, { "epoch": 1.8008750051110112, "grad_norm": 0.14579711854457855, "learning_rate": 2.9893075813130444e-07, "loss": 0.0012, "step": 220220 }, { "epoch": 1.8009567812896103, "grad_norm": 0.05278347060084343, "learning_rate": 2.986877539670957e-07, "loss": 0.0006, "step": 220230 }, { "epoch": 1.8010385574682095, "grad_norm": 0.1066022589802742, "learning_rate": 2.984448455724387e-07, "loss": 0.0007, "step": 220240 }, { "epoch": 1.8011203336468087, "grad_norm": 0.011205418966710567, "learning_rate": 2.982020329522817e-07, "loss": 0.0008, "step": 220250 }, { "epoch": 1.8012021098254078, "grad_norm": 0.013645210303366184, "learning_rate": 2.9795931611157124e-07, "loss": 0.0005, "step": 220260 }, { "epoch": 1.801283886004007, "grad_norm": 0.0072691417299211025, "learning_rate": 2.977166950552507e-07, "loss": 0.0003, "step": 220270 }, { "epoch": 1.8013656621826062, "grad_norm": 0.016774341464042664, "learning_rate": 2.974741697882638e-07, "loss": 0.0002, "step": 220280 }, { "epoch": 1.8014474383612054, "grad_norm": 0.07335600256919861, "learning_rate": 2.9723174031555056e-07, "loss": 0.0005, "step": 220290 }, { "epoch": 1.8015292145398045, "grad_norm": 0.05140894651412964, "learning_rate": 2.969894066420498e-07, "loss": 0.0006, "step": 220300 }, { "epoch": 1.8016109907184037, "grad_norm": 0.028888734057545662, "learning_rate": 2.967471687726975e-07, "loss": 0.0003, "step": 220310 }, { "epoch": 1.8016927668970029, "grad_norm": 0.05394211784005165, "learning_rate": 2.965050267124281e-07, "loss": 0.0007, "step": 220320 }, { "epoch": 1.801774543075602, "grad_norm": 0.003895958885550499, "learning_rate": 2.962629804661743e-07, "loss": 0.0003, "step": 220330 }, { "epoch": 1.8018563192542012, "grad_norm": 0.0386381670832634, "learning_rate": 2.9602103003886775e-07, "loss": 0.0005, "step": 220340 }, { "epoch": 1.8019380954328006, "grad_norm": 0.0030621143523603678, "learning_rate": 2.95779175435435e-07, "loss": 0.0006, "step": 220350 }, { "epoch": 1.8020198716113995, "grad_norm": 0.0044906833209097385, "learning_rate": 2.9553741666080493e-07, "loss": 0.0005, "step": 220360 }, { "epoch": 1.802101647789999, "grad_norm": 0.05735335126519203, "learning_rate": 2.9529575371990193e-07, "loss": 0.0009, "step": 220370 }, { "epoch": 1.8021834239685979, "grad_norm": 0.012053114362061024, "learning_rate": 2.950541866176487e-07, "loss": 0.0023, "step": 220380 }, { "epoch": 1.8022652001471973, "grad_norm": 0.0028140079230070114, "learning_rate": 2.948127153589653e-07, "loss": 0.0006, "step": 220390 }, { "epoch": 1.8023469763257962, "grad_norm": 0.004267254378646612, "learning_rate": 2.945713399487721e-07, "loss": 0.0006, "step": 220400 }, { "epoch": 1.8024287525043956, "grad_norm": 0.07862354069948196, "learning_rate": 2.943300603919852e-07, "loss": 0.001, "step": 220410 }, { "epoch": 1.8025105286829946, "grad_norm": 0.020259996876120567, "learning_rate": 2.9408887669352014e-07, "loss": 0.0004, "step": 220420 }, { "epoch": 1.802592304861594, "grad_norm": 0.03842768445611, "learning_rate": 2.9384778885829025e-07, "loss": 0.0007, "step": 220430 }, { "epoch": 1.8026740810401929, "grad_norm": 0.003680616384372115, "learning_rate": 2.9360679689120594e-07, "loss": 0.0003, "step": 220440 }, { "epoch": 1.8027558572187923, "grad_norm": 0.07255673408508301, "learning_rate": 2.9336590079717674e-07, "loss": 0.0007, "step": 220450 }, { "epoch": 1.8028376333973912, "grad_norm": 0.0030144082847982645, "learning_rate": 2.9312510058110975e-07, "loss": 0.0003, "step": 220460 }, { "epoch": 1.8029194095759906, "grad_norm": 0.007062017451971769, "learning_rate": 2.928843962479094e-07, "loss": 0.0003, "step": 220470 }, { "epoch": 1.8030011857545896, "grad_norm": 0.03120657242834568, "learning_rate": 2.9264378780248127e-07, "loss": 0.0015, "step": 220480 }, { "epoch": 1.803082961933189, "grad_norm": 0.07558222115039825, "learning_rate": 2.9240327524972524e-07, "loss": 0.0004, "step": 220490 }, { "epoch": 1.803164738111788, "grad_norm": 0.025729315355420113, "learning_rate": 2.921628585945413e-07, "loss": 0.0009, "step": 220500 }, { "epoch": 1.8032465142903873, "grad_norm": 0.027496490627527237, "learning_rate": 2.919225378418256e-07, "loss": 0.0003, "step": 220510 }, { "epoch": 1.8033282904689862, "grad_norm": 0.0027342550456523895, "learning_rate": 2.9168231299647576e-07, "loss": 0.0002, "step": 220520 }, { "epoch": 1.8034100666475856, "grad_norm": 0.00229770690202713, "learning_rate": 2.914421840633841e-07, "loss": 0.0002, "step": 220530 }, { "epoch": 1.8034918428261846, "grad_norm": 0.04409040883183479, "learning_rate": 2.912021510474422e-07, "loss": 0.0012, "step": 220540 }, { "epoch": 1.803573619004784, "grad_norm": 0.013639931567013264, "learning_rate": 2.909622139535406e-07, "loss": 0.0006, "step": 220550 }, { "epoch": 1.803655395183383, "grad_norm": 0.000635073403827846, "learning_rate": 2.90722372786566e-07, "loss": 0.0005, "step": 220560 }, { "epoch": 1.8037371713619823, "grad_norm": 0.02621871791779995, "learning_rate": 2.90482627551405e-07, "loss": 0.0006, "step": 220570 }, { "epoch": 1.8038189475405815, "grad_norm": 0.03655731678009033, "learning_rate": 2.9024297825294033e-07, "loss": 0.0004, "step": 220580 }, { "epoch": 1.8039007237191806, "grad_norm": 0.03483748435974121, "learning_rate": 2.9000342489605425e-07, "loss": 0.0007, "step": 220590 }, { "epoch": 1.8039824998977798, "grad_norm": 0.01743319258093834, "learning_rate": 2.897639674856273e-07, "loss": 0.0015, "step": 220600 }, { "epoch": 1.804064276076379, "grad_norm": 0.015585883520543575, "learning_rate": 2.895246060265372e-07, "loss": 0.0006, "step": 220610 }, { "epoch": 1.8041460522549781, "grad_norm": 0.03026878461241722, "learning_rate": 2.8928534052365954e-07, "loss": 0.0005, "step": 220620 }, { "epoch": 1.8042278284335773, "grad_norm": 0.0487794354557991, "learning_rate": 2.8904617098186814e-07, "loss": 0.0007, "step": 220630 }, { "epoch": 1.8043096046121765, "grad_norm": 0.14616495370864868, "learning_rate": 2.8880709740603584e-07, "loss": 0.0007, "step": 220640 }, { "epoch": 1.8043913807907757, "grad_norm": 0.045542359352111816, "learning_rate": 2.885681198010326e-07, "loss": 0.0014, "step": 220650 }, { "epoch": 1.8044731569693748, "grad_norm": 0.047267500311136246, "learning_rate": 2.883292381717262e-07, "loss": 0.0001, "step": 220660 }, { "epoch": 1.804554933147974, "grad_norm": 0.00864691473543644, "learning_rate": 2.8809045252298327e-07, "loss": 0.0006, "step": 220670 }, { "epoch": 1.8046367093265732, "grad_norm": 0.0957065150141716, "learning_rate": 2.878517628596678e-07, "loss": 0.0004, "step": 220680 }, { "epoch": 1.8047184855051723, "grad_norm": 0.04611264541745186, "learning_rate": 2.876131691866424e-07, "loss": 0.0005, "step": 220690 }, { "epoch": 1.8048002616837715, "grad_norm": 0.039890773594379425, "learning_rate": 2.873746715087672e-07, "loss": 0.0008, "step": 220700 }, { "epoch": 1.8048820378623707, "grad_norm": 0.045404672622680664, "learning_rate": 2.871362698308994e-07, "loss": 0.0006, "step": 220710 }, { "epoch": 1.8049638140409698, "grad_norm": 0.05437851324677467, "learning_rate": 2.868979641578973e-07, "loss": 0.0006, "step": 220720 }, { "epoch": 1.805045590219569, "grad_norm": 0.011995140463113785, "learning_rate": 2.866597544946148e-07, "loss": 0.0006, "step": 220730 }, { "epoch": 1.8051273663981682, "grad_norm": 0.007843240164220333, "learning_rate": 2.864216408459047e-07, "loss": 0.0003, "step": 220740 }, { "epoch": 1.8052091425767673, "grad_norm": 0.04384930804371834, "learning_rate": 2.8618362321661597e-07, "loss": 0.0018, "step": 220750 }, { "epoch": 1.8052909187553665, "grad_norm": 0.0017302448395639658, "learning_rate": 2.859457016115996e-07, "loss": 0.0003, "step": 220760 }, { "epoch": 1.8053726949339657, "grad_norm": 0.07540151476860046, "learning_rate": 2.857078760357007e-07, "loss": 0.0008, "step": 220770 }, { "epoch": 1.805454471112565, "grad_norm": 0.030873198062181473, "learning_rate": 2.854701464937643e-07, "loss": 0.0013, "step": 220780 }, { "epoch": 1.805536247291164, "grad_norm": 0.015127835795283318, "learning_rate": 2.85232512990633e-07, "loss": 0.0006, "step": 220790 }, { "epoch": 1.8056180234697634, "grad_norm": 0.04882647469639778, "learning_rate": 2.8499497553114816e-07, "loss": 0.0007, "step": 220800 }, { "epoch": 1.8056997996483624, "grad_norm": 0.04048720374703407, "learning_rate": 2.8475753412014804e-07, "loss": 0.0004, "step": 220810 }, { "epoch": 1.8057815758269617, "grad_norm": 0.022032838314771652, "learning_rate": 2.845201887624699e-07, "loss": 0.0005, "step": 220820 }, { "epoch": 1.8058633520055607, "grad_norm": 0.025458352640271187, "learning_rate": 2.8428293946294763e-07, "loss": 0.0003, "step": 220830 }, { "epoch": 1.80594512818416, "grad_norm": 0.0065657030791044235, "learning_rate": 2.8404578622641563e-07, "loss": 0.0006, "step": 220840 }, { "epoch": 1.806026904362759, "grad_norm": 0.036499496549367905, "learning_rate": 2.8380872905770464e-07, "loss": 0.0006, "step": 220850 }, { "epoch": 1.8061086805413584, "grad_norm": 0.008513228967785835, "learning_rate": 2.8357176796164285e-07, "loss": 0.0004, "step": 220860 }, { "epoch": 1.8061904567199574, "grad_norm": 0.05345775559544563, "learning_rate": 2.833349029430571e-07, "loss": 0.0006, "step": 220870 }, { "epoch": 1.8062722328985568, "grad_norm": 0.07680082321166992, "learning_rate": 2.8309813400677456e-07, "loss": 0.0006, "step": 220880 }, { "epoch": 1.8063540090771557, "grad_norm": 0.012041527777910233, "learning_rate": 2.8286146115761635e-07, "loss": 0.0006, "step": 220890 }, { "epoch": 1.806435785255755, "grad_norm": 0.030189184471964836, "learning_rate": 2.8262488440040535e-07, "loss": 0.0005, "step": 220900 }, { "epoch": 1.806517561434354, "grad_norm": 0.0008802986703813076, "learning_rate": 2.8238840373995936e-07, "loss": 0.0007, "step": 220910 }, { "epoch": 1.8065993376129534, "grad_norm": 0.05658363178372383, "learning_rate": 2.8215201918109616e-07, "loss": 0.0009, "step": 220920 }, { "epoch": 1.8066811137915524, "grad_norm": 0.004646191839128733, "learning_rate": 2.8191573072863134e-07, "loss": 0.0006, "step": 220930 }, { "epoch": 1.8067628899701518, "grad_norm": 0.3059048056602478, "learning_rate": 2.8167953838737827e-07, "loss": 0.0011, "step": 220940 }, { "epoch": 1.8068446661487507, "grad_norm": 0.06223892420530319, "learning_rate": 2.8144344216214757e-07, "loss": 0.0008, "step": 220950 }, { "epoch": 1.8069264423273501, "grad_norm": 0.019497625529766083, "learning_rate": 2.8120744205774986e-07, "loss": 0.001, "step": 220960 }, { "epoch": 1.807008218505949, "grad_norm": 0.025805380195379257, "learning_rate": 2.8097153807899236e-07, "loss": 0.001, "step": 220970 }, { "epoch": 1.8070899946845485, "grad_norm": 0.028794053941965103, "learning_rate": 2.8073573023068067e-07, "loss": 0.0006, "step": 220980 }, { "epoch": 1.8071717708631476, "grad_norm": 0.073935866355896, "learning_rate": 2.805000185176171e-07, "loss": 0.001, "step": 220990 }, { "epoch": 1.8072535470417468, "grad_norm": 0.022900421172380447, "learning_rate": 2.80264402944605e-07, "loss": 0.0009, "step": 221000 }, { "epoch": 1.807335323220346, "grad_norm": 0.013434396125376225, "learning_rate": 2.8002888351644385e-07, "loss": 0.0005, "step": 221010 }, { "epoch": 1.8074170993989451, "grad_norm": 0.03841765224933624, "learning_rate": 2.7979346023793044e-07, "loss": 0.0005, "step": 221020 }, { "epoch": 1.8074988755775443, "grad_norm": 0.012399381957948208, "learning_rate": 2.795581331138614e-07, "loss": 0.0008, "step": 221030 }, { "epoch": 1.8075806517561435, "grad_norm": 0.01873626746237278, "learning_rate": 2.7932290214903013e-07, "loss": 0.0007, "step": 221040 }, { "epoch": 1.8076624279347426, "grad_norm": 0.11219997704029083, "learning_rate": 2.7908776734822784e-07, "loss": 0.001, "step": 221050 }, { "epoch": 1.8077442041133418, "grad_norm": 0.04147449508309364, "learning_rate": 2.788527287162457e-07, "loss": 0.0012, "step": 221060 }, { "epoch": 1.807825980291941, "grad_norm": 0.017863698303699493, "learning_rate": 2.786177862578704e-07, "loss": 0.0007, "step": 221070 }, { "epoch": 1.8079077564705401, "grad_norm": 0.010803909972310066, "learning_rate": 2.783829399778892e-07, "loss": 0.0003, "step": 221080 }, { "epoch": 1.8079895326491393, "grad_norm": 0.005266755353659391, "learning_rate": 2.7814818988108503e-07, "loss": 0.0007, "step": 221090 }, { "epoch": 1.8080713088277385, "grad_norm": 0.029561663046479225, "learning_rate": 2.779135359722407e-07, "loss": 0.0007, "step": 221100 }, { "epoch": 1.8081530850063376, "grad_norm": 0.0013238845858722925, "learning_rate": 2.776789782561351e-07, "loss": 0.001, "step": 221110 }, { "epoch": 1.8082348611849368, "grad_norm": 0.08455910533666611, "learning_rate": 2.774445167375478e-07, "loss": 0.0005, "step": 221120 }, { "epoch": 1.808316637363536, "grad_norm": 0.02742130309343338, "learning_rate": 2.7721015142125495e-07, "loss": 0.0004, "step": 221130 }, { "epoch": 1.8083984135421352, "grad_norm": 0.005999924149364233, "learning_rate": 2.7697588231202944e-07, "loss": 0.0007, "step": 221140 }, { "epoch": 1.8084801897207343, "grad_norm": 0.00453942408785224, "learning_rate": 2.7674170941464463e-07, "loss": 0.0006, "step": 221150 }, { "epoch": 1.8085619658993335, "grad_norm": 0.07975797355175018, "learning_rate": 2.765076327338706e-07, "loss": 0.0005, "step": 221160 }, { "epoch": 1.8086437420779327, "grad_norm": 0.021160101518034935, "learning_rate": 2.7627365227447524e-07, "loss": 0.0009, "step": 221170 }, { "epoch": 1.8087255182565318, "grad_norm": 0.01107763685286045, "learning_rate": 2.7603976804122523e-07, "loss": 0.0005, "step": 221180 }, { "epoch": 1.808807294435131, "grad_norm": 0.015866274014115334, "learning_rate": 2.758059800388846e-07, "loss": 0.001, "step": 221190 }, { "epoch": 1.8088890706137302, "grad_norm": 0.029344109818339348, "learning_rate": 2.755722882722167e-07, "loss": 0.0007, "step": 221200 }, { "epoch": 1.8089708467923296, "grad_norm": 0.0495549812912941, "learning_rate": 2.753386927459817e-07, "loss": 0.0004, "step": 221210 }, { "epoch": 1.8090526229709285, "grad_norm": 0.0078316330909729, "learning_rate": 2.7510519346493735e-07, "loss": 0.0004, "step": 221220 }, { "epoch": 1.809134399149528, "grad_norm": 0.0620742030441761, "learning_rate": 2.748717904338405e-07, "loss": 0.0004, "step": 221230 }, { "epoch": 1.8092161753281268, "grad_norm": 0.03869469091296196, "learning_rate": 2.7463848365744673e-07, "loss": 0.0008, "step": 221240 }, { "epoch": 1.8092979515067262, "grad_norm": 0.013108400627970695, "learning_rate": 2.744052731405078e-07, "loss": 0.0004, "step": 221250 }, { "epoch": 1.8093797276853252, "grad_norm": 0.08231793344020844, "learning_rate": 2.7417215888777493e-07, "loss": 0.0027, "step": 221260 }, { "epoch": 1.8094615038639246, "grad_norm": 0.10194797813892365, "learning_rate": 2.7393914090399653e-07, "loss": 0.0007, "step": 221270 }, { "epoch": 1.8095432800425235, "grad_norm": 0.034516822546720505, "learning_rate": 2.737062191939194e-07, "loss": 0.0004, "step": 221280 }, { "epoch": 1.809625056221123, "grad_norm": 0.017679816111922264, "learning_rate": 2.73473393762288e-07, "loss": 0.0005, "step": 221290 }, { "epoch": 1.8097068323997219, "grad_norm": 0.02508816309273243, "learning_rate": 2.732406646138458e-07, "loss": 0.0005, "step": 221300 }, { "epoch": 1.8097886085783212, "grad_norm": 0.01969189941883087, "learning_rate": 2.7300803175333235e-07, "loss": 0.0004, "step": 221310 }, { "epoch": 1.8098703847569202, "grad_norm": 0.031196676194667816, "learning_rate": 2.727754951854883e-07, "loss": 0.0006, "step": 221320 }, { "epoch": 1.8099521609355196, "grad_norm": 0.05321609973907471, "learning_rate": 2.7254305491504984e-07, "loss": 0.0008, "step": 221330 }, { "epoch": 1.8100339371141185, "grad_norm": 0.019133323803544044, "learning_rate": 2.7231071094675267e-07, "loss": 0.0006, "step": 221340 }, { "epoch": 1.810115713292718, "grad_norm": 0.0006297081708908081, "learning_rate": 2.7207846328532794e-07, "loss": 0.0015, "step": 221350 }, { "epoch": 1.8101974894713169, "grad_norm": 0.03530411794781685, "learning_rate": 2.718463119355086e-07, "loss": 0.0007, "step": 221360 }, { "epoch": 1.8102792656499163, "grad_norm": 0.0001493688760092482, "learning_rate": 2.7161425690202303e-07, "loss": 0.0001, "step": 221370 }, { "epoch": 1.8103610418285152, "grad_norm": 0.0064146993681788445, "learning_rate": 2.713822981895992e-07, "loss": 0.0002, "step": 221380 }, { "epoch": 1.8104428180071146, "grad_norm": 0.04201004281640053, "learning_rate": 2.7115043580296096e-07, "loss": 0.0008, "step": 221390 }, { "epoch": 1.8105245941857135, "grad_norm": 0.01357085257768631, "learning_rate": 2.709186697468319e-07, "loss": 0.0005, "step": 221400 }, { "epoch": 1.810606370364313, "grad_norm": 0.041453830897808075, "learning_rate": 2.7068700002593426e-07, "loss": 0.0005, "step": 221410 }, { "epoch": 1.810688146542912, "grad_norm": 0.012236755341291428, "learning_rate": 2.7045542664498603e-07, "loss": 0.0005, "step": 221420 }, { "epoch": 1.8107699227215113, "grad_norm": 0.01184080634266138, "learning_rate": 2.702239496087045e-07, "loss": 0.0006, "step": 221430 }, { "epoch": 1.8108516989001104, "grad_norm": 0.047149889171123505, "learning_rate": 2.699925689218069e-07, "loss": 0.0008, "step": 221440 }, { "epoch": 1.8109334750787096, "grad_norm": 0.046823084354400635, "learning_rate": 2.6976128458900464e-07, "loss": 0.0009, "step": 221450 }, { "epoch": 1.8110152512573088, "grad_norm": 0.036654144525527954, "learning_rate": 2.695300966150105e-07, "loss": 0.0006, "step": 221460 }, { "epoch": 1.811097027435908, "grad_norm": 0.01863069273531437, "learning_rate": 2.692990050045324e-07, "loss": 0.0009, "step": 221470 }, { "epoch": 1.8111788036145071, "grad_norm": 0.05756666138768196, "learning_rate": 2.6906800976228e-07, "loss": 0.0007, "step": 221480 }, { "epoch": 1.8112605797931063, "grad_norm": 0.5063043236732483, "learning_rate": 2.688371108929572e-07, "loss": 0.0007, "step": 221490 }, { "epoch": 1.8113423559717055, "grad_norm": 0.013361981138586998, "learning_rate": 2.6860630840126854e-07, "loss": 0.0007, "step": 221500 }, { "epoch": 1.8114241321503046, "grad_norm": 0.05197790265083313, "learning_rate": 2.683756022919154e-07, "loss": 0.0004, "step": 221510 }, { "epoch": 1.8115059083289038, "grad_norm": 0.03341341391205788, "learning_rate": 2.681449925695967e-07, "loss": 0.0005, "step": 221520 }, { "epoch": 1.811587684507503, "grad_norm": 0.09704045951366425, "learning_rate": 2.679144792390109e-07, "loss": 0.0006, "step": 221530 }, { "epoch": 1.8116694606861021, "grad_norm": 0.061451710760593414, "learning_rate": 2.6768406230485377e-07, "loss": 0.0004, "step": 221540 }, { "epoch": 1.8117512368647013, "grad_norm": 0.04054019972681999, "learning_rate": 2.6745374177181816e-07, "loss": 0.0009, "step": 221550 }, { "epoch": 1.8118330130433005, "grad_norm": 0.03184043988585472, "learning_rate": 2.672235176445975e-07, "loss": 0.0006, "step": 221560 }, { "epoch": 1.8119147892218996, "grad_norm": 0.033627428114414215, "learning_rate": 2.669933899278804e-07, "loss": 0.0006, "step": 221570 }, { "epoch": 1.8119965654004988, "grad_norm": 0.08884217590093613, "learning_rate": 2.667633586263557e-07, "loss": 0.004, "step": 221580 }, { "epoch": 1.812078341579098, "grad_norm": 0.003969213925302029, "learning_rate": 2.665334237447076e-07, "loss": 0.0007, "step": 221590 }, { "epoch": 1.8121601177576971, "grad_norm": 0.01935751363635063, "learning_rate": 2.6630358528762224e-07, "loss": 0.0005, "step": 221600 }, { "epoch": 1.8122418939362963, "grad_norm": 0.06363710016012192, "learning_rate": 2.6607384325978036e-07, "loss": 0.0007, "step": 221610 }, { "epoch": 1.8123236701148955, "grad_norm": 0.0052586691454052925, "learning_rate": 2.658441976658621e-07, "loss": 0.0004, "step": 221620 }, { "epoch": 1.8124054462934946, "grad_norm": 0.03604442998766899, "learning_rate": 2.6561464851054597e-07, "loss": 0.0005, "step": 221630 }, { "epoch": 1.812487222472094, "grad_norm": 0.04289024695754051, "learning_rate": 2.6538519579850766e-07, "loss": 0.0015, "step": 221640 }, { "epoch": 1.812568998650693, "grad_norm": 0.024565361440181732, "learning_rate": 2.6515583953442116e-07, "loss": 0.0007, "step": 221650 }, { "epoch": 1.8126507748292924, "grad_norm": 0.005706515163183212, "learning_rate": 2.649265797229589e-07, "loss": 0.0011, "step": 221660 }, { "epoch": 1.8127325510078913, "grad_norm": 0.05594427511096001, "learning_rate": 2.64697416368791e-07, "loss": 0.0005, "step": 221670 }, { "epoch": 1.8128143271864907, "grad_norm": 0.04572208598256111, "learning_rate": 2.644683494765854e-07, "loss": 0.0008, "step": 221680 }, { "epoch": 1.8128961033650897, "grad_norm": 0.1326899230480194, "learning_rate": 2.642393790510095e-07, "loss": 0.0006, "step": 221690 }, { "epoch": 1.812977879543689, "grad_norm": 0.026490485295653343, "learning_rate": 2.6401050509672674e-07, "loss": 0.0009, "step": 221700 }, { "epoch": 1.813059655722288, "grad_norm": 0.06136564910411835, "learning_rate": 2.637817276183996e-07, "loss": 0.0007, "step": 221710 }, { "epoch": 1.8131414319008874, "grad_norm": 0.011186857707798481, "learning_rate": 2.6355304662068757e-07, "loss": 0.0011, "step": 221720 }, { "epoch": 1.8132232080794863, "grad_norm": 0.019607042893767357, "learning_rate": 2.633244621082509e-07, "loss": 0.0005, "step": 221730 }, { "epoch": 1.8133049842580857, "grad_norm": 0.024292608723044395, "learning_rate": 2.630959740857447e-07, "loss": 0.0009, "step": 221740 }, { "epoch": 1.8133867604366847, "grad_norm": 0.014587829820811749, "learning_rate": 2.6286758255782476e-07, "loss": 0.0024, "step": 221750 }, { "epoch": 1.813468536615284, "grad_norm": 0.0531267486512661, "learning_rate": 2.626392875291417e-07, "loss": 0.0006, "step": 221760 }, { "epoch": 1.813550312793883, "grad_norm": 0.030354609712958336, "learning_rate": 2.624110890043474e-07, "loss": 0.0006, "step": 221770 }, { "epoch": 1.8136320889724824, "grad_norm": 0.1198108047246933, "learning_rate": 2.621829869880904e-07, "loss": 0.001, "step": 221780 }, { "epoch": 1.8137138651510814, "grad_norm": 0.012665470130741596, "learning_rate": 2.6195498148501694e-07, "loss": 0.0006, "step": 221790 }, { "epoch": 1.8137956413296807, "grad_norm": 0.15175272524356842, "learning_rate": 2.6172707249977117e-07, "loss": 0.0018, "step": 221800 }, { "epoch": 1.8138774175082797, "grad_norm": 0.004442194942384958, "learning_rate": 2.61499260036997e-07, "loss": 0.0009, "step": 221810 }, { "epoch": 1.813959193686879, "grad_norm": 0.004202213604003191, "learning_rate": 2.6127154410133415e-07, "loss": 0.0004, "step": 221820 }, { "epoch": 1.814040969865478, "grad_norm": 0.054920196533203125, "learning_rate": 2.610439246974222e-07, "loss": 0.0006, "step": 221830 }, { "epoch": 1.8141227460440774, "grad_norm": 0.052330635488033295, "learning_rate": 2.60816401829897e-07, "loss": 0.0005, "step": 221840 }, { "epoch": 1.8142045222226766, "grad_norm": 0.10292995721101761, "learning_rate": 2.6058897550339414e-07, "loss": 0.0006, "step": 221850 }, { "epoch": 1.8142862984012758, "grad_norm": 0.0013779513537883759, "learning_rate": 2.6036164572254607e-07, "loss": 0.0006, "step": 221860 }, { "epoch": 1.814368074579875, "grad_norm": 0.027277400717139244, "learning_rate": 2.601344124919847e-07, "loss": 0.0004, "step": 221870 }, { "epoch": 1.814449850758474, "grad_norm": 0.00093324005138129, "learning_rate": 2.5990727581633734e-07, "loss": 0.0004, "step": 221880 }, { "epoch": 1.8145316269370733, "grad_norm": 0.05647073686122894, "learning_rate": 2.5968023570023204e-07, "loss": 0.0007, "step": 221890 }, { "epoch": 1.8146134031156724, "grad_norm": 0.06987032294273376, "learning_rate": 2.594532921482934e-07, "loss": 0.0005, "step": 221900 }, { "epoch": 1.8146951792942716, "grad_norm": 0.03461264818906784, "learning_rate": 2.5922644516514437e-07, "loss": 0.0003, "step": 221910 }, { "epoch": 1.8147769554728708, "grad_norm": 0.026090707629919052, "learning_rate": 2.5899969475540577e-07, "loss": 0.0005, "step": 221920 }, { "epoch": 1.81485873165147, "grad_norm": 0.02739003859460354, "learning_rate": 2.587730409236977e-07, "loss": 0.0008, "step": 221930 }, { "epoch": 1.814940507830069, "grad_norm": 0.01537057664245367, "learning_rate": 2.5854648367463654e-07, "loss": 0.0003, "step": 221940 }, { "epoch": 1.8150222840086683, "grad_norm": 0.04442349448800087, "learning_rate": 2.5832002301283745e-07, "loss": 0.0005, "step": 221950 }, { "epoch": 1.8151040601872674, "grad_norm": 0.03235325962305069, "learning_rate": 2.580936589429134e-07, "loss": 0.0006, "step": 221960 }, { "epoch": 1.8151858363658666, "grad_norm": 0.06749866902828217, "learning_rate": 2.5786739146947625e-07, "loss": 0.0006, "step": 221970 }, { "epoch": 1.8152676125444658, "grad_norm": 0.06270687282085419, "learning_rate": 2.5764122059713515e-07, "loss": 0.0005, "step": 221980 }, { "epoch": 1.815349388723065, "grad_norm": 0.03944765031337738, "learning_rate": 2.574151463304969e-07, "loss": 0.0006, "step": 221990 }, { "epoch": 1.8154311649016641, "grad_norm": 0.010926960036158562, "learning_rate": 2.5718916867416733e-07, "loss": 0.0007, "step": 222000 }, { "epoch": 1.8155129410802633, "grad_norm": 0.024378353729844093, "learning_rate": 2.5696328763274934e-07, "loss": 0.0006, "step": 222010 }, { "epoch": 1.8155947172588625, "grad_norm": 0.009725633077323437, "learning_rate": 2.5673750321084434e-07, "loss": 0.0012, "step": 222020 }, { "epoch": 1.8156764934374616, "grad_norm": 0.002887445967644453, "learning_rate": 2.5651181541305247e-07, "loss": 0.0007, "step": 222030 }, { "epoch": 1.8157582696160608, "grad_norm": 0.06973424553871155, "learning_rate": 2.5628622424396956e-07, "loss": 0.0008, "step": 222040 }, { "epoch": 1.81584004579466, "grad_norm": 0.03414631262421608, "learning_rate": 2.5606072970819295e-07, "loss": 0.0007, "step": 222050 }, { "epoch": 1.8159218219732591, "grad_norm": 0.03744681924581528, "learning_rate": 2.558353318103157e-07, "loss": 0.0003, "step": 222060 }, { "epoch": 1.8160035981518585, "grad_norm": 0.06887760758399963, "learning_rate": 2.556100305549286e-07, "loss": 0.0009, "step": 222070 }, { "epoch": 1.8160853743304575, "grad_norm": 0.12618505954742432, "learning_rate": 2.553848259466207e-07, "loss": 0.0006, "step": 222080 }, { "epoch": 1.8161671505090569, "grad_norm": 0.01495280023664236, "learning_rate": 2.551597179899817e-07, "loss": 0.0006, "step": 222090 }, { "epoch": 1.8162489266876558, "grad_norm": 0.00790338683873415, "learning_rate": 2.549347066895957e-07, "loss": 0.0004, "step": 222100 }, { "epoch": 1.8163307028662552, "grad_norm": 0.03355519473552704, "learning_rate": 2.5470979205004686e-07, "loss": 0.0014, "step": 222110 }, { "epoch": 1.8164124790448541, "grad_norm": 0.05800078064203262, "learning_rate": 2.544849740759164e-07, "loss": 0.0004, "step": 222120 }, { "epoch": 1.8164942552234535, "grad_norm": 0.06357967108488083, "learning_rate": 2.5426025277178526e-07, "loss": 0.0012, "step": 222130 }, { "epoch": 1.8165760314020525, "grad_norm": 0.022279715165495872, "learning_rate": 2.540356281422296e-07, "loss": 0.0007, "step": 222140 }, { "epoch": 1.8166578075806519, "grad_norm": 0.0062728989869356155, "learning_rate": 2.5381110019182585e-07, "loss": 0.0003, "step": 222150 }, { "epoch": 1.8167395837592508, "grad_norm": 0.08418252319097519, "learning_rate": 2.5358666892514703e-07, "loss": 0.0007, "step": 222160 }, { "epoch": 1.8168213599378502, "grad_norm": 0.03250589221715927, "learning_rate": 2.533623343467673e-07, "loss": 0.0003, "step": 222170 }, { "epoch": 1.8169031361164492, "grad_norm": 0.26296916604042053, "learning_rate": 2.531380964612545e-07, "loss": 0.0008, "step": 222180 }, { "epoch": 1.8169849122950485, "grad_norm": 0.033563584089279175, "learning_rate": 2.529139552731774e-07, "loss": 0.0005, "step": 222190 }, { "epoch": 1.8170666884736475, "grad_norm": 0.024263864383101463, "learning_rate": 2.526899107871006e-07, "loss": 0.0004, "step": 222200 }, { "epoch": 1.8171484646522469, "grad_norm": 0.09870311617851257, "learning_rate": 2.5246596300758984e-07, "loss": 0.0008, "step": 222210 }, { "epoch": 1.8172302408308458, "grad_norm": 0.02356145530939102, "learning_rate": 2.522421119392071e-07, "loss": 0.0009, "step": 222220 }, { "epoch": 1.8173120170094452, "grad_norm": 0.062294699251651764, "learning_rate": 2.5201835758651094e-07, "loss": 0.001, "step": 222230 }, { "epoch": 1.8173937931880442, "grad_norm": 0.034398727118968964, "learning_rate": 2.517946999540605e-07, "loss": 0.0003, "step": 222240 }, { "epoch": 1.8174755693666436, "grad_norm": 0.087217316031456, "learning_rate": 2.515711390464115e-07, "loss": 0.0015, "step": 222250 }, { "epoch": 1.8175573455452425, "grad_norm": 0.021862614899873734, "learning_rate": 2.5134767486811816e-07, "loss": 0.0009, "step": 222260 }, { "epoch": 1.817639121723842, "grad_norm": 0.05158715695142746, "learning_rate": 2.511243074237324e-07, "loss": 0.0005, "step": 222270 }, { "epoch": 1.817720897902441, "grad_norm": 0.020590918138623238, "learning_rate": 2.509010367178044e-07, "loss": 0.0006, "step": 222280 }, { "epoch": 1.8178026740810402, "grad_norm": 0.002139439107850194, "learning_rate": 2.506778627548828e-07, "loss": 0.0004, "step": 222290 }, { "epoch": 1.8178844502596394, "grad_norm": 0.05269503593444824, "learning_rate": 2.504547855395134e-07, "loss": 0.0004, "step": 222300 }, { "epoch": 1.8179662264382386, "grad_norm": 0.005321343895047903, "learning_rate": 2.502318050762409e-07, "loss": 0.0002, "step": 222310 }, { "epoch": 1.8180480026168377, "grad_norm": 0.11333346366882324, "learning_rate": 2.5000892136960664e-07, "loss": 0.0005, "step": 222320 }, { "epoch": 1.818129778795437, "grad_norm": 0.0331348180770874, "learning_rate": 2.4978613442415256e-07, "loss": 0.0004, "step": 222330 }, { "epoch": 1.818211554974036, "grad_norm": 0.13895320892333984, "learning_rate": 2.4956344424441614e-07, "loss": 0.0004, "step": 222340 }, { "epoch": 1.8182933311526353, "grad_norm": 0.0662633404135704, "learning_rate": 2.493408508349332e-07, "loss": 0.0008, "step": 222350 }, { "epoch": 1.8183751073312344, "grad_norm": 0.09185077995061874, "learning_rate": 2.49118354200239e-07, "loss": 0.0018, "step": 222360 }, { "epoch": 1.8184568835098336, "grad_norm": 0.05327330902218819, "learning_rate": 2.4889595434486544e-07, "loss": 0.0005, "step": 222370 }, { "epoch": 1.8185386596884328, "grad_norm": 0.06469784677028656, "learning_rate": 2.4867365127334343e-07, "loss": 0.0005, "step": 222380 }, { "epoch": 1.818620435867032, "grad_norm": 0.007665196433663368, "learning_rate": 2.484514449902009e-07, "loss": 0.0006, "step": 222390 }, { "epoch": 1.818702212045631, "grad_norm": 0.017276933416724205, "learning_rate": 2.482293354999643e-07, "loss": 0.0006, "step": 222400 }, { "epoch": 1.8187839882242303, "grad_norm": 0.013937367126345634, "learning_rate": 2.480073228071589e-07, "loss": 0.0003, "step": 222410 }, { "epoch": 1.8188657644028294, "grad_norm": 0.008290043100714684, "learning_rate": 2.4778540691630715e-07, "loss": 0.0017, "step": 222420 }, { "epoch": 1.8189475405814286, "grad_norm": 0.14077171683311462, "learning_rate": 2.475635878319299e-07, "loss": 0.0007, "step": 222430 }, { "epoch": 1.8190293167600278, "grad_norm": 0.014797616750001907, "learning_rate": 2.473418655585441e-07, "loss": 0.0003, "step": 222440 }, { "epoch": 1.819111092938627, "grad_norm": 0.013497543521225452, "learning_rate": 2.4712024010066836e-07, "loss": 0.0003, "step": 222450 }, { "epoch": 1.819192869117226, "grad_norm": 0.03136671334505081, "learning_rate": 2.4689871146281685e-07, "loss": 0.0003, "step": 222460 }, { "epoch": 1.8192746452958253, "grad_norm": 0.04103109985589981, "learning_rate": 2.4667727964950207e-07, "loss": 0.0007, "step": 222470 }, { "epoch": 1.8193564214744244, "grad_norm": 0.027183083817362785, "learning_rate": 2.464559446652348e-07, "loss": 0.0005, "step": 222480 }, { "epoch": 1.8194381976530236, "grad_norm": 0.16575156152248383, "learning_rate": 2.4623470651452373e-07, "loss": 0.003, "step": 222490 }, { "epoch": 1.819519973831623, "grad_norm": 0.00827082246541977, "learning_rate": 2.4601356520187583e-07, "loss": 0.0005, "step": 222500 }, { "epoch": 1.819601750010222, "grad_norm": 0.06234793737530708, "learning_rate": 2.457925207317957e-07, "loss": 0.0005, "step": 222510 }, { "epoch": 1.8196835261888213, "grad_norm": 0.061908140778541565, "learning_rate": 2.45571573108786e-07, "loss": 0.0004, "step": 222520 }, { "epoch": 1.8197653023674203, "grad_norm": 0.010297559201717377, "learning_rate": 2.45350722337348e-07, "loss": 0.0004, "step": 222530 }, { "epoch": 1.8198470785460197, "grad_norm": 0.03915159031748772, "learning_rate": 2.45129968421981e-07, "loss": 0.0007, "step": 222540 }, { "epoch": 1.8199288547246186, "grad_norm": 0.06739859282970428, "learning_rate": 2.4490931136718135e-07, "loss": 0.0005, "step": 222550 }, { "epoch": 1.820010630903218, "grad_norm": 0.007495244964957237, "learning_rate": 2.4468875117744374e-07, "loss": 0.0012, "step": 222560 }, { "epoch": 1.820092407081817, "grad_norm": 0.02665242739021778, "learning_rate": 2.444682878572624e-07, "loss": 0.0004, "step": 222570 }, { "epoch": 1.8201741832604164, "grad_norm": 0.10745077580213547, "learning_rate": 2.442479214111271e-07, "loss": 0.0008, "step": 222580 }, { "epoch": 1.8202559594390153, "grad_norm": 0.08160454034805298, "learning_rate": 2.440276518435275e-07, "loss": 0.0008, "step": 222590 }, { "epoch": 1.8203377356176147, "grad_norm": 0.0008533938089385629, "learning_rate": 2.438074791589501e-07, "loss": 0.0003, "step": 222600 }, { "epoch": 1.8204195117962136, "grad_norm": 0.013555587269365788, "learning_rate": 2.4358740336188067e-07, "loss": 0.0005, "step": 222610 }, { "epoch": 1.820501287974813, "grad_norm": 0.001631340361200273, "learning_rate": 2.433674244568024e-07, "loss": 0.0006, "step": 222620 }, { "epoch": 1.820583064153412, "grad_norm": 0.11925913393497467, "learning_rate": 2.431475424481955e-07, "loss": 0.0005, "step": 222630 }, { "epoch": 1.8206648403320114, "grad_norm": 0.030806709080934525, "learning_rate": 2.429277573405392e-07, "loss": 0.0004, "step": 222640 }, { "epoch": 1.8207466165106103, "grad_norm": 0.02161685936152935, "learning_rate": 2.4270806913831213e-07, "loss": 0.0007, "step": 222650 }, { "epoch": 1.8208283926892097, "grad_norm": 0.03754531964659691, "learning_rate": 2.424884778459885e-07, "loss": 0.0007, "step": 222660 }, { "epoch": 1.8209101688678087, "grad_norm": 0.1046556681394577, "learning_rate": 2.4226898346804137e-07, "loss": 0.0008, "step": 222670 }, { "epoch": 1.820991945046408, "grad_norm": 0.03747299686074257, "learning_rate": 2.4204958600894224e-07, "loss": 0.0006, "step": 222680 }, { "epoch": 1.821073721225007, "grad_norm": 0.0406935028731823, "learning_rate": 2.418302854731608e-07, "loss": 0.0012, "step": 222690 }, { "epoch": 1.8211554974036064, "grad_norm": 0.007072020322084427, "learning_rate": 2.41611081865164e-07, "loss": 0.001, "step": 222700 }, { "epoch": 1.8212372735822056, "grad_norm": 0.10130215436220169, "learning_rate": 2.4139197518941783e-07, "loss": 0.0012, "step": 222710 }, { "epoch": 1.8213190497608047, "grad_norm": 0.030500460416078568, "learning_rate": 2.411729654503847e-07, "loss": 0.0011, "step": 222720 }, { "epoch": 1.821400825939404, "grad_norm": 0.018405063077807426, "learning_rate": 2.4095405265252616e-07, "loss": 0.0006, "step": 222730 }, { "epoch": 1.821482602118003, "grad_norm": 0.005849611014127731, "learning_rate": 2.407352368003024e-07, "loss": 0.0004, "step": 222740 }, { "epoch": 1.8215643782966022, "grad_norm": 0.02808460220694542, "learning_rate": 2.4051651789816997e-07, "loss": 0.0005, "step": 222750 }, { "epoch": 1.8216461544752014, "grad_norm": 0.07484619319438934, "learning_rate": 2.402978959505847e-07, "loss": 0.0007, "step": 222760 }, { "epoch": 1.8217279306538006, "grad_norm": 0.05713588744401932, "learning_rate": 2.400793709620003e-07, "loss": 0.0004, "step": 222770 }, { "epoch": 1.8218097068323997, "grad_norm": 0.028443876653909683, "learning_rate": 2.398609429368681e-07, "loss": 0.0008, "step": 222780 }, { "epoch": 1.821891483010999, "grad_norm": 0.05396855250000954, "learning_rate": 2.39642611879638e-07, "loss": 0.0004, "step": 222790 }, { "epoch": 1.821973259189598, "grad_norm": 0.07364121079444885, "learning_rate": 2.3942437779475694e-07, "loss": 0.0007, "step": 222800 }, { "epoch": 1.8220550353681972, "grad_norm": 0.0022971101570874453, "learning_rate": 2.392062406866713e-07, "loss": 0.0005, "step": 222810 }, { "epoch": 1.8221368115467964, "grad_norm": 0.002178595867007971, "learning_rate": 2.3898820055982375e-07, "loss": 0.0017, "step": 222820 }, { "epoch": 1.8222185877253956, "grad_norm": 0.019509047269821167, "learning_rate": 2.3877025741865736e-07, "loss": 0.0003, "step": 222830 }, { "epoch": 1.8223003639039947, "grad_norm": 0.001647191122174263, "learning_rate": 2.385524112676102e-07, "loss": 0.0005, "step": 222840 }, { "epoch": 1.822382140082594, "grad_norm": 0.1306672990322113, "learning_rate": 2.3833466211112155e-07, "loss": 0.0015, "step": 222850 }, { "epoch": 1.822463916261193, "grad_norm": 0.07153663784265518, "learning_rate": 2.3811700995362563e-07, "loss": 0.0004, "step": 222860 }, { "epoch": 1.8225456924397923, "grad_norm": 0.0441502183675766, "learning_rate": 2.3789945479955724e-07, "loss": 0.0003, "step": 222870 }, { "epoch": 1.8226274686183914, "grad_norm": 0.00044829881517216563, "learning_rate": 2.3768199665334667e-07, "loss": 0.0012, "step": 222880 }, { "epoch": 1.8227092447969906, "grad_norm": 0.034063056111335754, "learning_rate": 2.37464635519426e-07, "loss": 0.0004, "step": 222890 }, { "epoch": 1.8227910209755898, "grad_norm": 0.027024630457162857, "learning_rate": 2.3724737140222164e-07, "loss": 0.0004, "step": 222900 }, { "epoch": 1.8228727971541892, "grad_norm": 0.04694882780313492, "learning_rate": 2.3703020430616008e-07, "loss": 0.0006, "step": 222910 }, { "epoch": 1.822954573332788, "grad_norm": 0.048908114433288574, "learning_rate": 2.3681313423566333e-07, "loss": 0.0005, "step": 222920 }, { "epoch": 1.8230363495113875, "grad_norm": 0.08951375633478165, "learning_rate": 2.3659616119515616e-07, "loss": 0.001, "step": 222930 }, { "epoch": 1.8231181256899864, "grad_norm": 0.14849568903446198, "learning_rate": 2.3637928518905672e-07, "loss": 0.0009, "step": 222940 }, { "epoch": 1.8231999018685858, "grad_norm": 0.0290417168289423, "learning_rate": 2.3616250622178317e-07, "loss": 0.0003, "step": 222950 }, { "epoch": 1.8232816780471848, "grad_norm": 0.010637759231030941, "learning_rate": 2.3594582429775192e-07, "loss": 0.0009, "step": 222960 }, { "epoch": 1.8233634542257842, "grad_norm": 0.0121842036023736, "learning_rate": 2.357292394213767e-07, "loss": 0.0005, "step": 222970 }, { "epoch": 1.823445230404383, "grad_norm": 0.04579413682222366, "learning_rate": 2.3551275159706955e-07, "loss": 0.0004, "step": 222980 }, { "epoch": 1.8235270065829825, "grad_norm": 0.04316267743706703, "learning_rate": 2.3529636082923967e-07, "loss": 0.0008, "step": 222990 }, { "epoch": 1.8236087827615814, "grad_norm": 0.08586090058088303, "learning_rate": 2.350800671222958e-07, "loss": 0.0007, "step": 223000 }, { "epoch": 1.8236905589401808, "grad_norm": 0.029855499044060707, "learning_rate": 2.3486387048064496e-07, "loss": 0.0002, "step": 223010 }, { "epoch": 1.8237723351187798, "grad_norm": 0.020673882216215134, "learning_rate": 2.3464777090868973e-07, "loss": 0.0005, "step": 223020 }, { "epoch": 1.8238541112973792, "grad_norm": 0.011445059441030025, "learning_rate": 2.3443176841083326e-07, "loss": 0.0007, "step": 223030 }, { "epoch": 1.8239358874759781, "grad_norm": 0.007172141224145889, "learning_rate": 2.3421586299147482e-07, "loss": 0.0007, "step": 223040 }, { "epoch": 1.8240176636545775, "grad_norm": 0.02177063189446926, "learning_rate": 2.3400005465501364e-07, "loss": 0.0004, "step": 223050 }, { "epoch": 1.8240994398331765, "grad_norm": 0.07092982530593872, "learning_rate": 2.3378434340584511e-07, "loss": 0.0013, "step": 223060 }, { "epoch": 1.8241812160117759, "grad_norm": 0.06143895909190178, "learning_rate": 2.3356872924836405e-07, "loss": 0.0007, "step": 223070 }, { "epoch": 1.8242629921903748, "grad_norm": 0.01611996442079544, "learning_rate": 2.3335321218696194e-07, "loss": 0.0005, "step": 223080 }, { "epoch": 1.8243447683689742, "grad_norm": 0.01158799696713686, "learning_rate": 2.3313779222602973e-07, "loss": 0.0004, "step": 223090 }, { "epoch": 1.8244265445475731, "grad_norm": 0.011838343925774097, "learning_rate": 2.32922469369955e-07, "loss": 0.0002, "step": 223100 }, { "epoch": 1.8245083207261725, "grad_norm": 0.006810935214161873, "learning_rate": 2.327072436231248e-07, "loss": 0.0003, "step": 223110 }, { "epoch": 1.8245900969047715, "grad_norm": 0.05818822234869003, "learning_rate": 2.3249211498992286e-07, "loss": 0.0004, "step": 223120 }, { "epoch": 1.8246718730833709, "grad_norm": 0.09980714321136475, "learning_rate": 2.322770834747312e-07, "loss": 0.0004, "step": 223130 }, { "epoch": 1.82475364926197, "grad_norm": 0.001963488757610321, "learning_rate": 2.3206214908193192e-07, "loss": 0.0011, "step": 223140 }, { "epoch": 1.8248354254405692, "grad_norm": 0.06549465656280518, "learning_rate": 2.3184731181590148e-07, "loss": 0.0006, "step": 223150 }, { "epoch": 1.8249172016191684, "grad_norm": 0.026547273620963097, "learning_rate": 2.3163257168101748e-07, "loss": 0.0003, "step": 223160 }, { "epoch": 1.8249989777977675, "grad_norm": 0.052275434136390686, "learning_rate": 2.3141792868165314e-07, "loss": 0.0002, "step": 223170 }, { "epoch": 1.8250807539763667, "grad_norm": 0.052439380437135696, "learning_rate": 2.3120338282218213e-07, "loss": 0.0004, "step": 223180 }, { "epoch": 1.8251625301549659, "grad_norm": 0.07274375110864639, "learning_rate": 2.3098893410697486e-07, "loss": 0.0008, "step": 223190 }, { "epoch": 1.825244306333565, "grad_norm": 0.17155607044696808, "learning_rate": 2.3077458254039897e-07, "loss": 0.0016, "step": 223200 }, { "epoch": 1.8253260825121642, "grad_norm": 0.006276182364672422, "learning_rate": 2.3056032812682205e-07, "loss": 0.0004, "step": 223210 }, { "epoch": 1.8254078586907634, "grad_norm": 0.02027209848165512, "learning_rate": 2.3034617087060783e-07, "loss": 0.0005, "step": 223220 }, { "epoch": 1.8254896348693626, "grad_norm": 0.0397968627512455, "learning_rate": 2.3013211077611897e-07, "loss": 0.0008, "step": 223230 }, { "epoch": 1.8255714110479617, "grad_norm": 0.014777883887290955, "learning_rate": 2.2991814784771583e-07, "loss": 0.0003, "step": 223240 }, { "epoch": 1.825653187226561, "grad_norm": 0.005996414925903082, "learning_rate": 2.2970428208975714e-07, "loss": 0.0003, "step": 223250 }, { "epoch": 1.82573496340516, "grad_norm": 0.01381529588252306, "learning_rate": 2.2949051350660057e-07, "loss": 0.0002, "step": 223260 }, { "epoch": 1.8258167395837592, "grad_norm": 0.0015537567669525743, "learning_rate": 2.2927684210259925e-07, "loss": 0.0006, "step": 223270 }, { "epoch": 1.8258985157623584, "grad_norm": 0.05296730995178223, "learning_rate": 2.2906326788210697e-07, "loss": 0.0005, "step": 223280 }, { "epoch": 1.8259802919409576, "grad_norm": 0.009064974263310432, "learning_rate": 2.28849790849473e-07, "loss": 0.0006, "step": 223290 }, { "epoch": 1.8260620681195567, "grad_norm": 0.12273550778627396, "learning_rate": 2.2863641100904776e-07, "loss": 0.0007, "step": 223300 }, { "epoch": 1.826143844298156, "grad_norm": 0.02305952087044716, "learning_rate": 2.284231283651772e-07, "loss": 0.0004, "step": 223310 }, { "epoch": 1.826225620476755, "grad_norm": 0.044111963361501694, "learning_rate": 2.2820994292220623e-07, "loss": 0.0006, "step": 223320 }, { "epoch": 1.8263073966553542, "grad_norm": 0.002856162842363119, "learning_rate": 2.279968546844774e-07, "loss": 0.0007, "step": 223330 }, { "epoch": 1.8263891728339536, "grad_norm": 0.016147656366229057, "learning_rate": 2.277838636563312e-07, "loss": 0.0005, "step": 223340 }, { "epoch": 1.8264709490125526, "grad_norm": 0.02335227280855179, "learning_rate": 2.275709698421069e-07, "loss": 0.0003, "step": 223350 }, { "epoch": 1.826552725191152, "grad_norm": 0.04385147988796234, "learning_rate": 2.2735817324614162e-07, "loss": 0.0005, "step": 223360 }, { "epoch": 1.826634501369751, "grad_norm": 0.008441686630249023, "learning_rate": 2.2714547387276852e-07, "loss": 0.0004, "step": 223370 }, { "epoch": 1.8267162775483503, "grad_norm": 0.05700792744755745, "learning_rate": 2.2693287172632251e-07, "loss": 0.0008, "step": 223380 }, { "epoch": 1.8267980537269493, "grad_norm": 0.0375387966632843, "learning_rate": 2.267203668111334e-07, "loss": 0.0006, "step": 223390 }, { "epoch": 1.8268798299055486, "grad_norm": 0.029704861342906952, "learning_rate": 2.2650795913153057e-07, "loss": 0.0006, "step": 223400 }, { "epoch": 1.8269616060841476, "grad_norm": 0.014283912256360054, "learning_rate": 2.2629564869183996e-07, "loss": 0.0007, "step": 223410 }, { "epoch": 1.827043382262747, "grad_norm": 0.020577700808644295, "learning_rate": 2.260834354963881e-07, "loss": 0.0009, "step": 223420 }, { "epoch": 1.827125158441346, "grad_norm": 0.01865507662296295, "learning_rate": 2.2587131954949716e-07, "loss": 0.0003, "step": 223430 }, { "epoch": 1.8272069346199453, "grad_norm": 0.007291827350854874, "learning_rate": 2.2565930085548803e-07, "loss": 0.0004, "step": 223440 }, { "epoch": 1.8272887107985443, "grad_norm": 0.06248227134346962, "learning_rate": 2.2544737941867956e-07, "loss": 0.0007, "step": 223450 }, { "epoch": 1.8273704869771437, "grad_norm": 0.03095613233745098, "learning_rate": 2.252355552433888e-07, "loss": 0.0008, "step": 223460 }, { "epoch": 1.8274522631557426, "grad_norm": 0.03562070056796074, "learning_rate": 2.2502382833393123e-07, "loss": 0.0007, "step": 223470 }, { "epoch": 1.827534039334342, "grad_norm": 0.03956684470176697, "learning_rate": 2.2481219869461945e-07, "loss": 0.0004, "step": 223480 }, { "epoch": 1.827615815512941, "grad_norm": 0.008490342646837234, "learning_rate": 2.246006663297634e-07, "loss": 0.0004, "step": 223490 }, { "epoch": 1.8276975916915403, "grad_norm": 0.03961949795484543, "learning_rate": 2.2438923124367463e-07, "loss": 0.001, "step": 223500 }, { "epoch": 1.8277793678701393, "grad_norm": 0.03552786633372307, "learning_rate": 2.241778934406591e-07, "loss": 0.0004, "step": 223510 }, { "epoch": 1.8278611440487387, "grad_norm": 0.0024412008933722973, "learning_rate": 2.2396665292502172e-07, "loss": 0.0005, "step": 223520 }, { "epoch": 1.8279429202273376, "grad_norm": 0.057489216327667236, "learning_rate": 2.237555097010652e-07, "loss": 0.0011, "step": 223530 }, { "epoch": 1.828024696405937, "grad_norm": 0.06382696330547333, "learning_rate": 2.2354446377309212e-07, "loss": 0.001, "step": 223540 }, { "epoch": 1.8281064725845362, "grad_norm": 0.019938958808779716, "learning_rate": 2.233335151454008e-07, "loss": 0.0004, "step": 223550 }, { "epoch": 1.8281882487631353, "grad_norm": 0.02673814259469509, "learning_rate": 2.2312266382228775e-07, "loss": 0.0006, "step": 223560 }, { "epoch": 1.8282700249417345, "grad_norm": 0.006858055014163256, "learning_rate": 2.2291190980804954e-07, "loss": 0.0003, "step": 223570 }, { "epoch": 1.8283518011203337, "grad_norm": 0.017229732125997543, "learning_rate": 2.2270125310697888e-07, "loss": 0.0005, "step": 223580 }, { "epoch": 1.8284335772989329, "grad_norm": 0.06193074956536293, "learning_rate": 2.224906937233662e-07, "loss": 0.0006, "step": 223590 }, { "epoch": 1.828515353477532, "grad_norm": 0.002709002932533622, "learning_rate": 2.2228023166150191e-07, "loss": 0.0005, "step": 223600 }, { "epoch": 1.8285971296561312, "grad_norm": 0.04080851376056671, "learning_rate": 2.2206986692567211e-07, "loss": 0.0007, "step": 223610 }, { "epoch": 1.8286789058347304, "grad_norm": 0.0035201148129999638, "learning_rate": 2.2185959952016333e-07, "loss": 0.001, "step": 223620 }, { "epoch": 1.8287606820133295, "grad_norm": 0.02138092741370201, "learning_rate": 2.2164942944925883e-07, "loss": 0.0003, "step": 223630 }, { "epoch": 1.8288424581919287, "grad_norm": 0.029193155467510223, "learning_rate": 2.2143935671723904e-07, "loss": 0.0004, "step": 223640 }, { "epoch": 1.8289242343705279, "grad_norm": 0.02395486645400524, "learning_rate": 2.2122938132838335e-07, "loss": 0.0004, "step": 223650 }, { "epoch": 1.829006010549127, "grad_norm": 0.06118417903780937, "learning_rate": 2.2101950328696997e-07, "loss": 0.0007, "step": 223660 }, { "epoch": 1.8290877867277262, "grad_norm": 0.03538860008120537, "learning_rate": 2.2080972259727385e-07, "loss": 0.0006, "step": 223670 }, { "epoch": 1.8291695629063254, "grad_norm": 0.0011148598277941346, "learning_rate": 2.206000392635682e-07, "loss": 0.0003, "step": 223680 }, { "epoch": 1.8292513390849245, "grad_norm": 0.031242724508047104, "learning_rate": 2.203904532901252e-07, "loss": 0.0007, "step": 223690 }, { "epoch": 1.8293331152635237, "grad_norm": 0.016310837119817734, "learning_rate": 2.2018096468121363e-07, "loss": 0.001, "step": 223700 }, { "epoch": 1.8294148914421229, "grad_norm": 0.0545024611055851, "learning_rate": 2.1997157344110066e-07, "loss": 0.0005, "step": 223710 }, { "epoch": 1.829496667620722, "grad_norm": 0.0010380559833720326, "learning_rate": 2.1976227957405227e-07, "loss": 0.0006, "step": 223720 }, { "epoch": 1.8295784437993212, "grad_norm": 0.026691196486353874, "learning_rate": 2.195530830843312e-07, "loss": 0.0004, "step": 223730 }, { "epoch": 1.8296602199779204, "grad_norm": 0.0014884219272062182, "learning_rate": 2.193439839762007e-07, "loss": 0.0005, "step": 223740 }, { "epoch": 1.8297419961565196, "grad_norm": 0.02361379750072956, "learning_rate": 2.191349822539185e-07, "loss": 0.0004, "step": 223750 }, { "epoch": 1.8298237723351187, "grad_norm": 0.037003740668296814, "learning_rate": 2.1892607792174281e-07, "loss": 0.0008, "step": 223760 }, { "epoch": 1.8299055485137181, "grad_norm": 0.012966552749276161, "learning_rate": 2.1871727098392915e-07, "loss": 0.0005, "step": 223770 }, { "epoch": 1.829987324692317, "grad_norm": 0.005966912489384413, "learning_rate": 2.1850856144473132e-07, "loss": 0.0007, "step": 223780 }, { "epoch": 1.8300691008709165, "grad_norm": 0.024666802957654, "learning_rate": 2.1829994930840037e-07, "loss": 0.0005, "step": 223790 }, { "epoch": 1.8301508770495154, "grad_norm": 0.015113738365471363, "learning_rate": 2.180914345791868e-07, "loss": 0.0003, "step": 223800 }, { "epoch": 1.8302326532281148, "grad_norm": 0.07619687169790268, "learning_rate": 2.178830172613372e-07, "loss": 0.0006, "step": 223810 }, { "epoch": 1.8303144294067137, "grad_norm": 0.005354163236916065, "learning_rate": 2.176746973590982e-07, "loss": 0.0009, "step": 223820 }, { "epoch": 1.8303962055853131, "grad_norm": 0.02721480280160904, "learning_rate": 2.1746647487671247e-07, "loss": 0.0006, "step": 223830 }, { "epoch": 1.830477981763912, "grad_norm": 0.04960382357239723, "learning_rate": 2.172583498184222e-07, "loss": 0.0008, "step": 223840 }, { "epoch": 1.8305597579425115, "grad_norm": 0.07483221590518951, "learning_rate": 2.170503221884662e-07, "loss": 0.0009, "step": 223850 }, { "epoch": 1.8306415341211104, "grad_norm": 0.11576659977436066, "learning_rate": 2.1684239199108336e-07, "loss": 0.0008, "step": 223860 }, { "epoch": 1.8307233102997098, "grad_norm": 0.00019340617291163653, "learning_rate": 2.1663455923050913e-07, "loss": 0.0009, "step": 223870 }, { "epoch": 1.8308050864783088, "grad_norm": 0.0012273177271708846, "learning_rate": 2.1642682391097735e-07, "loss": 0.0018, "step": 223880 }, { "epoch": 1.8308868626569081, "grad_norm": 0.028909150511026382, "learning_rate": 2.1621918603671853e-07, "loss": 0.0008, "step": 223890 }, { "epoch": 1.830968638835507, "grad_norm": 0.02298525534570217, "learning_rate": 2.1601164561196375e-07, "loss": 0.0007, "step": 223900 }, { "epoch": 1.8310504150141065, "grad_norm": 0.020356088876724243, "learning_rate": 2.158042026409407e-07, "loss": 0.0005, "step": 223910 }, { "epoch": 1.8311321911927054, "grad_norm": 0.0013025130610913038, "learning_rate": 2.1559685712787493e-07, "loss": 0.0004, "step": 223920 }, { "epoch": 1.8312139673713048, "grad_norm": 0.1345718502998352, "learning_rate": 2.153896090769897e-07, "loss": 0.0008, "step": 223930 }, { "epoch": 1.8312957435499038, "grad_norm": 0.04345100000500679, "learning_rate": 2.1518245849250775e-07, "loss": 0.0008, "step": 223940 }, { "epoch": 1.8313775197285032, "grad_norm": 0.016613062471151352, "learning_rate": 2.1497540537864847e-07, "loss": 0.0005, "step": 223950 }, { "epoch": 1.831459295907102, "grad_norm": 0.0019227751763537526, "learning_rate": 2.1476844973962906e-07, "loss": 0.0002, "step": 223960 }, { "epoch": 1.8315410720857015, "grad_norm": 0.0706542506814003, "learning_rate": 2.145615915796656e-07, "loss": 0.0006, "step": 223970 }, { "epoch": 1.8316228482643007, "grad_norm": 0.09118485450744629, "learning_rate": 2.1435483090297305e-07, "loss": 0.001, "step": 223980 }, { "epoch": 1.8317046244428998, "grad_norm": 0.0414654016494751, "learning_rate": 2.1414816771376247e-07, "loss": 0.0008, "step": 223990 }, { "epoch": 1.831786400621499, "grad_norm": 0.002497491892427206, "learning_rate": 2.139416020162438e-07, "loss": 0.0006, "step": 224000 }, { "epoch": 1.8318681768000982, "grad_norm": 0.013049746863543987, "learning_rate": 2.1373513381462484e-07, "loss": 0.0009, "step": 224010 }, { "epoch": 1.8319499529786973, "grad_norm": 0.031094538047909737, "learning_rate": 2.1352876311311166e-07, "loss": 0.0005, "step": 224020 }, { "epoch": 1.8320317291572965, "grad_norm": 0.00016852667613420635, "learning_rate": 2.1332248991590865e-07, "loss": 0.0006, "step": 224030 }, { "epoch": 1.8321135053358957, "grad_norm": 0.028462793678045273, "learning_rate": 2.131163142272169e-07, "loss": 0.0004, "step": 224040 }, { "epoch": 1.8321952815144948, "grad_norm": 0.0006176313618198037, "learning_rate": 2.129102360512375e-07, "loss": 0.0002, "step": 224050 }, { "epoch": 1.832277057693094, "grad_norm": 0.0017153777880594134, "learning_rate": 2.127042553921671e-07, "loss": 0.0008, "step": 224060 }, { "epoch": 1.8323588338716932, "grad_norm": 0.020788496360182762, "learning_rate": 2.1249837225420234e-07, "loss": 0.0004, "step": 224070 }, { "epoch": 1.8324406100502924, "grad_norm": 0.023557551205158234, "learning_rate": 2.1229258664153763e-07, "loss": 0.0004, "step": 224080 }, { "epoch": 1.8325223862288915, "grad_norm": 0.015288212336599827, "learning_rate": 2.120868985583635e-07, "loss": 0.001, "step": 224090 }, { "epoch": 1.8326041624074907, "grad_norm": 0.06325872987508774, "learning_rate": 2.118813080088722e-07, "loss": 0.0004, "step": 224100 }, { "epoch": 1.8326859385860899, "grad_norm": 0.02540394850075245, "learning_rate": 2.116758149972503e-07, "loss": 0.0006, "step": 224110 }, { "epoch": 1.832767714764689, "grad_norm": 0.008846336044371128, "learning_rate": 2.11470419527684e-07, "loss": 0.0008, "step": 224120 }, { "epoch": 1.8328494909432882, "grad_norm": 0.00435716612264514, "learning_rate": 2.1126512160435708e-07, "loss": 0.0008, "step": 224130 }, { "epoch": 1.8329312671218874, "grad_norm": 0.005760580766946077, "learning_rate": 2.110599212314529e-07, "loss": 0.0005, "step": 224140 }, { "epoch": 1.8330130433004865, "grad_norm": 0.008241573348641396, "learning_rate": 2.1085481841315093e-07, "loss": 0.0007, "step": 224150 }, { "epoch": 1.8330948194790857, "grad_norm": 0.18773838877677917, "learning_rate": 2.1064981315362886e-07, "loss": 0.001, "step": 224160 }, { "epoch": 1.8331765956576849, "grad_norm": 0.036599721759557724, "learning_rate": 2.1044490545706343e-07, "loss": 0.0009, "step": 224170 }, { "epoch": 1.833258371836284, "grad_norm": 0.027949532493948936, "learning_rate": 2.102400953276279e-07, "loss": 0.0007, "step": 224180 }, { "epoch": 1.8333401480148832, "grad_norm": 0.01708790473639965, "learning_rate": 2.100353827694951e-07, "loss": 0.0007, "step": 224190 }, { "epoch": 1.8334219241934826, "grad_norm": 0.02567666955292225, "learning_rate": 2.0983076778683497e-07, "loss": 0.0006, "step": 224200 }, { "epoch": 1.8335037003720815, "grad_norm": 0.017017554491758347, "learning_rate": 2.0962625038381533e-07, "loss": 0.0006, "step": 224210 }, { "epoch": 1.833585476550681, "grad_norm": 0.14451353251934052, "learning_rate": 2.0942183056460342e-07, "loss": 0.0008, "step": 224220 }, { "epoch": 1.8336672527292799, "grad_norm": 0.0011340032797306776, "learning_rate": 2.0921750833336252e-07, "loss": 0.0009, "step": 224230 }, { "epoch": 1.8337490289078793, "grad_norm": 0.005546122789382935, "learning_rate": 2.0901328369425546e-07, "loss": 0.0012, "step": 224240 }, { "epoch": 1.8338308050864782, "grad_norm": 0.02067362330853939, "learning_rate": 2.0880915665144109e-07, "loss": 0.0012, "step": 224250 }, { "epoch": 1.8339125812650776, "grad_norm": 0.07069700211286545, "learning_rate": 2.0860512720907943e-07, "loss": 0.001, "step": 224260 }, { "epoch": 1.8339943574436766, "grad_norm": 0.062176719307899475, "learning_rate": 2.0840119537132607e-07, "loss": 0.001, "step": 224270 }, { "epoch": 1.834076133622276, "grad_norm": 0.039699334651231766, "learning_rate": 2.0819736114233548e-07, "loss": 0.0004, "step": 224280 }, { "epoch": 1.834157909800875, "grad_norm": 0.05554681643843651, "learning_rate": 2.0799362452625926e-07, "loss": 0.0007, "step": 224290 }, { "epoch": 1.8342396859794743, "grad_norm": 0.0286127720028162, "learning_rate": 2.0778998552724805e-07, "loss": 0.0009, "step": 224300 }, { "epoch": 1.8343214621580732, "grad_norm": 0.05131607502698898, "learning_rate": 2.0758644414945016e-07, "loss": 0.0005, "step": 224310 }, { "epoch": 1.8344032383366726, "grad_norm": 0.06784719973802567, "learning_rate": 2.0738300039701175e-07, "loss": 0.0008, "step": 224320 }, { "epoch": 1.8344850145152716, "grad_norm": 0.023556174710392952, "learning_rate": 2.0717965427407728e-07, "loss": 0.0004, "step": 224330 }, { "epoch": 1.834566790693871, "grad_norm": 0.0035034827888011932, "learning_rate": 2.0697640578478894e-07, "loss": 0.0005, "step": 224340 }, { "epoch": 1.83464856687247, "grad_norm": 0.25193318724632263, "learning_rate": 2.0677325493328736e-07, "loss": 0.0008, "step": 224350 }, { "epoch": 1.8347303430510693, "grad_norm": 0.0417800173163414, "learning_rate": 2.065702017237109e-07, "loss": 0.0005, "step": 224360 }, { "epoch": 1.8348121192296682, "grad_norm": 0.04206787049770355, "learning_rate": 2.063672461601951e-07, "loss": 0.0005, "step": 224370 }, { "epoch": 1.8348938954082676, "grad_norm": 0.01631276309490204, "learning_rate": 2.0616438824687556e-07, "loss": 0.0006, "step": 224380 }, { "epoch": 1.8349756715868666, "grad_norm": 0.021393749862909317, "learning_rate": 2.0596162798788343e-07, "loss": 0.0025, "step": 224390 }, { "epoch": 1.835057447765466, "grad_norm": 0.001601808238774538, "learning_rate": 2.057589653873504e-07, "loss": 0.001, "step": 224400 }, { "epoch": 1.8351392239440651, "grad_norm": 0.00514267897233367, "learning_rate": 2.0555640044940428e-07, "loss": 0.0005, "step": 224410 }, { "epoch": 1.8352210001226643, "grad_norm": 0.007388933561742306, "learning_rate": 2.053539331781712e-07, "loss": 0.0007, "step": 224420 }, { "epoch": 1.8353027763012635, "grad_norm": 0.006755664478987455, "learning_rate": 2.0515156357777566e-07, "loss": 0.0003, "step": 224430 }, { "epoch": 1.8353845524798627, "grad_norm": 0.05478652939200401, "learning_rate": 2.0494929165234045e-07, "loss": 0.0007, "step": 224440 }, { "epoch": 1.8354663286584618, "grad_norm": 0.05912945792078972, "learning_rate": 2.0474711740598506e-07, "loss": 0.0011, "step": 224450 }, { "epoch": 1.835548104837061, "grad_norm": 0.01652568392455578, "learning_rate": 2.04545040842829e-07, "loss": 0.0005, "step": 224460 }, { "epoch": 1.8356298810156602, "grad_norm": 0.12524062395095825, "learning_rate": 2.0434306196698893e-07, "loss": 0.0006, "step": 224470 }, { "epoch": 1.8357116571942593, "grad_norm": 0.05719344690442085, "learning_rate": 2.0414118078257829e-07, "loss": 0.0006, "step": 224480 }, { "epoch": 1.8357934333728585, "grad_norm": 0.012696058489382267, "learning_rate": 2.039393972937098e-07, "loss": 0.0002, "step": 224490 }, { "epoch": 1.8358752095514577, "grad_norm": 0.06437141448259354, "learning_rate": 2.0373771150449473e-07, "loss": 0.0009, "step": 224500 }, { "epoch": 1.8359569857300568, "grad_norm": 0.020458174869418144, "learning_rate": 2.0353612341904083e-07, "loss": 0.0008, "step": 224510 }, { "epoch": 1.836038761908656, "grad_norm": 0.012069587595760822, "learning_rate": 2.0333463304145539e-07, "loss": 0.0003, "step": 224520 }, { "epoch": 1.8361205380872552, "grad_norm": 0.02882591262459755, "learning_rate": 2.031332403758418e-07, "loss": 0.0005, "step": 224530 }, { "epoch": 1.8362023142658543, "grad_norm": 0.038931943476200104, "learning_rate": 2.02931945426304e-07, "loss": 0.0009, "step": 224540 }, { "epoch": 1.8362840904444535, "grad_norm": 0.01530552189797163, "learning_rate": 2.0273074819694093e-07, "loss": 0.0004, "step": 224550 }, { "epoch": 1.8363658666230527, "grad_norm": 0.048519134521484375, "learning_rate": 2.0252964869185209e-07, "loss": 0.0005, "step": 224560 }, { "epoch": 1.8364476428016518, "grad_norm": 0.005803279113024473, "learning_rate": 2.023286469151342e-07, "loss": 0.0005, "step": 224570 }, { "epoch": 1.836529418980251, "grad_norm": 0.02633345127105713, "learning_rate": 2.0212774287088067e-07, "loss": 0.0005, "step": 224580 }, { "epoch": 1.8366111951588502, "grad_norm": 0.026030343025922775, "learning_rate": 2.0192693656318597e-07, "loss": 0.0011, "step": 224590 }, { "epoch": 1.8366929713374494, "grad_norm": 0.0060910857282578945, "learning_rate": 2.0172622799613906e-07, "loss": 0.0006, "step": 224600 }, { "epoch": 1.8367747475160485, "grad_norm": 0.03361717239022255, "learning_rate": 2.0152561717382947e-07, "loss": 0.0007, "step": 224610 }, { "epoch": 1.8368565236946477, "grad_norm": 0.060593146830797195, "learning_rate": 2.0132510410034277e-07, "loss": 0.0003, "step": 224620 }, { "epoch": 1.836938299873247, "grad_norm": 0.02529493160545826, "learning_rate": 2.0112468877976466e-07, "loss": 0.0003, "step": 224630 }, { "epoch": 1.837020076051846, "grad_norm": 0.00828140415251255, "learning_rate": 2.0092437121617737e-07, "loss": 0.0004, "step": 224640 }, { "epoch": 1.8371018522304454, "grad_norm": 0.043055031448602676, "learning_rate": 2.0072415141366152e-07, "loss": 0.0006, "step": 224650 }, { "epoch": 1.8371836284090444, "grad_norm": 0.015812888741493225, "learning_rate": 2.005240293762961e-07, "loss": 0.0009, "step": 224660 }, { "epoch": 1.8372654045876438, "grad_norm": 0.01271140668541193, "learning_rate": 2.003240051081573e-07, "loss": 0.0006, "step": 224670 }, { "epoch": 1.8373471807662427, "grad_norm": 0.0008827935089357197, "learning_rate": 2.001240786133196e-07, "loss": 0.0004, "step": 224680 }, { "epoch": 1.837428956944842, "grad_norm": 0.0071218302473425865, "learning_rate": 1.9992424989585647e-07, "loss": 0.0003, "step": 224690 }, { "epoch": 1.837510733123441, "grad_norm": 0.08011453598737717, "learning_rate": 1.997245189598368e-07, "loss": 0.0008, "step": 224700 }, { "epoch": 1.8375925093020404, "grad_norm": 0.10421790182590485, "learning_rate": 1.9952488580933182e-07, "loss": 0.0009, "step": 224710 }, { "epoch": 1.8376742854806394, "grad_norm": 0.0016952988225966692, "learning_rate": 1.993253504484066e-07, "loss": 0.0008, "step": 224720 }, { "epoch": 1.8377560616592388, "grad_norm": 0.0370127372443676, "learning_rate": 1.9912591288112627e-07, "loss": 0.0004, "step": 224730 }, { "epoch": 1.8378378378378377, "grad_norm": 0.03961293771862984, "learning_rate": 1.989265731115525e-07, "loss": 0.0007, "step": 224740 }, { "epoch": 1.837919614016437, "grad_norm": 0.018202131614089012, "learning_rate": 1.9872733114374765e-07, "loss": 0.0006, "step": 224750 }, { "epoch": 1.838001390195036, "grad_norm": 0.005362923257052898, "learning_rate": 1.985281869817701e-07, "loss": 0.0013, "step": 224760 }, { "epoch": 1.8380831663736354, "grad_norm": 0.044460706412792206, "learning_rate": 1.9832914062967612e-07, "loss": 0.0008, "step": 224770 }, { "epoch": 1.8381649425522344, "grad_norm": 0.03538184612989426, "learning_rate": 1.9813019209152019e-07, "loss": 0.001, "step": 224780 }, { "epoch": 1.8382467187308338, "grad_norm": 0.015247258357703686, "learning_rate": 1.9793134137135573e-07, "loss": 0.0004, "step": 224790 }, { "epoch": 1.8383284949094327, "grad_norm": 0.015581551007926464, "learning_rate": 1.9773258847323284e-07, "loss": 0.0013, "step": 224800 }, { "epoch": 1.8384102710880321, "grad_norm": 0.012744192034006119, "learning_rate": 1.975339334012011e-07, "loss": 0.0004, "step": 224810 }, { "epoch": 1.838492047266631, "grad_norm": 0.012441924773156643, "learning_rate": 1.973353761593061e-07, "loss": 0.0005, "step": 224820 }, { "epoch": 1.8385738234452305, "grad_norm": 0.026170728728175163, "learning_rate": 1.9713691675159352e-07, "loss": 0.0005, "step": 224830 }, { "epoch": 1.8386555996238296, "grad_norm": 0.006638129707425833, "learning_rate": 1.9693855518210625e-07, "loss": 0.0003, "step": 224840 }, { "epoch": 1.8387373758024288, "grad_norm": 0.0054933009669184685, "learning_rate": 1.9674029145488438e-07, "loss": 0.0004, "step": 224850 }, { "epoch": 1.838819151981028, "grad_norm": 0.044265441596508026, "learning_rate": 1.965421255739669e-07, "loss": 0.001, "step": 224860 }, { "epoch": 1.8389009281596271, "grad_norm": 0.024649137631058693, "learning_rate": 1.9634405754339114e-07, "loss": 0.0007, "step": 224870 }, { "epoch": 1.8389827043382263, "grad_norm": 0.008410508744418621, "learning_rate": 1.961460873671911e-07, "loss": 0.0005, "step": 224880 }, { "epoch": 1.8390644805168255, "grad_norm": 0.015148889273405075, "learning_rate": 1.9594821504940075e-07, "loss": 0.0005, "step": 224890 }, { "epoch": 1.8391462566954246, "grad_norm": 0.018666554242372513, "learning_rate": 1.957504405940497e-07, "loss": 0.0006, "step": 224900 }, { "epoch": 1.8392280328740238, "grad_norm": 0.020967604592442513, "learning_rate": 1.9555276400516743e-07, "loss": 0.0003, "step": 224910 }, { "epoch": 1.839309809052623, "grad_norm": 0.01578698866069317, "learning_rate": 1.9535518528678077e-07, "loss": 0.0006, "step": 224920 }, { "epoch": 1.8393915852312221, "grad_norm": 0.003972872160375118, "learning_rate": 1.9515770444291426e-07, "loss": 0.0005, "step": 224930 }, { "epoch": 1.8394733614098213, "grad_norm": 0.02216510847210884, "learning_rate": 1.9496032147759026e-07, "loss": 0.0006, "step": 224940 }, { "epoch": 1.8395551375884205, "grad_norm": 0.030344290658831596, "learning_rate": 1.9476303639483107e-07, "loss": 0.0008, "step": 224950 }, { "epoch": 1.8396369137670197, "grad_norm": 0.03015838749706745, "learning_rate": 1.9456584919865406e-07, "loss": 0.0006, "step": 224960 }, { "epoch": 1.8397186899456188, "grad_norm": 0.0004776825662702322, "learning_rate": 1.943687598930777e-07, "loss": 0.0005, "step": 224970 }, { "epoch": 1.839800466124218, "grad_norm": 0.0014097706880420446, "learning_rate": 1.9417176848211483e-07, "loss": 0.0006, "step": 224980 }, { "epoch": 1.8398822423028172, "grad_norm": 0.03608717769384384, "learning_rate": 1.9397487496978007e-07, "loss": 0.0008, "step": 224990 }, { "epoch": 1.8399640184814163, "grad_norm": 0.003472884651273489, "learning_rate": 1.9377807936008352e-07, "loss": 0.0004, "step": 225000 }, { "epoch": 1.8400457946600155, "grad_norm": 0.08458980172872543, "learning_rate": 1.9358138165703422e-07, "loss": 0.0006, "step": 225010 }, { "epoch": 1.8401275708386147, "grad_norm": 0.06124509871006012, "learning_rate": 1.9338478186463893e-07, "loss": 0.0011, "step": 225020 }, { "epoch": 1.8402093470172138, "grad_norm": 0.014775105752050877, "learning_rate": 1.931882799869028e-07, "loss": 0.0006, "step": 225030 }, { "epoch": 1.840291123195813, "grad_norm": 0.01910415105521679, "learning_rate": 1.9299187602782875e-07, "loss": 0.0006, "step": 225040 }, { "epoch": 1.8403728993744122, "grad_norm": 0.05917688459157944, "learning_rate": 1.927955699914169e-07, "loss": 0.0003, "step": 225050 }, { "epoch": 1.8404546755530116, "grad_norm": 0.04058270901441574, "learning_rate": 1.9259936188166683e-07, "loss": 0.0003, "step": 225060 }, { "epoch": 1.8405364517316105, "grad_norm": 0.03489559143781662, "learning_rate": 1.924032517025759e-07, "loss": 0.0002, "step": 225070 }, { "epoch": 1.84061822791021, "grad_norm": 0.047386907041072845, "learning_rate": 1.9220723945813814e-07, "loss": 0.0007, "step": 225080 }, { "epoch": 1.8407000040888088, "grad_norm": 0.058224502950906754, "learning_rate": 1.9201132515234756e-07, "loss": 0.0003, "step": 225090 }, { "epoch": 1.8407817802674082, "grad_norm": 0.0622512511909008, "learning_rate": 1.918155087891932e-07, "loss": 0.0011, "step": 225100 }, { "epoch": 1.8408635564460072, "grad_norm": 0.018238089978694916, "learning_rate": 1.916197903726663e-07, "loss": 0.0005, "step": 225110 }, { "epoch": 1.8409453326246066, "grad_norm": 0.02824217453598976, "learning_rate": 1.9142416990675206e-07, "loss": 0.001, "step": 225120 }, { "epoch": 1.8410271088032055, "grad_norm": 0.00958612933754921, "learning_rate": 1.9122864739543668e-07, "loss": 0.0004, "step": 225130 }, { "epoch": 1.841108884981805, "grad_norm": 0.04556788504123688, "learning_rate": 1.9103322284270254e-07, "loss": 0.0008, "step": 225140 }, { "epoch": 1.8411906611604039, "grad_norm": 0.03823472186923027, "learning_rate": 1.9083789625253035e-07, "loss": 0.0004, "step": 225150 }, { "epoch": 1.8412724373390033, "grad_norm": 0.047556642442941666, "learning_rate": 1.9064266762889915e-07, "loss": 0.0007, "step": 225160 }, { "epoch": 1.8413542135176022, "grad_norm": 0.0009877970442175865, "learning_rate": 1.9044753697578632e-07, "loss": 0.001, "step": 225170 }, { "epoch": 1.8414359896962016, "grad_norm": 0.0059699490666389465, "learning_rate": 1.902525042971659e-07, "loss": 0.0007, "step": 225180 }, { "epoch": 1.8415177658748005, "grad_norm": 0.07463619112968445, "learning_rate": 1.9005756959701195e-07, "loss": 0.0005, "step": 225190 }, { "epoch": 1.8415995420534, "grad_norm": 0.011975791305303574, "learning_rate": 1.8986273287929513e-07, "loss": 0.0005, "step": 225200 }, { "epoch": 1.8416813182319989, "grad_norm": 0.0211926382035017, "learning_rate": 1.8966799414798452e-07, "loss": 0.0018, "step": 225210 }, { "epoch": 1.8417630944105983, "grad_norm": 0.04591595008969307, "learning_rate": 1.8947335340704642e-07, "loss": 0.0005, "step": 225220 }, { "epoch": 1.8418448705891972, "grad_norm": 0.06746690720319748, "learning_rate": 1.892788106604465e-07, "loss": 0.0008, "step": 225230 }, { "epoch": 1.8419266467677966, "grad_norm": 0.03469487652182579, "learning_rate": 1.8908436591214773e-07, "loss": 0.0005, "step": 225240 }, { "epoch": 1.8420084229463956, "grad_norm": 0.03407334163784981, "learning_rate": 1.8889001916611082e-07, "loss": 0.0011, "step": 225250 }, { "epoch": 1.842090199124995, "grad_norm": 0.05684881657361984, "learning_rate": 1.8869577042629484e-07, "loss": 0.0007, "step": 225260 }, { "epoch": 1.8421719753035941, "grad_norm": 0.04069361463189125, "learning_rate": 1.8850161969665715e-07, "loss": 0.0005, "step": 225270 }, { "epoch": 1.8422537514821933, "grad_norm": 0.0012160349870100617, "learning_rate": 1.883075669811524e-07, "loss": 0.0005, "step": 225280 }, { "epoch": 1.8423355276607924, "grad_norm": 0.018129725009202957, "learning_rate": 1.881136122837335e-07, "loss": 0.0004, "step": 225290 }, { "epoch": 1.8424173038393916, "grad_norm": 0.00872290600091219, "learning_rate": 1.8791975560835062e-07, "loss": 0.0008, "step": 225300 }, { "epoch": 1.8424990800179908, "grad_norm": 0.1477011889219284, "learning_rate": 1.877259969589551e-07, "loss": 0.0022, "step": 225310 }, { "epoch": 1.84258085619659, "grad_norm": 0.01655610464513302, "learning_rate": 1.875323363394921e-07, "loss": 0.0006, "step": 225320 }, { "epoch": 1.8426626323751891, "grad_norm": 0.0008342995424754918, "learning_rate": 1.873387737539073e-07, "loss": 0.0007, "step": 225330 }, { "epoch": 1.8427444085537883, "grad_norm": 0.017878811806440353, "learning_rate": 1.8714530920614316e-07, "loss": 0.0007, "step": 225340 }, { "epoch": 1.8428261847323875, "grad_norm": 0.002939433790743351, "learning_rate": 1.8695194270014205e-07, "loss": 0.0008, "step": 225350 }, { "epoch": 1.8429079609109866, "grad_norm": 0.04856647178530693, "learning_rate": 1.867586742398414e-07, "loss": 0.0006, "step": 225360 }, { "epoch": 1.8429897370895858, "grad_norm": 0.09565536677837372, "learning_rate": 1.8656550382917916e-07, "loss": 0.001, "step": 225370 }, { "epoch": 1.843071513268185, "grad_norm": 0.07942798733711243, "learning_rate": 1.863724314720905e-07, "loss": 0.0004, "step": 225380 }, { "epoch": 1.8431532894467841, "grad_norm": 0.017215939238667488, "learning_rate": 1.8617945717250785e-07, "loss": 0.0006, "step": 225390 }, { "epoch": 1.8432350656253833, "grad_norm": 0.04161930829286575, "learning_rate": 1.8598658093436307e-07, "loss": 0.0005, "step": 225400 }, { "epoch": 1.8433168418039825, "grad_norm": 0.040772102773189545, "learning_rate": 1.857938027615841e-07, "loss": 0.0008, "step": 225410 }, { "epoch": 1.8433986179825816, "grad_norm": 0.05380531772971153, "learning_rate": 1.8560112265809782e-07, "loss": 0.0005, "step": 225420 }, { "epoch": 1.8434803941611808, "grad_norm": 0.11278235912322998, "learning_rate": 1.8540854062783108e-07, "loss": 0.0005, "step": 225430 }, { "epoch": 1.84356217033978, "grad_norm": 0.003420865163207054, "learning_rate": 1.8521605667470576e-07, "loss": 0.0006, "step": 225440 }, { "epoch": 1.8436439465183792, "grad_norm": 0.06382130086421967, "learning_rate": 1.850236708026426e-07, "loss": 0.0003, "step": 225450 }, { "epoch": 1.8437257226969783, "grad_norm": 0.03518229350447655, "learning_rate": 1.848313830155607e-07, "loss": 0.001, "step": 225460 }, { "epoch": 1.8438074988755777, "grad_norm": 0.01568165235221386, "learning_rate": 1.8463919331737856e-07, "loss": 0.0005, "step": 225470 }, { "epoch": 1.8438892750541767, "grad_norm": 0.02875322662293911, "learning_rate": 1.8444710171200973e-07, "loss": 0.0004, "step": 225480 }, { "epoch": 1.843971051232776, "grad_norm": 0.020297734066843987, "learning_rate": 1.842551082033678e-07, "loss": 0.0007, "step": 225490 }, { "epoch": 1.844052827411375, "grad_norm": 0.031636547297239304, "learning_rate": 1.8406321279536343e-07, "loss": 0.0005, "step": 225500 }, { "epoch": 1.8441346035899744, "grad_norm": 0.024495815858244896, "learning_rate": 1.8387141549190635e-07, "loss": 0.0007, "step": 225510 }, { "epoch": 1.8442163797685733, "grad_norm": 0.027761543169617653, "learning_rate": 1.836797162969034e-07, "loss": 0.0008, "step": 225520 }, { "epoch": 1.8442981559471727, "grad_norm": 0.024109916761517525, "learning_rate": 1.8348811521425924e-07, "loss": 0.0008, "step": 225530 }, { "epoch": 1.8443799321257717, "grad_norm": 0.0355110839009285, "learning_rate": 1.8329661224787686e-07, "loss": 0.0004, "step": 225540 }, { "epoch": 1.844461708304371, "grad_norm": 0.04042007401585579, "learning_rate": 1.8310520740165816e-07, "loss": 0.0007, "step": 225550 }, { "epoch": 1.84454348448297, "grad_norm": 0.007707465440034866, "learning_rate": 1.8291390067950166e-07, "loss": 0.0006, "step": 225560 }, { "epoch": 1.8446252606615694, "grad_norm": 0.009259575046598911, "learning_rate": 1.827226920853048e-07, "loss": 0.0005, "step": 225570 }, { "epoch": 1.8447070368401683, "grad_norm": 0.013869239948689938, "learning_rate": 1.825315816229617e-07, "loss": 0.0007, "step": 225580 }, { "epoch": 1.8447888130187677, "grad_norm": 0.029631951823830605, "learning_rate": 1.8234056929636646e-07, "loss": 0.0009, "step": 225590 }, { "epoch": 1.8448705891973667, "grad_norm": 0.004873931407928467, "learning_rate": 1.8214965510941042e-07, "loss": 0.0024, "step": 225600 }, { "epoch": 1.844952365375966, "grad_norm": 0.010795148089528084, "learning_rate": 1.8195883906598156e-07, "loss": 0.0008, "step": 225610 }, { "epoch": 1.845034141554565, "grad_norm": 0.02147175930440426, "learning_rate": 1.8176812116996733e-07, "loss": 0.0007, "step": 225620 }, { "epoch": 1.8451159177331644, "grad_norm": 0.19794371724128723, "learning_rate": 1.8157750142525356e-07, "loss": 0.0012, "step": 225630 }, { "epoch": 1.8451976939117634, "grad_norm": 0.0014758040197193623, "learning_rate": 1.8138697983572208e-07, "loss": 0.0003, "step": 225640 }, { "epoch": 1.8452794700903628, "grad_norm": 0.00326057942584157, "learning_rate": 1.811965564052548e-07, "loss": 0.0012, "step": 225650 }, { "epoch": 1.8453612462689617, "grad_norm": 0.11636722087860107, "learning_rate": 1.8100623113772974e-07, "loss": 0.0009, "step": 225660 }, { "epoch": 1.845443022447561, "grad_norm": 0.025923049077391624, "learning_rate": 1.8081600403702604e-07, "loss": 0.0004, "step": 225670 }, { "epoch": 1.84552479862616, "grad_norm": 0.0033233698923140764, "learning_rate": 1.8062587510701723e-07, "loss": 0.0003, "step": 225680 }, { "epoch": 1.8456065748047594, "grad_norm": 0.030204031616449356, "learning_rate": 1.8043584435157636e-07, "loss": 0.0007, "step": 225690 }, { "epoch": 1.8456883509833586, "grad_norm": 0.04274730756878853, "learning_rate": 1.8024591177457474e-07, "loss": 0.0005, "step": 225700 }, { "epoch": 1.8457701271619578, "grad_norm": 0.025948110967874527, "learning_rate": 1.8005607737988207e-07, "loss": 0.0006, "step": 225710 }, { "epoch": 1.845851903340557, "grad_norm": 0.14284747838974, "learning_rate": 1.798663411713647e-07, "loss": 0.0007, "step": 225720 }, { "epoch": 1.845933679519156, "grad_norm": 0.0762646496295929, "learning_rate": 1.7967670315288842e-07, "loss": 0.0004, "step": 225730 }, { "epoch": 1.8460154556977553, "grad_norm": 0.015280092135071754, "learning_rate": 1.7948716332831572e-07, "loss": 0.0004, "step": 225740 }, { "epoch": 1.8460972318763544, "grad_norm": 0.08862560242414474, "learning_rate": 1.7929772170150738e-07, "loss": 0.0006, "step": 225750 }, { "epoch": 1.8461790080549536, "grad_norm": 0.007542889565229416, "learning_rate": 1.7910837827632365e-07, "loss": 0.0006, "step": 225760 }, { "epoch": 1.8462607842335528, "grad_norm": 0.00216150120832026, "learning_rate": 1.7891913305662034e-07, "loss": 0.0009, "step": 225770 }, { "epoch": 1.846342560412152, "grad_norm": 0.04901566356420517, "learning_rate": 1.787299860462527e-07, "loss": 0.0005, "step": 225780 }, { "epoch": 1.8464243365907511, "grad_norm": 0.047655895352363586, "learning_rate": 1.7854093724907485e-07, "loss": 0.0005, "step": 225790 }, { "epoch": 1.8465061127693503, "grad_norm": 0.026770753785967827, "learning_rate": 1.783519866689365e-07, "loss": 0.0008, "step": 225800 }, { "epoch": 1.8465878889479495, "grad_norm": 0.19838933646678925, "learning_rate": 1.7816313430968847e-07, "loss": 0.0007, "step": 225810 }, { "epoch": 1.8466696651265486, "grad_norm": 0.054881516844034195, "learning_rate": 1.7797438017517544e-07, "loss": 0.0005, "step": 225820 }, { "epoch": 1.8467514413051478, "grad_norm": 0.025375956669449806, "learning_rate": 1.7778572426924433e-07, "loss": 0.0003, "step": 225830 }, { "epoch": 1.846833217483747, "grad_norm": 0.007591740228235722, "learning_rate": 1.7759716659573822e-07, "loss": 0.0003, "step": 225840 }, { "epoch": 1.8469149936623461, "grad_norm": 0.04036619886755943, "learning_rate": 1.7740870715849733e-07, "loss": 0.001, "step": 225850 }, { "epoch": 1.8469967698409453, "grad_norm": 0.005296566057950258, "learning_rate": 1.772203459613614e-07, "loss": 0.0007, "step": 225860 }, { "epoch": 1.8470785460195445, "grad_norm": 0.03887464851140976, "learning_rate": 1.7703208300816678e-07, "loss": 0.0004, "step": 225870 }, { "epoch": 1.8471603221981436, "grad_norm": 0.00047705788165330887, "learning_rate": 1.7684391830274928e-07, "loss": 0.0005, "step": 225880 }, { "epoch": 1.8472420983767428, "grad_norm": 0.05846313014626503, "learning_rate": 1.766558518489414e-07, "loss": 0.0009, "step": 225890 }, { "epoch": 1.8473238745553422, "grad_norm": 0.023995080962777138, "learning_rate": 1.76467883650574e-07, "loss": 0.0002, "step": 225900 }, { "epoch": 1.8474056507339411, "grad_norm": 0.02693348191678524, "learning_rate": 1.762800137114773e-07, "loss": 0.0019, "step": 225910 }, { "epoch": 1.8474874269125405, "grad_norm": 0.023907702416181564, "learning_rate": 1.7609224203547715e-07, "loss": 0.0004, "step": 225920 }, { "epoch": 1.8475692030911395, "grad_norm": 0.012244616635143757, "learning_rate": 1.7590456862639937e-07, "loss": 0.0005, "step": 225930 }, { "epoch": 1.8476509792697389, "grad_norm": 0.028716327622532845, "learning_rate": 1.7571699348806648e-07, "loss": 0.0004, "step": 225940 }, { "epoch": 1.8477327554483378, "grad_norm": 0.024533981457352638, "learning_rate": 1.7552951662429984e-07, "loss": 0.0007, "step": 225950 }, { "epoch": 1.8478145316269372, "grad_norm": 0.022654827684164047, "learning_rate": 1.7534213803891863e-07, "loss": 0.0011, "step": 225960 }, { "epoch": 1.8478963078055362, "grad_norm": 0.03523814305663109, "learning_rate": 1.7515485773573983e-07, "loss": 0.0007, "step": 225970 }, { "epoch": 1.8479780839841355, "grad_norm": 0.035204581916332245, "learning_rate": 1.7496767571857865e-07, "loss": 0.0012, "step": 225980 }, { "epoch": 1.8480598601627345, "grad_norm": 0.014381462708115578, "learning_rate": 1.7478059199124765e-07, "loss": 0.0004, "step": 225990 }, { "epoch": 1.8481416363413339, "grad_norm": 0.04326065257191658, "learning_rate": 1.745936065575582e-07, "loss": 0.0009, "step": 226000 }, { "epoch": 1.8482234125199328, "grad_norm": 0.04913240671157837, "learning_rate": 1.7440671942131892e-07, "loss": 0.001, "step": 226010 }, { "epoch": 1.8483051886985322, "grad_norm": 0.026037901639938354, "learning_rate": 1.7421993058633735e-07, "loss": 0.0007, "step": 226020 }, { "epoch": 1.8483869648771312, "grad_norm": 0.001630315207876265, "learning_rate": 1.740332400564182e-07, "loss": 0.0003, "step": 226030 }, { "epoch": 1.8484687410557306, "grad_norm": 0.024562925100326538, "learning_rate": 1.738466478353651e-07, "loss": 0.0004, "step": 226040 }, { "epoch": 1.8485505172343295, "grad_norm": 0.05403950437903404, "learning_rate": 1.7366015392697833e-07, "loss": 0.0009, "step": 226050 }, { "epoch": 1.848632293412929, "grad_norm": 0.009756031446158886, "learning_rate": 1.7347375833505765e-07, "loss": 0.001, "step": 226060 }, { "epoch": 1.8487140695915278, "grad_norm": 0.06879972666501999, "learning_rate": 1.732874610633989e-07, "loss": 0.0006, "step": 226070 }, { "epoch": 1.8487958457701272, "grad_norm": 0.0848793089389801, "learning_rate": 1.7310126211579902e-07, "loss": 0.0004, "step": 226080 }, { "epoch": 1.8488776219487262, "grad_norm": 0.001608008984476328, "learning_rate": 1.7291516149604948e-07, "loss": 0.0003, "step": 226090 }, { "epoch": 1.8489593981273256, "grad_norm": 0.040328193455934525, "learning_rate": 1.7272915920794164e-07, "loss": 0.0006, "step": 226100 }, { "epoch": 1.8490411743059247, "grad_norm": 0.03524281084537506, "learning_rate": 1.7254325525526472e-07, "loss": 0.0004, "step": 226110 }, { "epoch": 1.849122950484524, "grad_norm": 0.06982745975255966, "learning_rate": 1.7235744964180568e-07, "loss": 0.0004, "step": 226120 }, { "epoch": 1.849204726663123, "grad_norm": 0.014470224268734455, "learning_rate": 1.721717423713498e-07, "loss": 0.0037, "step": 226130 }, { "epoch": 1.8492865028417222, "grad_norm": 0.05800164118409157, "learning_rate": 1.7198613344767967e-07, "loss": 0.0004, "step": 226140 }, { "epoch": 1.8493682790203214, "grad_norm": 0.07815052568912506, "learning_rate": 1.7180062287457554e-07, "loss": 0.0008, "step": 226150 }, { "epoch": 1.8494500551989206, "grad_norm": 0.05423779413104057, "learning_rate": 1.7161521065581776e-07, "loss": 0.0006, "step": 226160 }, { "epoch": 1.8495318313775198, "grad_norm": 0.02752099744975567, "learning_rate": 1.714298967951833e-07, "loss": 0.0006, "step": 226170 }, { "epoch": 1.849613607556119, "grad_norm": 0.006569261662662029, "learning_rate": 1.7124468129644634e-07, "loss": 0.0006, "step": 226180 }, { "epoch": 1.849695383734718, "grad_norm": 0.025438975542783737, "learning_rate": 1.7105956416337998e-07, "loss": 0.0013, "step": 226190 }, { "epoch": 1.8497771599133173, "grad_norm": 0.014192786067724228, "learning_rate": 1.7087454539975568e-07, "loss": 0.0006, "step": 226200 }, { "epoch": 1.8498589360919164, "grad_norm": 0.02582889050245285, "learning_rate": 1.706896250093426e-07, "loss": 0.0007, "step": 226210 }, { "epoch": 1.8499407122705156, "grad_norm": 0.006800341885536909, "learning_rate": 1.7050480299590722e-07, "loss": 0.0003, "step": 226220 }, { "epoch": 1.8500224884491148, "grad_norm": 0.030864648520946503, "learning_rate": 1.7032007936321427e-07, "loss": 0.0011, "step": 226230 }, { "epoch": 1.850104264627714, "grad_norm": 0.031634531915187836, "learning_rate": 1.701354541150274e-07, "loss": 0.0019, "step": 226240 }, { "epoch": 1.850186040806313, "grad_norm": 0.07161308079957962, "learning_rate": 1.6995092725510699e-07, "loss": 0.0006, "step": 226250 }, { "epoch": 1.8502678169849123, "grad_norm": 0.07807035744190216, "learning_rate": 1.697664987872122e-07, "loss": 0.0005, "step": 226260 }, { "epoch": 1.8503495931635114, "grad_norm": 0.04831162467598915, "learning_rate": 1.6958216871510004e-07, "loss": 0.0009, "step": 226270 }, { "epoch": 1.8504313693421106, "grad_norm": 0.018848346546292305, "learning_rate": 1.6939793704252526e-07, "loss": 0.0003, "step": 226280 }, { "epoch": 1.8505131455207098, "grad_norm": 0.09000514447689056, "learning_rate": 1.692138037732416e-07, "loss": 0.001, "step": 226290 }, { "epoch": 1.850594921699309, "grad_norm": 0.004619669169187546, "learning_rate": 1.6902976891099932e-07, "loss": 0.0005, "step": 226300 }, { "epoch": 1.8506766978779081, "grad_norm": 0.003872945439070463, "learning_rate": 1.6884583245954656e-07, "loss": 0.0003, "step": 226310 }, { "epoch": 1.8507584740565073, "grad_norm": 0.056028857827186584, "learning_rate": 1.6866199442263197e-07, "loss": 0.0004, "step": 226320 }, { "epoch": 1.8508402502351067, "grad_norm": 0.05009712651371956, "learning_rate": 1.684782548039998e-07, "loss": 0.0006, "step": 226330 }, { "epoch": 1.8509220264137056, "grad_norm": 0.1642143279314041, "learning_rate": 1.6829461360739262e-07, "loss": 0.0012, "step": 226340 }, { "epoch": 1.851003802592305, "grad_norm": 0.012111461721360683, "learning_rate": 1.6811107083655187e-07, "loss": 0.001, "step": 226350 }, { "epoch": 1.851085578770904, "grad_norm": 0.003793521085754037, "learning_rate": 1.6792762649521566e-07, "loss": 0.0003, "step": 226360 }, { "epoch": 1.8511673549495034, "grad_norm": 0.045842982828617096, "learning_rate": 1.677442805871221e-07, "loss": 0.0006, "step": 226370 }, { "epoch": 1.8512491311281023, "grad_norm": 0.041267309337854385, "learning_rate": 1.675610331160049e-07, "loss": 0.0008, "step": 226380 }, { "epoch": 1.8513309073067017, "grad_norm": 0.0011618860298767686, "learning_rate": 1.673778840855972e-07, "loss": 0.0004, "step": 226390 }, { "epoch": 1.8514126834853006, "grad_norm": 0.02375415340065956, "learning_rate": 1.671948334996304e-07, "loss": 0.001, "step": 226400 }, { "epoch": 1.8514944596639, "grad_norm": 0.03293629735708237, "learning_rate": 1.670118813618332e-07, "loss": 0.0014, "step": 226410 }, { "epoch": 1.851576235842499, "grad_norm": 0.0914607048034668, "learning_rate": 1.6682902767593212e-07, "loss": 0.0006, "step": 226420 }, { "epoch": 1.8516580120210984, "grad_norm": 0.02825411781668663, "learning_rate": 1.6664627244565246e-07, "loss": 0.001, "step": 226430 }, { "epoch": 1.8517397881996973, "grad_norm": 0.028889453038573265, "learning_rate": 1.6646361567471736e-07, "loss": 0.0005, "step": 226440 }, { "epoch": 1.8518215643782967, "grad_norm": 0.0913810208439827, "learning_rate": 1.662810573668472e-07, "loss": 0.0003, "step": 226450 }, { "epoch": 1.8519033405568956, "grad_norm": 0.021817676723003387, "learning_rate": 1.6609859752576063e-07, "loss": 0.0008, "step": 226460 }, { "epoch": 1.851985116735495, "grad_norm": 0.001196600729599595, "learning_rate": 1.6591623615517527e-07, "loss": 0.0005, "step": 226470 }, { "epoch": 1.852066892914094, "grad_norm": 0.039872631430625916, "learning_rate": 1.6573397325880536e-07, "loss": 0.001, "step": 226480 }, { "epoch": 1.8521486690926934, "grad_norm": 0.007558146957308054, "learning_rate": 1.6555180884036348e-07, "loss": 0.0005, "step": 226490 }, { "epoch": 1.8522304452712923, "grad_norm": 0.036587875336408615, "learning_rate": 1.6536974290356167e-07, "loss": 0.0004, "step": 226500 }, { "epoch": 1.8523122214498917, "grad_norm": 0.008757183328270912, "learning_rate": 1.6518777545210695e-07, "loss": 0.0002, "step": 226510 }, { "epoch": 1.8523939976284907, "grad_norm": 0.01807362027466297, "learning_rate": 1.6500590648970804e-07, "loss": 0.0003, "step": 226520 }, { "epoch": 1.85247577380709, "grad_norm": 0.06843218207359314, "learning_rate": 1.6482413602006863e-07, "loss": 0.0003, "step": 226530 }, { "epoch": 1.8525575499856892, "grad_norm": 0.043568968772888184, "learning_rate": 1.646424640468919e-07, "loss": 0.0019, "step": 226540 }, { "epoch": 1.8526393261642884, "grad_norm": 0.005960037931799889, "learning_rate": 1.6446089057387758e-07, "loss": 0.0005, "step": 226550 }, { "epoch": 1.8527211023428876, "grad_norm": 0.032741788774728775, "learning_rate": 1.6427941560472671e-07, "loss": 0.0007, "step": 226560 }, { "epoch": 1.8528028785214867, "grad_norm": 0.030708888545632362, "learning_rate": 1.640980391431346e-07, "loss": 0.0008, "step": 226570 }, { "epoch": 1.852884654700086, "grad_norm": 0.05900076776742935, "learning_rate": 1.6391676119279609e-07, "loss": 0.0011, "step": 226580 }, { "epoch": 1.852966430878685, "grad_norm": 0.03014143742620945, "learning_rate": 1.6373558175740433e-07, "loss": 0.0005, "step": 226590 }, { "epoch": 1.8530482070572842, "grad_norm": 0.04273506999015808, "learning_rate": 1.6355450084064973e-07, "loss": 0.0004, "step": 226600 }, { "epoch": 1.8531299832358834, "grad_norm": 0.03405267000198364, "learning_rate": 1.6337351844622097e-07, "loss": 0.0004, "step": 226610 }, { "epoch": 1.8532117594144826, "grad_norm": 0.033382803201675415, "learning_rate": 1.631926345778051e-07, "loss": 0.0005, "step": 226620 }, { "epoch": 1.8532935355930817, "grad_norm": 0.08013173937797546, "learning_rate": 1.6301184923908642e-07, "loss": 0.001, "step": 226630 }, { "epoch": 1.853375311771681, "grad_norm": 0.07197850197553635, "learning_rate": 1.6283116243374865e-07, "loss": 0.0008, "step": 226640 }, { "epoch": 1.85345708795028, "grad_norm": 0.028556397184729576, "learning_rate": 1.6265057416547213e-07, "loss": 0.0009, "step": 226650 }, { "epoch": 1.8535388641288792, "grad_norm": 0.0028244040440768003, "learning_rate": 1.624700844379351e-07, "loss": 0.0004, "step": 226660 }, { "epoch": 1.8536206403074784, "grad_norm": 0.061669718474149704, "learning_rate": 1.622896932548146e-07, "loss": 0.0017, "step": 226670 }, { "epoch": 1.8537024164860776, "grad_norm": 0.0015811050543561578, "learning_rate": 1.6210940061978542e-07, "loss": 0.0005, "step": 226680 }, { "epoch": 1.8537841926646768, "grad_norm": 0.00985355768352747, "learning_rate": 1.6192920653652022e-07, "loss": 0.0004, "step": 226690 }, { "epoch": 1.853865968843276, "grad_norm": 0.033348824828863144, "learning_rate": 1.617491110086894e-07, "loss": 0.0005, "step": 226700 }, { "epoch": 1.853947745021875, "grad_norm": 0.033837832510471344, "learning_rate": 1.6156911403996222e-07, "loss": 0.0005, "step": 226710 }, { "epoch": 1.8540295212004743, "grad_norm": 0.0044839815236628056, "learning_rate": 1.6138921563400522e-07, "loss": 0.0003, "step": 226720 }, { "epoch": 1.8541112973790734, "grad_norm": 0.001775632961653173, "learning_rate": 1.6120941579448267e-07, "loss": 0.0003, "step": 226730 }, { "epoch": 1.8541930735576726, "grad_norm": 0.0017704369965940714, "learning_rate": 1.6102971452505778e-07, "loss": 0.0006, "step": 226740 }, { "epoch": 1.8542748497362718, "grad_norm": 0.03489934653043747, "learning_rate": 1.608501118293898e-07, "loss": 0.0006, "step": 226750 }, { "epoch": 1.8543566259148712, "grad_norm": 0.009109973907470703, "learning_rate": 1.6067060771113975e-07, "loss": 0.0006, "step": 226760 }, { "epoch": 1.85443840209347, "grad_norm": 0.03402700647711754, "learning_rate": 1.604912021739624e-07, "loss": 0.0003, "step": 226770 }, { "epoch": 1.8545201782720695, "grad_norm": 0.015223846770823002, "learning_rate": 1.6031189522151325e-07, "loss": 0.0003, "step": 226780 }, { "epoch": 1.8546019544506684, "grad_norm": 0.015438609756529331, "learning_rate": 1.6013268685744433e-07, "loss": 0.0004, "step": 226790 }, { "epoch": 1.8546837306292678, "grad_norm": 0.046526696532964706, "learning_rate": 1.5995357708540716e-07, "loss": 0.0004, "step": 226800 }, { "epoch": 1.8547655068078668, "grad_norm": 0.0006564372451975942, "learning_rate": 1.597745659090494e-07, "loss": 0.0003, "step": 226810 }, { "epoch": 1.8548472829864662, "grad_norm": 0.03171764686703682, "learning_rate": 1.5959565333201866e-07, "loss": 0.0006, "step": 226820 }, { "epoch": 1.8549290591650651, "grad_norm": 0.042118266224861145, "learning_rate": 1.5941683935795816e-07, "loss": 0.0013, "step": 226830 }, { "epoch": 1.8550108353436645, "grad_norm": 0.040214505046606064, "learning_rate": 1.5923812399051218e-07, "loss": 0.0006, "step": 226840 }, { "epoch": 1.8550926115222635, "grad_norm": 0.05799996852874756, "learning_rate": 1.5905950723331952e-07, "loss": 0.0003, "step": 226850 }, { "epoch": 1.8551743877008628, "grad_norm": 0.028024306520819664, "learning_rate": 1.5888098909002003e-07, "loss": 0.0008, "step": 226860 }, { "epoch": 1.8552561638794618, "grad_norm": 0.008338182233273983, "learning_rate": 1.587025695642497e-07, "loss": 0.0002, "step": 226870 }, { "epoch": 1.8553379400580612, "grad_norm": 0.07180772721767426, "learning_rate": 1.5852424865964334e-07, "loss": 0.0007, "step": 226880 }, { "epoch": 1.8554197162366601, "grad_norm": 0.02321348711848259, "learning_rate": 1.583460263798331e-07, "loss": 0.0005, "step": 226890 }, { "epoch": 1.8555014924152595, "grad_norm": 0.0036942570004612207, "learning_rate": 1.5816790272844996e-07, "loss": 0.0006, "step": 226900 }, { "epoch": 1.8555832685938585, "grad_norm": 0.019258754327893257, "learning_rate": 1.5798987770912212e-07, "loss": 0.0005, "step": 226910 }, { "epoch": 1.8556650447724579, "grad_norm": 0.033839963376522064, "learning_rate": 1.5781195132547666e-07, "loss": 0.0008, "step": 226920 }, { "epoch": 1.8557468209510568, "grad_norm": 0.022204507142305374, "learning_rate": 1.5763412358113738e-07, "loss": 0.0007, "step": 226930 }, { "epoch": 1.8558285971296562, "grad_norm": 0.09094826877117157, "learning_rate": 1.574563944797275e-07, "loss": 0.0005, "step": 226940 }, { "epoch": 1.8559103733082551, "grad_norm": 0.0012085286434739828, "learning_rate": 1.5727876402486686e-07, "loss": 0.0011, "step": 226950 }, { "epoch": 1.8559921494868545, "grad_norm": 0.02124946378171444, "learning_rate": 1.5710123222017427e-07, "loss": 0.001, "step": 226960 }, { "epoch": 1.8560739256654537, "grad_norm": 0.019067438319325447, "learning_rate": 1.569237990692657e-07, "loss": 0.0008, "step": 226970 }, { "epoch": 1.8561557018440529, "grad_norm": 0.03475067764520645, "learning_rate": 1.567464645757566e-07, "loss": 0.0008, "step": 226980 }, { "epoch": 1.856237478022652, "grad_norm": 0.005620873533189297, "learning_rate": 1.5656922874325797e-07, "loss": 0.0014, "step": 226990 }, { "epoch": 1.8563192542012512, "grad_norm": 0.05329003185033798, "learning_rate": 1.5639209157538138e-07, "loss": 0.0007, "step": 227000 }, { "epoch": 1.8564010303798504, "grad_norm": 0.11004865914583206, "learning_rate": 1.5621505307573502e-07, "loss": 0.0005, "step": 227010 }, { "epoch": 1.8564828065584495, "grad_norm": 0.03190276399254799, "learning_rate": 1.560381132479255e-07, "loss": 0.0008, "step": 227020 }, { "epoch": 1.8565645827370487, "grad_norm": 0.1306363046169281, "learning_rate": 1.5586127209555657e-07, "loss": 0.0005, "step": 227030 }, { "epoch": 1.8566463589156479, "grad_norm": 0.011607527732849121, "learning_rate": 1.5568452962223145e-07, "loss": 0.0004, "step": 227040 }, { "epoch": 1.856728135094247, "grad_norm": 0.054325830191373825, "learning_rate": 1.5550788583155008e-07, "loss": 0.0003, "step": 227050 }, { "epoch": 1.8568099112728462, "grad_norm": 0.02207214944064617, "learning_rate": 1.5533134072711065e-07, "loss": 0.0003, "step": 227060 }, { "epoch": 1.8568916874514454, "grad_norm": 0.0017386015970259905, "learning_rate": 1.551548943125103e-07, "loss": 0.0004, "step": 227070 }, { "epoch": 1.8569734636300446, "grad_norm": 0.0197144728153944, "learning_rate": 1.5497854659134226e-07, "loss": 0.0014, "step": 227080 }, { "epoch": 1.8570552398086437, "grad_norm": 0.001218849909491837, "learning_rate": 1.548022975671998e-07, "loss": 0.0006, "step": 227090 }, { "epoch": 1.857137015987243, "grad_norm": 0.043762825429439545, "learning_rate": 1.5462614724367275e-07, "loss": 0.0003, "step": 227100 }, { "epoch": 1.857218792165842, "grad_norm": 0.0032306171488016844, "learning_rate": 1.5445009562434887e-07, "loss": 0.0009, "step": 227110 }, { "epoch": 1.8573005683444412, "grad_norm": 0.029830146580934525, "learning_rate": 1.5427414271281638e-07, "loss": 0.0003, "step": 227120 }, { "epoch": 1.8573823445230404, "grad_norm": 0.06292533874511719, "learning_rate": 1.5409828851265795e-07, "loss": 0.0006, "step": 227130 }, { "epoch": 1.8574641207016396, "grad_norm": 0.02969709224998951, "learning_rate": 1.539225330274563e-07, "loss": 0.0002, "step": 227140 }, { "epoch": 1.8575458968802387, "grad_norm": 0.044328656047582626, "learning_rate": 1.5374687626079131e-07, "loss": 0.0008, "step": 227150 }, { "epoch": 1.857627673058838, "grad_norm": 0.07345128059387207, "learning_rate": 1.5357131821624238e-07, "loss": 0.0005, "step": 227160 }, { "epoch": 1.857709449237437, "grad_norm": 0.020413760095834732, "learning_rate": 1.5339585889738495e-07, "loss": 0.0006, "step": 227170 }, { "epoch": 1.8577912254160363, "grad_norm": 0.010051495395600796, "learning_rate": 1.532204983077934e-07, "loss": 0.0008, "step": 227180 }, { "epoch": 1.8578730015946356, "grad_norm": 0.10438222438097, "learning_rate": 1.5304523645103987e-07, "loss": 0.0006, "step": 227190 }, { "epoch": 1.8579547777732346, "grad_norm": 0.00402658898383379, "learning_rate": 1.528700733306948e-07, "loss": 0.0003, "step": 227200 }, { "epoch": 1.858036553951834, "grad_norm": 0.062094707041978836, "learning_rate": 1.5269500895032595e-07, "loss": 0.0006, "step": 227210 }, { "epoch": 1.858118330130433, "grad_norm": 0.05146142840385437, "learning_rate": 1.5252004331350046e-07, "loss": 0.0007, "step": 227220 }, { "epoch": 1.8582001063090323, "grad_norm": 0.0398557148873806, "learning_rate": 1.52345176423781e-07, "loss": 0.0004, "step": 227230 }, { "epoch": 1.8582818824876313, "grad_norm": 0.18460200726985931, "learning_rate": 1.5217040828473083e-07, "loss": 0.0014, "step": 227240 }, { "epoch": 1.8583636586662307, "grad_norm": 0.02469058148562908, "learning_rate": 1.5199573889991047e-07, "loss": 0.0003, "step": 227250 }, { "epoch": 1.8584454348448296, "grad_norm": 0.02157699130475521, "learning_rate": 1.5182116827287764e-07, "loss": 0.0006, "step": 227260 }, { "epoch": 1.858527211023429, "grad_norm": 0.08462762087583542, "learning_rate": 1.5164669640718776e-07, "loss": 0.0004, "step": 227270 }, { "epoch": 1.858608987202028, "grad_norm": 0.017619220539927483, "learning_rate": 1.5147232330639583e-07, "loss": 0.0005, "step": 227280 }, { "epoch": 1.8586907633806273, "grad_norm": 0.005845183506608009, "learning_rate": 1.5129804897405398e-07, "loss": 0.0006, "step": 227290 }, { "epoch": 1.8587725395592263, "grad_norm": 0.06423348933458328, "learning_rate": 1.5112387341371216e-07, "loss": 0.0008, "step": 227300 }, { "epoch": 1.8588543157378257, "grad_norm": 0.039837971329689026, "learning_rate": 1.5094979662891861e-07, "loss": 0.0006, "step": 227310 }, { "epoch": 1.8589360919164246, "grad_norm": 0.04745638370513916, "learning_rate": 1.507758186232189e-07, "loss": 0.0006, "step": 227320 }, { "epoch": 1.859017868095024, "grad_norm": 0.014440684579312801, "learning_rate": 1.5060193940015732e-07, "loss": 0.0008, "step": 227330 }, { "epoch": 1.859099644273623, "grad_norm": 0.028513099998235703, "learning_rate": 1.5042815896327667e-07, "loss": 0.0005, "step": 227340 }, { "epoch": 1.8591814204522223, "grad_norm": 0.05276959016919136, "learning_rate": 1.502544773161152e-07, "loss": 0.0012, "step": 227350 }, { "epoch": 1.8592631966308213, "grad_norm": 0.012533590197563171, "learning_rate": 1.5008089446221285e-07, "loss": 0.0007, "step": 227360 }, { "epoch": 1.8593449728094207, "grad_norm": 0.009611434303224087, "learning_rate": 1.499074104051046e-07, "loss": 0.0002, "step": 227370 }, { "epoch": 1.8594267489880196, "grad_norm": 0.0008444832637906075, "learning_rate": 1.497340251483248e-07, "loss": 0.0005, "step": 227380 }, { "epoch": 1.859508525166619, "grad_norm": 0.02264103665947914, "learning_rate": 1.4956073869540566e-07, "loss": 0.0005, "step": 227390 }, { "epoch": 1.8595903013452182, "grad_norm": 0.06005406007170677, "learning_rate": 1.4938755104987656e-07, "loss": 0.0005, "step": 227400 }, { "epoch": 1.8596720775238174, "grad_norm": 0.053770631551742554, "learning_rate": 1.492144622152658e-07, "loss": 0.0017, "step": 227410 }, { "epoch": 1.8597538537024165, "grad_norm": 0.021278243511915207, "learning_rate": 1.4904147219510002e-07, "loss": 0.0005, "step": 227420 }, { "epoch": 1.8598356298810157, "grad_norm": 0.013481919653713703, "learning_rate": 1.4886858099290247e-07, "loss": 0.0004, "step": 227430 }, { "epoch": 1.8599174060596149, "grad_norm": 0.04413837566971779, "learning_rate": 1.4869578861219425e-07, "loss": 0.0011, "step": 227440 }, { "epoch": 1.859999182238214, "grad_norm": 0.0024046620819717646, "learning_rate": 1.4852309505649642e-07, "loss": 0.0011, "step": 227450 }, { "epoch": 1.8600809584168132, "grad_norm": 0.001527015003375709, "learning_rate": 1.483505003293273e-07, "loss": 0.0007, "step": 227460 }, { "epoch": 1.8601627345954124, "grad_norm": 0.011311057023704052, "learning_rate": 1.4817800443420183e-07, "loss": 0.0004, "step": 227470 }, { "epoch": 1.8602445107740115, "grad_norm": 0.085038922727108, "learning_rate": 1.4800560737463331e-07, "loss": 0.0008, "step": 227480 }, { "epoch": 1.8603262869526107, "grad_norm": 0.09034647047519684, "learning_rate": 1.4783330915413507e-07, "loss": 0.0003, "step": 227490 }, { "epoch": 1.8604080631312099, "grad_norm": 0.033000774681568146, "learning_rate": 1.476611097762165e-07, "loss": 0.0006, "step": 227500 }, { "epoch": 1.860489839309809, "grad_norm": 0.012351510114967823, "learning_rate": 1.4748900924438537e-07, "loss": 0.0005, "step": 227510 }, { "epoch": 1.8605716154884082, "grad_norm": 0.03721030056476593, "learning_rate": 1.4731700756214662e-07, "loss": 0.0008, "step": 227520 }, { "epoch": 1.8606533916670074, "grad_norm": 0.0010815240675583482, "learning_rate": 1.4714510473300581e-07, "loss": 0.0003, "step": 227530 }, { "epoch": 1.8607351678456066, "grad_norm": 0.026592114940285683, "learning_rate": 1.4697330076046346e-07, "loss": 0.0007, "step": 227540 }, { "epoch": 1.8608169440242057, "grad_norm": 0.015884006395936012, "learning_rate": 1.4680159564802066e-07, "loss": 0.0005, "step": 227550 }, { "epoch": 1.860898720202805, "grad_norm": 0.04803968593478203, "learning_rate": 1.4662998939917296e-07, "loss": 0.0009, "step": 227560 }, { "epoch": 1.860980496381404, "grad_norm": 0.056526996195316315, "learning_rate": 1.4645848201741807e-07, "loss": 0.0007, "step": 227570 }, { "epoch": 1.8610622725600032, "grad_norm": 0.0024935132823884487, "learning_rate": 1.4628707350624882e-07, "loss": 0.0006, "step": 227580 }, { "epoch": 1.8611440487386024, "grad_norm": 0.010281449183821678, "learning_rate": 1.461157638691574e-07, "loss": 0.001, "step": 227590 }, { "epoch": 1.8612258249172016, "grad_norm": 0.06158187612891197, "learning_rate": 1.4594455310963263e-07, "loss": 0.0009, "step": 227600 }, { "epoch": 1.8613076010958007, "grad_norm": 0.03390331566333771, "learning_rate": 1.4577344123116345e-07, "loss": 0.0005, "step": 227610 }, { "epoch": 1.8613893772744001, "grad_norm": 0.05456521734595299, "learning_rate": 1.4560242823723482e-07, "loss": 0.0016, "step": 227620 }, { "epoch": 1.861471153452999, "grad_norm": 0.009872285649180412, "learning_rate": 1.4543151413133062e-07, "loss": 0.0005, "step": 227630 }, { "epoch": 1.8615529296315985, "grad_norm": 0.022111015394330025, "learning_rate": 1.4526069891693194e-07, "loss": 0.0005, "step": 227640 }, { "epoch": 1.8616347058101974, "grad_norm": 0.002562063280493021, "learning_rate": 1.4508998259751938e-07, "loss": 0.0002, "step": 227650 }, { "epoch": 1.8617164819887968, "grad_norm": 0.04677314683794975, "learning_rate": 1.449193651765707e-07, "loss": 0.0004, "step": 227660 }, { "epoch": 1.8617982581673957, "grad_norm": 0.0126683684065938, "learning_rate": 1.4474884665756083e-07, "loss": 0.0007, "step": 227670 }, { "epoch": 1.8618800343459951, "grad_norm": 0.044057559221982956, "learning_rate": 1.4457842704396318e-07, "loss": 0.0007, "step": 227680 }, { "epoch": 1.861961810524594, "grad_norm": 0.0485733300447464, "learning_rate": 1.444081063392494e-07, "loss": 0.0004, "step": 227690 }, { "epoch": 1.8620435867031935, "grad_norm": 0.0311452466994524, "learning_rate": 1.4423788454689003e-07, "loss": 0.0004, "step": 227700 }, { "epoch": 1.8621253628817924, "grad_norm": 0.014091229997575283, "learning_rate": 1.440677616703512e-07, "loss": 0.0003, "step": 227710 }, { "epoch": 1.8622071390603918, "grad_norm": 0.003967693075537682, "learning_rate": 1.43897737713099e-07, "loss": 0.0002, "step": 227720 }, { "epoch": 1.8622889152389908, "grad_norm": 0.0006409445777535439, "learning_rate": 1.437278126785979e-07, "loss": 0.0007, "step": 227730 }, { "epoch": 1.8623706914175902, "grad_norm": 0.020807385444641113, "learning_rate": 1.4355798657030794e-07, "loss": 0.0007, "step": 227740 }, { "epoch": 1.862452467596189, "grad_norm": 0.00487182941287756, "learning_rate": 1.4338825939169022e-07, "loss": 0.001, "step": 227750 }, { "epoch": 1.8625342437747885, "grad_norm": 0.032013051211833954, "learning_rate": 1.4321863114620026e-07, "loss": 0.0007, "step": 227760 }, { "epoch": 1.8626160199533874, "grad_norm": 0.050221458077430725, "learning_rate": 1.4304910183729482e-07, "loss": 0.0005, "step": 227770 }, { "epoch": 1.8626977961319868, "grad_norm": 0.06686025857925415, "learning_rate": 1.4287967146842774e-07, "loss": 0.0006, "step": 227780 }, { "epoch": 1.8627795723105858, "grad_norm": 0.004726487677544355, "learning_rate": 1.4271034004304963e-07, "loss": 0.0004, "step": 227790 }, { "epoch": 1.8628613484891852, "grad_norm": 0.06681759655475616, "learning_rate": 1.4254110756460992e-07, "loss": 0.0006, "step": 227800 }, { "epoch": 1.8629431246677841, "grad_norm": 0.09278959035873413, "learning_rate": 1.423719740365559e-07, "loss": 0.001, "step": 227810 }, { "epoch": 1.8630249008463835, "grad_norm": 0.0583469495177269, "learning_rate": 1.4220293946233365e-07, "loss": 0.0008, "step": 227820 }, { "epoch": 1.8631066770249827, "grad_norm": 0.0738438069820404, "learning_rate": 1.4203400384538602e-07, "loss": 0.001, "step": 227830 }, { "epoch": 1.8631884532035818, "grad_norm": 0.07938062399625778, "learning_rate": 1.418651671891541e-07, "loss": 0.0006, "step": 227840 }, { "epoch": 1.863270229382181, "grad_norm": 0.0329219251871109, "learning_rate": 1.4169642949707797e-07, "loss": 0.0004, "step": 227850 }, { "epoch": 1.8633520055607802, "grad_norm": 0.004536167252808809, "learning_rate": 1.4152779077259482e-07, "loss": 0.0006, "step": 227860 }, { "epoch": 1.8634337817393793, "grad_norm": 0.006088636815547943, "learning_rate": 1.4135925101913971e-07, "loss": 0.0006, "step": 227870 }, { "epoch": 1.8635155579179785, "grad_norm": 0.03913304582238197, "learning_rate": 1.4119081024014547e-07, "loss": 0.0004, "step": 227880 }, { "epoch": 1.8635973340965777, "grad_norm": 0.058996476233005524, "learning_rate": 1.4102246843904432e-07, "loss": 0.0013, "step": 227890 }, { "epoch": 1.8636791102751769, "grad_norm": 0.09749022871255875, "learning_rate": 1.4085422561926464e-07, "loss": 0.0009, "step": 227900 }, { "epoch": 1.863760886453776, "grad_norm": 0.011058688163757324, "learning_rate": 1.4068608178423482e-07, "loss": 0.0004, "step": 227910 }, { "epoch": 1.8638426626323752, "grad_norm": 0.0594068206846714, "learning_rate": 1.4051803693737876e-07, "loss": 0.0005, "step": 227920 }, { "epoch": 1.8639244388109744, "grad_norm": 0.06025589630007744, "learning_rate": 1.403500910821204e-07, "loss": 0.0006, "step": 227930 }, { "epoch": 1.8640062149895735, "grad_norm": 0.009189493022859097, "learning_rate": 1.4018224422188088e-07, "loss": 0.0007, "step": 227940 }, { "epoch": 1.8640879911681727, "grad_norm": 0.050091881304979324, "learning_rate": 1.4001449636007969e-07, "loss": 0.0016, "step": 227950 }, { "epoch": 1.8641697673467719, "grad_norm": 0.0013579934602603316, "learning_rate": 1.3984684750013245e-07, "loss": 0.0006, "step": 227960 }, { "epoch": 1.864251543525371, "grad_norm": 0.004972513765096664, "learning_rate": 1.396792976454564e-07, "loss": 0.0004, "step": 227970 }, { "epoch": 1.8643333197039702, "grad_norm": 0.0006425020983442664, "learning_rate": 1.3951184679946327e-07, "loss": 0.0004, "step": 227980 }, { "epoch": 1.8644150958825694, "grad_norm": 0.09058202803134918, "learning_rate": 1.3934449496556533e-07, "loss": 0.0012, "step": 227990 }, { "epoch": 1.8644968720611685, "grad_norm": 0.05992789566516876, "learning_rate": 1.391772421471699e-07, "loss": 0.0005, "step": 228000 }, { "epoch": 1.8645786482397677, "grad_norm": 0.02882593497633934, "learning_rate": 1.3901008834768582e-07, "loss": 0.0007, "step": 228010 }, { "epoch": 1.8646604244183669, "grad_norm": 0.04236524924635887, "learning_rate": 1.3884303357051765e-07, "loss": 0.0004, "step": 228020 }, { "epoch": 1.864742200596966, "grad_norm": 0.047589901834726334, "learning_rate": 1.386760778190682e-07, "loss": 0.0005, "step": 228030 }, { "epoch": 1.8648239767755652, "grad_norm": 0.011753750033676624, "learning_rate": 1.3850922109673814e-07, "loss": 0.0003, "step": 228040 }, { "epoch": 1.8649057529541646, "grad_norm": 0.05763338878750801, "learning_rate": 1.3834246340692693e-07, "loss": 0.0004, "step": 228050 }, { "epoch": 1.8649875291327636, "grad_norm": 0.0352250374853611, "learning_rate": 1.3817580475303183e-07, "loss": 0.001, "step": 228060 }, { "epoch": 1.865069305311363, "grad_norm": 0.0209959726780653, "learning_rate": 1.3800924513844737e-07, "loss": 0.0006, "step": 228070 }, { "epoch": 1.865151081489962, "grad_norm": 0.13822411000728607, "learning_rate": 1.3784278456656587e-07, "loss": 0.0004, "step": 228080 }, { "epoch": 1.8652328576685613, "grad_norm": 0.059810515493154526, "learning_rate": 1.3767642304077955e-07, "loss": 0.0005, "step": 228090 }, { "epoch": 1.8653146338471602, "grad_norm": 0.046445827931165695, "learning_rate": 1.3751016056447685e-07, "loss": 0.0005, "step": 228100 }, { "epoch": 1.8653964100257596, "grad_norm": 0.026750298216938972, "learning_rate": 1.3734399714104452e-07, "loss": 0.0006, "step": 228110 }, { "epoch": 1.8654781862043586, "grad_norm": 0.0612202063202858, "learning_rate": 1.371779327738676e-07, "loss": 0.0012, "step": 228120 }, { "epoch": 1.865559962382958, "grad_norm": 0.0015268026618286967, "learning_rate": 1.3701196746632896e-07, "loss": 0.0013, "step": 228130 }, { "epoch": 1.865641738561557, "grad_norm": 0.032491203397512436, "learning_rate": 1.3684610122180919e-07, "loss": 0.0003, "step": 228140 }, { "epoch": 1.8657235147401563, "grad_norm": 0.07050243020057678, "learning_rate": 1.3668033404368787e-07, "loss": 0.0007, "step": 228150 }, { "epoch": 1.8658052909187552, "grad_norm": 0.049014944583177567, "learning_rate": 1.3651466593534058e-07, "loss": 0.0006, "step": 228160 }, { "epoch": 1.8658870670973546, "grad_norm": 0.016245022416114807, "learning_rate": 1.3634909690014297e-07, "loss": 0.0006, "step": 228170 }, { "epoch": 1.8659688432759536, "grad_norm": 0.0010739301797002554, "learning_rate": 1.3618362694146793e-07, "loss": 0.0006, "step": 228180 }, { "epoch": 1.866050619454553, "grad_norm": 0.03799813985824585, "learning_rate": 1.360182560626855e-07, "loss": 0.0005, "step": 228190 }, { "epoch": 1.866132395633152, "grad_norm": 0.0014964916044846177, "learning_rate": 1.3585298426716463e-07, "loss": 0.0002, "step": 228200 }, { "epoch": 1.8662141718117513, "grad_norm": 0.05521903932094574, "learning_rate": 1.356878115582727e-07, "loss": 0.0013, "step": 228210 }, { "epoch": 1.8662959479903503, "grad_norm": 0.021778380498290062, "learning_rate": 1.3552273793937422e-07, "loss": 0.0007, "step": 228220 }, { "epoch": 1.8663777241689496, "grad_norm": 0.011947139166295528, "learning_rate": 1.3535776341383145e-07, "loss": 0.001, "step": 228230 }, { "epoch": 1.8664595003475486, "grad_norm": 0.0009581033955328166, "learning_rate": 1.3519288798500453e-07, "loss": 0.0009, "step": 228240 }, { "epoch": 1.866541276526148, "grad_norm": 0.019659889861941338, "learning_rate": 1.350281116562535e-07, "loss": 0.0006, "step": 228250 }, { "epoch": 1.8666230527047472, "grad_norm": 0.05250069871544838, "learning_rate": 1.348634344309341e-07, "loss": 0.0006, "step": 228260 }, { "epoch": 1.8667048288833463, "grad_norm": 0.0010958813363686204, "learning_rate": 1.346988563124013e-07, "loss": 0.0005, "step": 228270 }, { "epoch": 1.8667866050619455, "grad_norm": 0.015147345140576363, "learning_rate": 1.3453437730400698e-07, "loss": 0.0006, "step": 228280 }, { "epoch": 1.8668683812405447, "grad_norm": 0.004139657132327557, "learning_rate": 1.3436999740910283e-07, "loss": 0.0005, "step": 228290 }, { "epoch": 1.8669501574191438, "grad_norm": 0.028631990775465965, "learning_rate": 1.3420571663103677e-07, "loss": 0.0008, "step": 228300 }, { "epoch": 1.867031933597743, "grad_norm": 0.012470033019781113, "learning_rate": 1.3404153497315497e-07, "loss": 0.0004, "step": 228310 }, { "epoch": 1.8671137097763422, "grad_norm": 0.054498132318258286, "learning_rate": 1.3387745243880256e-07, "loss": 0.0006, "step": 228320 }, { "epoch": 1.8671954859549413, "grad_norm": 0.036481283605098724, "learning_rate": 1.3371346903132188e-07, "loss": 0.0003, "step": 228330 }, { "epoch": 1.8672772621335405, "grad_norm": 0.057532865554094315, "learning_rate": 1.33549584754053e-07, "loss": 0.0006, "step": 228340 }, { "epoch": 1.8673590383121397, "grad_norm": 0.0011939797550439835, "learning_rate": 1.3338579961033548e-07, "loss": 0.0001, "step": 228350 }, { "epoch": 1.8674408144907388, "grad_norm": 0.011466212570667267, "learning_rate": 1.332221136035039e-07, "loss": 0.0015, "step": 228360 }, { "epoch": 1.867522590669338, "grad_norm": 0.01115555502474308, "learning_rate": 1.3305852673689445e-07, "loss": 0.0038, "step": 228370 }, { "epoch": 1.8676043668479372, "grad_norm": 0.0428534671664238, "learning_rate": 1.3289503901383894e-07, "loss": 0.0005, "step": 228380 }, { "epoch": 1.8676861430265363, "grad_norm": 0.017937468364834785, "learning_rate": 1.3273165043766857e-07, "loss": 0.0003, "step": 228390 }, { "epoch": 1.8677679192051355, "grad_norm": 0.04172332212328911, "learning_rate": 1.32568361011709e-07, "loss": 0.0004, "step": 228400 }, { "epoch": 1.8678496953837347, "grad_norm": 0.049569230526685715, "learning_rate": 1.3240517073928928e-07, "loss": 0.0003, "step": 228410 }, { "epoch": 1.8679314715623339, "grad_norm": 0.05376395210623741, "learning_rate": 1.3224207962373282e-07, "loss": 0.0007, "step": 228420 }, { "epoch": 1.868013247740933, "grad_norm": 0.05396917834877968, "learning_rate": 1.3207908766836196e-07, "loss": 0.0009, "step": 228430 }, { "epoch": 1.8680950239195322, "grad_norm": 0.025979869067668915, "learning_rate": 1.3191619487649632e-07, "loss": 0.0005, "step": 228440 }, { "epoch": 1.8681768000981314, "grad_norm": 0.1384085863828659, "learning_rate": 1.3175340125145542e-07, "loss": 0.0014, "step": 228450 }, { "epoch": 1.8682585762767308, "grad_norm": 0.004443867597728968, "learning_rate": 1.3159070679655494e-07, "loss": 0.0005, "step": 228460 }, { "epoch": 1.8683403524553297, "grad_norm": 0.0044786641374230385, "learning_rate": 1.3142811151510838e-07, "loss": 0.0009, "step": 228470 }, { "epoch": 1.868422128633929, "grad_norm": 0.01139459852129221, "learning_rate": 1.3126561541042858e-07, "loss": 0.0006, "step": 228480 }, { "epoch": 1.868503904812528, "grad_norm": 0.0019405931234359741, "learning_rate": 1.311032184858263e-07, "loss": 0.0006, "step": 228490 }, { "epoch": 1.8685856809911274, "grad_norm": 0.012516845017671585, "learning_rate": 1.3094092074460886e-07, "loss": 0.0006, "step": 228500 }, { "epoch": 1.8686674571697264, "grad_norm": 0.01668911799788475, "learning_rate": 1.3077872219008304e-07, "loss": 0.0006, "step": 228510 }, { "epoch": 1.8687492333483258, "grad_norm": 0.03517387434840202, "learning_rate": 1.3061662282555176e-07, "loss": 0.0029, "step": 228520 }, { "epoch": 1.8688310095269247, "grad_norm": 0.02497388981282711, "learning_rate": 1.3045462265431852e-07, "loss": 0.0008, "step": 228530 }, { "epoch": 1.868912785705524, "grad_norm": 0.00212291581556201, "learning_rate": 1.3029272167968233e-07, "loss": 0.0002, "step": 228540 }, { "epoch": 1.868994561884123, "grad_norm": 0.02660025656223297, "learning_rate": 1.301309199049422e-07, "loss": 0.0006, "step": 228550 }, { "epoch": 1.8690763380627224, "grad_norm": 0.06333982944488525, "learning_rate": 1.2996921733339273e-07, "loss": 0.0004, "step": 228560 }, { "epoch": 1.8691581142413214, "grad_norm": 0.024580691009759903, "learning_rate": 1.2980761396832965e-07, "loss": 0.0004, "step": 228570 }, { "epoch": 1.8692398904199208, "grad_norm": 0.030595121905207634, "learning_rate": 1.2964610981304416e-07, "loss": 0.0005, "step": 228580 }, { "epoch": 1.8693216665985197, "grad_norm": 0.009366142563521862, "learning_rate": 1.2948470487082587e-07, "loss": 0.0003, "step": 228590 }, { "epoch": 1.8694034427771191, "grad_norm": 0.020605556666851044, "learning_rate": 1.2932339914496327e-07, "loss": 0.0003, "step": 228600 }, { "epoch": 1.869485218955718, "grad_norm": 0.021191300824284554, "learning_rate": 1.2916219263874208e-07, "loss": 0.0008, "step": 228610 }, { "epoch": 1.8695669951343175, "grad_norm": 0.009535684250295162, "learning_rate": 1.290010853554463e-07, "loss": 0.0006, "step": 228620 }, { "epoch": 1.8696487713129164, "grad_norm": 0.04853953793644905, "learning_rate": 1.2884007729835835e-07, "loss": 0.0004, "step": 228630 }, { "epoch": 1.8697305474915158, "grad_norm": 0.0016315410612151027, "learning_rate": 1.2867916847075668e-07, "loss": 0.0004, "step": 228640 }, { "epoch": 1.8698123236701147, "grad_norm": 0.007390828337520361, "learning_rate": 1.2851835887592036e-07, "loss": 0.0009, "step": 228650 }, { "epoch": 1.8698940998487141, "grad_norm": 0.06462987512350082, "learning_rate": 1.283576485171245e-07, "loss": 0.0008, "step": 228660 }, { "epoch": 1.869975876027313, "grad_norm": 0.025950221344828606, "learning_rate": 1.281970373976432e-07, "loss": 0.0005, "step": 228670 }, { "epoch": 1.8700576522059125, "grad_norm": 0.08252761512994766, "learning_rate": 1.280365255207483e-07, "loss": 0.0004, "step": 228680 }, { "epoch": 1.8701394283845116, "grad_norm": 0.04276693984866142, "learning_rate": 1.2787611288970936e-07, "loss": 0.0005, "step": 228690 }, { "epoch": 1.8702212045631108, "grad_norm": 0.1173354759812355, "learning_rate": 1.277157995077949e-07, "loss": 0.0009, "step": 228700 }, { "epoch": 1.87030298074171, "grad_norm": 0.18441762030124664, "learning_rate": 1.2755558537826952e-07, "loss": 0.0014, "step": 228710 }, { "epoch": 1.8703847569203091, "grad_norm": 0.004917441867291927, "learning_rate": 1.2739547050439726e-07, "loss": 0.0004, "step": 228720 }, { "epoch": 1.8704665330989083, "grad_norm": 0.008638069033622742, "learning_rate": 1.2723545488944e-07, "loss": 0.0009, "step": 228730 }, { "epoch": 1.8705483092775075, "grad_norm": 0.009003190323710442, "learning_rate": 1.2707553853665788e-07, "loss": 0.0006, "step": 228740 }, { "epoch": 1.8706300854561067, "grad_norm": 0.06026371195912361, "learning_rate": 1.2691572144930774e-07, "loss": 0.0007, "step": 228750 }, { "epoch": 1.8707118616347058, "grad_norm": 0.030261244624853134, "learning_rate": 1.2675600363064479e-07, "loss": 0.0007, "step": 228760 }, { "epoch": 1.870793637813305, "grad_norm": 0.0632241815328598, "learning_rate": 1.2659638508392357e-07, "loss": 0.0005, "step": 228770 }, { "epoch": 1.8708754139919042, "grad_norm": 0.0013911790447309613, "learning_rate": 1.2643686581239544e-07, "loss": 0.0005, "step": 228780 }, { "epoch": 1.8709571901705033, "grad_norm": 0.02139207348227501, "learning_rate": 1.2627744581931e-07, "loss": 0.0004, "step": 228790 }, { "epoch": 1.8710389663491025, "grad_norm": 0.028999460861086845, "learning_rate": 1.2611812510791355e-07, "loss": 0.0005, "step": 228800 }, { "epoch": 1.8711207425277017, "grad_norm": 0.014747433364391327, "learning_rate": 1.2595890368145348e-07, "loss": 0.0008, "step": 228810 }, { "epoch": 1.8712025187063008, "grad_norm": 0.12083712220191956, "learning_rate": 1.257997815431722e-07, "loss": 0.0004, "step": 228820 }, { "epoch": 1.8712842948849, "grad_norm": 0.06766447424888611, "learning_rate": 1.2564075869631154e-07, "loss": 0.0007, "step": 228830 }, { "epoch": 1.8713660710634992, "grad_norm": 0.018886523321270943, "learning_rate": 1.2548183514411005e-07, "loss": 0.0003, "step": 228840 }, { "epoch": 1.8714478472420983, "grad_norm": 0.003646847093477845, "learning_rate": 1.253230108898057e-07, "loss": 0.0008, "step": 228850 }, { "epoch": 1.8715296234206975, "grad_norm": 0.02635737881064415, "learning_rate": 1.2516428593663476e-07, "loss": 0.0008, "step": 228860 }, { "epoch": 1.8716113995992967, "grad_norm": 0.06466034054756165, "learning_rate": 1.2500566028782967e-07, "loss": 0.0006, "step": 228870 }, { "epoch": 1.8716931757778958, "grad_norm": 0.022763967514038086, "learning_rate": 1.2484713394662175e-07, "loss": 0.0017, "step": 228880 }, { "epoch": 1.8717749519564952, "grad_norm": 0.0005428297445178032, "learning_rate": 1.2468870691624002e-07, "loss": 0.0002, "step": 228890 }, { "epoch": 1.8718567281350942, "grad_norm": 0.07420367747545242, "learning_rate": 1.2453037919991252e-07, "loss": 0.001, "step": 228900 }, { "epoch": 1.8719385043136936, "grad_norm": 0.13569045066833496, "learning_rate": 1.243721508008644e-07, "loss": 0.0007, "step": 228910 }, { "epoch": 1.8720202804922925, "grad_norm": 0.030914699658751488, "learning_rate": 1.2421402172231867e-07, "loss": 0.0006, "step": 228920 }, { "epoch": 1.872102056670892, "grad_norm": 0.010022389702498913, "learning_rate": 1.2405599196749551e-07, "loss": 0.0011, "step": 228930 }, { "epoch": 1.8721838328494909, "grad_norm": 0.010635248385369778, "learning_rate": 1.2389806153961626e-07, "loss": 0.0007, "step": 228940 }, { "epoch": 1.8722656090280902, "grad_norm": 0.016593381762504578, "learning_rate": 1.2374023044189665e-07, "loss": 0.0004, "step": 228950 }, { "epoch": 1.8723473852066892, "grad_norm": 0.06511256098747253, "learning_rate": 1.2358249867755246e-07, "loss": 0.0007, "step": 228960 }, { "epoch": 1.8724291613852886, "grad_norm": 0.08801443129777908, "learning_rate": 1.2342486624979555e-07, "loss": 0.0004, "step": 228970 }, { "epoch": 1.8725109375638875, "grad_norm": 0.014986740425229073, "learning_rate": 1.232673331618389e-07, "loss": 0.0006, "step": 228980 }, { "epoch": 1.872592713742487, "grad_norm": 0.04325849562883377, "learning_rate": 1.2310989941689111e-07, "loss": 0.0004, "step": 228990 }, { "epoch": 1.8726744899210859, "grad_norm": 0.019342433661222458, "learning_rate": 1.2295256501815844e-07, "loss": 0.0008, "step": 229000 }, { "epoch": 1.8727562660996853, "grad_norm": 0.021424150094389915, "learning_rate": 1.227953299688456e-07, "loss": 0.0003, "step": 229010 }, { "epoch": 1.8728380422782842, "grad_norm": 0.02964274398982525, "learning_rate": 1.2263819427215661e-07, "loss": 0.0005, "step": 229020 }, { "epoch": 1.8729198184568836, "grad_norm": 0.0019457318121567369, "learning_rate": 1.2248115793129233e-07, "loss": 0.0005, "step": 229030 }, { "epoch": 1.8730015946354825, "grad_norm": 0.058745838701725006, "learning_rate": 1.2232422094945128e-07, "loss": 0.0004, "step": 229040 }, { "epoch": 1.873083370814082, "grad_norm": 0.012084081768989563, "learning_rate": 1.2216738332983037e-07, "loss": 0.0013, "step": 229050 }, { "epoch": 1.8731651469926809, "grad_norm": 0.0756579264998436, "learning_rate": 1.2201064507562533e-07, "loss": 0.0007, "step": 229060 }, { "epoch": 1.8732469231712803, "grad_norm": 0.020668691024184227, "learning_rate": 1.2185400619002863e-07, "loss": 0.0007, "step": 229070 }, { "epoch": 1.8733286993498792, "grad_norm": 0.06140111759305, "learning_rate": 1.216974666762305e-07, "loss": 0.0006, "step": 229080 }, { "epoch": 1.8734104755284786, "grad_norm": 0.01994437538087368, "learning_rate": 1.2154102653742007e-07, "loss": 0.0006, "step": 229090 }, { "epoch": 1.8734922517070778, "grad_norm": 0.03519674390554428, "learning_rate": 1.2138468577678474e-07, "loss": 0.0007, "step": 229100 }, { "epoch": 1.873574027885677, "grad_norm": 0.029817743226885796, "learning_rate": 1.2122844439750925e-07, "loss": 0.0006, "step": 229110 }, { "epoch": 1.8736558040642761, "grad_norm": 0.009098067879676819, "learning_rate": 1.2107230240277602e-07, "loss": 0.0008, "step": 229120 }, { "epoch": 1.8737375802428753, "grad_norm": 0.00811859779059887, "learning_rate": 1.2091625979576473e-07, "loss": 0.0006, "step": 229130 }, { "epoch": 1.8738193564214745, "grad_norm": 0.021731125190854073, "learning_rate": 1.2076031657965616e-07, "loss": 0.0004, "step": 229140 }, { "epoch": 1.8739011326000736, "grad_norm": 0.04462780803442001, "learning_rate": 1.2060447275762555e-07, "loss": 0.0009, "step": 229150 }, { "epoch": 1.8739829087786728, "grad_norm": 0.02209324575960636, "learning_rate": 1.2044872833284815e-07, "loss": 0.001, "step": 229160 }, { "epoch": 1.874064684957272, "grad_norm": 0.006031054072082043, "learning_rate": 1.2029308330849587e-07, "loss": 0.0005, "step": 229170 }, { "epoch": 1.8741464611358711, "grad_norm": 0.028187958523631096, "learning_rate": 1.2013753768774062e-07, "loss": 0.0006, "step": 229180 }, { "epoch": 1.8742282373144703, "grad_norm": 0.013163779862225056, "learning_rate": 1.1998209147374985e-07, "loss": 0.0009, "step": 229190 }, { "epoch": 1.8743100134930695, "grad_norm": 0.008720185607671738, "learning_rate": 1.19826744669691e-07, "loss": 0.001, "step": 229200 }, { "epoch": 1.8743917896716686, "grad_norm": 0.09828802943229675, "learning_rate": 1.1967149727872772e-07, "loss": 0.0015, "step": 229210 }, { "epoch": 1.8744735658502678, "grad_norm": 0.004995330236852169, "learning_rate": 1.1951634930402357e-07, "loss": 0.0005, "step": 229220 }, { "epoch": 1.874555342028867, "grad_norm": 0.018740233033895493, "learning_rate": 1.193613007487382e-07, "loss": 0.0005, "step": 229230 }, { "epoch": 1.8746371182074661, "grad_norm": 0.007827699184417725, "learning_rate": 1.1920635161603022e-07, "loss": 0.0007, "step": 229240 }, { "epoch": 1.8747188943860653, "grad_norm": 0.04751534387469292, "learning_rate": 1.1905150190905546e-07, "loss": 0.0012, "step": 229250 }, { "epoch": 1.8748006705646645, "grad_norm": 0.05886278674006462, "learning_rate": 1.188967516309697e-07, "loss": 0.0008, "step": 229260 }, { "epoch": 1.8748824467432637, "grad_norm": 0.06099040433764458, "learning_rate": 1.1874210078492488e-07, "loss": 0.0007, "step": 229270 }, { "epoch": 1.8749642229218628, "grad_norm": 0.020062820985913277, "learning_rate": 1.1858754937407124e-07, "loss": 0.0005, "step": 229280 }, { "epoch": 1.875045999100462, "grad_norm": 0.04061585292220116, "learning_rate": 1.1843309740155629e-07, "loss": 0.0013, "step": 229290 }, { "epoch": 1.8751277752790612, "grad_norm": 0.020731912925839424, "learning_rate": 1.1827874487052748e-07, "loss": 0.0013, "step": 229300 }, { "epoch": 1.8752095514576603, "grad_norm": 0.029523570090532303, "learning_rate": 1.1812449178412899e-07, "loss": 0.0003, "step": 229310 }, { "epoch": 1.8752913276362597, "grad_norm": 0.032313354313373566, "learning_rate": 1.1797033814550274e-07, "loss": 0.0003, "step": 229320 }, { "epoch": 1.8753731038148587, "grad_norm": 0.03411746770143509, "learning_rate": 1.1781628395778787e-07, "loss": 0.0005, "step": 229330 }, { "epoch": 1.875454879993458, "grad_norm": 0.03603750839829445, "learning_rate": 1.1766232922412579e-07, "loss": 0.0006, "step": 229340 }, { "epoch": 1.875536656172057, "grad_norm": 0.0005960845737718046, "learning_rate": 1.1750847394764952e-07, "loss": 0.0005, "step": 229350 }, { "epoch": 1.8756184323506564, "grad_norm": 0.05878526717424393, "learning_rate": 1.1735471813149435e-07, "loss": 0.0009, "step": 229360 }, { "epoch": 1.8757002085292553, "grad_norm": 0.004880737513303757, "learning_rate": 1.1720106177879165e-07, "loss": 0.0006, "step": 229370 }, { "epoch": 1.8757819847078547, "grad_norm": 0.03858184814453125, "learning_rate": 1.1704750489267336e-07, "loss": 0.0007, "step": 229380 }, { "epoch": 1.8758637608864537, "grad_norm": 0.04580087214708328, "learning_rate": 1.1689404747626587e-07, "loss": 0.0004, "step": 229390 }, { "epoch": 1.875945537065053, "grad_norm": 0.03621034324169159, "learning_rate": 1.1674068953269613e-07, "loss": 0.0004, "step": 229400 }, { "epoch": 1.876027313243652, "grad_norm": 0.07248865067958832, "learning_rate": 1.1658743106508774e-07, "loss": 0.0008, "step": 229410 }, { "epoch": 1.8761090894222514, "grad_norm": 0.10825453698635101, "learning_rate": 1.1643427207656266e-07, "loss": 0.0006, "step": 229420 }, { "epoch": 1.8761908656008504, "grad_norm": 0.00853306520730257, "learning_rate": 1.1628121257024117e-07, "loss": 0.0007, "step": 229430 }, { "epoch": 1.8762726417794497, "grad_norm": 0.032044488936662674, "learning_rate": 1.1612825254924132e-07, "loss": 0.0005, "step": 229440 }, { "epoch": 1.8763544179580487, "grad_norm": 0.005627558566629887, "learning_rate": 1.1597539201667785e-07, "loss": 0.0004, "step": 229450 }, { "epoch": 1.876436194136648, "grad_norm": 0.02243676222860813, "learning_rate": 1.1582263097566715e-07, "loss": 0.0008, "step": 229460 }, { "epoch": 1.876517970315247, "grad_norm": 0.04476073011755943, "learning_rate": 1.1566996942931841e-07, "loss": 0.0005, "step": 229470 }, { "epoch": 1.8765997464938464, "grad_norm": 0.0029791505075991154, "learning_rate": 1.1551740738074301e-07, "loss": 0.0014, "step": 229480 }, { "epoch": 1.8766815226724454, "grad_norm": 0.05947304144501686, "learning_rate": 1.1536494483304794e-07, "loss": 0.002, "step": 229490 }, { "epoch": 1.8767632988510448, "grad_norm": 0.09684742242097855, "learning_rate": 1.1521258178933958e-07, "loss": 0.0005, "step": 229500 }, { "epoch": 1.8768450750296437, "grad_norm": 0.0028747650794684887, "learning_rate": 1.1506031825272157e-07, "loss": 0.0018, "step": 229510 }, { "epoch": 1.876926851208243, "grad_norm": 0.015895914286375046, "learning_rate": 1.1490815422629531e-07, "loss": 0.0021, "step": 229520 }, { "epoch": 1.8770086273868423, "grad_norm": 0.03467410057783127, "learning_rate": 1.1475608971316055e-07, "loss": 0.0003, "step": 229530 }, { "epoch": 1.8770904035654414, "grad_norm": 0.06989820301532745, "learning_rate": 1.1460412471641591e-07, "loss": 0.0006, "step": 229540 }, { "epoch": 1.8771721797440406, "grad_norm": 0.0019281937275081873, "learning_rate": 1.144522592391556e-07, "loss": 0.0003, "step": 229550 }, { "epoch": 1.8772539559226398, "grad_norm": 0.3197786509990692, "learning_rate": 1.1430049328447435e-07, "loss": 0.0006, "step": 229560 }, { "epoch": 1.877335732101239, "grad_norm": 0.044761039316654205, "learning_rate": 1.1414882685546302e-07, "loss": 0.0011, "step": 229570 }, { "epoch": 1.877417508279838, "grad_norm": 0.0577850304543972, "learning_rate": 1.1399725995521249e-07, "loss": 0.0004, "step": 229580 }, { "epoch": 1.8774992844584373, "grad_norm": 0.12222521007061005, "learning_rate": 1.1384579258680861e-07, "loss": 0.0005, "step": 229590 }, { "epoch": 1.8775810606370364, "grad_norm": 0.079300656914711, "learning_rate": 1.1369442475333725e-07, "loss": 0.0011, "step": 229600 }, { "epoch": 1.8776628368156356, "grad_norm": 0.0052271643653512, "learning_rate": 1.1354315645788261e-07, "loss": 0.0005, "step": 229610 }, { "epoch": 1.8777446129942348, "grad_norm": 0.006705006118863821, "learning_rate": 1.1339198770352555e-07, "loss": 0.0005, "step": 229620 }, { "epoch": 1.877826389172834, "grad_norm": 0.0861557349562645, "learning_rate": 1.1324091849334584e-07, "loss": 0.0047, "step": 229630 }, { "epoch": 1.8779081653514331, "grad_norm": 0.02905411832034588, "learning_rate": 1.1308994883042102e-07, "loss": 0.0003, "step": 229640 }, { "epoch": 1.8779899415300323, "grad_norm": 0.006328410003334284, "learning_rate": 1.1293907871782583e-07, "loss": 0.0004, "step": 229650 }, { "epoch": 1.8780717177086315, "grad_norm": 0.08755344152450562, "learning_rate": 1.1278830815863395e-07, "loss": 0.0006, "step": 229660 }, { "epoch": 1.8781534938872306, "grad_norm": 0.01723308302462101, "learning_rate": 1.1263763715591735e-07, "loss": 0.0005, "step": 229670 }, { "epoch": 1.8782352700658298, "grad_norm": 0.019835826009511948, "learning_rate": 1.124870657127447e-07, "loss": 0.0008, "step": 229680 }, { "epoch": 1.878317046244429, "grad_norm": 0.03371819853782654, "learning_rate": 1.1233659383218242e-07, "loss": 0.0005, "step": 229690 }, { "epoch": 1.8783988224230281, "grad_norm": 0.0224899984896183, "learning_rate": 1.1218622151729752e-07, "loss": 0.0008, "step": 229700 }, { "epoch": 1.8784805986016273, "grad_norm": 0.04698297008872032, "learning_rate": 1.1203594877115254e-07, "loss": 0.0006, "step": 229710 }, { "epoch": 1.8785623747802265, "grad_norm": 0.03992336243391037, "learning_rate": 1.118857755968078e-07, "loss": 0.0004, "step": 229720 }, { "epoch": 1.8786441509588256, "grad_norm": 0.01115640252828598, "learning_rate": 1.1173570199732254e-07, "loss": 0.0008, "step": 229730 }, { "epoch": 1.8787259271374248, "grad_norm": 0.012863955460488796, "learning_rate": 1.115857279757554e-07, "loss": 0.001, "step": 229740 }, { "epoch": 1.8788077033160242, "grad_norm": 0.004987155552953482, "learning_rate": 1.1143585353516006e-07, "loss": 0.0009, "step": 229750 }, { "epoch": 1.8788894794946231, "grad_norm": 0.028847811743617058, "learning_rate": 1.112860786785902e-07, "loss": 0.0005, "step": 229760 }, { "epoch": 1.8789712556732225, "grad_norm": 0.11004739999771118, "learning_rate": 1.1113640340909615e-07, "loss": 0.0007, "step": 229770 }, { "epoch": 1.8790530318518215, "grad_norm": 0.01113541703671217, "learning_rate": 1.1098682772972769e-07, "loss": 0.0002, "step": 229780 }, { "epoch": 1.8791348080304209, "grad_norm": 0.02072170004248619, "learning_rate": 1.1083735164353182e-07, "loss": 0.0005, "step": 229790 }, { "epoch": 1.8792165842090198, "grad_norm": 0.031780291348695755, "learning_rate": 1.1068797515355279e-07, "loss": 0.0008, "step": 229800 }, { "epoch": 1.8792983603876192, "grad_norm": 0.009067900478839874, "learning_rate": 1.1053869826283426e-07, "loss": 0.0006, "step": 229810 }, { "epoch": 1.8793801365662182, "grad_norm": 0.030291341245174408, "learning_rate": 1.1038952097441712e-07, "loss": 0.0002, "step": 229820 }, { "epoch": 1.8794619127448176, "grad_norm": 0.04371579736471176, "learning_rate": 1.1024044329133954e-07, "loss": 0.0011, "step": 229830 }, { "epoch": 1.8795436889234165, "grad_norm": 0.025866838172078133, "learning_rate": 1.1009146521663849e-07, "loss": 0.0004, "step": 229840 }, { "epoch": 1.879625465102016, "grad_norm": 0.026514016091823578, "learning_rate": 1.0994258675334934e-07, "loss": 0.0006, "step": 229850 }, { "epoch": 1.8797072412806148, "grad_norm": 0.046638187021017075, "learning_rate": 1.0979380790450412e-07, "loss": 0.0004, "step": 229860 }, { "epoch": 1.8797890174592142, "grad_norm": 0.0525263249874115, "learning_rate": 1.0964512867313427e-07, "loss": 0.0005, "step": 229870 }, { "epoch": 1.8798707936378132, "grad_norm": 0.02233661152422428, "learning_rate": 1.0949654906226847e-07, "loss": 0.0006, "step": 229880 }, { "epoch": 1.8799525698164126, "grad_norm": 0.049237534403800964, "learning_rate": 1.0934806907493267e-07, "loss": 0.0007, "step": 229890 }, { "epoch": 1.8800343459950115, "grad_norm": 0.05819567292928696, "learning_rate": 1.0919968871415221e-07, "loss": 0.0009, "step": 229900 }, { "epoch": 1.880116122173611, "grad_norm": 0.04717645421624184, "learning_rate": 1.0905140798294966e-07, "loss": 0.0014, "step": 229910 }, { "epoch": 1.8801978983522099, "grad_norm": 0.0487217977643013, "learning_rate": 1.0890322688434596e-07, "loss": 0.0012, "step": 229920 }, { "epoch": 1.8802796745308092, "grad_norm": 0.07730253785848618, "learning_rate": 1.0875514542135812e-07, "loss": 0.0003, "step": 229930 }, { "epoch": 1.8803614507094082, "grad_norm": 0.027862098067998886, "learning_rate": 1.0860716359700485e-07, "loss": 0.0007, "step": 229940 }, { "epoch": 1.8804432268880076, "grad_norm": 0.055338405072689056, "learning_rate": 1.0845928141429929e-07, "loss": 0.0009, "step": 229950 }, { "epoch": 1.8805250030666067, "grad_norm": 0.04031333699822426, "learning_rate": 1.0831149887625403e-07, "loss": 0.0004, "step": 229960 }, { "epoch": 1.880606779245206, "grad_norm": 0.10332277417182922, "learning_rate": 1.0816381598587944e-07, "loss": 0.0006, "step": 229970 }, { "epoch": 1.880688555423805, "grad_norm": 0.05293172225356102, "learning_rate": 1.0801623274618423e-07, "loss": 0.0005, "step": 229980 }, { "epoch": 1.8807703316024043, "grad_norm": 0.007900490425527096, "learning_rate": 1.0786874916017543e-07, "loss": 0.0002, "step": 229990 }, { "epoch": 1.8808521077810034, "grad_norm": 0.009434463456273079, "learning_rate": 1.0772136523085619e-07, "loss": 0.0007, "step": 230000 }, { "epoch": 1.8809338839596026, "grad_norm": 0.006534066051244736, "learning_rate": 1.0757408096122912e-07, "loss": 0.0014, "step": 230010 }, { "epoch": 1.8810156601382018, "grad_norm": 0.08495958149433136, "learning_rate": 1.0742689635429515e-07, "loss": 0.0009, "step": 230020 }, { "epoch": 1.881097436316801, "grad_norm": 0.010371104814112186, "learning_rate": 1.0727981141305243e-07, "loss": 0.0006, "step": 230030 }, { "epoch": 1.8811792124954, "grad_norm": 0.019365601241588593, "learning_rate": 1.0713282614049692e-07, "loss": 0.0006, "step": 230040 }, { "epoch": 1.8812609886739993, "grad_norm": 0.02331814356148243, "learning_rate": 1.0698594053962231e-07, "loss": 0.0004, "step": 230050 }, { "epoch": 1.8813427648525984, "grad_norm": 0.06942804157733917, "learning_rate": 1.0683915461342176e-07, "loss": 0.0008, "step": 230060 }, { "epoch": 1.8814245410311976, "grad_norm": 0.03449535369873047, "learning_rate": 1.0669246836488512e-07, "loss": 0.0005, "step": 230070 }, { "epoch": 1.8815063172097968, "grad_norm": 0.0031930278055369854, "learning_rate": 1.0654588179700054e-07, "loss": 0.0005, "step": 230080 }, { "epoch": 1.881588093388396, "grad_norm": 0.023832283914089203, "learning_rate": 1.0639939491275287e-07, "loss": 0.0008, "step": 230090 }, { "epoch": 1.8816698695669951, "grad_norm": 0.060288600623607635, "learning_rate": 1.0625300771512803e-07, "loss": 0.0004, "step": 230100 }, { "epoch": 1.8817516457455943, "grad_norm": 0.017999334260821342, "learning_rate": 1.0610672020710755e-07, "loss": 0.0005, "step": 230110 }, { "epoch": 1.8818334219241935, "grad_norm": 0.17800229787826538, "learning_rate": 1.059605323916707e-07, "loss": 0.0006, "step": 230120 }, { "epoch": 1.8819151981027926, "grad_norm": 0.012906912714242935, "learning_rate": 1.0581444427179566e-07, "loss": 0.0003, "step": 230130 }, { "epoch": 1.8819969742813918, "grad_norm": 0.01597006805241108, "learning_rate": 1.0566845585045893e-07, "loss": 0.0014, "step": 230140 }, { "epoch": 1.882078750459991, "grad_norm": 0.06107866391539574, "learning_rate": 1.0552256713063424e-07, "loss": 0.0007, "step": 230150 }, { "epoch": 1.8821605266385901, "grad_norm": 0.051795296370983124, "learning_rate": 1.0537677811529312e-07, "loss": 0.0005, "step": 230160 }, { "epoch": 1.8822423028171893, "grad_norm": 0.004495596047490835, "learning_rate": 1.0523108880740484e-07, "loss": 0.0002, "step": 230170 }, { "epoch": 1.8823240789957887, "grad_norm": 0.048482514917850494, "learning_rate": 1.0508549920993927e-07, "loss": 0.0005, "step": 230180 }, { "epoch": 1.8824058551743876, "grad_norm": 0.061413299292325974, "learning_rate": 1.0494000932586069e-07, "loss": 0.0007, "step": 230190 }, { "epoch": 1.882487631352987, "grad_norm": 0.028263777494430542, "learning_rate": 1.0479461915813228e-07, "loss": 0.0004, "step": 230200 }, { "epoch": 1.882569407531586, "grad_norm": 0.032110795378685, "learning_rate": 1.0464932870971666e-07, "loss": 0.0008, "step": 230210 }, { "epoch": 1.8826511837101854, "grad_norm": 0.030229389667510986, "learning_rate": 1.0450413798357373e-07, "loss": 0.0004, "step": 230220 }, { "epoch": 1.8827329598887843, "grad_norm": 0.014568469487130642, "learning_rate": 1.0435904698266053e-07, "loss": 0.0003, "step": 230230 }, { "epoch": 1.8828147360673837, "grad_norm": 0.04646101966500282, "learning_rate": 1.0421405570993304e-07, "loss": 0.0008, "step": 230240 }, { "epoch": 1.8828965122459826, "grad_norm": 0.03660362958908081, "learning_rate": 1.0406916416834445e-07, "loss": 0.0005, "step": 230250 }, { "epoch": 1.882978288424582, "grad_norm": 0.030216284096240997, "learning_rate": 1.0392437236084629e-07, "loss": 0.0007, "step": 230260 }, { "epoch": 1.883060064603181, "grad_norm": 0.0012694080360233784, "learning_rate": 1.0377968029038899e-07, "loss": 0.0007, "step": 230270 }, { "epoch": 1.8831418407817804, "grad_norm": 0.10980567336082458, "learning_rate": 1.0363508795991961e-07, "loss": 0.0005, "step": 230280 }, { "epoch": 1.8832236169603793, "grad_norm": 0.09284251183271408, "learning_rate": 1.0349059537238304e-07, "loss": 0.0006, "step": 230290 }, { "epoch": 1.8833053931389787, "grad_norm": 0.018336504697799683, "learning_rate": 1.0334620253072358e-07, "loss": 0.001, "step": 230300 }, { "epoch": 1.8833871693175777, "grad_norm": 0.03628650680184364, "learning_rate": 1.0320190943788166e-07, "loss": 0.0018, "step": 230310 }, { "epoch": 1.883468945496177, "grad_norm": 0.0035306867212057114, "learning_rate": 1.030577160967977e-07, "loss": 0.0003, "step": 230320 }, { "epoch": 1.883550721674776, "grad_norm": 0.05684060603380203, "learning_rate": 1.0291362251040826e-07, "loss": 0.001, "step": 230330 }, { "epoch": 1.8836324978533754, "grad_norm": 0.003482130356132984, "learning_rate": 1.0276962868164819e-07, "loss": 0.0003, "step": 230340 }, { "epoch": 1.8837142740319743, "grad_norm": 0.017428962513804436, "learning_rate": 1.0262573461345237e-07, "loss": 0.0001, "step": 230350 }, { "epoch": 1.8837960502105737, "grad_norm": 0.1594812273979187, "learning_rate": 1.024819403087507e-07, "loss": 0.0005, "step": 230360 }, { "epoch": 1.8838778263891727, "grad_norm": 0.029864899814128876, "learning_rate": 1.0233824577047302e-07, "loss": 0.0018, "step": 230370 }, { "epoch": 1.883959602567772, "grad_norm": 0.020274581387639046, "learning_rate": 1.021946510015459e-07, "loss": 0.0009, "step": 230380 }, { "epoch": 1.8840413787463712, "grad_norm": 0.01853015460073948, "learning_rate": 1.020511560048948e-07, "loss": 0.0008, "step": 230390 }, { "epoch": 1.8841231549249704, "grad_norm": 0.014274396002292633, "learning_rate": 1.0190776078344345e-07, "loss": 0.001, "step": 230400 }, { "epoch": 1.8842049311035696, "grad_norm": 0.026488477364182472, "learning_rate": 1.017644653401123e-07, "loss": 0.0002, "step": 230410 }, { "epoch": 1.8842867072821687, "grad_norm": 0.06856279075145721, "learning_rate": 1.0162126967782072e-07, "loss": 0.0009, "step": 230420 }, { "epoch": 1.884368483460768, "grad_norm": 0.009986815974116325, "learning_rate": 1.0147817379948521e-07, "loss": 0.0011, "step": 230430 }, { "epoch": 1.884450259639367, "grad_norm": 0.005737856030464172, "learning_rate": 1.013351777080207e-07, "loss": 0.0003, "step": 230440 }, { "epoch": 1.8845320358179662, "grad_norm": 0.04115540161728859, "learning_rate": 1.0119228140634096e-07, "loss": 0.0011, "step": 230450 }, { "epoch": 1.8846138119965654, "grad_norm": 0.04151229187846184, "learning_rate": 1.0104948489735589e-07, "loss": 0.0004, "step": 230460 }, { "epoch": 1.8846955881751646, "grad_norm": 0.015353845432400703, "learning_rate": 1.0090678818397482e-07, "loss": 0.0005, "step": 230470 }, { "epoch": 1.8847773643537638, "grad_norm": 0.05509508028626442, "learning_rate": 1.0076419126910542e-07, "loss": 0.0009, "step": 230480 }, { "epoch": 1.884859140532363, "grad_norm": 0.06046808511018753, "learning_rate": 1.0062169415565093e-07, "loss": 0.0005, "step": 230490 }, { "epoch": 1.884940916710962, "grad_norm": 0.0532553493976593, "learning_rate": 1.0047929684651514e-07, "loss": 0.0016, "step": 230500 }, { "epoch": 1.8850226928895613, "grad_norm": 0.0019757836125791073, "learning_rate": 1.0033699934459907e-07, "loss": 0.0007, "step": 230510 }, { "epoch": 1.8851044690681604, "grad_norm": 0.02380554936826229, "learning_rate": 1.0019480165280038e-07, "loss": 0.0004, "step": 230520 }, { "epoch": 1.8851862452467596, "grad_norm": 0.06797687709331512, "learning_rate": 1.0005270377401677e-07, "loss": 0.0004, "step": 230530 }, { "epoch": 1.8852680214253588, "grad_norm": 0.0012465002946555614, "learning_rate": 9.991070571114258e-08, "loss": 0.0004, "step": 230540 }, { "epoch": 1.885349797603958, "grad_norm": 0.0015503617469221354, "learning_rate": 9.976880746706997e-08, "loss": 0.0004, "step": 230550 }, { "epoch": 1.885431573782557, "grad_norm": 0.016641667112708092, "learning_rate": 9.962700904468991e-08, "loss": 0.0006, "step": 230560 }, { "epoch": 1.8855133499611563, "grad_norm": 0.0014726328663527966, "learning_rate": 9.948531044689125e-08, "loss": 0.0004, "step": 230570 }, { "epoch": 1.8855951261397554, "grad_norm": 0.09810475260019302, "learning_rate": 9.934371167655943e-08, "loss": 0.0008, "step": 230580 }, { "epoch": 1.8856769023183546, "grad_norm": 0.01481647975742817, "learning_rate": 9.920221273657992e-08, "loss": 0.0008, "step": 230590 }, { "epoch": 1.8857586784969538, "grad_norm": 0.02130047045648098, "learning_rate": 9.906081362983543e-08, "loss": 0.0002, "step": 230600 }, { "epoch": 1.8858404546755532, "grad_norm": 0.01763642393052578, "learning_rate": 9.891951435920532e-08, "loss": 0.0006, "step": 230610 }, { "epoch": 1.8859222308541521, "grad_norm": 0.018724553287029266, "learning_rate": 9.87783149275684e-08, "loss": 0.0007, "step": 230620 }, { "epoch": 1.8860040070327515, "grad_norm": 0.007482326123863459, "learning_rate": 9.863721533780124e-08, "loss": 0.0003, "step": 230630 }, { "epoch": 1.8860857832113505, "grad_norm": 0.010707095265388489, "learning_rate": 9.849621559277822e-08, "loss": 0.0005, "step": 230640 }, { "epoch": 1.8861675593899498, "grad_norm": 0.005894386675208807, "learning_rate": 9.835531569537149e-08, "loss": 0.0005, "step": 230650 }, { "epoch": 1.8862493355685488, "grad_norm": 0.05810331925749779, "learning_rate": 9.821451564845152e-08, "loss": 0.0014, "step": 230660 }, { "epoch": 1.8863311117471482, "grad_norm": 0.00983390025794506, "learning_rate": 9.807381545488603e-08, "loss": 0.0006, "step": 230670 }, { "epoch": 1.8864128879257471, "grad_norm": 0.021836528554558754, "learning_rate": 9.793321511754106e-08, "loss": 0.0012, "step": 230680 }, { "epoch": 1.8864946641043465, "grad_norm": 0.07873817533254623, "learning_rate": 9.779271463928152e-08, "loss": 0.0005, "step": 230690 }, { "epoch": 1.8865764402829455, "grad_norm": 0.07127697020769119, "learning_rate": 9.765231402296848e-08, "loss": 0.0006, "step": 230700 }, { "epoch": 1.8866582164615449, "grad_norm": 0.03700213506817818, "learning_rate": 9.751201327146298e-08, "loss": 0.0006, "step": 230710 }, { "epoch": 1.8867399926401438, "grad_norm": 0.00447428785264492, "learning_rate": 9.737181238762272e-08, "loss": 0.0004, "step": 230720 }, { "epoch": 1.8868217688187432, "grad_norm": 0.005406842567026615, "learning_rate": 9.723171137430376e-08, "loss": 0.0006, "step": 230730 }, { "epoch": 1.8869035449973421, "grad_norm": 0.0050279260613024235, "learning_rate": 9.709171023435937e-08, "loss": 0.0006, "step": 230740 }, { "epoch": 1.8869853211759415, "grad_norm": 0.009505676105618477, "learning_rate": 9.695180897064282e-08, "loss": 0.0007, "step": 230750 }, { "epoch": 1.8870670973545405, "grad_norm": 0.06297823041677475, "learning_rate": 9.681200758600351e-08, "loss": 0.0009, "step": 230760 }, { "epoch": 1.8871488735331399, "grad_norm": 0.018756819888949394, "learning_rate": 9.66723060832886e-08, "loss": 0.0007, "step": 230770 }, { "epoch": 1.8872306497117388, "grad_norm": 0.006784772500395775, "learning_rate": 9.653270446534469e-08, "loss": 0.0005, "step": 230780 }, { "epoch": 1.8873124258903382, "grad_norm": 0.02104330249130726, "learning_rate": 9.639320273501562e-08, "loss": 0.0005, "step": 230790 }, { "epoch": 1.8873942020689372, "grad_norm": 0.0069900574162602425, "learning_rate": 9.625380089514247e-08, "loss": 0.0004, "step": 230800 }, { "epoch": 1.8874759782475365, "grad_norm": 0.016831528395414352, "learning_rate": 9.611449894856573e-08, "loss": 0.0006, "step": 230810 }, { "epoch": 1.8875577544261357, "grad_norm": 0.004268638323992491, "learning_rate": 9.597529689812202e-08, "loss": 0.0009, "step": 230820 }, { "epoch": 1.8876395306047349, "grad_norm": 0.030613530427217484, "learning_rate": 9.583619474664796e-08, "loss": 0.0007, "step": 230830 }, { "epoch": 1.887721306783334, "grad_norm": 0.03970969468355179, "learning_rate": 9.56971924969774e-08, "loss": 0.0007, "step": 230840 }, { "epoch": 1.8878030829619332, "grad_norm": 0.03967057168483734, "learning_rate": 9.555829015194084e-08, "loss": 0.0005, "step": 230850 }, { "epoch": 1.8878848591405324, "grad_norm": 0.06781308352947235, "learning_rate": 9.541948771436826e-08, "loss": 0.0009, "step": 230860 }, { "epoch": 1.8879666353191316, "grad_norm": 0.04563362896442413, "learning_rate": 9.528078518708794e-08, "loss": 0.0003, "step": 230870 }, { "epoch": 1.8880484114977307, "grad_norm": 0.025953756645321846, "learning_rate": 9.514218257292484e-08, "loss": 0.001, "step": 230880 }, { "epoch": 1.88813018767633, "grad_norm": 0.16814608871936798, "learning_rate": 9.500367987470227e-08, "loss": 0.0016, "step": 230890 }, { "epoch": 1.888211963854929, "grad_norm": 0.01164232473820448, "learning_rate": 9.486527709524185e-08, "loss": 0.0005, "step": 230900 }, { "epoch": 1.8882937400335282, "grad_norm": 0.09667328000068665, "learning_rate": 9.472697423736244e-08, "loss": 0.0009, "step": 230910 }, { "epoch": 1.8883755162121274, "grad_norm": 0.025253601372241974, "learning_rate": 9.458877130388234e-08, "loss": 0.0005, "step": 230920 }, { "epoch": 1.8884572923907266, "grad_norm": 0.08882763236761093, "learning_rate": 9.445066829761596e-08, "loss": 0.0005, "step": 230930 }, { "epoch": 1.8885390685693257, "grad_norm": 0.00299833039753139, "learning_rate": 9.431266522137605e-08, "loss": 0.0011, "step": 230940 }, { "epoch": 1.888620844747925, "grad_norm": 0.0022337420377880335, "learning_rate": 9.417476207797594e-08, "loss": 0.0012, "step": 230950 }, { "epoch": 1.888702620926524, "grad_norm": 0.028882037848234177, "learning_rate": 9.40369588702228e-08, "loss": 0.0008, "step": 230960 }, { "epoch": 1.8887843971051232, "grad_norm": 0.038139551877975464, "learning_rate": 9.389925560092494e-08, "loss": 0.0005, "step": 230970 }, { "epoch": 1.8888661732837224, "grad_norm": 0.05898605287075043, "learning_rate": 9.37616522728868e-08, "loss": 0.0006, "step": 230980 }, { "epoch": 1.8889479494623216, "grad_norm": 0.034842729568481445, "learning_rate": 9.362414888891225e-08, "loss": 0.0011, "step": 230990 }, { "epoch": 1.8890297256409208, "grad_norm": 0.001496252603828907, "learning_rate": 9.348674545180181e-08, "loss": 0.0004, "step": 231000 }, { "epoch": 1.88911150181952, "grad_norm": 0.0047779204323887825, "learning_rate": 9.334944196435437e-08, "loss": 0.0008, "step": 231010 }, { "epoch": 1.8891932779981193, "grad_norm": 0.0601060576736927, "learning_rate": 9.321223842936766e-08, "loss": 0.0005, "step": 231020 }, { "epoch": 1.8892750541767183, "grad_norm": 0.01799776591360569, "learning_rate": 9.307513484963559e-08, "loss": 0.0005, "step": 231030 }, { "epoch": 1.8893568303553177, "grad_norm": 0.003603021614253521, "learning_rate": 9.293813122795148e-08, "loss": 0.0003, "step": 231040 }, { "epoch": 1.8894386065339166, "grad_norm": 0.059023886919021606, "learning_rate": 9.280122756710641e-08, "loss": 0.0009, "step": 231050 }, { "epoch": 1.889520382712516, "grad_norm": 0.03495021536946297, "learning_rate": 9.266442386988927e-08, "loss": 0.0003, "step": 231060 }, { "epoch": 1.889602158891115, "grad_norm": 0.004381755366921425, "learning_rate": 9.252772013908617e-08, "loss": 0.0002, "step": 231070 }, { "epoch": 1.8896839350697143, "grad_norm": 0.03111123852431774, "learning_rate": 9.239111637748322e-08, "loss": 0.0007, "step": 231080 }, { "epoch": 1.8897657112483133, "grad_norm": 0.06827584654092789, "learning_rate": 9.225461258786206e-08, "loss": 0.0008, "step": 231090 }, { "epoch": 1.8898474874269127, "grad_norm": 0.032473593950271606, "learning_rate": 9.211820877300327e-08, "loss": 0.0004, "step": 231100 }, { "epoch": 1.8899292636055116, "grad_norm": 0.03183748945593834, "learning_rate": 9.198190493568571e-08, "loss": 0.0007, "step": 231110 }, { "epoch": 1.890011039784111, "grad_norm": 0.020598245784640312, "learning_rate": 9.184570107868718e-08, "loss": 0.0006, "step": 231120 }, { "epoch": 1.89009281596271, "grad_norm": 0.07147154957056046, "learning_rate": 9.170959720478046e-08, "loss": 0.0006, "step": 231130 }, { "epoch": 1.8901745921413093, "grad_norm": 0.023640504106879234, "learning_rate": 9.157359331673942e-08, "loss": 0.0007, "step": 231140 }, { "epoch": 1.8902563683199083, "grad_norm": 0.0473470576107502, "learning_rate": 9.143768941733355e-08, "loss": 0.0007, "step": 231150 }, { "epoch": 1.8903381444985077, "grad_norm": 0.00785049982368946, "learning_rate": 9.130188550933228e-08, "loss": 0.0005, "step": 231160 }, { "epoch": 1.8904199206771066, "grad_norm": 0.06470267474651337, "learning_rate": 9.116618159550173e-08, "loss": 0.0007, "step": 231170 }, { "epoch": 1.890501696855706, "grad_norm": 0.16704553365707397, "learning_rate": 9.103057767860524e-08, "loss": 0.0009, "step": 231180 }, { "epoch": 1.890583473034305, "grad_norm": 0.13405096530914307, "learning_rate": 9.089507376140671e-08, "loss": 0.0027, "step": 231190 }, { "epoch": 1.8906652492129044, "grad_norm": 0.0436401404440403, "learning_rate": 9.075966984666617e-08, "loss": 0.0004, "step": 231200 }, { "epoch": 1.8907470253915033, "grad_norm": 0.0675494372844696, "learning_rate": 9.062436593714086e-08, "loss": 0.0006, "step": 231210 }, { "epoch": 1.8908288015701027, "grad_norm": 0.06731881946325302, "learning_rate": 9.0489162035588e-08, "loss": 0.0012, "step": 231220 }, { "epoch": 1.8909105777487016, "grad_norm": 0.08398690074682236, "learning_rate": 9.03540581447615e-08, "loss": 0.0009, "step": 231230 }, { "epoch": 1.890992353927301, "grad_norm": 0.03418118506669998, "learning_rate": 9.021905426741417e-08, "loss": 0.0003, "step": 231240 }, { "epoch": 1.8910741301059002, "grad_norm": 0.04975837469100952, "learning_rate": 9.008415040629548e-08, "loss": 0.0006, "step": 231250 }, { "epoch": 1.8911559062844994, "grad_norm": 0.11017050594091415, "learning_rate": 8.994934656415322e-08, "loss": 0.0008, "step": 231260 }, { "epoch": 1.8912376824630985, "grad_norm": 0.13037702441215515, "learning_rate": 8.981464274373408e-08, "loss": 0.0009, "step": 231270 }, { "epoch": 1.8913194586416977, "grad_norm": 0.02901276759803295, "learning_rate": 8.968003894778198e-08, "loss": 0.0007, "step": 231280 }, { "epoch": 1.8914012348202969, "grad_norm": 0.012831413187086582, "learning_rate": 8.954553517903863e-08, "loss": 0.0009, "step": 231290 }, { "epoch": 1.891483010998896, "grad_norm": 0.01247766986489296, "learning_rate": 8.941113144024349e-08, "loss": 0.0005, "step": 231300 }, { "epoch": 1.8915647871774952, "grad_norm": 0.0035572166088968515, "learning_rate": 8.927682773413604e-08, "loss": 0.0007, "step": 231310 }, { "epoch": 1.8916465633560944, "grad_norm": 0.017577702179551125, "learning_rate": 8.914262406345075e-08, "loss": 0.0005, "step": 231320 }, { "epoch": 1.8917283395346935, "grad_norm": 0.0015118910232558846, "learning_rate": 8.900852043092211e-08, "loss": 0.0006, "step": 231330 }, { "epoch": 1.8918101157132927, "grad_norm": 0.020551633089780807, "learning_rate": 8.887451683928183e-08, "loss": 0.0013, "step": 231340 }, { "epoch": 1.8918918918918919, "grad_norm": 0.08390060067176819, "learning_rate": 8.874061329125939e-08, "loss": 0.0009, "step": 231350 }, { "epoch": 1.891973668070491, "grad_norm": 0.0790233165025711, "learning_rate": 8.860680978958314e-08, "loss": 0.001, "step": 231360 }, { "epoch": 1.8920554442490902, "grad_norm": 0.08159773796796799, "learning_rate": 8.847310633697814e-08, "loss": 0.0004, "step": 231370 }, { "epoch": 1.8921372204276894, "grad_norm": 0.05924971401691437, "learning_rate": 8.833950293616833e-08, "loss": 0.0005, "step": 231380 }, { "epoch": 1.8922189966062886, "grad_norm": 0.04005085676908493, "learning_rate": 8.820599958987541e-08, "loss": 0.0018, "step": 231390 }, { "epoch": 1.8923007727848877, "grad_norm": 0.06367986649274826, "learning_rate": 8.807259630081833e-08, "loss": 0.0004, "step": 231400 }, { "epoch": 1.892382548963487, "grad_norm": 0.05522604286670685, "learning_rate": 8.793929307171544e-08, "loss": 0.0009, "step": 231410 }, { "epoch": 1.892464325142086, "grad_norm": 0.015075603500008583, "learning_rate": 8.780608990528128e-08, "loss": 0.0005, "step": 231420 }, { "epoch": 1.8925461013206852, "grad_norm": 0.051668811589479446, "learning_rate": 8.76729868042303e-08, "loss": 0.0006, "step": 231430 }, { "epoch": 1.8926278774992844, "grad_norm": 0.00970911793410778, "learning_rate": 8.75399837712737e-08, "loss": 0.001, "step": 231440 }, { "epoch": 1.8927096536778838, "grad_norm": 0.009876127354800701, "learning_rate": 8.740708080912097e-08, "loss": 0.0003, "step": 231450 }, { "epoch": 1.8927914298564827, "grad_norm": 0.016894793137907982, "learning_rate": 8.727427792047826e-08, "loss": 0.0007, "step": 231460 }, { "epoch": 1.8928732060350821, "grad_norm": 0.06700914353132248, "learning_rate": 8.714157510805232e-08, "loss": 0.0003, "step": 231470 }, { "epoch": 1.892954982213681, "grad_norm": 0.0018069082871079445, "learning_rate": 8.700897237454598e-08, "loss": 0.0006, "step": 231480 }, { "epoch": 1.8930367583922805, "grad_norm": 0.007663373369723558, "learning_rate": 8.687646972265984e-08, "loss": 0.0004, "step": 231490 }, { "epoch": 1.8931185345708794, "grad_norm": 0.02460336685180664, "learning_rate": 8.674406715509399e-08, "loss": 0.0002, "step": 231500 }, { "epoch": 1.8932003107494788, "grad_norm": 0.06576533615589142, "learning_rate": 8.661176467454513e-08, "loss": 0.002, "step": 231510 }, { "epoch": 1.8932820869280778, "grad_norm": 0.04732097312808037, "learning_rate": 8.647956228370891e-08, "loss": 0.0008, "step": 231520 }, { "epoch": 1.8933638631066771, "grad_norm": 0.07228158414363861, "learning_rate": 8.63474599852776e-08, "loss": 0.0008, "step": 231530 }, { "epoch": 1.893445639285276, "grad_norm": 0.018783167004585266, "learning_rate": 8.621545778194185e-08, "loss": 0.0005, "step": 231540 }, { "epoch": 1.8935274154638755, "grad_norm": 0.017005756497383118, "learning_rate": 8.608355567639171e-08, "loss": 0.0011, "step": 231550 }, { "epoch": 1.8936091916424744, "grad_norm": 0.010831003077328205, "learning_rate": 8.595175367131392e-08, "loss": 0.0008, "step": 231560 }, { "epoch": 1.8936909678210738, "grad_norm": 0.06825031340122223, "learning_rate": 8.582005176939356e-08, "loss": 0.0005, "step": 231570 }, { "epoch": 1.8937727439996728, "grad_norm": 0.0280026663094759, "learning_rate": 8.568844997331238e-08, "loss": 0.0009, "step": 231580 }, { "epoch": 1.8938545201782722, "grad_norm": 0.0866924524307251, "learning_rate": 8.555694828575267e-08, "loss": 0.0008, "step": 231590 }, { "epoch": 1.893936296356871, "grad_norm": 0.01334171462804079, "learning_rate": 8.542554670939285e-08, "loss": 0.0002, "step": 231600 }, { "epoch": 1.8940180725354705, "grad_norm": 0.021801188588142395, "learning_rate": 8.52942452469091e-08, "loss": 0.0004, "step": 231610 }, { "epoch": 1.8940998487140694, "grad_norm": 0.014297976158559322, "learning_rate": 8.516304390097652e-08, "loss": 0.0006, "step": 231620 }, { "epoch": 1.8941816248926688, "grad_norm": 0.007825929671525955, "learning_rate": 8.50319426742674e-08, "loss": 0.0005, "step": 231630 }, { "epoch": 1.8942634010712678, "grad_norm": 0.033426668494939804, "learning_rate": 8.490094156945294e-08, "loss": 0.0008, "step": 231640 }, { "epoch": 1.8943451772498672, "grad_norm": 0.01048573199659586, "learning_rate": 8.477004058920157e-08, "loss": 0.0005, "step": 231650 }, { "epoch": 1.8944269534284663, "grad_norm": 0.0026923823170363903, "learning_rate": 8.463923973617894e-08, "loss": 0.0006, "step": 231660 }, { "epoch": 1.8945087296070655, "grad_norm": 0.0007937912014313042, "learning_rate": 8.450853901305123e-08, "loss": 0.0003, "step": 231670 }, { "epoch": 1.8945905057856647, "grad_norm": 0.005290647502988577, "learning_rate": 8.437793842248021e-08, "loss": 0.0005, "step": 231680 }, { "epoch": 1.8946722819642638, "grad_norm": 0.0020060192327946424, "learning_rate": 8.424743796712598e-08, "loss": 0.0004, "step": 231690 }, { "epoch": 1.894754058142863, "grad_norm": 0.07111623883247375, "learning_rate": 8.411703764964751e-08, "loss": 0.001, "step": 231700 }, { "epoch": 1.8948358343214622, "grad_norm": 0.02924310974776745, "learning_rate": 8.398673747269991e-08, "loss": 0.0003, "step": 231710 }, { "epoch": 1.8949176105000614, "grad_norm": 0.04930892959237099, "learning_rate": 8.385653743893885e-08, "loss": 0.0008, "step": 231720 }, { "epoch": 1.8949993866786605, "grad_norm": 0.028213810175657272, "learning_rate": 8.372643755101661e-08, "loss": 0.0005, "step": 231730 }, { "epoch": 1.8950811628572597, "grad_norm": 0.011490849778056145, "learning_rate": 8.359643781158222e-08, "loss": 0.0006, "step": 231740 }, { "epoch": 1.8951629390358589, "grad_norm": 0.050041552633047104, "learning_rate": 8.346653822328521e-08, "loss": 0.0005, "step": 231750 }, { "epoch": 1.895244715214458, "grad_norm": 0.0029866371769458055, "learning_rate": 8.333673878877124e-08, "loss": 0.0005, "step": 231760 }, { "epoch": 1.8953264913930572, "grad_norm": 0.0039014441426843405, "learning_rate": 8.320703951068431e-08, "loss": 0.0006, "step": 231770 }, { "epoch": 1.8954082675716564, "grad_norm": 0.01606033556163311, "learning_rate": 8.307744039166621e-08, "loss": 0.0006, "step": 231780 }, { "epoch": 1.8954900437502555, "grad_norm": 0.0428207628428936, "learning_rate": 8.294794143435702e-08, "loss": 0.0004, "step": 231790 }, { "epoch": 1.8955718199288547, "grad_norm": 0.0015280090738087893, "learning_rate": 8.281854264139577e-08, "loss": 0.0012, "step": 231800 }, { "epoch": 1.8956535961074539, "grad_norm": 0.023079292848706245, "learning_rate": 8.268924401541755e-08, "loss": 0.0007, "step": 231810 }, { "epoch": 1.895735372286053, "grad_norm": 0.018169201910495758, "learning_rate": 8.25600455590564e-08, "loss": 0.0004, "step": 231820 }, { "epoch": 1.8958171484646522, "grad_norm": 0.06497886776924133, "learning_rate": 8.243094727494405e-08, "loss": 0.0007, "step": 231830 }, { "epoch": 1.8958989246432514, "grad_norm": 0.021869894117116928, "learning_rate": 8.230194916571066e-08, "loss": 0.0004, "step": 231840 }, { "epoch": 1.8959807008218506, "grad_norm": 0.0126867750659585, "learning_rate": 8.217305123398356e-08, "loss": 0.0007, "step": 231850 }, { "epoch": 1.8960624770004497, "grad_norm": 0.013300647959113121, "learning_rate": 8.204425348238898e-08, "loss": 0.0002, "step": 231860 }, { "epoch": 1.8961442531790489, "grad_norm": 0.033234864473342896, "learning_rate": 8.191555591355094e-08, "loss": 0.0004, "step": 231870 }, { "epoch": 1.8962260293576483, "grad_norm": 0.02354354038834572, "learning_rate": 8.178695853009012e-08, "loss": 0.0005, "step": 231880 }, { "epoch": 1.8963078055362472, "grad_norm": 0.044404465705156326, "learning_rate": 8.165846133462718e-08, "loss": 0.0015, "step": 231890 }, { "epoch": 1.8963895817148466, "grad_norm": 0.05193810537457466, "learning_rate": 8.153006432977895e-08, "loss": 0.001, "step": 231900 }, { "epoch": 1.8964713578934456, "grad_norm": 0.02306496910750866, "learning_rate": 8.140176751816054e-08, "loss": 0.0005, "step": 231910 }, { "epoch": 1.896553134072045, "grad_norm": 0.02344205603003502, "learning_rate": 8.127357090238707e-08, "loss": 0.0008, "step": 231920 }, { "epoch": 1.896634910250644, "grad_norm": 0.0013303635641932487, "learning_rate": 8.11454744850687e-08, "loss": 0.0006, "step": 231930 }, { "epoch": 1.8967166864292433, "grad_norm": 0.013179315254092216, "learning_rate": 8.101747826881556e-08, "loss": 0.0004, "step": 231940 }, { "epoch": 1.8967984626078422, "grad_norm": 0.034935738891363144, "learning_rate": 8.088958225623388e-08, "loss": 0.0004, "step": 231950 }, { "epoch": 1.8968802387864416, "grad_norm": 0.04832100123167038, "learning_rate": 8.076178644993105e-08, "loss": 0.0007, "step": 231960 }, { "epoch": 1.8969620149650406, "grad_norm": 0.057211168110370636, "learning_rate": 8.063409085250829e-08, "loss": 0.0011, "step": 231970 }, { "epoch": 1.89704379114364, "grad_norm": 0.025299375876784325, "learning_rate": 8.050649546656853e-08, "loss": 0.0009, "step": 231980 }, { "epoch": 1.897125567322239, "grad_norm": 0.0016686279559507966, "learning_rate": 8.037900029470969e-08, "loss": 0.0005, "step": 231990 }, { "epoch": 1.8972073435008383, "grad_norm": 0.029124081134796143, "learning_rate": 8.02516053395297e-08, "loss": 0.0012, "step": 232000 }, { "epoch": 1.8972891196794373, "grad_norm": 0.07258351147174835, "learning_rate": 8.012431060362369e-08, "loss": 0.0007, "step": 232010 }, { "epoch": 1.8973708958580366, "grad_norm": 0.033327143639326096, "learning_rate": 7.999711608958405e-08, "loss": 0.0006, "step": 232020 }, { "epoch": 1.8974526720366356, "grad_norm": 0.053264543414115906, "learning_rate": 7.987002180000258e-08, "loss": 0.0004, "step": 232030 }, { "epoch": 1.897534448215235, "grad_norm": 0.013201341032981873, "learning_rate": 7.974302773746779e-08, "loss": 0.0007, "step": 232040 }, { "epoch": 1.897616224393834, "grad_norm": 0.17130130529403687, "learning_rate": 7.961613390456758e-08, "loss": 0.0009, "step": 232050 }, { "epoch": 1.8976980005724333, "grad_norm": 0.025460796430706978, "learning_rate": 7.948934030388544e-08, "loss": 0.0003, "step": 232060 }, { "epoch": 1.8977797767510323, "grad_norm": 0.05615023896098137, "learning_rate": 7.936264693800544e-08, "loss": 0.0007, "step": 232070 }, { "epoch": 1.8978615529296317, "grad_norm": 0.03038889355957508, "learning_rate": 7.923605380950772e-08, "loss": 0.0012, "step": 232080 }, { "epoch": 1.8979433291082308, "grad_norm": 0.06081550940871239, "learning_rate": 7.910956092097189e-08, "loss": 0.0003, "step": 232090 }, { "epoch": 1.89802510528683, "grad_norm": 0.003484240500256419, "learning_rate": 7.898316827497366e-08, "loss": 0.0006, "step": 232100 }, { "epoch": 1.8981068814654292, "grad_norm": 0.028640547767281532, "learning_rate": 7.885687587408874e-08, "loss": 0.0003, "step": 232110 }, { "epoch": 1.8981886576440283, "grad_norm": 0.05068547651171684, "learning_rate": 7.873068372088954e-08, "loss": 0.0018, "step": 232120 }, { "epoch": 1.8982704338226275, "grad_norm": 0.009515746496617794, "learning_rate": 7.860459181794621e-08, "loss": 0.0009, "step": 232130 }, { "epoch": 1.8983522100012267, "grad_norm": 0.06476569175720215, "learning_rate": 7.847860016782727e-08, "loss": 0.0007, "step": 232140 }, { "epoch": 1.8984339861798258, "grad_norm": 0.03360094130039215, "learning_rate": 7.835270877310009e-08, "loss": 0.0005, "step": 232150 }, { "epoch": 1.898515762358425, "grad_norm": 0.05073588341474533, "learning_rate": 7.822691763632873e-08, "loss": 0.0013, "step": 232160 }, { "epoch": 1.8985975385370242, "grad_norm": 0.008196678943932056, "learning_rate": 7.810122676007558e-08, "loss": 0.0013, "step": 232170 }, { "epoch": 1.8986793147156233, "grad_norm": 0.00624864362180233, "learning_rate": 7.797563614690084e-08, "loss": 0.0012, "step": 232180 }, { "epoch": 1.8987610908942225, "grad_norm": 0.03389875218272209, "learning_rate": 7.785014579936357e-08, "loss": 0.0006, "step": 232190 }, { "epoch": 1.8988428670728217, "grad_norm": 0.009824104607105255, "learning_rate": 7.772475572001947e-08, "loss": 0.0012, "step": 232200 }, { "epoch": 1.8989246432514209, "grad_norm": 0.01321414951235056, "learning_rate": 7.759946591142375e-08, "loss": 0.0005, "step": 232210 }, { "epoch": 1.89900641943002, "grad_norm": 0.03922205790877342, "learning_rate": 7.74742763761277e-08, "loss": 0.0005, "step": 232220 }, { "epoch": 1.8990881956086192, "grad_norm": 0.05234195664525032, "learning_rate": 7.734918711668205e-08, "loss": 0.0008, "step": 232230 }, { "epoch": 1.8991699717872184, "grad_norm": 0.10722754150629044, "learning_rate": 7.722419813563476e-08, "loss": 0.0011, "step": 232240 }, { "epoch": 1.8992517479658175, "grad_norm": 0.010363021865487099, "learning_rate": 7.709930943553156e-08, "loss": 0.0005, "step": 232250 }, { "epoch": 1.8993335241444167, "grad_norm": 0.0172476414591074, "learning_rate": 7.697452101891767e-08, "loss": 0.0004, "step": 232260 }, { "epoch": 1.8994153003230159, "grad_norm": 0.0003879187861457467, "learning_rate": 7.684983288833326e-08, "loss": 0.0005, "step": 232270 }, { "epoch": 1.899497076501615, "grad_norm": 0.023572076112031937, "learning_rate": 7.672524504632073e-08, "loss": 0.0007, "step": 232280 }, { "epoch": 1.8995788526802142, "grad_norm": 0.03699266538023949, "learning_rate": 7.660075749541585e-08, "loss": 0.0013, "step": 232290 }, { "epoch": 1.8996606288588134, "grad_norm": 0.002722240751609206, "learning_rate": 7.647637023815601e-08, "loss": 0.0007, "step": 232300 }, { "epoch": 1.8997424050374128, "grad_norm": 0.037164654582738876, "learning_rate": 7.635208327707422e-08, "loss": 0.0005, "step": 232310 }, { "epoch": 1.8998241812160117, "grad_norm": 0.1026657298207283, "learning_rate": 7.622789661470287e-08, "loss": 0.0004, "step": 232320 }, { "epoch": 1.899905957394611, "grad_norm": 0.01604471728205681, "learning_rate": 7.610381025357161e-08, "loss": 0.0004, "step": 232330 }, { "epoch": 1.89998773357321, "grad_norm": 0.027097754180431366, "learning_rate": 7.597982419620841e-08, "loss": 0.001, "step": 232340 }, { "epoch": 1.9000695097518094, "grad_norm": 0.02324855700135231, "learning_rate": 7.585593844513794e-08, "loss": 0.0008, "step": 232350 }, { "epoch": 1.9001512859304084, "grad_norm": 0.006200226489454508, "learning_rate": 7.573215300288483e-08, "loss": 0.0003, "step": 232360 }, { "epoch": 1.9002330621090078, "grad_norm": 0.0218743234872818, "learning_rate": 7.56084678719704e-08, "loss": 0.0005, "step": 232370 }, { "epoch": 1.9003148382876067, "grad_norm": 0.02599695511162281, "learning_rate": 7.54848830549143e-08, "loss": 0.0005, "step": 232380 }, { "epoch": 1.9003966144662061, "grad_norm": 0.09412003308534622, "learning_rate": 7.536139855423397e-08, "loss": 0.0006, "step": 232390 }, { "epoch": 1.900478390644805, "grad_norm": 0.08596251904964447, "learning_rate": 7.52380143724446e-08, "loss": 0.0005, "step": 232400 }, { "epoch": 1.9005601668234045, "grad_norm": 0.010689293034374714, "learning_rate": 7.51147305120603e-08, "loss": 0.0012, "step": 232410 }, { "epoch": 1.9006419430020034, "grad_norm": 0.0022718897089362144, "learning_rate": 7.499154697559186e-08, "loss": 0.0006, "step": 232420 }, { "epoch": 1.9007237191806028, "grad_norm": 0.03906991705298424, "learning_rate": 7.48684637655489e-08, "loss": 0.0005, "step": 232430 }, { "epoch": 1.9008054953592017, "grad_norm": 0.0123751824721694, "learning_rate": 7.47454808844389e-08, "loss": 0.0006, "step": 232440 }, { "epoch": 1.9008872715378011, "grad_norm": 0.036237429827451706, "learning_rate": 7.462259833476648e-08, "loss": 0.0047, "step": 232450 }, { "epoch": 1.9009690477164, "grad_norm": 0.08796057850122452, "learning_rate": 7.449981611903578e-08, "loss": 0.0006, "step": 232460 }, { "epoch": 1.9010508238949995, "grad_norm": 0.0072944327257573605, "learning_rate": 7.437713423974757e-08, "loss": 0.0009, "step": 232470 }, { "epoch": 1.9011326000735984, "grad_norm": 0.042099349200725555, "learning_rate": 7.42545526994004e-08, "loss": 0.0004, "step": 232480 }, { "epoch": 1.9012143762521978, "grad_norm": 0.008705981075763702, "learning_rate": 7.413207150049173e-08, "loss": 0.0003, "step": 232490 }, { "epoch": 1.9012961524307967, "grad_norm": 0.03130263462662697, "learning_rate": 7.400969064551732e-08, "loss": 0.0009, "step": 232500 }, { "epoch": 1.9013779286093961, "grad_norm": 0.01996535062789917, "learning_rate": 7.388741013696909e-08, "loss": 0.0004, "step": 232510 }, { "epoch": 1.9014597047879953, "grad_norm": 0.3073176443576813, "learning_rate": 7.376522997733837e-08, "loss": 0.0011, "step": 232520 }, { "epoch": 1.9015414809665945, "grad_norm": 0.0026649339124560356, "learning_rate": 7.364315016911428e-08, "loss": 0.0005, "step": 232530 }, { "epoch": 1.9016232571451936, "grad_norm": 0.02533029578626156, "learning_rate": 7.35211707147837e-08, "loss": 0.0005, "step": 232540 }, { "epoch": 1.9017050333237928, "grad_norm": 0.026982218027114868, "learning_rate": 7.339929161683135e-08, "loss": 0.0006, "step": 232550 }, { "epoch": 1.901786809502392, "grad_norm": 0.02911257930099964, "learning_rate": 7.327751287773966e-08, "loss": 0.0004, "step": 232560 }, { "epoch": 1.9018685856809912, "grad_norm": 0.02388039417564869, "learning_rate": 7.315583449998997e-08, "loss": 0.0003, "step": 232570 }, { "epoch": 1.9019503618595903, "grad_norm": 0.037197183817625046, "learning_rate": 7.303425648606089e-08, "loss": 0.0003, "step": 232580 }, { "epoch": 1.9020321380381895, "grad_norm": 0.025544285774230957, "learning_rate": 7.291277883842873e-08, "loss": 0.0008, "step": 232590 }, { "epoch": 1.9021139142167887, "grad_norm": 0.04111373797059059, "learning_rate": 7.279140155956765e-08, "loss": 0.0004, "step": 232600 }, { "epoch": 1.9021956903953878, "grad_norm": 0.0395316556096077, "learning_rate": 7.267012465195123e-08, "loss": 0.0006, "step": 232610 }, { "epoch": 1.902277466573987, "grad_norm": 0.13219432532787323, "learning_rate": 7.254894811804969e-08, "loss": 0.0006, "step": 232620 }, { "epoch": 1.9023592427525862, "grad_norm": 0.030826538801193237, "learning_rate": 7.242787196033053e-08, "loss": 0.0012, "step": 232630 }, { "epoch": 1.9024410189311853, "grad_norm": 0.007067921571433544, "learning_rate": 7.230689618126175e-08, "loss": 0.0004, "step": 232640 }, { "epoch": 1.9025227951097845, "grad_norm": 0.00449715182185173, "learning_rate": 7.218602078330639e-08, "loss": 0.001, "step": 232650 }, { "epoch": 1.9026045712883837, "grad_norm": 0.07907326519489288, "learning_rate": 7.206524576892803e-08, "loss": 0.0011, "step": 232660 }, { "epoch": 1.9026863474669828, "grad_norm": 0.11113964766263962, "learning_rate": 7.194457114058528e-08, "loss": 0.0016, "step": 232670 }, { "epoch": 1.902768123645582, "grad_norm": 0.07584578543901443, "learning_rate": 7.182399690073782e-08, "loss": 0.0006, "step": 232680 }, { "epoch": 1.9028498998241812, "grad_norm": 0.05934561416506767, "learning_rate": 7.170352305184147e-08, "loss": 0.0015, "step": 232690 }, { "epoch": 1.9029316760027803, "grad_norm": 0.005222134292125702, "learning_rate": 7.158314959635038e-08, "loss": 0.0009, "step": 232700 }, { "epoch": 1.9030134521813795, "grad_norm": 0.04563087597489357, "learning_rate": 7.146287653671647e-08, "loss": 0.0006, "step": 232710 }, { "epoch": 1.9030952283599787, "grad_norm": 0.04841093719005585, "learning_rate": 7.134270387538944e-08, "loss": 0.0009, "step": 232720 }, { "epoch": 1.9031770045385779, "grad_norm": 0.02418495900928974, "learning_rate": 7.122263161481845e-08, "loss": 0.001, "step": 232730 }, { "epoch": 1.9032587807171772, "grad_norm": 0.017634622752666473, "learning_rate": 7.110265975744823e-08, "loss": 0.0004, "step": 232740 }, { "epoch": 1.9033405568957762, "grad_norm": 0.032271817326545715, "learning_rate": 7.098278830572347e-08, "loss": 0.0008, "step": 232750 }, { "epoch": 1.9034223330743756, "grad_norm": 0.03265960142016411, "learning_rate": 7.086301726208556e-08, "loss": 0.0003, "step": 232760 }, { "epoch": 1.9035041092529745, "grad_norm": 0.03432324901223183, "learning_rate": 7.074334662897476e-08, "loss": 0.0005, "step": 232770 }, { "epoch": 1.903585885431574, "grad_norm": 0.0481601320207119, "learning_rate": 7.062377640882911e-08, "loss": 0.0005, "step": 232780 }, { "epoch": 1.9036676616101729, "grad_norm": 0.05816134065389633, "learning_rate": 7.050430660408281e-08, "loss": 0.0009, "step": 232790 }, { "epoch": 1.9037494377887723, "grad_norm": 0.11736022680997849, "learning_rate": 7.038493721717165e-08, "loss": 0.0009, "step": 232800 }, { "epoch": 1.9038312139673712, "grad_norm": 0.02658090740442276, "learning_rate": 7.026566825052594e-08, "loss": 0.0002, "step": 232810 }, { "epoch": 1.9039129901459706, "grad_norm": 0.05105218663811684, "learning_rate": 7.014649970657594e-08, "loss": 0.0009, "step": 232820 }, { "epoch": 1.9039947663245695, "grad_norm": 0.08093040436506271, "learning_rate": 7.002743158774916e-08, "loss": 0.0009, "step": 232830 }, { "epoch": 1.904076542503169, "grad_norm": 0.03763869032263756, "learning_rate": 6.990846389647032e-08, "loss": 0.0006, "step": 232840 }, { "epoch": 1.9041583186817679, "grad_norm": 0.038902413100004196, "learning_rate": 6.97895966351636e-08, "loss": 0.0006, "step": 232850 }, { "epoch": 1.9042400948603673, "grad_norm": 0.004430669359862804, "learning_rate": 6.967082980625039e-08, "loss": 0.0004, "step": 232860 }, { "epoch": 1.9043218710389662, "grad_norm": 0.022194691002368927, "learning_rate": 6.955216341214987e-08, "loss": 0.0008, "step": 232870 }, { "epoch": 1.9044036472175656, "grad_norm": 0.007286737207323313, "learning_rate": 6.943359745527955e-08, "loss": 0.0003, "step": 232880 }, { "epoch": 1.9044854233961646, "grad_norm": 0.13366442918777466, "learning_rate": 6.931513193805418e-08, "loss": 0.0009, "step": 232890 }, { "epoch": 1.904567199574764, "grad_norm": 0.0016419824678450823, "learning_rate": 6.91967668628879e-08, "loss": 0.0008, "step": 232900 }, { "epoch": 1.904648975753363, "grad_norm": 0.025382179766893387, "learning_rate": 6.907850223219104e-08, "loss": 0.0005, "step": 232910 }, { "epoch": 1.9047307519319623, "grad_norm": 0.004343343898653984, "learning_rate": 6.896033804837332e-08, "loss": 0.001, "step": 232920 }, { "epoch": 1.9048125281105612, "grad_norm": 0.003654590342193842, "learning_rate": 6.884227431384227e-08, "loss": 0.0003, "step": 232930 }, { "epoch": 1.9048943042891606, "grad_norm": 0.018152283504605293, "learning_rate": 6.872431103100153e-08, "loss": 0.0005, "step": 232940 }, { "epoch": 1.9049760804677598, "grad_norm": 0.0031803760211914778, "learning_rate": 6.860644820225526e-08, "loss": 0.0002, "step": 232950 }, { "epoch": 1.905057856646359, "grad_norm": 0.037871621549129486, "learning_rate": 6.848868583000434e-08, "loss": 0.0003, "step": 232960 }, { "epoch": 1.9051396328249581, "grad_norm": 0.06114139407873154, "learning_rate": 6.837102391664685e-08, "loss": 0.0007, "step": 232970 }, { "epoch": 1.9052214090035573, "grad_norm": 0.0027150725945830345, "learning_rate": 6.825346246458086e-08, "loss": 0.0003, "step": 232980 }, { "epoch": 1.9053031851821565, "grad_norm": 0.06950609385967255, "learning_rate": 6.813600147620003e-08, "loss": 0.0007, "step": 232990 }, { "epoch": 1.9053849613607556, "grad_norm": 0.06715934723615646, "learning_rate": 6.801864095389799e-08, "loss": 0.0008, "step": 233000 }, { "epoch": 1.9054667375393548, "grad_norm": 0.0323554202914238, "learning_rate": 6.790138090006504e-08, "loss": 0.0004, "step": 233010 }, { "epoch": 1.905548513717954, "grad_norm": 0.005174726247787476, "learning_rate": 6.778422131708984e-08, "loss": 0.0007, "step": 233020 }, { "epoch": 1.9056302898965531, "grad_norm": 0.026616718620061874, "learning_rate": 6.76671622073588e-08, "loss": 0.0006, "step": 233030 }, { "epoch": 1.9057120660751523, "grad_norm": 0.0134158656001091, "learning_rate": 6.755020357325726e-08, "loss": 0.0004, "step": 233040 }, { "epoch": 1.9057938422537515, "grad_norm": 0.07807565480470657, "learning_rate": 6.743334541716772e-08, "loss": 0.0007, "step": 233050 }, { "epoch": 1.9058756184323506, "grad_norm": 0.016042398288846016, "learning_rate": 6.731658774146999e-08, "loss": 0.0012, "step": 233060 }, { "epoch": 1.9059573946109498, "grad_norm": 0.01226658746600151, "learning_rate": 6.719993054854324e-08, "loss": 0.0005, "step": 233070 }, { "epoch": 1.906039170789549, "grad_norm": 0.07855336368083954, "learning_rate": 6.708337384076335e-08, "loss": 0.0007, "step": 233080 }, { "epoch": 1.9061209469681482, "grad_norm": 0.1533537656068802, "learning_rate": 6.696691762050456e-08, "loss": 0.0007, "step": 233090 }, { "epoch": 1.9062027231467473, "grad_norm": 0.0056542279198765755, "learning_rate": 6.685056189013938e-08, "loss": 0.0012, "step": 233100 }, { "epoch": 1.9062844993253465, "grad_norm": 0.0224568210542202, "learning_rate": 6.673430665203817e-08, "loss": 0.0003, "step": 233110 }, { "epoch": 1.9063662755039457, "grad_norm": 0.022734329104423523, "learning_rate": 6.661815190856902e-08, "loss": 0.0004, "step": 233120 }, { "epoch": 1.9064480516825448, "grad_norm": 0.055508486926555634, "learning_rate": 6.650209766209837e-08, "loss": 0.0006, "step": 233130 }, { "epoch": 1.906529827861144, "grad_norm": 0.05464168265461922, "learning_rate": 6.638614391498987e-08, "loss": 0.0006, "step": 233140 }, { "epoch": 1.9066116040397432, "grad_norm": 0.046433813869953156, "learning_rate": 6.627029066960611e-08, "loss": 0.0003, "step": 233150 }, { "epoch": 1.9066933802183423, "grad_norm": 0.02389531210064888, "learning_rate": 6.615453792830684e-08, "loss": 0.0005, "step": 233160 }, { "epoch": 1.9067751563969417, "grad_norm": 0.03308026120066643, "learning_rate": 6.603888569345018e-08, "loss": 0.0005, "step": 233170 }, { "epoch": 1.9068569325755407, "grad_norm": 0.04321209713816643, "learning_rate": 6.592333396739148e-08, "loss": 0.0009, "step": 233180 }, { "epoch": 1.90693870875414, "grad_norm": 0.05100777745246887, "learning_rate": 6.580788275248551e-08, "loss": 0.0007, "step": 233190 }, { "epoch": 1.907020484932739, "grad_norm": 0.1264197677373886, "learning_rate": 6.569253205108372e-08, "loss": 0.0011, "step": 233200 }, { "epoch": 1.9071022611113384, "grad_norm": 0.002580857602879405, "learning_rate": 6.557728186553591e-08, "loss": 0.0012, "step": 233210 }, { "epoch": 1.9071840372899374, "grad_norm": 0.005991232115775347, "learning_rate": 6.546213219818964e-08, "loss": 0.0001, "step": 233220 }, { "epoch": 1.9072658134685367, "grad_norm": 0.026582958176732063, "learning_rate": 6.534708305139082e-08, "loss": 0.0006, "step": 233230 }, { "epoch": 1.9073475896471357, "grad_norm": 0.07728955894708633, "learning_rate": 6.523213442748255e-08, "loss": 0.0003, "step": 233240 }, { "epoch": 1.907429365825735, "grad_norm": 0.060143712908029556, "learning_rate": 6.511728632880742e-08, "loss": 0.0005, "step": 233250 }, { "epoch": 1.907511142004334, "grad_norm": 0.00985786598175764, "learning_rate": 6.500253875770412e-08, "loss": 0.0005, "step": 233260 }, { "epoch": 1.9075929181829334, "grad_norm": 0.029822582378983498, "learning_rate": 6.488789171651078e-08, "loss": 0.0005, "step": 233270 }, { "epoch": 1.9076746943615324, "grad_norm": 0.03507228568196297, "learning_rate": 6.477334520756163e-08, "loss": 0.0004, "step": 233280 }, { "epoch": 1.9077564705401318, "grad_norm": 0.005769785959273577, "learning_rate": 6.465889923319202e-08, "loss": 0.0003, "step": 233290 }, { "epoch": 1.9078382467187307, "grad_norm": 0.04555797949433327, "learning_rate": 6.454455379573177e-08, "loss": 0.0009, "step": 233300 }, { "epoch": 1.90792002289733, "grad_norm": 0.01716780848801136, "learning_rate": 6.443030889751123e-08, "loss": 0.0003, "step": 233310 }, { "epoch": 1.908001799075929, "grad_norm": 0.07397855073213577, "learning_rate": 6.431616454085688e-08, "loss": 0.0008, "step": 233320 }, { "epoch": 1.9080835752545284, "grad_norm": 0.045553289353847504, "learning_rate": 6.420212072809407e-08, "loss": 0.0006, "step": 233330 }, { "epoch": 1.9081653514331274, "grad_norm": 0.017851950600743294, "learning_rate": 6.408817746154594e-08, "loss": 0.0005, "step": 233340 }, { "epoch": 1.9082471276117268, "grad_norm": 0.08468140661716461, "learning_rate": 6.397433474353399e-08, "loss": 0.0008, "step": 233350 }, { "epoch": 1.9083289037903257, "grad_norm": 0.05315838381648064, "learning_rate": 6.386059257637689e-08, "loss": 0.0005, "step": 233360 }, { "epoch": 1.908410679968925, "grad_norm": 0.08424951881170273, "learning_rate": 6.374695096239226e-08, "loss": 0.0005, "step": 233370 }, { "epoch": 1.9084924561475243, "grad_norm": 0.03997404873371124, "learning_rate": 6.363340990389432e-08, "loss": 0.0015, "step": 233380 }, { "epoch": 1.9085742323261234, "grad_norm": 0.008627436123788357, "learning_rate": 6.351996940319627e-08, "loss": 0.0005, "step": 233390 }, { "epoch": 1.9086560085047226, "grad_norm": 0.020122665911912918, "learning_rate": 6.340662946260844e-08, "loss": 0.0007, "step": 233400 }, { "epoch": 1.9087377846833218, "grad_norm": 0.008052297867834568, "learning_rate": 6.329339008444068e-08, "loss": 0.0007, "step": 233410 }, { "epoch": 1.908819560861921, "grad_norm": 0.035986337810754776, "learning_rate": 6.318025127099947e-08, "loss": 0.0008, "step": 233420 }, { "epoch": 1.9089013370405201, "grad_norm": 0.08621054887771606, "learning_rate": 6.306721302458963e-08, "loss": 0.0007, "step": 233430 }, { "epoch": 1.9089831132191193, "grad_norm": 0.05105508118867874, "learning_rate": 6.29542753475132e-08, "loss": 0.0004, "step": 233440 }, { "epoch": 1.9090648893977185, "grad_norm": 0.009301398880779743, "learning_rate": 6.284143824207167e-08, "loss": 0.0002, "step": 233450 }, { "epoch": 1.9091466655763176, "grad_norm": 0.024485325440764427, "learning_rate": 6.272870171056267e-08, "loss": 0.0004, "step": 233460 }, { "epoch": 1.9092284417549168, "grad_norm": 0.04869974032044411, "learning_rate": 6.261606575528379e-08, "loss": 0.0003, "step": 233470 }, { "epoch": 1.909310217933516, "grad_norm": 0.02472987398505211, "learning_rate": 6.250353037852819e-08, "loss": 0.0003, "step": 233480 }, { "epoch": 1.9093919941121151, "grad_norm": 0.03849055618047714, "learning_rate": 6.23910955825896e-08, "loss": 0.0007, "step": 233490 }, { "epoch": 1.9094737702907143, "grad_norm": 0.02507900819182396, "learning_rate": 6.227876136975785e-08, "loss": 0.0005, "step": 233500 }, { "epoch": 1.9095555464693135, "grad_norm": 0.0013006903463974595, "learning_rate": 6.216652774232113e-08, "loss": 0.0002, "step": 233510 }, { "epoch": 1.9096373226479126, "grad_norm": 0.002026248024776578, "learning_rate": 6.205439470256536e-08, "loss": 0.0007, "step": 233520 }, { "epoch": 1.9097190988265118, "grad_norm": 0.019003799185156822, "learning_rate": 6.194236225277594e-08, "loss": 0.0007, "step": 233530 }, { "epoch": 1.909800875005111, "grad_norm": 0.024399667978286743, "learning_rate": 6.183043039523495e-08, "loss": 0.0004, "step": 233540 }, { "epoch": 1.9098826511837101, "grad_norm": 0.05351211503148079, "learning_rate": 6.171859913222167e-08, "loss": 0.0007, "step": 233550 }, { "epoch": 1.9099644273623093, "grad_norm": 0.025331037119030952, "learning_rate": 6.160686846601428e-08, "loss": 0.0005, "step": 233560 }, { "epoch": 1.9100462035409085, "grad_norm": 0.023470265790820122, "learning_rate": 6.149523839888927e-08, "loss": 0.0011, "step": 233570 }, { "epoch": 1.9101279797195079, "grad_norm": 0.01680474914610386, "learning_rate": 6.13837089331204e-08, "loss": 0.0004, "step": 233580 }, { "epoch": 1.9102097558981068, "grad_norm": 0.0016101327491924167, "learning_rate": 6.127228007098029e-08, "loss": 0.0004, "step": 233590 }, { "epoch": 1.9102915320767062, "grad_norm": 0.057434115558862686, "learning_rate": 6.116095181473714e-08, "loss": 0.0008, "step": 233600 }, { "epoch": 1.9103733082553052, "grad_norm": 0.043479882180690765, "learning_rate": 6.104972416666077e-08, "loss": 0.0008, "step": 233610 }, { "epoch": 1.9104550844339045, "grad_norm": 0.019459178671240807, "learning_rate": 6.093859712901551e-08, "loss": 0.0007, "step": 233620 }, { "epoch": 1.9105368606125035, "grad_norm": 0.09130356460809708, "learning_rate": 6.08275707040662e-08, "loss": 0.0009, "step": 233630 }, { "epoch": 1.9106186367911029, "grad_norm": 0.05429855361580849, "learning_rate": 6.07166448940738e-08, "loss": 0.0006, "step": 233640 }, { "epoch": 1.9107004129697018, "grad_norm": 0.012973957695066929, "learning_rate": 6.06058197012982e-08, "loss": 0.0008, "step": 233650 }, { "epoch": 1.9107821891483012, "grad_norm": 0.030447419732809067, "learning_rate": 6.0495095127997e-08, "loss": 0.0006, "step": 233660 }, { "epoch": 1.9108639653269002, "grad_norm": 0.0032811604905873537, "learning_rate": 6.038447117642565e-08, "loss": 0.0007, "step": 233670 }, { "epoch": 1.9109457415054996, "grad_norm": 0.028002385050058365, "learning_rate": 6.027394784883789e-08, "loss": 0.0004, "step": 233680 }, { "epoch": 1.9110275176840985, "grad_norm": 0.0029388999100774527, "learning_rate": 6.016352514748525e-08, "loss": 0.0004, "step": 233690 }, { "epoch": 1.911109293862698, "grad_norm": 0.04427897185087204, "learning_rate": 6.005320307461648e-08, "loss": 0.0008, "step": 233700 }, { "epoch": 1.9111910700412968, "grad_norm": 0.012829448096454144, "learning_rate": 5.99429816324798e-08, "loss": 0.0005, "step": 233710 }, { "epoch": 1.9112728462198962, "grad_norm": 0.031090842559933662, "learning_rate": 5.983286082331952e-08, "loss": 0.0006, "step": 233720 }, { "epoch": 1.9113546223984952, "grad_norm": 0.029128167778253555, "learning_rate": 5.972284064937994e-08, "loss": 0.0009, "step": 233730 }, { "epoch": 1.9114363985770946, "grad_norm": 0.03172716498374939, "learning_rate": 5.961292111290151e-08, "loss": 0.0013, "step": 233740 }, { "epoch": 1.9115181747556935, "grad_norm": 0.01987706497311592, "learning_rate": 5.950310221612354e-08, "loss": 0.0003, "step": 233750 }, { "epoch": 1.911599950934293, "grad_norm": 0.029934410005807877, "learning_rate": 5.9393383961283134e-08, "loss": 0.0004, "step": 233760 }, { "epoch": 1.9116817271128919, "grad_norm": 0.06474412232637405, "learning_rate": 5.9283766350615724e-08, "loss": 0.0007, "step": 233770 }, { "epoch": 1.9117635032914913, "grad_norm": 0.05452468991279602, "learning_rate": 5.917424938635396e-08, "loss": 0.0009, "step": 233780 }, { "epoch": 1.9118452794700902, "grad_norm": 0.060901425778865814, "learning_rate": 5.90648330707283e-08, "loss": 0.0009, "step": 233790 }, { "epoch": 1.9119270556486896, "grad_norm": 0.07091294974088669, "learning_rate": 5.8955517405969164e-08, "loss": 0.0005, "step": 233800 }, { "epoch": 1.9120088318272888, "grad_norm": 0.1332891434431076, "learning_rate": 5.8846302394302e-08, "loss": 0.0012, "step": 233810 }, { "epoch": 1.912090608005888, "grad_norm": 0.0037134899757802486, "learning_rate": 5.87371880379517e-08, "loss": 0.0004, "step": 233820 }, { "epoch": 1.912172384184487, "grad_norm": 0.005378232803195715, "learning_rate": 5.8628174339141473e-08, "loss": 0.0005, "step": 233830 }, { "epoch": 1.9122541603630863, "grad_norm": 0.0008087207097560167, "learning_rate": 5.851926130009178e-08, "loss": 0.0005, "step": 233840 }, { "epoch": 1.9123359365416854, "grad_norm": 0.04304250702261925, "learning_rate": 5.841044892302194e-08, "loss": 0.0006, "step": 233850 }, { "epoch": 1.9124177127202846, "grad_norm": 0.011051440611481667, "learning_rate": 5.8301737210147425e-08, "loss": 0.0006, "step": 233860 }, { "epoch": 1.9124994888988838, "grad_norm": 0.03463888540863991, "learning_rate": 5.819312616368367e-08, "loss": 0.001, "step": 233870 }, { "epoch": 1.912581265077483, "grad_norm": 0.03365517035126686, "learning_rate": 5.8084615785842235e-08, "loss": 0.0006, "step": 233880 }, { "epoch": 1.912663041256082, "grad_norm": 0.17603766918182373, "learning_rate": 5.7976206078834695e-08, "loss": 0.0006, "step": 233890 }, { "epoch": 1.9127448174346813, "grad_norm": 0.14196369051933289, "learning_rate": 5.7867897044869283e-08, "loss": 0.0011, "step": 233900 }, { "epoch": 1.9128265936132804, "grad_norm": 0.017109526321291924, "learning_rate": 5.775968868615145e-08, "loss": 0.0003, "step": 233910 }, { "epoch": 1.9129083697918796, "grad_norm": 0.03321639075875282, "learning_rate": 5.7651581004886104e-08, "loss": 0.0005, "step": 233920 }, { "epoch": 1.9129901459704788, "grad_norm": 0.036843348294496536, "learning_rate": 5.754357400327537e-08, "loss": 0.0006, "step": 233930 }, { "epoch": 1.913071922149078, "grad_norm": 0.008211870677769184, "learning_rate": 5.743566768351971e-08, "loss": 0.0005, "step": 233940 }, { "epoch": 1.9131536983276771, "grad_norm": 0.03813181072473526, "learning_rate": 5.732786204781682e-08, "loss": 0.0005, "step": 233950 }, { "epoch": 1.9132354745062763, "grad_norm": 0.08397744596004486, "learning_rate": 5.7220157098362706e-08, "loss": 0.0007, "step": 233960 }, { "epoch": 1.9133172506848755, "grad_norm": 0.03077094815671444, "learning_rate": 5.7112552837352287e-08, "loss": 0.0005, "step": 233970 }, { "epoch": 1.9133990268634746, "grad_norm": 0.008835976012051105, "learning_rate": 5.7005049266976585e-08, "loss": 0.0006, "step": 233980 }, { "epoch": 1.9134808030420738, "grad_norm": 0.08362963795661926, "learning_rate": 5.6897646389426075e-08, "loss": 0.0006, "step": 233990 }, { "epoch": 1.913562579220673, "grad_norm": 0.013564993627369404, "learning_rate": 5.6790344206887895e-08, "loss": 0.0009, "step": 234000 }, { "epoch": 1.9136443553992724, "grad_norm": 0.07338909804821014, "learning_rate": 5.668314272154862e-08, "loss": 0.001, "step": 234010 }, { "epoch": 1.9137261315778713, "grad_norm": 0.0025697406381368637, "learning_rate": 5.657604193559263e-08, "loss": 0.0004, "step": 234020 }, { "epoch": 1.9138079077564707, "grad_norm": 0.027012398466467857, "learning_rate": 5.646904185120039e-08, "loss": 0.0002, "step": 234030 }, { "epoch": 1.9138896839350696, "grad_norm": 0.038919419050216675, "learning_rate": 5.636214247055183e-08, "loss": 0.0005, "step": 234040 }, { "epoch": 1.913971460113669, "grad_norm": 0.0669059008359909, "learning_rate": 5.62553437958252e-08, "loss": 0.0007, "step": 234050 }, { "epoch": 1.914053236292268, "grad_norm": 0.03599744662642479, "learning_rate": 5.614864582919544e-08, "loss": 0.0008, "step": 234060 }, { "epoch": 1.9141350124708674, "grad_norm": 0.07852423191070557, "learning_rate": 5.60420485728369e-08, "loss": 0.0006, "step": 234070 }, { "epoch": 1.9142167886494663, "grad_norm": 0.015823280438780785, "learning_rate": 5.593555202891954e-08, "loss": 0.0006, "step": 234080 }, { "epoch": 1.9142985648280657, "grad_norm": 0.04645562171936035, "learning_rate": 5.582915619961438e-08, "loss": 0.0007, "step": 234090 }, { "epoch": 1.9143803410066647, "grad_norm": 0.024461163207888603, "learning_rate": 5.572286108708802e-08, "loss": 0.0004, "step": 234100 }, { "epoch": 1.914462117185264, "grad_norm": 0.01413240097463131, "learning_rate": 5.561666669350541e-08, "loss": 0.0004, "step": 234110 }, { "epoch": 1.914543893363863, "grad_norm": 0.0016547483392059803, "learning_rate": 5.5510573021030914e-08, "loss": 0.0006, "step": 234120 }, { "epoch": 1.9146256695424624, "grad_norm": 0.002322589512914419, "learning_rate": 5.5404580071824474e-08, "loss": 0.0004, "step": 234130 }, { "epoch": 1.9147074457210613, "grad_norm": 0.05848553776741028, "learning_rate": 5.5298687848046595e-08, "loss": 0.0004, "step": 234140 }, { "epoch": 1.9147892218996607, "grad_norm": 0.02505522035062313, "learning_rate": 5.519289635185332e-08, "loss": 0.0005, "step": 234150 }, { "epoch": 1.9148709980782597, "grad_norm": 0.03597470000386238, "learning_rate": 5.508720558540071e-08, "loss": 0.0008, "step": 234160 }, { "epoch": 1.914952774256859, "grad_norm": 0.003920620307326317, "learning_rate": 5.4981615550840364e-08, "loss": 0.0006, "step": 234170 }, { "epoch": 1.915034550435458, "grad_norm": 0.011832554824650288, "learning_rate": 5.4876126250324456e-08, "loss": 0.0009, "step": 234180 }, { "epoch": 1.9151163266140574, "grad_norm": 0.051648370921611786, "learning_rate": 5.477073768600128e-08, "loss": 0.0006, "step": 234190 }, { "epoch": 1.9151981027926563, "grad_norm": 0.010020875371992588, "learning_rate": 5.466544986001798e-08, "loss": 0.0005, "step": 234200 }, { "epoch": 1.9152798789712557, "grad_norm": 0.02017989195883274, "learning_rate": 5.456026277451898e-08, "loss": 0.0011, "step": 234210 }, { "epoch": 1.915361655149855, "grad_norm": 0.0011424347758293152, "learning_rate": 5.445517643164755e-08, "loss": 0.0011, "step": 234220 }, { "epoch": 1.915443431328454, "grad_norm": 0.01690501533448696, "learning_rate": 5.43501908335442e-08, "loss": 0.0009, "step": 234230 }, { "epoch": 1.9155252075070532, "grad_norm": 0.032283298671245575, "learning_rate": 5.4245305982346674e-08, "loss": 0.0007, "step": 234240 }, { "epoch": 1.9156069836856524, "grad_norm": 0.011367822997272015, "learning_rate": 5.414052188019325e-08, "loss": 0.0005, "step": 234250 }, { "epoch": 1.9156887598642516, "grad_norm": 0.042855773121118546, "learning_rate": 5.4035838529217235e-08, "loss": 0.0011, "step": 234260 }, { "epoch": 1.9157705360428507, "grad_norm": 0.11180505156517029, "learning_rate": 5.3931255931551907e-08, "loss": 0.001, "step": 234270 }, { "epoch": 1.91585231222145, "grad_norm": 0.06489107757806778, "learning_rate": 5.382677408932724e-08, "loss": 0.0009, "step": 234280 }, { "epoch": 1.915934088400049, "grad_norm": 0.005660556722432375, "learning_rate": 5.372239300467097e-08, "loss": 0.0007, "step": 234290 }, { "epoch": 1.9160158645786483, "grad_norm": 0.041355058550834656, "learning_rate": 5.3618112679710846e-08, "loss": 0.0003, "step": 234300 }, { "epoch": 1.9160976407572474, "grad_norm": 0.001599685288965702, "learning_rate": 5.3513933116570164e-08, "loss": 0.0004, "step": 234310 }, { "epoch": 1.9161794169358466, "grad_norm": 0.054492149502038956, "learning_rate": 5.340985431737056e-08, "loss": 0.0005, "step": 234320 }, { "epoch": 1.9162611931144458, "grad_norm": 0.06281862407922745, "learning_rate": 5.330587628423423e-08, "loss": 0.0007, "step": 234330 }, { "epoch": 1.916342969293045, "grad_norm": 0.05203407630324364, "learning_rate": 5.320199901927725e-08, "loss": 0.0004, "step": 234340 }, { "epoch": 1.916424745471644, "grad_norm": 0.03307280316948891, "learning_rate": 5.3098222524616825e-08, "loss": 0.0005, "step": 234350 }, { "epoch": 1.9165065216502433, "grad_norm": 0.0193550456315279, "learning_rate": 5.2994546802366266e-08, "loss": 0.0009, "step": 234360 }, { "epoch": 1.9165882978288424, "grad_norm": 0.024747265502810478, "learning_rate": 5.289097185463832e-08, "loss": 0.0006, "step": 234370 }, { "epoch": 1.9166700740074416, "grad_norm": 0.018479149788618088, "learning_rate": 5.278749768354241e-08, "loss": 0.0017, "step": 234380 }, { "epoch": 1.9167518501860408, "grad_norm": 0.030221935361623764, "learning_rate": 5.2684124291186856e-08, "loss": 0.0004, "step": 234390 }, { "epoch": 1.91683362636464, "grad_norm": 0.030253911390900612, "learning_rate": 5.2580851679676636e-08, "loss": 0.0005, "step": 234400 }, { "epoch": 1.916915402543239, "grad_norm": 0.03589646890759468, "learning_rate": 5.2477679851116184e-08, "loss": 0.0013, "step": 234410 }, { "epoch": 1.9169971787218383, "grad_norm": 0.00679733557626605, "learning_rate": 5.2374608807606586e-08, "loss": 0.0004, "step": 234420 }, { "epoch": 1.9170789549004374, "grad_norm": 0.05113411322236061, "learning_rate": 5.227163855124839e-08, "loss": 0.0005, "step": 234430 }, { "epoch": 1.9171607310790368, "grad_norm": 0.010430189780890942, "learning_rate": 5.2168769084137706e-08, "loss": 0.0008, "step": 234440 }, { "epoch": 1.9172425072576358, "grad_norm": 0.030862104147672653, "learning_rate": 5.206600040837173e-08, "loss": 0.0002, "step": 234450 }, { "epoch": 1.9173242834362352, "grad_norm": 0.02435574121773243, "learning_rate": 5.196333252604324e-08, "loss": 0.0006, "step": 234460 }, { "epoch": 1.9174060596148341, "grad_norm": 0.03388408571481705, "learning_rate": 5.1860765439243346e-08, "loss": 0.0006, "step": 234470 }, { "epoch": 1.9174878357934335, "grad_norm": 0.02104024961590767, "learning_rate": 5.1758299150061474e-08, "loss": 0.0004, "step": 234480 }, { "epoch": 1.9175696119720325, "grad_norm": 0.012058787047863007, "learning_rate": 5.165593366058541e-08, "loss": 0.0004, "step": 234490 }, { "epoch": 1.9176513881506319, "grad_norm": 0.0010118323843926191, "learning_rate": 5.1553668972900706e-08, "loss": 0.0008, "step": 234500 }, { "epoch": 1.9177331643292308, "grad_norm": 0.03616435453295708, "learning_rate": 5.145150508908958e-08, "loss": 0.0004, "step": 234510 }, { "epoch": 1.9178149405078302, "grad_norm": 0.13856972754001617, "learning_rate": 5.134944201123371e-08, "loss": 0.0009, "step": 234520 }, { "epoch": 1.9178967166864291, "grad_norm": 0.028385523706674576, "learning_rate": 5.124747974141253e-08, "loss": 0.0007, "step": 234530 }, { "epoch": 1.9179784928650285, "grad_norm": 0.12819743156433105, "learning_rate": 5.114561828170217e-08, "loss": 0.0004, "step": 234540 }, { "epoch": 1.9180602690436275, "grad_norm": 0.01633470319211483, "learning_rate": 5.104385763417874e-08, "loss": 0.0006, "step": 234550 }, { "epoch": 1.9181420452222269, "grad_norm": 0.005051678512245417, "learning_rate": 5.0942197800913916e-08, "loss": 0.0006, "step": 234560 }, { "epoch": 1.9182238214008258, "grad_norm": 0.056463822722435, "learning_rate": 5.0840638783979936e-08, "loss": 0.0004, "step": 234570 }, { "epoch": 1.9183055975794252, "grad_norm": 0.035466376692056656, "learning_rate": 5.073918058544458e-08, "loss": 0.0003, "step": 234580 }, { "epoch": 1.9183873737580242, "grad_norm": 0.02816629596054554, "learning_rate": 5.063782320737565e-08, "loss": 0.0005, "step": 234590 }, { "epoch": 1.9184691499366235, "grad_norm": 0.02337571606040001, "learning_rate": 5.0536566651837035e-08, "loss": 0.0004, "step": 234600 }, { "epoch": 1.9185509261152225, "grad_norm": 0.04699506238102913, "learning_rate": 5.043541092089099e-08, "loss": 0.0005, "step": 234610 }, { "epoch": 1.9186327022938219, "grad_norm": 0.01819617860019207, "learning_rate": 5.033435601659975e-08, "loss": 0.0007, "step": 234620 }, { "epoch": 1.9187144784724208, "grad_norm": 0.13665100932121277, "learning_rate": 5.023340194102055e-08, "loss": 0.0007, "step": 234630 }, { "epoch": 1.9187962546510202, "grad_norm": 0.019551528617739677, "learning_rate": 5.013254869621065e-08, "loss": 0.0004, "step": 234640 }, { "epoch": 1.9188780308296194, "grad_norm": 0.03758816421031952, "learning_rate": 5.0031796284223946e-08, "loss": 0.0004, "step": 234650 }, { "epoch": 1.9189598070082186, "grad_norm": 0.03332044929265976, "learning_rate": 4.993114470711324e-08, "loss": 0.0007, "step": 234660 }, { "epoch": 1.9190415831868177, "grad_norm": 0.007294265553355217, "learning_rate": 4.983059396692913e-08, "loss": 0.0003, "step": 234670 }, { "epoch": 1.919123359365417, "grad_norm": 0.009950266219675541, "learning_rate": 4.9730144065718853e-08, "loss": 0.0011, "step": 234680 }, { "epoch": 1.919205135544016, "grad_norm": 0.0032649775967001915, "learning_rate": 4.962979500552911e-08, "loss": 0.0005, "step": 234690 }, { "epoch": 1.9192869117226152, "grad_norm": 0.013928741216659546, "learning_rate": 4.9529546788404935e-08, "loss": 0.0008, "step": 234700 }, { "epoch": 1.9193686879012144, "grad_norm": 0.07424621284008026, "learning_rate": 4.942939941638747e-08, "loss": 0.0005, "step": 234710 }, { "epoch": 1.9194504640798136, "grad_norm": 0.034863609820604324, "learning_rate": 4.93293528915173e-08, "loss": 0.0004, "step": 234720 }, { "epoch": 1.9195322402584127, "grad_norm": 0.020312681794166565, "learning_rate": 4.9229407215832245e-08, "loss": 0.0007, "step": 234730 }, { "epoch": 1.919614016437012, "grad_norm": 0.016205396503210068, "learning_rate": 4.912956239136846e-08, "loss": 0.0009, "step": 234740 }, { "epoch": 1.919695792615611, "grad_norm": 0.061858389526605606, "learning_rate": 4.902981842015986e-08, "loss": 0.0008, "step": 234750 }, { "epoch": 1.9197775687942102, "grad_norm": 0.020007407292723656, "learning_rate": 4.8930175304238156e-08, "loss": 0.0005, "step": 234760 }, { "epoch": 1.9198593449728094, "grad_norm": 0.03600189834833145, "learning_rate": 4.8830633045633405e-08, "loss": 0.0005, "step": 234770 }, { "epoch": 1.9199411211514086, "grad_norm": 0.1027957871556282, "learning_rate": 4.873119164637285e-08, "loss": 0.0006, "step": 234780 }, { "epoch": 1.9200228973300077, "grad_norm": 0.030113348737359047, "learning_rate": 4.8631851108482676e-08, "loss": 0.0005, "step": 234790 }, { "epoch": 1.920104673508607, "grad_norm": 0.09915180504322052, "learning_rate": 4.8532611433986245e-08, "loss": 0.0004, "step": 234800 }, { "epoch": 1.920186449687206, "grad_norm": 0.012019433081150055, "learning_rate": 4.8433472624904724e-08, "loss": 0.0021, "step": 234810 }, { "epoch": 1.9202682258658053, "grad_norm": 0.034137532114982605, "learning_rate": 4.8334434683259266e-08, "loss": 0.0009, "step": 234820 }, { "epoch": 1.9203500020444044, "grad_norm": 0.01056730467826128, "learning_rate": 4.8235497611065494e-08, "loss": 0.0007, "step": 234830 }, { "epoch": 1.9204317782230036, "grad_norm": 0.09773479402065277, "learning_rate": 4.8136661410340126e-08, "loss": 0.0005, "step": 234840 }, { "epoch": 1.9205135544016028, "grad_norm": 0.0009143850766122341, "learning_rate": 4.8037926083095434e-08, "loss": 0.0005, "step": 234850 }, { "epoch": 1.920595330580202, "grad_norm": 0.04522545263171196, "learning_rate": 4.79392916313437e-08, "loss": 0.0009, "step": 234860 }, { "epoch": 1.9206771067588013, "grad_norm": 0.014296681620180607, "learning_rate": 4.784075805709443e-08, "loss": 0.0007, "step": 234870 }, { "epoch": 1.9207588829374003, "grad_norm": 0.025373438373208046, "learning_rate": 4.774232536235379e-08, "loss": 0.0004, "step": 234880 }, { "epoch": 1.9208406591159997, "grad_norm": 0.10384923219680786, "learning_rate": 4.7643993549126855e-08, "loss": 0.0007, "step": 234890 }, { "epoch": 1.9209224352945986, "grad_norm": 0.02828921005129814, "learning_rate": 4.7545762619418126e-08, "loss": 0.0007, "step": 234900 }, { "epoch": 1.921004211473198, "grad_norm": 0.0028266652952879667, "learning_rate": 4.7447632575227106e-08, "loss": 0.0007, "step": 234910 }, { "epoch": 1.921085987651797, "grad_norm": 0.00944191962480545, "learning_rate": 4.734960341855388e-08, "loss": 0.0008, "step": 234920 }, { "epoch": 1.9211677638303963, "grad_norm": 0.0065940930508077145, "learning_rate": 4.7251675151394617e-08, "loss": 0.0005, "step": 234930 }, { "epoch": 1.9212495400089953, "grad_norm": 0.10565601289272308, "learning_rate": 4.7153847775744385e-08, "loss": 0.0007, "step": 234940 }, { "epoch": 1.9213313161875947, "grad_norm": 0.0027457261458039284, "learning_rate": 4.7056121293596605e-08, "loss": 0.0006, "step": 234950 }, { "epoch": 1.9214130923661936, "grad_norm": 0.11536448448896408, "learning_rate": 4.695849570694133e-08, "loss": 0.001, "step": 234960 }, { "epoch": 1.921494868544793, "grad_norm": 0.020614920184016228, "learning_rate": 4.686097101776754e-08, "loss": 0.0003, "step": 234970 }, { "epoch": 1.921576644723392, "grad_norm": 0.04986368119716644, "learning_rate": 4.676354722806198e-08, "loss": 0.0004, "step": 234980 }, { "epoch": 1.9216584209019913, "grad_norm": 0.011271798983216286, "learning_rate": 4.6666224339809165e-08, "loss": 0.0007, "step": 234990 }, { "epoch": 1.9217401970805903, "grad_norm": 0.014847109094262123, "learning_rate": 4.656900235499196e-08, "loss": 0.0007, "step": 235000 }, { "epoch": 1.9218219732591897, "grad_norm": 0.009403204545378685, "learning_rate": 4.647188127558988e-08, "loss": 0.0017, "step": 235010 }, { "epoch": 1.9219037494377886, "grad_norm": 0.025776026770472527, "learning_rate": 4.637486110358247e-08, "loss": 0.0026, "step": 235020 }, { "epoch": 1.921985525616388, "grad_norm": 0.024163227528333664, "learning_rate": 4.6277941840945365e-08, "loss": 0.0004, "step": 235030 }, { "epoch": 1.922067301794987, "grad_norm": 0.018533769994974136, "learning_rate": 4.6181123489653644e-08, "loss": 0.0006, "step": 235040 }, { "epoch": 1.9221490779735864, "grad_norm": 0.019627492874860764, "learning_rate": 4.608440605167852e-08, "loss": 0.0004, "step": 235050 }, { "epoch": 1.9222308541521853, "grad_norm": 0.03598525747656822, "learning_rate": 4.5987789528990634e-08, "loss": 0.0008, "step": 235060 }, { "epoch": 1.9223126303307847, "grad_norm": 0.030228430405259132, "learning_rate": 4.589127392355897e-08, "loss": 0.0003, "step": 235070 }, { "epoch": 1.9223944065093839, "grad_norm": 0.008549094200134277, "learning_rate": 4.579485923734861e-08, "loss": 0.0007, "step": 235080 }, { "epoch": 1.922476182687983, "grad_norm": 0.0018468463094905019, "learning_rate": 4.569854547232411e-08, "loss": 0.0003, "step": 235090 }, { "epoch": 1.9225579588665822, "grad_norm": 0.056394949555397034, "learning_rate": 4.560233263044722e-08, "loss": 0.0007, "step": 235100 }, { "epoch": 1.9226397350451814, "grad_norm": 0.04326846823096275, "learning_rate": 4.55062207136775e-08, "loss": 0.0006, "step": 235110 }, { "epoch": 1.9227215112237805, "grad_norm": 0.01943255588412285, "learning_rate": 4.541020972397392e-08, "loss": 0.0004, "step": 235120 }, { "epoch": 1.9228032874023797, "grad_norm": 0.012706889770925045, "learning_rate": 4.531429966329159e-08, "loss": 0.0008, "step": 235130 }, { "epoch": 1.9228850635809789, "grad_norm": 0.00657218461856246, "learning_rate": 4.521849053358451e-08, "loss": 0.0004, "step": 235140 }, { "epoch": 1.922966839759578, "grad_norm": 0.02252722904086113, "learning_rate": 4.5122782336803894e-08, "loss": 0.0005, "step": 235150 }, { "epoch": 1.9230486159381772, "grad_norm": 0.052149318158626556, "learning_rate": 4.5027175074899844e-08, "loss": 0.0006, "step": 235160 }, { "epoch": 1.9231303921167764, "grad_norm": 0.028857531026005745, "learning_rate": 4.4931668749819136e-08, "loss": 0.0002, "step": 235170 }, { "epoch": 1.9232121682953756, "grad_norm": 0.00352464709430933, "learning_rate": 4.4836263363509106e-08, "loss": 0.0008, "step": 235180 }, { "epoch": 1.9232939444739747, "grad_norm": 0.030554702505469322, "learning_rate": 4.474095891791152e-08, "loss": 0.0019, "step": 235190 }, { "epoch": 1.923375720652574, "grad_norm": 0.06141649931669235, "learning_rate": 4.464575541496874e-08, "loss": 0.0026, "step": 235200 }, { "epoch": 1.923457496831173, "grad_norm": 0.018300039693713188, "learning_rate": 4.455065285661919e-08, "loss": 0.0004, "step": 235210 }, { "epoch": 1.9235392730097722, "grad_norm": 0.07253977656364441, "learning_rate": 4.445565124480133e-08, "loss": 0.0015, "step": 235220 }, { "epoch": 1.9236210491883714, "grad_norm": 0.041849132627248764, "learning_rate": 4.436075058145028e-08, "loss": 0.0004, "step": 235230 }, { "epoch": 1.9237028253669706, "grad_norm": 0.02304977923631668, "learning_rate": 4.426595086849839e-08, "loss": 0.0005, "step": 235240 }, { "epoch": 1.9237846015455697, "grad_norm": 0.009744031354784966, "learning_rate": 4.417125210787743e-08, "loss": 0.0003, "step": 235250 }, { "epoch": 1.923866377724169, "grad_norm": 0.037782520055770874, "learning_rate": 4.407665430151642e-08, "loss": 0.0005, "step": 235260 }, { "epoch": 1.923948153902768, "grad_norm": 0.0028430779930204153, "learning_rate": 4.3982157451342714e-08, "loss": 0.0005, "step": 235270 }, { "epoch": 1.9240299300813672, "grad_norm": 0.03404155746102333, "learning_rate": 4.388776155928032e-08, "loss": 0.0006, "step": 235280 }, { "epoch": 1.9241117062599664, "grad_norm": 0.06990764290094376, "learning_rate": 4.37934666272527e-08, "loss": 0.0004, "step": 235290 }, { "epoch": 1.9241934824385658, "grad_norm": 0.026404574513435364, "learning_rate": 4.36992726571811e-08, "loss": 0.0009, "step": 235300 }, { "epoch": 1.9242752586171648, "grad_norm": 0.0037849806249141693, "learning_rate": 4.360517965098399e-08, "loss": 0.0005, "step": 235310 }, { "epoch": 1.9243570347957641, "grad_norm": 0.012841098010540009, "learning_rate": 4.3511187610578156e-08, "loss": 0.0005, "step": 235320 }, { "epoch": 1.924438810974363, "grad_norm": 0.0420248880982399, "learning_rate": 4.341729653787818e-08, "loss": 0.0008, "step": 235330 }, { "epoch": 1.9245205871529625, "grad_norm": 0.061919018626213074, "learning_rate": 4.332350643479644e-08, "loss": 0.0005, "step": 235340 }, { "epoch": 1.9246023633315614, "grad_norm": 0.006728841457515955, "learning_rate": 4.322981730324471e-08, "loss": 0.0003, "step": 235350 }, { "epoch": 1.9246841395101608, "grad_norm": 0.018635548651218414, "learning_rate": 4.3136229145129825e-08, "loss": 0.0004, "step": 235360 }, { "epoch": 1.9247659156887598, "grad_norm": 0.07032995671033859, "learning_rate": 4.304274196235969e-08, "loss": 0.0007, "step": 235370 }, { "epoch": 1.9248476918673592, "grad_norm": 0.00041630782652646303, "learning_rate": 4.294935575683778e-08, "loss": 0.0061, "step": 235380 }, { "epoch": 1.924929468045958, "grad_norm": 0.03519967570900917, "learning_rate": 4.285607053046703e-08, "loss": 0.0007, "step": 235390 }, { "epoch": 1.9250112442245575, "grad_norm": 0.03607053682208061, "learning_rate": 4.2762886285147574e-08, "loss": 0.0026, "step": 235400 }, { "epoch": 1.9250930204031564, "grad_norm": 0.016941122710704803, "learning_rate": 4.266980302277679e-08, "loss": 0.0003, "step": 235410 }, { "epoch": 1.9251747965817558, "grad_norm": 0.009298243559896946, "learning_rate": 4.25768207452526e-08, "loss": 0.0005, "step": 235420 }, { "epoch": 1.9252565727603548, "grad_norm": 0.020289603620767593, "learning_rate": 4.2483939454467384e-08, "loss": 0.0011, "step": 235430 }, { "epoch": 1.9253383489389542, "grad_norm": 0.10917177051305771, "learning_rate": 4.239115915231462e-08, "loss": 0.0006, "step": 235440 }, { "epoch": 1.9254201251175531, "grad_norm": 0.045217499136924744, "learning_rate": 4.229847984068336e-08, "loss": 0.0005, "step": 235450 }, { "epoch": 1.9255019012961525, "grad_norm": 0.0149790458381176, "learning_rate": 4.220590152146153e-08, "loss": 0.0007, "step": 235460 }, { "epoch": 1.9255836774747515, "grad_norm": 0.05976365879178047, "learning_rate": 4.211342419653597e-08, "loss": 0.0007, "step": 235470 }, { "epoch": 1.9256654536533508, "grad_norm": 0.00316361035220325, "learning_rate": 4.202104786779015e-08, "loss": 0.0002, "step": 235480 }, { "epoch": 1.9257472298319498, "grad_norm": 0.017987122759222984, "learning_rate": 4.192877253710537e-08, "loss": 0.0004, "step": 235490 }, { "epoch": 1.9258290060105492, "grad_norm": 0.024775004014372826, "learning_rate": 4.183659820636121e-08, "loss": 0.0003, "step": 235500 }, { "epoch": 1.9259107821891484, "grad_norm": 0.0329294390976429, "learning_rate": 4.174452487743619e-08, "loss": 0.0004, "step": 235510 }, { "epoch": 1.9259925583677475, "grad_norm": 0.1464165300130844, "learning_rate": 4.165255255220546e-08, "loss": 0.0009, "step": 235520 }, { "epoch": 1.9260743345463467, "grad_norm": 0.003963177558034658, "learning_rate": 4.1560681232541975e-08, "loss": 0.0002, "step": 235530 }, { "epoch": 1.9261561107249459, "grad_norm": 0.0855589509010315, "learning_rate": 4.1468910920318685e-08, "loss": 0.0008, "step": 235540 }, { "epoch": 1.926237886903545, "grad_norm": 0.002365036401897669, "learning_rate": 4.137724161740353e-08, "loss": 0.0004, "step": 235550 }, { "epoch": 1.9263196630821442, "grad_norm": 0.05916636809706688, "learning_rate": 4.128567332566502e-08, "loss": 0.0006, "step": 235560 }, { "epoch": 1.9264014392607434, "grad_norm": 0.038229480385780334, "learning_rate": 4.1194206046967776e-08, "loss": 0.0007, "step": 235570 }, { "epoch": 1.9264832154393425, "grad_norm": 0.03670303896069527, "learning_rate": 4.1102839783175305e-08, "loss": 0.0004, "step": 235580 }, { "epoch": 1.9265649916179417, "grad_norm": 0.034488726407289505, "learning_rate": 4.1011574536148344e-08, "loss": 0.0018, "step": 235590 }, { "epoch": 1.9266467677965409, "grad_norm": 0.019355304539203644, "learning_rate": 4.092041030774707e-08, "loss": 0.001, "step": 235600 }, { "epoch": 1.92672854397514, "grad_norm": 0.030161241069436073, "learning_rate": 4.082934709982778e-08, "loss": 0.0006, "step": 235610 }, { "epoch": 1.9268103201537392, "grad_norm": 0.006148195825517178, "learning_rate": 4.073838491424564e-08, "loss": 0.0006, "step": 235620 }, { "epoch": 1.9268920963323384, "grad_norm": 0.011979660950601101, "learning_rate": 4.064752375285419e-08, "loss": 0.0005, "step": 235630 }, { "epoch": 1.9269738725109375, "grad_norm": 0.10526278614997864, "learning_rate": 4.05567636175036e-08, "loss": 0.0005, "step": 235640 }, { "epoch": 1.9270556486895367, "grad_norm": 0.007895510643720627, "learning_rate": 4.0466104510042404e-08, "loss": 0.0025, "step": 235650 }, { "epoch": 1.9271374248681359, "grad_norm": 0.039338745176792145, "learning_rate": 4.037554643231856e-08, "loss": 0.0003, "step": 235660 }, { "epoch": 1.927219201046735, "grad_norm": 0.01238261442631483, "learning_rate": 4.0285089386175594e-08, "loss": 0.0005, "step": 235670 }, { "epoch": 1.9273009772253342, "grad_norm": 0.00948395300656557, "learning_rate": 4.019473337345758e-08, "loss": 0.0005, "step": 235680 }, { "epoch": 1.9273827534039334, "grad_norm": 0.02054748311638832, "learning_rate": 4.010447839600362e-08, "loss": 0.001, "step": 235690 }, { "epoch": 1.9274645295825326, "grad_norm": 0.008960805833339691, "learning_rate": 4.00143244556539e-08, "loss": 0.0004, "step": 235700 }, { "epoch": 1.9275463057611317, "grad_norm": 0.03713776916265488, "learning_rate": 3.992427155424361e-08, "loss": 0.0006, "step": 235710 }, { "epoch": 1.927628081939731, "grad_norm": 0.06599225103855133, "learning_rate": 3.9834319693607405e-08, "loss": 0.0005, "step": 235720 }, { "epoch": 1.9277098581183303, "grad_norm": 0.046950917690992355, "learning_rate": 3.9744468875578254e-08, "loss": 0.0006, "step": 235730 }, { "epoch": 1.9277916342969292, "grad_norm": 0.005819648038595915, "learning_rate": 3.965471910198637e-08, "loss": 0.0011, "step": 235740 }, { "epoch": 1.9278734104755286, "grad_norm": 0.04400147870182991, "learning_rate": 3.956507037465973e-08, "loss": 0.0003, "step": 235750 }, { "epoch": 1.9279551866541276, "grad_norm": 0.08413968235254288, "learning_rate": 3.947552269542465e-08, "loss": 0.0009, "step": 235760 }, { "epoch": 1.928036962832727, "grad_norm": 0.005197996739298105, "learning_rate": 3.938607606610467e-08, "loss": 0.0006, "step": 235770 }, { "epoch": 1.928118739011326, "grad_norm": 0.0021892490331083536, "learning_rate": 3.929673048852334e-08, "loss": 0.0008, "step": 235780 }, { "epoch": 1.9282005151899253, "grad_norm": 0.05298361927270889, "learning_rate": 3.920748596449975e-08, "loss": 0.0008, "step": 235790 }, { "epoch": 1.9282822913685242, "grad_norm": 0.03729299083352089, "learning_rate": 3.911834249585134e-08, "loss": 0.0005, "step": 235800 }, { "epoch": 1.9283640675471236, "grad_norm": 0.015267828479409218, "learning_rate": 3.902930008439498e-08, "loss": 0.0005, "step": 235810 }, { "epoch": 1.9284458437257226, "grad_norm": 0.001055655418895185, "learning_rate": 3.894035873194479e-08, "loss": 0.0013, "step": 235820 }, { "epoch": 1.928527619904322, "grad_norm": 0.04945666715502739, "learning_rate": 3.8851518440311543e-08, "loss": 0.0008, "step": 235830 }, { "epoch": 1.928609396082921, "grad_norm": 0.03970648720860481, "learning_rate": 3.8762779211306e-08, "loss": 0.0019, "step": 235840 }, { "epoch": 1.9286911722615203, "grad_norm": 0.007069101557135582, "learning_rate": 3.867414104673506e-08, "loss": 0.0008, "step": 235850 }, { "epoch": 1.9287729484401193, "grad_norm": 0.0276357289403677, "learning_rate": 3.8585603948404495e-08, "loss": 0.0005, "step": 235860 }, { "epoch": 1.9288547246187187, "grad_norm": 0.023541804403066635, "learning_rate": 3.849716791811786e-08, "loss": 0.0004, "step": 235870 }, { "epoch": 1.9289365007973176, "grad_norm": 0.03544241935014725, "learning_rate": 3.840883295767706e-08, "loss": 0.0013, "step": 235880 }, { "epoch": 1.929018276975917, "grad_norm": 0.026368051767349243, "learning_rate": 3.832059906888119e-08, "loss": 0.0006, "step": 235890 }, { "epoch": 1.929100053154516, "grad_norm": 0.04101662337779999, "learning_rate": 3.8232466253527725e-08, "loss": 0.0004, "step": 235900 }, { "epoch": 1.9291818293331153, "grad_norm": 0.050962746143341064, "learning_rate": 3.814443451341243e-08, "loss": 0.0009, "step": 235910 }, { "epoch": 1.9292636055117143, "grad_norm": 0.0363299697637558, "learning_rate": 3.805650385032833e-08, "loss": 0.0013, "step": 235920 }, { "epoch": 1.9293453816903137, "grad_norm": 0.020065369084477425, "learning_rate": 3.796867426606565e-08, "loss": 0.0007, "step": 235930 }, { "epoch": 1.9294271578689128, "grad_norm": 0.0345991812646389, "learning_rate": 3.788094576241519e-08, "loss": 0.0008, "step": 235940 }, { "epoch": 1.929508934047512, "grad_norm": 0.0016272346256300807, "learning_rate": 3.779331834116273e-08, "loss": 0.0006, "step": 235950 }, { "epoch": 1.9295907102261112, "grad_norm": 0.021004918962717056, "learning_rate": 3.770579200409463e-08, "loss": 0.0005, "step": 235960 }, { "epoch": 1.9296724864047103, "grad_norm": 0.00690129678696394, "learning_rate": 3.761836675299224e-08, "loss": 0.0004, "step": 235970 }, { "epoch": 1.9297542625833095, "grad_norm": 0.007090560160577297, "learning_rate": 3.7531042589638025e-08, "loss": 0.0009, "step": 235980 }, { "epoch": 1.9298360387619087, "grad_norm": 0.06940530985593796, "learning_rate": 3.744381951581e-08, "loss": 0.0008, "step": 235990 }, { "epoch": 1.9299178149405078, "grad_norm": 0.104088693857193, "learning_rate": 3.735669753328508e-08, "loss": 0.0005, "step": 236000 }, { "epoch": 1.929999591119107, "grad_norm": 0.15948781371116638, "learning_rate": 3.726967664383796e-08, "loss": 0.0008, "step": 236010 }, { "epoch": 1.9300813672977062, "grad_norm": 0.05801039934158325, "learning_rate": 3.718275684924111e-08, "loss": 0.0004, "step": 236020 }, { "epoch": 1.9301631434763054, "grad_norm": 0.008089825510978699, "learning_rate": 3.709593815126589e-08, "loss": 0.0007, "step": 236030 }, { "epoch": 1.9302449196549045, "grad_norm": 0.03317566215991974, "learning_rate": 3.700922055168088e-08, "loss": 0.0008, "step": 236040 }, { "epoch": 1.9303266958335037, "grad_norm": 0.016540763899683952, "learning_rate": 3.6922604052251344e-08, "loss": 0.0003, "step": 236050 }, { "epoch": 1.9304084720121029, "grad_norm": 0.08199114352464676, "learning_rate": 3.683608865474308e-08, "loss": 0.0009, "step": 236060 }, { "epoch": 1.930490248190702, "grad_norm": 0.014239238575100899, "learning_rate": 3.674967436091747e-08, "loss": 0.0005, "step": 236070 }, { "epoch": 1.9305720243693012, "grad_norm": 0.06571746617555618, "learning_rate": 3.666336117253588e-08, "loss": 0.0007, "step": 236080 }, { "epoch": 1.9306538005479004, "grad_norm": 0.03011036105453968, "learning_rate": 3.657714909135579e-08, "loss": 0.001, "step": 236090 }, { "epoch": 1.9307355767264995, "grad_norm": 0.02651791460812092, "learning_rate": 3.649103811913412e-08, "loss": 0.0005, "step": 236100 }, { "epoch": 1.9308173529050987, "grad_norm": 0.023175103589892387, "learning_rate": 3.640502825762393e-08, "loss": 0.0011, "step": 236110 }, { "epoch": 1.9308991290836979, "grad_norm": 0.04482708498835564, "learning_rate": 3.6319119508578246e-08, "loss": 0.0009, "step": 236120 }, { "epoch": 1.930980905262297, "grad_norm": 0.07302907109260559, "learning_rate": 3.623331187374679e-08, "loss": 0.0007, "step": 236130 }, { "epoch": 1.9310626814408962, "grad_norm": 0.04347441717982292, "learning_rate": 3.614760535487705e-08, "loss": 0.0006, "step": 236140 }, { "epoch": 1.9311444576194954, "grad_norm": 0.004290338139981031, "learning_rate": 3.6061999953715396e-08, "loss": 0.0007, "step": 236150 }, { "epoch": 1.9312262337980948, "grad_norm": 0.0022068878170102835, "learning_rate": 3.597649567200656e-08, "loss": 0.0005, "step": 236160 }, { "epoch": 1.9313080099766937, "grad_norm": 0.07261345535516739, "learning_rate": 3.589109251149081e-08, "loss": 0.0006, "step": 236170 }, { "epoch": 1.931389786155293, "grad_norm": 0.034907855093479156, "learning_rate": 3.580579047390842e-08, "loss": 0.0005, "step": 236180 }, { "epoch": 1.931471562333892, "grad_norm": 0.040297385305166245, "learning_rate": 3.572058956099744e-08, "loss": 0.0005, "step": 236190 }, { "epoch": 1.9315533385124914, "grad_norm": 0.017686203122138977, "learning_rate": 3.563548977449316e-08, "loss": 0.0006, "step": 236200 }, { "epoch": 1.9316351146910904, "grad_norm": 0.04046610742807388, "learning_rate": 3.555049111612918e-08, "loss": 0.0006, "step": 236210 }, { "epoch": 1.9317168908696898, "grad_norm": 0.015673747286200523, "learning_rate": 3.546559358763746e-08, "loss": 0.0004, "step": 236220 }, { "epoch": 1.9317986670482887, "grad_norm": 0.013115660287439823, "learning_rate": 3.538079719074661e-08, "loss": 0.0004, "step": 236230 }, { "epoch": 1.9318804432268881, "grad_norm": 0.035630811005830765, "learning_rate": 3.529610192718413e-08, "loss": 0.0005, "step": 236240 }, { "epoch": 1.931962219405487, "grad_norm": 0.0044546956196427345, "learning_rate": 3.521150779867588e-08, "loss": 0.0004, "step": 236250 }, { "epoch": 1.9320439955840865, "grad_norm": 0.05817420408129692, "learning_rate": 3.512701480694436e-08, "loss": 0.0007, "step": 236260 }, { "epoch": 1.9321257717626854, "grad_norm": 0.05569295957684517, "learning_rate": 3.504262295371152e-08, "loss": 0.0003, "step": 236270 }, { "epoch": 1.9322075479412848, "grad_norm": 0.045868657529354095, "learning_rate": 3.4958332240696556e-08, "loss": 0.0005, "step": 236280 }, { "epoch": 1.9322893241198837, "grad_norm": 0.038651980459690094, "learning_rate": 3.4874142669615865e-08, "loss": 0.0008, "step": 236290 }, { "epoch": 1.9323711002984831, "grad_norm": 0.025519203394651413, "learning_rate": 3.4790054242184176e-08, "loss": 0.001, "step": 236300 }, { "epoch": 1.932452876477082, "grad_norm": 0.08051098138093948, "learning_rate": 3.470606696011569e-08, "loss": 0.0008, "step": 236310 }, { "epoch": 1.9325346526556815, "grad_norm": 0.008851548656821251, "learning_rate": 3.462218082512015e-08, "loss": 0.0004, "step": 236320 }, { "epoch": 1.9326164288342804, "grad_norm": 0.03179854527115822, "learning_rate": 3.453839583890728e-08, "loss": 0.0004, "step": 236330 }, { "epoch": 1.9326982050128798, "grad_norm": 0.009531686082482338, "learning_rate": 3.445471200318351e-08, "loss": 0.0002, "step": 236340 }, { "epoch": 1.9327799811914788, "grad_norm": 0.04081641137599945, "learning_rate": 3.437112931965303e-08, "loss": 0.0008, "step": 236350 }, { "epoch": 1.9328617573700781, "grad_norm": 0.006616579368710518, "learning_rate": 3.4287647790019476e-08, "loss": 0.0007, "step": 236360 }, { "epoch": 1.9329435335486773, "grad_norm": 0.007313968613743782, "learning_rate": 3.4204267415982594e-08, "loss": 0.0005, "step": 236370 }, { "epoch": 1.9330253097272765, "grad_norm": 0.0552373044192791, "learning_rate": 3.412098819924048e-08, "loss": 0.0003, "step": 236380 }, { "epoch": 1.9331070859058757, "grad_norm": 0.0026119232643395662, "learning_rate": 3.403781014149121e-08, "loss": 0.0004, "step": 236390 }, { "epoch": 1.9331888620844748, "grad_norm": 0.02277655340731144, "learning_rate": 3.395473324442789e-08, "loss": 0.0009, "step": 236400 }, { "epoch": 1.933270638263074, "grad_norm": 0.010791818611323833, "learning_rate": 3.3871757509743586e-08, "loss": 0.0004, "step": 236410 }, { "epoch": 1.9333524144416732, "grad_norm": 0.013777396641671658, "learning_rate": 3.378888293912752e-08, "loss": 0.0006, "step": 236420 }, { "epoch": 1.9334341906202723, "grad_norm": 0.015616833232343197, "learning_rate": 3.370610953426945e-08, "loss": 0.0004, "step": 236430 }, { "epoch": 1.9335159667988715, "grad_norm": 0.03658819571137428, "learning_rate": 3.362343729685413e-08, "loss": 0.0006, "step": 236440 }, { "epoch": 1.9335977429774707, "grad_norm": 0.016666019335389137, "learning_rate": 3.354086622856689e-08, "loss": 0.0011, "step": 236450 }, { "epoch": 1.9336795191560698, "grad_norm": 0.015889231115579605, "learning_rate": 3.3458396331088604e-08, "loss": 0.0019, "step": 236460 }, { "epoch": 1.933761295334669, "grad_norm": 0.0836460292339325, "learning_rate": 3.337602760610015e-08, "loss": 0.0008, "step": 236470 }, { "epoch": 1.9338430715132682, "grad_norm": 0.06156093254685402, "learning_rate": 3.329376005527907e-08, "loss": 0.0011, "step": 236480 }, { "epoch": 1.9339248476918673, "grad_norm": 0.009026343934237957, "learning_rate": 3.321159368030125e-08, "loss": 0.0004, "step": 236490 }, { "epoch": 1.9340066238704665, "grad_norm": 0.016094477847218513, "learning_rate": 3.312952848283979e-08, "loss": 0.0004, "step": 236500 }, { "epoch": 1.9340884000490657, "grad_norm": 0.0012738415971398354, "learning_rate": 3.3047564464567805e-08, "loss": 0.0008, "step": 236510 }, { "epoch": 1.9341701762276649, "grad_norm": 0.009836362674832344, "learning_rate": 3.296570162715451e-08, "loss": 0.0008, "step": 236520 }, { "epoch": 1.934251952406264, "grad_norm": 0.049989596009254456, "learning_rate": 3.288393997226691e-08, "loss": 0.0006, "step": 236530 }, { "epoch": 1.9343337285848632, "grad_norm": 0.01775163598358631, "learning_rate": 3.280227950157033e-08, "loss": 0.0004, "step": 236540 }, { "epoch": 1.9344155047634624, "grad_norm": 0.032032471150159836, "learning_rate": 3.272072021672956e-08, "loss": 0.001, "step": 236550 }, { "epoch": 1.9344972809420615, "grad_norm": 0.07402941584587097, "learning_rate": 3.263926211940549e-08, "loss": 0.0006, "step": 236560 }, { "epoch": 1.934579057120661, "grad_norm": 0.001398941851221025, "learning_rate": 3.255790521125679e-08, "loss": 0.0005, "step": 236570 }, { "epoch": 1.9346608332992599, "grad_norm": 0.023657869547605515, "learning_rate": 3.247664949394158e-08, "loss": 0.0007, "step": 236580 }, { "epoch": 1.9347426094778593, "grad_norm": 0.04681563377380371, "learning_rate": 3.23954949691152e-08, "loss": 0.0005, "step": 236590 }, { "epoch": 1.9348243856564582, "grad_norm": 0.012406961061060429, "learning_rate": 3.2314441638430225e-08, "loss": 0.0003, "step": 236600 }, { "epoch": 1.9349061618350576, "grad_norm": 0.03218991681933403, "learning_rate": 3.223348950353755e-08, "loss": 0.0022, "step": 236610 }, { "epoch": 1.9349879380136565, "grad_norm": 0.002550736302509904, "learning_rate": 3.215263856608697e-08, "loss": 0.0006, "step": 236620 }, { "epoch": 1.935069714192256, "grad_norm": 0.007321800570935011, "learning_rate": 3.2071888827725495e-08, "loss": 0.0008, "step": 236630 }, { "epoch": 1.9351514903708549, "grad_norm": 0.004687618929892778, "learning_rate": 3.199124029009737e-08, "loss": 0.0002, "step": 236640 }, { "epoch": 1.9352332665494543, "grad_norm": 0.027833642438054085, "learning_rate": 3.191069295484628e-08, "loss": 0.0004, "step": 236650 }, { "epoch": 1.9353150427280532, "grad_norm": 0.06303715705871582, "learning_rate": 3.183024682361202e-08, "loss": 0.0005, "step": 236660 }, { "epoch": 1.9353968189066526, "grad_norm": 0.017943212762475014, "learning_rate": 3.17499018980344e-08, "loss": 0.0007, "step": 236670 }, { "epoch": 1.9354785950852516, "grad_norm": 0.023439180105924606, "learning_rate": 3.166965817974932e-08, "loss": 0.0008, "step": 236680 }, { "epoch": 1.935560371263851, "grad_norm": 0.056043654680252075, "learning_rate": 3.158951567039215e-08, "loss": 0.0011, "step": 236690 }, { "epoch": 1.93564214744245, "grad_norm": 0.029725169762969017, "learning_rate": 3.150947437159546e-08, "loss": 0.0005, "step": 236700 }, { "epoch": 1.9357239236210493, "grad_norm": 0.11765771359205246, "learning_rate": 3.142953428498852e-08, "loss": 0.0008, "step": 236710 }, { "epoch": 1.9358056997996482, "grad_norm": 0.0014989878982305527, "learning_rate": 3.134969541220112e-08, "loss": 0.0003, "step": 236720 }, { "epoch": 1.9358874759782476, "grad_norm": 0.016511693596839905, "learning_rate": 3.126995775485919e-08, "loss": 0.0006, "step": 236730 }, { "epoch": 1.9359692521568466, "grad_norm": 0.008534573949873447, "learning_rate": 3.1190321314586994e-08, "loss": 0.0011, "step": 236740 }, { "epoch": 1.936051028335446, "grad_norm": 0.028429687023162842, "learning_rate": 3.111078609300655e-08, "loss": 0.0003, "step": 236750 }, { "epoch": 1.936132804514045, "grad_norm": 0.057704053819179535, "learning_rate": 3.103135209173824e-08, "loss": 0.0005, "step": 236760 }, { "epoch": 1.9362145806926443, "grad_norm": 0.010031329467892647, "learning_rate": 3.0952019312400773e-08, "loss": 0.0004, "step": 236770 }, { "epoch": 1.9362963568712432, "grad_norm": 0.07149174064397812, "learning_rate": 3.087278775660896e-08, "loss": 0.0006, "step": 236780 }, { "epoch": 1.9363781330498426, "grad_norm": 0.0481644943356514, "learning_rate": 3.0793657425978174e-08, "loss": 0.0004, "step": 236790 }, { "epoch": 1.9364599092284418, "grad_norm": 0.03210125491023064, "learning_rate": 3.071462832211936e-08, "loss": 0.0005, "step": 236800 }, { "epoch": 1.936541685407041, "grad_norm": 0.039002206176519394, "learning_rate": 3.063570044664343e-08, "loss": 0.0003, "step": 236810 }, { "epoch": 1.9366234615856401, "grad_norm": 0.03269532322883606, "learning_rate": 3.0556873801156904e-08, "loss": 0.0006, "step": 236820 }, { "epoch": 1.9367052377642393, "grad_norm": 0.05053051561117172, "learning_rate": 3.0478148387266816e-08, "loss": 0.0006, "step": 236830 }, { "epoch": 1.9367870139428385, "grad_norm": 0.023232705891132355, "learning_rate": 3.039952420657577e-08, "loss": 0.0005, "step": 236840 }, { "epoch": 1.9368687901214376, "grad_norm": 0.011293390765786171, "learning_rate": 3.0321001260686375e-08, "loss": 0.0003, "step": 236850 }, { "epoch": 1.9369505663000368, "grad_norm": 0.0030859478283673525, "learning_rate": 3.024257955119736e-08, "loss": 0.0003, "step": 236860 }, { "epoch": 1.937032342478636, "grad_norm": 0.020757749676704407, "learning_rate": 3.0164259079706883e-08, "loss": 0.0004, "step": 236870 }, { "epoch": 1.9371141186572352, "grad_norm": 0.012116547673940659, "learning_rate": 3.008603984780978e-08, "loss": 0.0004, "step": 236880 }, { "epoch": 1.9371958948358343, "grad_norm": 0.0034599711652845144, "learning_rate": 3.000792185710033e-08, "loss": 0.0003, "step": 236890 }, { "epoch": 1.9372776710144335, "grad_norm": 0.04148998111486435, "learning_rate": 2.992990510916893e-08, "loss": 0.0006, "step": 236900 }, { "epoch": 1.9373594471930327, "grad_norm": 0.014582949690520763, "learning_rate": 2.985198960560487e-08, "loss": 0.0003, "step": 236910 }, { "epoch": 1.9374412233716318, "grad_norm": 0.09911035746335983, "learning_rate": 2.9774175347996315e-08, "loss": 0.0009, "step": 236920 }, { "epoch": 1.937522999550231, "grad_norm": 0.016176721081137657, "learning_rate": 2.969646233792811e-08, "loss": 0.0006, "step": 236930 }, { "epoch": 1.9376047757288302, "grad_norm": 0.031664855778217316, "learning_rate": 2.9618850576982327e-08, "loss": 0.0005, "step": 236940 }, { "epoch": 1.9376865519074293, "grad_norm": 0.03511473909020424, "learning_rate": 2.954134006674103e-08, "loss": 0.0005, "step": 236950 }, { "epoch": 1.9377683280860285, "grad_norm": 0.03844297304749489, "learning_rate": 2.946393080878296e-08, "loss": 0.0003, "step": 236960 }, { "epoch": 1.9378501042646277, "grad_norm": 0.09119462966918945, "learning_rate": 2.9386622804684627e-08, "loss": 0.0011, "step": 236970 }, { "epoch": 1.9379318804432268, "grad_norm": 0.011060791090130806, "learning_rate": 2.930941605602089e-08, "loss": 0.0011, "step": 236980 }, { "epoch": 1.938013656621826, "grad_norm": 0.028339765965938568, "learning_rate": 2.923231056436493e-08, "loss": 0.0004, "step": 236990 }, { "epoch": 1.9380954328004254, "grad_norm": 0.04673677682876587, "learning_rate": 2.9155306331287715e-08, "loss": 0.0005, "step": 237000 }, { "epoch": 1.9381772089790243, "grad_norm": 0.06075824052095413, "learning_rate": 2.9078403358356876e-08, "loss": 0.0007, "step": 237010 }, { "epoch": 1.9382589851576237, "grad_norm": 0.0018315589986741543, "learning_rate": 2.90016016471395e-08, "loss": 0.0002, "step": 237020 }, { "epoch": 1.9383407613362227, "grad_norm": 0.0005634010885842144, "learning_rate": 2.8924901199200438e-08, "loss": 0.0004, "step": 237030 }, { "epoch": 1.938422537514822, "grad_norm": 0.0409998893737793, "learning_rate": 2.884830201610178e-08, "loss": 0.0006, "step": 237040 }, { "epoch": 1.938504313693421, "grad_norm": 0.010589341633021832, "learning_rate": 2.8771804099403388e-08, "loss": 0.0003, "step": 237050 }, { "epoch": 1.9385860898720204, "grad_norm": 0.017003009095788002, "learning_rate": 2.8695407450665124e-08, "loss": 0.0005, "step": 237060 }, { "epoch": 1.9386678660506194, "grad_norm": 0.14505308866500854, "learning_rate": 2.8619112071441858e-08, "loss": 0.0005, "step": 237070 }, { "epoch": 1.9387496422292188, "grad_norm": 0.021431252360343933, "learning_rate": 2.8542917963287898e-08, "loss": 0.0007, "step": 237080 }, { "epoch": 1.9388314184078177, "grad_norm": 0.010869788005948067, "learning_rate": 2.846682512775589e-08, "loss": 0.0009, "step": 237090 }, { "epoch": 1.938913194586417, "grad_norm": 0.0417049415409565, "learning_rate": 2.8390833566395714e-08, "loss": 0.0007, "step": 237100 }, { "epoch": 1.938994970765016, "grad_norm": 0.12085860967636108, "learning_rate": 2.831494328075557e-08, "loss": 0.0012, "step": 237110 }, { "epoch": 1.9390767469436154, "grad_norm": 0.002779413014650345, "learning_rate": 2.8239154272381442e-08, "loss": 0.0005, "step": 237120 }, { "epoch": 1.9391585231222144, "grad_norm": 0.018269725143909454, "learning_rate": 2.8163466542816543e-08, "loss": 0.0004, "step": 237130 }, { "epoch": 1.9392402993008138, "grad_norm": 0.09721864014863968, "learning_rate": 2.8087880093602972e-08, "loss": 0.0012, "step": 237140 }, { "epoch": 1.9393220754794127, "grad_norm": 0.05691888555884361, "learning_rate": 2.801239492628116e-08, "loss": 0.0004, "step": 237150 }, { "epoch": 1.939403851658012, "grad_norm": 0.11831281334161758, "learning_rate": 2.793701104238822e-08, "loss": 0.0006, "step": 237160 }, { "epoch": 1.939485627836611, "grad_norm": 0.01490282453596592, "learning_rate": 2.786172844345958e-08, "loss": 0.0003, "step": 237170 }, { "epoch": 1.9395674040152104, "grad_norm": 0.005056506022810936, "learning_rate": 2.7786547131029572e-08, "loss": 0.0011, "step": 237180 }, { "epoch": 1.9396491801938094, "grad_norm": 0.044967081397771835, "learning_rate": 2.7711467106628642e-08, "loss": 0.0007, "step": 237190 }, { "epoch": 1.9397309563724088, "grad_norm": 0.006738392636179924, "learning_rate": 2.763648837178723e-08, "loss": 0.0005, "step": 237200 }, { "epoch": 1.939812732551008, "grad_norm": 0.04783628508448601, "learning_rate": 2.7561610928032445e-08, "loss": 0.0004, "step": 237210 }, { "epoch": 1.9398945087296071, "grad_norm": 0.03375253081321716, "learning_rate": 2.7486834776888627e-08, "loss": 0.0005, "step": 237220 }, { "epoch": 1.9399762849082063, "grad_norm": 0.010097443126142025, "learning_rate": 2.7412159919880665e-08, "loss": 0.0003, "step": 237230 }, { "epoch": 1.9400580610868055, "grad_norm": 0.009772725403308868, "learning_rate": 2.7337586358528456e-08, "loss": 0.0008, "step": 237240 }, { "epoch": 1.9401398372654046, "grad_norm": 0.015223074704408646, "learning_rate": 2.7263114094351894e-08, "loss": 0.0006, "step": 237250 }, { "epoch": 1.9402216134440038, "grad_norm": 0.00838236603885889, "learning_rate": 2.7188743128867547e-08, "loss": 0.0008, "step": 237260 }, { "epoch": 1.940303389622603, "grad_norm": 0.04200766980648041, "learning_rate": 2.7114473463590306e-08, "loss": 0.0012, "step": 237270 }, { "epoch": 1.9403851658012021, "grad_norm": 0.025623055174946785, "learning_rate": 2.704030510003397e-08, "loss": 0.0011, "step": 237280 }, { "epoch": 1.9404669419798013, "grad_norm": 0.014084897935390472, "learning_rate": 2.6966238039708992e-08, "loss": 0.0011, "step": 237290 }, { "epoch": 1.9405487181584005, "grad_norm": 0.03380858525633812, "learning_rate": 2.689227228412361e-08, "loss": 0.0006, "step": 237300 }, { "epoch": 1.9406304943369996, "grad_norm": 0.10536523908376694, "learning_rate": 2.6818407834784953e-08, "loss": 0.0012, "step": 237310 }, { "epoch": 1.9407122705155988, "grad_norm": 0.008913317695260048, "learning_rate": 2.6744644693197928e-08, "loss": 0.0013, "step": 237320 }, { "epoch": 1.940794046694198, "grad_norm": 0.09374343603849411, "learning_rate": 2.667098286086467e-08, "loss": 0.0007, "step": 237330 }, { "epoch": 1.9408758228727971, "grad_norm": 0.03233664110302925, "learning_rate": 2.6597422339286193e-08, "loss": 0.0006, "step": 237340 }, { "epoch": 1.9409575990513963, "grad_norm": 0.012813607230782509, "learning_rate": 2.652396312996075e-08, "loss": 0.0018, "step": 237350 }, { "epoch": 1.9410393752299955, "grad_norm": 0.04499374330043793, "learning_rate": 2.6450605234384918e-08, "loss": 0.0005, "step": 237360 }, { "epoch": 1.9411211514085946, "grad_norm": 0.013617591932415962, "learning_rate": 2.6377348654053057e-08, "loss": 0.0002, "step": 237370 }, { "epoch": 1.9412029275871938, "grad_norm": 0.020872173830866814, "learning_rate": 2.630419339045731e-08, "loss": 0.0005, "step": 237380 }, { "epoch": 1.941284703765793, "grad_norm": 0.0026982775889337063, "learning_rate": 2.6231139445087593e-08, "loss": 0.0004, "step": 237390 }, { "epoch": 1.9413664799443922, "grad_norm": 0.0034900715108960867, "learning_rate": 2.6158186819433273e-08, "loss": 0.0003, "step": 237400 }, { "epoch": 1.9414482561229913, "grad_norm": 0.005460643675178289, "learning_rate": 2.6085335514978717e-08, "loss": 0.0005, "step": 237410 }, { "epoch": 1.9415300323015905, "grad_norm": 0.008633865974843502, "learning_rate": 2.60125855332094e-08, "loss": 0.0006, "step": 237420 }, { "epoch": 1.9416118084801899, "grad_norm": 0.02501145377755165, "learning_rate": 2.5939936875606918e-08, "loss": 0.0005, "step": 237430 }, { "epoch": 1.9416935846587888, "grad_norm": 0.016481764614582062, "learning_rate": 2.5867389543650645e-08, "loss": 0.0009, "step": 237440 }, { "epoch": 1.9417753608373882, "grad_norm": 0.004575670231133699, "learning_rate": 2.5794943538819395e-08, "loss": 0.0001, "step": 237450 }, { "epoch": 1.9418571370159872, "grad_norm": 0.008550954982638359, "learning_rate": 2.5722598862587545e-08, "loss": 0.0006, "step": 237460 }, { "epoch": 1.9419389131945866, "grad_norm": 0.02502419427037239, "learning_rate": 2.5650355516430024e-08, "loss": 0.0005, "step": 237470 }, { "epoch": 1.9420206893731855, "grad_norm": 0.015952402725815773, "learning_rate": 2.557821350181844e-08, "loss": 0.0003, "step": 237480 }, { "epoch": 1.942102465551785, "grad_norm": 0.00866776891052723, "learning_rate": 2.5506172820221607e-08, "loss": 0.0005, "step": 237490 }, { "epoch": 1.9421842417303838, "grad_norm": 0.007748407777398825, "learning_rate": 2.5434233473107807e-08, "loss": 0.0003, "step": 237500 }, { "epoch": 1.9422660179089832, "grad_norm": 0.01991913840174675, "learning_rate": 2.536239546194197e-08, "loss": 0.0018, "step": 237510 }, { "epoch": 1.9423477940875822, "grad_norm": 0.013279557228088379, "learning_rate": 2.529065878818793e-08, "loss": 0.0004, "step": 237520 }, { "epoch": 1.9424295702661816, "grad_norm": 0.019495731219649315, "learning_rate": 2.5219023453306735e-08, "loss": 0.0004, "step": 237530 }, { "epoch": 1.9425113464447805, "grad_norm": 0.04042946919798851, "learning_rate": 2.514748945875778e-08, "loss": 0.0008, "step": 237540 }, { "epoch": 1.94259312262338, "grad_norm": 0.10362565517425537, "learning_rate": 2.5076056805998228e-08, "loss": 0.0005, "step": 237550 }, { "epoch": 1.9426748988019789, "grad_norm": 0.08130700141191483, "learning_rate": 2.5004725496483028e-08, "loss": 0.0008, "step": 237560 }, { "epoch": 1.9427566749805782, "grad_norm": 0.06746621429920197, "learning_rate": 2.4933495531665463e-08, "loss": 0.0005, "step": 237570 }, { "epoch": 1.9428384511591772, "grad_norm": 0.04456634819507599, "learning_rate": 2.4862366912996593e-08, "loss": 0.0005, "step": 237580 }, { "epoch": 1.9429202273377766, "grad_norm": 0.08215514570474625, "learning_rate": 2.479133964192526e-08, "loss": 0.0003, "step": 237590 }, { "epoch": 1.9430020035163755, "grad_norm": 0.0008229620289057493, "learning_rate": 2.472041371989864e-08, "loss": 0.0005, "step": 237600 }, { "epoch": 1.943083779694975, "grad_norm": 0.056632183492183685, "learning_rate": 2.464958914836113e-08, "loss": 0.0005, "step": 237610 }, { "epoch": 1.9431655558735739, "grad_norm": 0.056890446692705154, "learning_rate": 2.4578865928756024e-08, "loss": 0.0008, "step": 237620 }, { "epoch": 1.9432473320521733, "grad_norm": 0.009214069694280624, "learning_rate": 2.4508244062522723e-08, "loss": 0.0005, "step": 237630 }, { "epoch": 1.9433291082307724, "grad_norm": 0.003735598176717758, "learning_rate": 2.4437723551101746e-08, "loss": 0.0004, "step": 237640 }, { "epoch": 1.9434108844093716, "grad_norm": 0.0023826665710657835, "learning_rate": 2.4367304395928048e-08, "loss": 0.0004, "step": 237650 }, { "epoch": 1.9434926605879708, "grad_norm": 0.06802642345428467, "learning_rate": 2.4296986598437712e-08, "loss": 0.0005, "step": 237660 }, { "epoch": 1.94357443676657, "grad_norm": 0.0589752234518528, "learning_rate": 2.4226770160061254e-08, "loss": 0.0006, "step": 237670 }, { "epoch": 1.943656212945169, "grad_norm": 0.022611873224377632, "learning_rate": 2.4156655082230308e-08, "loss": 0.0005, "step": 237680 }, { "epoch": 1.9437379891237683, "grad_norm": 0.02457376755774021, "learning_rate": 2.408664136637262e-08, "loss": 0.0004, "step": 237690 }, { "epoch": 1.9438197653023674, "grad_norm": 0.036800120025873184, "learning_rate": 2.401672901391483e-08, "loss": 0.0004, "step": 237700 }, { "epoch": 1.9439015414809666, "grad_norm": 0.028210531920194626, "learning_rate": 2.3946918026280795e-08, "loss": 0.0006, "step": 237710 }, { "epoch": 1.9439833176595658, "grad_norm": 0.007181912660598755, "learning_rate": 2.387720840489327e-08, "loss": 0.0007, "step": 237720 }, { "epoch": 1.944065093838165, "grad_norm": 0.04439970478415489, "learning_rate": 2.3807600151171673e-08, "loss": 0.0006, "step": 237730 }, { "epoch": 1.9441468700167641, "grad_norm": 0.025331273674964905, "learning_rate": 2.3738093266533757e-08, "loss": 0.0003, "step": 237740 }, { "epoch": 1.9442286461953633, "grad_norm": 0.05990329384803772, "learning_rate": 2.3668687752395615e-08, "loss": 0.0014, "step": 237750 }, { "epoch": 1.9443104223739625, "grad_norm": 0.024938806891441345, "learning_rate": 2.3599383610171666e-08, "loss": 0.0005, "step": 237760 }, { "epoch": 1.9443921985525616, "grad_norm": 0.011837847530841827, "learning_rate": 2.3530180841273564e-08, "loss": 0.0005, "step": 237770 }, { "epoch": 1.9444739747311608, "grad_norm": 0.19864293932914734, "learning_rate": 2.3461079447110735e-08, "loss": 0.0008, "step": 237780 }, { "epoch": 1.94455575090976, "grad_norm": 0.025379376485943794, "learning_rate": 2.3392079429090388e-08, "loss": 0.0003, "step": 237790 }, { "epoch": 1.9446375270883591, "grad_norm": 0.028756728395819664, "learning_rate": 2.3323180788618615e-08, "loss": 0.0007, "step": 237800 }, { "epoch": 1.9447193032669583, "grad_norm": 0.08506505191326141, "learning_rate": 2.32543835270993e-08, "loss": 0.0005, "step": 237810 }, { "epoch": 1.9448010794455575, "grad_norm": 0.007806555368006229, "learning_rate": 2.3185687645932985e-08, "loss": 0.0003, "step": 237820 }, { "epoch": 1.9448828556241566, "grad_norm": 0.03312423825263977, "learning_rate": 2.3117093146519664e-08, "loss": 0.0007, "step": 237830 }, { "epoch": 1.9449646318027558, "grad_norm": 0.01033779140561819, "learning_rate": 2.3048600030257107e-08, "loss": 0.0008, "step": 237840 }, { "epoch": 1.945046407981355, "grad_norm": 0.07679200917482376, "learning_rate": 2.298020829853975e-08, "loss": 0.0007, "step": 237850 }, { "epoch": 1.9451281841599544, "grad_norm": 0.05456085130572319, "learning_rate": 2.291191795276093e-08, "loss": 0.0008, "step": 237860 }, { "epoch": 1.9452099603385533, "grad_norm": 0.009622386656701565, "learning_rate": 2.2843728994311752e-08, "loss": 0.0007, "step": 237870 }, { "epoch": 1.9452917365171527, "grad_norm": 0.025959540158510208, "learning_rate": 2.2775641424581664e-08, "loss": 0.0005, "step": 237880 }, { "epoch": 1.9453735126957516, "grad_norm": 0.010626695118844509, "learning_rate": 2.2707655244957326e-08, "loss": 0.0006, "step": 237890 }, { "epoch": 1.945455288874351, "grad_norm": 0.03984726592898369, "learning_rate": 2.2639770456824306e-08, "loss": 0.0008, "step": 237900 }, { "epoch": 1.94553706505295, "grad_norm": 0.05937055125832558, "learning_rate": 2.2571987061564827e-08, "loss": 0.0007, "step": 237910 }, { "epoch": 1.9456188412315494, "grad_norm": 0.09563493728637695, "learning_rate": 2.250430506055945e-08, "loss": 0.0009, "step": 237920 }, { "epoch": 1.9457006174101483, "grad_norm": 0.04200632497668266, "learning_rate": 2.243672445518763e-08, "loss": 0.0006, "step": 237930 }, { "epoch": 1.9457823935887477, "grad_norm": 0.001319759525358677, "learning_rate": 2.236924524682549e-08, "loss": 0.0004, "step": 237940 }, { "epoch": 1.9458641697673467, "grad_norm": 0.06578081846237183, "learning_rate": 2.230186743684748e-08, "loss": 0.0006, "step": 237950 }, { "epoch": 1.945945945945946, "grad_norm": 0.02380307950079441, "learning_rate": 2.223459102662695e-08, "loss": 0.001, "step": 237960 }, { "epoch": 1.946027722124545, "grad_norm": 0.0006511573446914554, "learning_rate": 2.2167416017533916e-08, "loss": 0.0003, "step": 237970 }, { "epoch": 1.9461094983031444, "grad_norm": 0.004268774762749672, "learning_rate": 2.2100342410936727e-08, "loss": 0.0005, "step": 237980 }, { "epoch": 1.9461912744817433, "grad_norm": 0.018498161807656288, "learning_rate": 2.2033370208202067e-08, "loss": 0.001, "step": 237990 }, { "epoch": 1.9462730506603427, "grad_norm": 0.01767113246023655, "learning_rate": 2.1966499410693288e-08, "loss": 0.0004, "step": 238000 }, { "epoch": 1.9463548268389417, "grad_norm": 0.00735020637512207, "learning_rate": 2.1899730019773747e-08, "loss": 0.0003, "step": 238010 }, { "epoch": 1.946436603017541, "grad_norm": 0.08863522857427597, "learning_rate": 2.1833062036803467e-08, "loss": 0.0006, "step": 238020 }, { "epoch": 1.94651837919614, "grad_norm": 0.0011866025160998106, "learning_rate": 2.1766495463139692e-08, "loss": 0.0002, "step": 238030 }, { "epoch": 1.9466001553747394, "grad_norm": 0.052283670753240585, "learning_rate": 2.170003030013912e-08, "loss": 0.0006, "step": 238040 }, { "epoch": 1.9466819315533384, "grad_norm": 0.030688976868987083, "learning_rate": 2.1633666549155107e-08, "loss": 0.0003, "step": 238050 }, { "epoch": 1.9467637077319377, "grad_norm": 0.03878282755613327, "learning_rate": 2.1567404211540467e-08, "loss": 0.0002, "step": 238060 }, { "epoch": 1.946845483910537, "grad_norm": 0.08274491876363754, "learning_rate": 2.1501243288644112e-08, "loss": 0.0005, "step": 238070 }, { "epoch": 1.946927260089136, "grad_norm": 0.06366951763629913, "learning_rate": 2.1435183781814416e-08, "loss": 0.0005, "step": 238080 }, { "epoch": 1.9470090362677352, "grad_norm": 0.022005770355463028, "learning_rate": 2.1369225692396412e-08, "loss": 0.0005, "step": 238090 }, { "epoch": 1.9470908124463344, "grad_norm": 0.038255464285612106, "learning_rate": 2.1303369021734578e-08, "loss": 0.0006, "step": 238100 }, { "epoch": 1.9471725886249336, "grad_norm": 0.1292559802532196, "learning_rate": 2.1237613771169507e-08, "loss": 0.0012, "step": 238110 }, { "epoch": 1.9472543648035328, "grad_norm": 0.09168156236410141, "learning_rate": 2.1171959942041797e-08, "loss": 0.0004, "step": 238120 }, { "epoch": 1.947336140982132, "grad_norm": 0.07295303791761398, "learning_rate": 2.1106407535687602e-08, "loss": 0.0007, "step": 238130 }, { "epoch": 1.947417917160731, "grad_norm": 0.057660214602947235, "learning_rate": 2.1040956553443627e-08, "loss": 0.0005, "step": 238140 }, { "epoch": 1.9474996933393303, "grad_norm": 0.06318239122629166, "learning_rate": 2.097560699664214e-08, "loss": 0.0008, "step": 238150 }, { "epoch": 1.9475814695179294, "grad_norm": 0.022653665393590927, "learning_rate": 2.09103588666143e-08, "loss": 0.0008, "step": 238160 }, { "epoch": 1.9476632456965286, "grad_norm": 0.01951650157570839, "learning_rate": 2.0845212164690155e-08, "loss": 0.0004, "step": 238170 }, { "epoch": 1.9477450218751278, "grad_norm": 0.02971588261425495, "learning_rate": 2.0780166892196418e-08, "loss": 0.0006, "step": 238180 }, { "epoch": 1.947826798053727, "grad_norm": 0.11625180393457413, "learning_rate": 2.071522305045759e-08, "loss": 0.0008, "step": 238190 }, { "epoch": 1.947908574232326, "grad_norm": 0.02360370382666588, "learning_rate": 2.0650380640797053e-08, "loss": 0.0002, "step": 238200 }, { "epoch": 1.9479903504109253, "grad_norm": 0.010352573357522488, "learning_rate": 2.0585639664535974e-08, "loss": 0.0004, "step": 238210 }, { "epoch": 1.9480721265895244, "grad_norm": 0.026650086045265198, "learning_rate": 2.052100012299274e-08, "loss": 0.001, "step": 238220 }, { "epoch": 1.9481539027681236, "grad_norm": 0.17457973957061768, "learning_rate": 2.045646201748408e-08, "loss": 0.0015, "step": 238230 }, { "epoch": 1.9482356789467228, "grad_norm": 0.0031441396567970514, "learning_rate": 2.039202534932505e-08, "loss": 0.0003, "step": 238240 }, { "epoch": 1.948317455125322, "grad_norm": 0.040524110198020935, "learning_rate": 2.0327690119827937e-08, "loss": 0.0004, "step": 238250 }, { "epoch": 1.9483992313039211, "grad_norm": 0.019400175660848618, "learning_rate": 2.026345633030391e-08, "loss": 0.0013, "step": 238260 }, { "epoch": 1.9484810074825203, "grad_norm": 0.03452102839946747, "learning_rate": 2.019932398206026e-08, "loss": 0.0005, "step": 238270 }, { "epoch": 1.9485627836611195, "grad_norm": 0.005597447045147419, "learning_rate": 2.0135293076404826e-08, "loss": 0.0006, "step": 238280 }, { "epoch": 1.9486445598397188, "grad_norm": 0.11827950179576874, "learning_rate": 2.0071363614641016e-08, "loss": 0.0012, "step": 238290 }, { "epoch": 1.9487263360183178, "grad_norm": 0.005682679358869791, "learning_rate": 2.000753559807167e-08, "loss": 0.0007, "step": 238300 }, { "epoch": 1.9488081121969172, "grad_norm": 0.0238787904381752, "learning_rate": 1.9943809027996307e-08, "loss": 0.0011, "step": 238310 }, { "epoch": 1.9488898883755161, "grad_norm": 0.09791209548711777, "learning_rate": 1.988018390571389e-08, "loss": 0.0011, "step": 238320 }, { "epoch": 1.9489716645541155, "grad_norm": 0.03759462386369705, "learning_rate": 1.9816660232520046e-08, "loss": 0.0004, "step": 238330 }, { "epoch": 1.9490534407327145, "grad_norm": 0.07077933102846146, "learning_rate": 1.97532380097093e-08, "loss": 0.0006, "step": 238340 }, { "epoch": 1.9491352169113139, "grad_norm": 0.00696425000205636, "learning_rate": 1.9689917238572275e-08, "loss": 0.0014, "step": 238350 }, { "epoch": 1.9492169930899128, "grad_norm": 0.06425365060567856, "learning_rate": 1.9626697920400728e-08, "loss": 0.0004, "step": 238360 }, { "epoch": 1.9492987692685122, "grad_norm": 0.10136047005653381, "learning_rate": 1.9563580056481402e-08, "loss": 0.0006, "step": 238370 }, { "epoch": 1.9493805454471111, "grad_norm": 0.0031565818935632706, "learning_rate": 1.950056364810049e-08, "loss": 0.0003, "step": 238380 }, { "epoch": 1.9494623216257105, "grad_norm": 0.013128695078194141, "learning_rate": 1.9437648696540857e-08, "loss": 0.0008, "step": 238390 }, { "epoch": 1.9495440978043095, "grad_norm": 0.04425494372844696, "learning_rate": 1.9374835203085364e-08, "loss": 0.0009, "step": 238400 }, { "epoch": 1.9496258739829089, "grad_norm": 0.0405682735145092, "learning_rate": 1.9312123169012432e-08, "loss": 0.0008, "step": 238410 }, { "epoch": 1.9497076501615078, "grad_norm": 0.018097305670380592, "learning_rate": 1.9249512595600485e-08, "loss": 0.0004, "step": 238420 }, { "epoch": 1.9497894263401072, "grad_norm": 0.029718760401010513, "learning_rate": 1.9187003484124056e-08, "loss": 0.0003, "step": 238430 }, { "epoch": 1.9498712025187062, "grad_norm": 0.06326194107532501, "learning_rate": 1.912459583585713e-08, "loss": 0.0004, "step": 238440 }, { "epoch": 1.9499529786973056, "grad_norm": 0.01035958994179964, "learning_rate": 1.906228965207091e-08, "loss": 0.0004, "step": 238450 }, { "epoch": 1.9500347548759045, "grad_norm": 0.014367160387337208, "learning_rate": 1.900008493403438e-08, "loss": 0.0003, "step": 238460 }, { "epoch": 1.9501165310545039, "grad_norm": 0.03522782027721405, "learning_rate": 1.893798168301486e-08, "loss": 0.0006, "step": 238470 }, { "epoch": 1.9501983072331028, "grad_norm": 0.18741938471794128, "learning_rate": 1.8875979900278007e-08, "loss": 0.0008, "step": 238480 }, { "epoch": 1.9502800834117022, "grad_norm": 0.03345710411667824, "learning_rate": 1.8814079587086143e-08, "loss": 0.0007, "step": 238490 }, { "epoch": 1.9503618595903014, "grad_norm": 0.015021239407360554, "learning_rate": 1.875228074470048e-08, "loss": 0.0006, "step": 238500 }, { "epoch": 1.9504436357689006, "grad_norm": 0.03734874725341797, "learning_rate": 1.8690583374378902e-08, "loss": 0.0008, "step": 238510 }, { "epoch": 1.9505254119474997, "grad_norm": 0.04811993986368179, "learning_rate": 1.8628987477379845e-08, "loss": 0.0005, "step": 238520 }, { "epoch": 1.950607188126099, "grad_norm": 0.042879439890384674, "learning_rate": 1.8567493054957865e-08, "loss": 0.0005, "step": 238530 }, { "epoch": 1.950688964304698, "grad_norm": 0.10894007235765457, "learning_rate": 1.8506100108364177e-08, "loss": 0.0011, "step": 238540 }, { "epoch": 1.9507707404832972, "grad_norm": 0.007428226061165333, "learning_rate": 1.844480863885112e-08, "loss": 0.0002, "step": 238550 }, { "epoch": 1.9508525166618964, "grad_norm": 0.0006564153591170907, "learning_rate": 1.8383618647666023e-08, "loss": 0.0013, "step": 238560 }, { "epoch": 1.9509342928404956, "grad_norm": 0.05625390633940697, "learning_rate": 1.8322530136056226e-08, "loss": 0.0014, "step": 238570 }, { "epoch": 1.9510160690190947, "grad_norm": 0.05724870041012764, "learning_rate": 1.8261543105265732e-08, "loss": 0.0006, "step": 238580 }, { "epoch": 1.951097845197694, "grad_norm": 0.08436457812786102, "learning_rate": 1.8200657556536882e-08, "loss": 0.0003, "step": 238590 }, { "epoch": 1.951179621376293, "grad_norm": 0.006226020865142345, "learning_rate": 1.8139873491109794e-08, "loss": 0.0003, "step": 238600 }, { "epoch": 1.9512613975548923, "grad_norm": 0.12281210720539093, "learning_rate": 1.8079190910223475e-08, "loss": 0.0004, "step": 238610 }, { "epoch": 1.9513431737334914, "grad_norm": 0.05554431676864624, "learning_rate": 1.8018609815113608e-08, "loss": 0.0005, "step": 238620 }, { "epoch": 1.9514249499120906, "grad_norm": 0.027781516313552856, "learning_rate": 1.7958130207013647e-08, "loss": 0.0004, "step": 238630 }, { "epoch": 1.9515067260906898, "grad_norm": 0.0067013949155807495, "learning_rate": 1.7897752087156494e-08, "loss": 0.0004, "step": 238640 }, { "epoch": 1.951588502269289, "grad_norm": 0.13301493227481842, "learning_rate": 1.7837475456771724e-08, "loss": 0.0013, "step": 238650 }, { "epoch": 1.951670278447888, "grad_norm": 0.14914828538894653, "learning_rate": 1.777730031708724e-08, "loss": 0.0009, "step": 238660 }, { "epoch": 1.9517520546264873, "grad_norm": 0.030234966427087784, "learning_rate": 1.7717226669328735e-08, "loss": 0.0009, "step": 238670 }, { "epoch": 1.9518338308050864, "grad_norm": 0.019164936617016792, "learning_rate": 1.7657254514720778e-08, "loss": 0.0005, "step": 238680 }, { "epoch": 1.9519156069836856, "grad_norm": 0.0012202915968373418, "learning_rate": 1.7597383854483507e-08, "loss": 0.0007, "step": 238690 }, { "epoch": 1.9519973831622848, "grad_norm": 0.05470481142401695, "learning_rate": 1.7537614689838168e-08, "loss": 0.0004, "step": 238700 }, { "epoch": 1.952079159340884, "grad_norm": 0.0037640915252268314, "learning_rate": 1.7477947022001008e-08, "loss": 0.0006, "step": 238710 }, { "epoch": 1.9521609355194833, "grad_norm": 0.03614664450287819, "learning_rate": 1.7418380852188833e-08, "loss": 0.0008, "step": 238720 }, { "epoch": 1.9522427116980823, "grad_norm": 0.038676705211400986, "learning_rate": 1.735891618161345e-08, "loss": 0.0005, "step": 238730 }, { "epoch": 1.9523244878766817, "grad_norm": 0.002482085255905986, "learning_rate": 1.729955301148778e-08, "loss": 0.0007, "step": 238740 }, { "epoch": 1.9524062640552806, "grad_norm": 0.06701252609491348, "learning_rate": 1.7240291343019744e-08, "loss": 0.0006, "step": 238750 }, { "epoch": 1.95248804023388, "grad_norm": 0.05662929639220238, "learning_rate": 1.7181131177417263e-08, "loss": 0.0005, "step": 238760 }, { "epoch": 1.952569816412479, "grad_norm": 0.07787421345710754, "learning_rate": 1.7122072515885492e-08, "loss": 0.0006, "step": 238770 }, { "epoch": 1.9526515925910783, "grad_norm": 0.0008420656085945666, "learning_rate": 1.70631153596279e-08, "loss": 0.001, "step": 238780 }, { "epoch": 1.9527333687696773, "grad_norm": 0.054389163851737976, "learning_rate": 1.7004259709844094e-08, "loss": 0.0005, "step": 238790 }, { "epoch": 1.9528151449482767, "grad_norm": 0.024863379076123238, "learning_rate": 1.6945505567734222e-08, "loss": 0.0006, "step": 238800 }, { "epoch": 1.9528969211268756, "grad_norm": 0.027294183149933815, "learning_rate": 1.68868529344951e-08, "loss": 0.0005, "step": 238810 }, { "epoch": 1.952978697305475, "grad_norm": 0.07205351442098618, "learning_rate": 1.6828301811321334e-08, "loss": 0.0005, "step": 238820 }, { "epoch": 1.953060473484074, "grad_norm": 0.06546095758676529, "learning_rate": 1.6769852199404747e-08, "loss": 0.0005, "step": 238830 }, { "epoch": 1.9531422496626734, "grad_norm": 0.011787883937358856, "learning_rate": 1.6711504099937713e-08, "loss": 0.0003, "step": 238840 }, { "epoch": 1.9532240258412723, "grad_norm": 0.007389485836029053, "learning_rate": 1.6653257514107623e-08, "loss": 0.0003, "step": 238850 }, { "epoch": 1.9533058020198717, "grad_norm": 0.03004545159637928, "learning_rate": 1.6595112443101857e-08, "loss": 0.0006, "step": 238860 }, { "epoch": 1.9533875781984706, "grad_norm": 0.010801272466778755, "learning_rate": 1.6537068888103914e-08, "loss": 0.0008, "step": 238870 }, { "epoch": 1.95346935437707, "grad_norm": 0.0332958959043026, "learning_rate": 1.647912685029618e-08, "loss": 0.0003, "step": 238880 }, { "epoch": 1.953551130555669, "grad_norm": 0.07966963946819305, "learning_rate": 1.642128633085993e-08, "loss": 0.0003, "step": 238890 }, { "epoch": 1.9536329067342684, "grad_norm": 0.006146567407995462, "learning_rate": 1.6363547330973116e-08, "loss": 0.0003, "step": 238900 }, { "epoch": 1.9537146829128673, "grad_norm": 0.0031299814581871033, "learning_rate": 1.6305909851811462e-08, "loss": 0.0004, "step": 238910 }, { "epoch": 1.9537964590914667, "grad_norm": 0.016143597662448883, "learning_rate": 1.624837389454903e-08, "loss": 0.0002, "step": 238920 }, { "epoch": 1.9538782352700659, "grad_norm": 0.014110667631030083, "learning_rate": 1.6190939460358767e-08, "loss": 0.0002, "step": 238930 }, { "epoch": 1.953960011448665, "grad_norm": 0.0050422935746610165, "learning_rate": 1.61336065504103e-08, "loss": 0.0004, "step": 238940 }, { "epoch": 1.9540417876272642, "grad_norm": 0.10574254393577576, "learning_rate": 1.6076375165871016e-08, "loss": 0.0006, "step": 238950 }, { "epoch": 1.9541235638058634, "grad_norm": 0.0018003913573920727, "learning_rate": 1.6019245307906662e-08, "loss": 0.0006, "step": 238960 }, { "epoch": 1.9542053399844626, "grad_norm": 0.05276902765035629, "learning_rate": 1.596221697768241e-08, "loss": 0.0007, "step": 238970 }, { "epoch": 1.9542871161630617, "grad_norm": 0.016252445057034492, "learning_rate": 1.5905290176358446e-08, "loss": 0.0002, "step": 238980 }, { "epoch": 1.954368892341661, "grad_norm": 0.03380545973777771, "learning_rate": 1.584846490509495e-08, "loss": 0.0005, "step": 238990 }, { "epoch": 1.95445066852026, "grad_norm": 0.0015093408292159438, "learning_rate": 1.579174116504989e-08, "loss": 0.0007, "step": 239000 }, { "epoch": 1.9545324446988592, "grad_norm": 0.026440022513270378, "learning_rate": 1.5735118957378447e-08, "loss": 0.001, "step": 239010 }, { "epoch": 1.9546142208774584, "grad_norm": 0.0010977487545460463, "learning_rate": 1.5678598283233592e-08, "loss": 0.0005, "step": 239020 }, { "epoch": 1.9546959970560576, "grad_norm": 0.004332234617322683, "learning_rate": 1.562217914376718e-08, "loss": 0.0004, "step": 239030 }, { "epoch": 1.9547777732346567, "grad_norm": 0.060385119169950485, "learning_rate": 1.5565861540128847e-08, "loss": 0.0005, "step": 239040 }, { "epoch": 1.954859549413256, "grad_norm": 0.012726712971925735, "learning_rate": 1.550964547346545e-08, "loss": 0.0005, "step": 239050 }, { "epoch": 1.954941325591855, "grad_norm": 0.005031473468989134, "learning_rate": 1.545353094492219e-08, "loss": 0.0006, "step": 239060 }, { "epoch": 1.9550231017704542, "grad_norm": 0.01420392096042633, "learning_rate": 1.5397517955642038e-08, "loss": 0.0006, "step": 239070 }, { "epoch": 1.9551048779490534, "grad_norm": 0.08948088437318802, "learning_rate": 1.534160650676575e-08, "loss": 0.0006, "step": 239080 }, { "epoch": 1.9551866541276526, "grad_norm": 0.021956028416752815, "learning_rate": 1.5285796599432968e-08, "loss": 0.0004, "step": 239090 }, { "epoch": 1.9552684303062517, "grad_norm": 0.020775068551301956, "learning_rate": 1.523008823478056e-08, "loss": 0.0005, "step": 239100 }, { "epoch": 1.955350206484851, "grad_norm": 0.07091695070266724, "learning_rate": 1.5174481413943177e-08, "loss": 0.0011, "step": 239110 }, { "epoch": 1.95543198266345, "grad_norm": 0.191719651222229, "learning_rate": 1.511897613805269e-08, "loss": 0.001, "step": 239120 }, { "epoch": 1.9555137588420495, "grad_norm": 0.05445864424109459, "learning_rate": 1.5063572408240967e-08, "loss": 0.0005, "step": 239130 }, { "epoch": 1.9555955350206484, "grad_norm": 0.02568873204290867, "learning_rate": 1.5008270225636555e-08, "loss": 0.0012, "step": 239140 }, { "epoch": 1.9556773111992478, "grad_norm": 0.03084251470863819, "learning_rate": 1.495306959136522e-08, "loss": 0.0005, "step": 239150 }, { "epoch": 1.9557590873778468, "grad_norm": 0.038570649921894073, "learning_rate": 1.4897970506552173e-08, "loss": 0.0006, "step": 239160 }, { "epoch": 1.9558408635564462, "grad_norm": 0.034268904477357864, "learning_rate": 1.4842972972319847e-08, "loss": 0.0024, "step": 239170 }, { "epoch": 1.955922639735045, "grad_norm": 0.038800232112407684, "learning_rate": 1.4788076989787902e-08, "loss": 0.0005, "step": 239180 }, { "epoch": 1.9560044159136445, "grad_norm": 0.043994151055812836, "learning_rate": 1.4733282560074891e-08, "loss": 0.0005, "step": 239190 }, { "epoch": 1.9560861920922434, "grad_norm": 0.03013439103960991, "learning_rate": 1.467858968429714e-08, "loss": 0.0008, "step": 239200 }, { "epoch": 1.9561679682708428, "grad_norm": 0.009151589125394821, "learning_rate": 1.4623998363569313e-08, "loss": 0.0023, "step": 239210 }, { "epoch": 1.9562497444494418, "grad_norm": 0.08543874323368073, "learning_rate": 1.4569508599002746e-08, "loss": 0.0011, "step": 239220 }, { "epoch": 1.9563315206280412, "grad_norm": 0.004637677222490311, "learning_rate": 1.451512039170766e-08, "loss": 0.0007, "step": 239230 }, { "epoch": 1.9564132968066401, "grad_norm": 0.14460815489292145, "learning_rate": 1.4460833742791502e-08, "loss": 0.0011, "step": 239240 }, { "epoch": 1.9564950729852395, "grad_norm": 0.029501307755708694, "learning_rate": 1.4406648653361167e-08, "loss": 0.0006, "step": 239250 }, { "epoch": 1.9565768491638384, "grad_norm": 0.08117437362670898, "learning_rate": 1.4352565124519657e-08, "loss": 0.0002, "step": 239260 }, { "epoch": 1.9566586253424378, "grad_norm": 0.01083467248827219, "learning_rate": 1.4298583157368872e-08, "loss": 0.0003, "step": 239270 }, { "epoch": 1.9567404015210368, "grad_norm": 0.0817481279373169, "learning_rate": 1.4244702753008489e-08, "loss": 0.0008, "step": 239280 }, { "epoch": 1.9568221776996362, "grad_norm": 0.008163114078342915, "learning_rate": 1.4190923912536514e-08, "loss": 0.0008, "step": 239290 }, { "epoch": 1.9569039538782351, "grad_norm": 0.003594868816435337, "learning_rate": 1.413724663704763e-08, "loss": 0.0008, "step": 239300 }, { "epoch": 1.9569857300568345, "grad_norm": 0.016091419383883476, "learning_rate": 1.4083670927635962e-08, "loss": 0.0008, "step": 239310 }, { "epoch": 1.9570675062354335, "grad_norm": 0.026351450011134148, "learning_rate": 1.4030196785392858e-08, "loss": 0.0007, "step": 239320 }, { "epoch": 1.9571492824140329, "grad_norm": 0.015980714932084084, "learning_rate": 1.3976824211406892e-08, "loss": 0.0005, "step": 239330 }, { "epoch": 1.9572310585926318, "grad_norm": 0.02625967189669609, "learning_rate": 1.3923553206766638e-08, "loss": 0.0004, "step": 239340 }, { "epoch": 1.9573128347712312, "grad_norm": 0.02027352713048458, "learning_rate": 1.3870383772556228e-08, "loss": 0.0007, "step": 239350 }, { "epoch": 1.9573946109498304, "grad_norm": 0.011761115863919258, "learning_rate": 1.3817315909858686e-08, "loss": 0.0004, "step": 239360 }, { "epoch": 1.9574763871284295, "grad_norm": 0.0012740750098600984, "learning_rate": 1.3764349619755368e-08, "loss": 0.0003, "step": 239370 }, { "epoch": 1.9575581633070287, "grad_norm": 0.15397357940673828, "learning_rate": 1.3711484903325966e-08, "loss": 0.0005, "step": 239380 }, { "epoch": 1.9576399394856279, "grad_norm": 0.0031861269380897284, "learning_rate": 1.3658721761646288e-08, "loss": 0.0005, "step": 239390 }, { "epoch": 1.957721715664227, "grad_norm": 0.015229876153171062, "learning_rate": 1.3606060195791026e-08, "loss": 0.0007, "step": 239400 }, { "epoch": 1.9578034918428262, "grad_norm": 0.043485552072525024, "learning_rate": 1.3553500206833769e-08, "loss": 0.0005, "step": 239410 }, { "epoch": 1.9578852680214254, "grad_norm": 0.021288661286234856, "learning_rate": 1.3501041795845326e-08, "loss": 0.0003, "step": 239420 }, { "epoch": 1.9579670442000245, "grad_norm": 0.04556594416499138, "learning_rate": 1.3448684963893732e-08, "loss": 0.0007, "step": 239430 }, { "epoch": 1.9580488203786237, "grad_norm": 0.11337859183549881, "learning_rate": 1.3396429712045355e-08, "loss": 0.0006, "step": 239440 }, { "epoch": 1.9581305965572229, "grad_norm": 0.07456456124782562, "learning_rate": 1.3344276041365457e-08, "loss": 0.0005, "step": 239450 }, { "epoch": 1.958212372735822, "grad_norm": 0.016373446211218834, "learning_rate": 1.329222395291596e-08, "loss": 0.0007, "step": 239460 }, { "epoch": 1.9582941489144212, "grad_norm": 0.04981879144906998, "learning_rate": 1.3240273447757135e-08, "loss": 0.0012, "step": 239470 }, { "epoch": 1.9583759250930204, "grad_norm": 0.02430538646876812, "learning_rate": 1.318842452694702e-08, "loss": 0.0003, "step": 239480 }, { "epoch": 1.9584577012716196, "grad_norm": 0.036056432873010635, "learning_rate": 1.3136677191542547e-08, "loss": 0.0005, "step": 239490 }, { "epoch": 1.9585394774502187, "grad_norm": 0.027164340019226074, "learning_rate": 1.3085031442597318e-08, "loss": 0.0007, "step": 239500 }, { "epoch": 1.958621253628818, "grad_norm": 0.005516568198800087, "learning_rate": 1.3033487281163825e-08, "loss": 0.0003, "step": 239510 }, { "epoch": 1.958703029807417, "grad_norm": 0.04102380946278572, "learning_rate": 1.2982044708291785e-08, "loss": 0.0004, "step": 239520 }, { "epoch": 1.9587848059860162, "grad_norm": 0.01509096473455429, "learning_rate": 1.2930703725028692e-08, "loss": 0.0006, "step": 239530 }, { "epoch": 1.9588665821646154, "grad_norm": 0.002560586202889681, "learning_rate": 1.2879464332421488e-08, "loss": 0.0003, "step": 239540 }, { "epoch": 1.9589483583432146, "grad_norm": 0.036198753863573074, "learning_rate": 1.2828326531513225e-08, "loss": 0.001, "step": 239550 }, { "epoch": 1.959030134521814, "grad_norm": 0.10271470248699188, "learning_rate": 1.2777290323345293e-08, "loss": 0.0006, "step": 239560 }, { "epoch": 1.959111910700413, "grad_norm": 0.0020637353882193565, "learning_rate": 1.272635570895797e-08, "loss": 0.0002, "step": 239570 }, { "epoch": 1.9591936868790123, "grad_norm": 0.001115621766075492, "learning_rate": 1.2675522689388764e-08, "loss": 0.0005, "step": 239580 }, { "epoch": 1.9592754630576112, "grad_norm": 0.006372873205691576, "learning_rate": 1.2624791265672953e-08, "loss": 0.0007, "step": 239590 }, { "epoch": 1.9593572392362106, "grad_norm": 0.021005896851420403, "learning_rate": 1.2574161438844158e-08, "loss": 0.0009, "step": 239600 }, { "epoch": 1.9594390154148096, "grad_norm": 0.04335373267531395, "learning_rate": 1.2523633209933772e-08, "loss": 0.0008, "step": 239610 }, { "epoch": 1.959520791593409, "grad_norm": 0.006032298319041729, "learning_rate": 1.2473206579970976e-08, "loss": 0.0005, "step": 239620 }, { "epoch": 1.959602567772008, "grad_norm": 0.1034896969795227, "learning_rate": 1.2422881549982724e-08, "loss": 0.0009, "step": 239630 }, { "epoch": 1.9596843439506073, "grad_norm": 0.01600463129580021, "learning_rate": 1.2372658120994863e-08, "loss": 0.0008, "step": 239640 }, { "epoch": 1.9597661201292063, "grad_norm": 0.2099376916885376, "learning_rate": 1.2322536294029908e-08, "loss": 0.0013, "step": 239650 }, { "epoch": 1.9598478963078056, "grad_norm": 0.0015590587863698602, "learning_rate": 1.2272516070109264e-08, "loss": 0.0008, "step": 239660 }, { "epoch": 1.9599296724864046, "grad_norm": 0.042159512639045715, "learning_rate": 1.2222597450251562e-08, "loss": 0.0004, "step": 239670 }, { "epoch": 1.960011448665004, "grad_norm": 0.07495568692684174, "learning_rate": 1.2172780435473763e-08, "loss": 0.0012, "step": 239680 }, { "epoch": 1.960093224843603, "grad_norm": 0.017243461683392525, "learning_rate": 1.2123065026790614e-08, "loss": 0.0005, "step": 239690 }, { "epoch": 1.9601750010222023, "grad_norm": 0.008055069483816624, "learning_rate": 1.2073451225215193e-08, "loss": 0.0002, "step": 239700 }, { "epoch": 1.9602567772008013, "grad_norm": 0.02676883526146412, "learning_rate": 1.2023939031757802e-08, "loss": 0.0003, "step": 239710 }, { "epoch": 1.9603385533794007, "grad_norm": 0.10545974969863892, "learning_rate": 1.1974528447427636e-08, "loss": 0.0005, "step": 239720 }, { "epoch": 1.9604203295579996, "grad_norm": 0.0049644713290035725, "learning_rate": 1.1925219473229998e-08, "loss": 0.0007, "step": 239730 }, { "epoch": 1.960502105736599, "grad_norm": 0.014697861857712269, "learning_rate": 1.1876012110170754e-08, "loss": 0.0007, "step": 239740 }, { "epoch": 1.960583881915198, "grad_norm": 0.018387803807854652, "learning_rate": 1.1826906359251878e-08, "loss": 0.0007, "step": 239750 }, { "epoch": 1.9606656580937973, "grad_norm": 0.042349714785814285, "learning_rate": 1.1777902221473126e-08, "loss": 0.0005, "step": 239760 }, { "epoch": 1.9607474342723965, "grad_norm": 0.04507764056324959, "learning_rate": 1.1728999697833143e-08, "loss": 0.0006, "step": 239770 }, { "epoch": 1.9608292104509957, "grad_norm": 0.07154101133346558, "learning_rate": 1.1680198789328356e-08, "loss": 0.0003, "step": 239780 }, { "epoch": 1.9609109866295948, "grad_norm": 0.03610813245177269, "learning_rate": 1.1631499496952414e-08, "loss": 0.0006, "step": 239790 }, { "epoch": 1.960992762808194, "grad_norm": 0.03155532479286194, "learning_rate": 1.1582901821697302e-08, "loss": 0.0023, "step": 239800 }, { "epoch": 1.9610745389867932, "grad_norm": 0.007753607351332903, "learning_rate": 1.1534405764553891e-08, "loss": 0.0003, "step": 239810 }, { "epoch": 1.9611563151653923, "grad_norm": 0.06545935571193695, "learning_rate": 1.1486011326509173e-08, "loss": 0.0009, "step": 239820 }, { "epoch": 1.9612380913439915, "grad_norm": 0.005564146209508181, "learning_rate": 1.1437718508549023e-08, "loss": 0.0005, "step": 239830 }, { "epoch": 1.9613198675225907, "grad_norm": 0.040884580463171005, "learning_rate": 1.1389527311657656e-08, "loss": 0.0007, "step": 239840 }, { "epoch": 1.9614016437011899, "grad_norm": 0.0008029209566302598, "learning_rate": 1.1341437736816508e-08, "loss": 0.0007, "step": 239850 }, { "epoch": 1.961483419879789, "grad_norm": 0.04235008358955383, "learning_rate": 1.129344978500535e-08, "loss": 0.0008, "step": 239860 }, { "epoch": 1.9615651960583882, "grad_norm": 0.022168584167957306, "learning_rate": 1.1245563457201736e-08, "loss": 0.0004, "step": 239870 }, { "epoch": 1.9616469722369874, "grad_norm": 0.0036844771821051836, "learning_rate": 1.1197778754380995e-08, "loss": 0.0004, "step": 239880 }, { "epoch": 1.9617287484155865, "grad_norm": 0.07553475350141525, "learning_rate": 1.1150095677516792e-08, "loss": 0.0014, "step": 239890 }, { "epoch": 1.9618105245941857, "grad_norm": 0.047154612839221954, "learning_rate": 1.1102514227580019e-08, "loss": 0.0008, "step": 239900 }, { "epoch": 1.9618923007727849, "grad_norm": 0.01328661385923624, "learning_rate": 1.1055034405540454e-08, "loss": 0.0003, "step": 239910 }, { "epoch": 1.961974076951384, "grad_norm": 0.020491883158683777, "learning_rate": 1.1007656212364548e-08, "loss": 0.0006, "step": 239920 }, { "epoch": 1.9620558531299832, "grad_norm": 0.0029955627396702766, "learning_rate": 1.0960379649018194e-08, "loss": 0.0004, "step": 239930 }, { "epoch": 1.9621376293085824, "grad_norm": 0.016240786761045456, "learning_rate": 1.091320471646451e-08, "loss": 0.0003, "step": 239940 }, { "epoch": 1.9622194054871815, "grad_norm": 0.007622730452567339, "learning_rate": 1.0866131415663838e-08, "loss": 0.0008, "step": 239950 }, { "epoch": 1.9623011816657807, "grad_norm": 0.015269186347723007, "learning_rate": 1.0819159747575414e-08, "loss": 0.0004, "step": 239960 }, { "epoch": 1.9623829578443799, "grad_norm": 0.001302492106333375, "learning_rate": 1.0772289713156248e-08, "loss": 0.0005, "step": 239970 }, { "epoch": 1.962464734022979, "grad_norm": 0.056463610380887985, "learning_rate": 1.0725521313361132e-08, "loss": 0.0006, "step": 239980 }, { "epoch": 1.9625465102015784, "grad_norm": 0.034983355551958084, "learning_rate": 1.0678854549142637e-08, "loss": 0.0005, "step": 239990 }, { "epoch": 1.9626282863801774, "grad_norm": 0.010327312164008617, "learning_rate": 1.0632289421451114e-08, "loss": 0.0004, "step": 240000 }, { "epoch": 1.9627100625587768, "grad_norm": 0.05403928458690643, "learning_rate": 1.058582593123525e-08, "loss": 0.0005, "step": 240010 }, { "epoch": 1.9627918387373757, "grad_norm": 0.06538177281618118, "learning_rate": 1.0539464079442064e-08, "loss": 0.001, "step": 240020 }, { "epoch": 1.9628736149159751, "grad_norm": 0.06580471992492676, "learning_rate": 1.04932038670158e-08, "loss": 0.001, "step": 240030 }, { "epoch": 1.962955391094574, "grad_norm": 0.06423071771860123, "learning_rate": 1.0447045294898483e-08, "loss": 0.0014, "step": 240040 }, { "epoch": 1.9630371672731735, "grad_norm": 0.034219369292259216, "learning_rate": 1.0400988364030472e-08, "loss": 0.0005, "step": 240050 }, { "epoch": 1.9631189434517724, "grad_norm": 0.0235893651843071, "learning_rate": 1.0355033075349907e-08, "loss": 0.0003, "step": 240060 }, { "epoch": 1.9632007196303718, "grad_norm": 0.04243030399084091, "learning_rate": 1.0309179429793814e-08, "loss": 0.0004, "step": 240070 }, { "epoch": 1.9632824958089707, "grad_norm": 0.0009585013031028211, "learning_rate": 1.0263427428294781e-08, "loss": 0.0004, "step": 240080 }, { "epoch": 1.9633642719875701, "grad_norm": 0.013436681590974331, "learning_rate": 1.021777707178595e-08, "loss": 0.0017, "step": 240090 }, { "epoch": 1.963446048166169, "grad_norm": 0.059747856110334396, "learning_rate": 1.0172228361196579e-08, "loss": 0.0009, "step": 240100 }, { "epoch": 1.9635278243447685, "grad_norm": 0.016728214919567108, "learning_rate": 1.0126781297455367e-08, "loss": 0.0005, "step": 240110 }, { "epoch": 1.9636096005233674, "grad_norm": 0.06252273917198181, "learning_rate": 1.0081435881487133e-08, "loss": 0.0005, "step": 240120 }, { "epoch": 1.9636913767019668, "grad_norm": 0.11201881617307663, "learning_rate": 1.0036192114216137e-08, "loss": 0.0008, "step": 240130 }, { "epoch": 1.9637731528805658, "grad_norm": 0.001467607798986137, "learning_rate": 9.991049996564417e-09, "loss": 0.0005, "step": 240140 }, { "epoch": 1.9638549290591651, "grad_norm": 0.0016172622563317418, "learning_rate": 9.946009529450684e-09, "loss": 0.0004, "step": 240150 }, { "epoch": 1.963936705237764, "grad_norm": 0.14251671731472015, "learning_rate": 9.901070713792538e-09, "loss": 0.0004, "step": 240160 }, { "epoch": 1.9640184814163635, "grad_norm": 0.02853759378194809, "learning_rate": 9.856233550506466e-09, "loss": 0.0006, "step": 240170 }, { "epoch": 1.9641002575949624, "grad_norm": 0.015415819361805916, "learning_rate": 9.811498040505073e-09, "loss": 0.0008, "step": 240180 }, { "epoch": 1.9641820337735618, "grad_norm": 0.020218301564455032, "learning_rate": 9.766864184699298e-09, "loss": 0.0004, "step": 240190 }, { "epoch": 1.964263809952161, "grad_norm": 0.014005202800035477, "learning_rate": 9.722331983998412e-09, "loss": 0.0009, "step": 240200 }, { "epoch": 1.9643455861307602, "grad_norm": 0.012094114907085896, "learning_rate": 9.677901439310578e-09, "loss": 0.0002, "step": 240210 }, { "epoch": 1.9644273623093593, "grad_norm": 0.003013062058016658, "learning_rate": 9.633572551540072e-09, "loss": 0.001, "step": 240220 }, { "epoch": 1.9645091384879585, "grad_norm": 0.11522839963436127, "learning_rate": 9.58934532159006e-09, "loss": 0.0005, "step": 240230 }, { "epoch": 1.9645909146665577, "grad_norm": 0.09314898401498795, "learning_rate": 9.54521975036149e-09, "loss": 0.0009, "step": 240240 }, { "epoch": 1.9646726908451568, "grad_norm": 0.008583898656070232, "learning_rate": 9.501195838753086e-09, "loss": 0.0006, "step": 240250 }, { "epoch": 1.964754467023756, "grad_norm": 0.02873467653989792, "learning_rate": 9.457273587661907e-09, "loss": 0.0005, "step": 240260 }, { "epoch": 1.9648362432023552, "grad_norm": 0.020059024915099144, "learning_rate": 9.413452997982798e-09, "loss": 0.0006, "step": 240270 }, { "epoch": 1.9649180193809543, "grad_norm": 0.02810043841600418, "learning_rate": 9.369734070607816e-09, "loss": 0.0004, "step": 240280 }, { "epoch": 1.9649997955595535, "grad_norm": 0.06491725891828537, "learning_rate": 9.32611680642792e-09, "loss": 0.0004, "step": 240290 }, { "epoch": 1.9650815717381527, "grad_norm": 0.07915057986974716, "learning_rate": 9.282601206332398e-09, "loss": 0.0007, "step": 240300 }, { "epoch": 1.9651633479167518, "grad_norm": 0.03704533353447914, "learning_rate": 9.239187271206095e-09, "loss": 0.0006, "step": 240310 }, { "epoch": 1.965245124095351, "grad_norm": 0.10058996081352234, "learning_rate": 9.195875001934972e-09, "loss": 0.0006, "step": 240320 }, { "epoch": 1.9653269002739502, "grad_norm": 0.11383067071437836, "learning_rate": 9.152664399399991e-09, "loss": 0.0006, "step": 240330 }, { "epoch": 1.9654086764525494, "grad_norm": 0.03813592344522476, "learning_rate": 9.109555464482111e-09, "loss": 0.0008, "step": 240340 }, { "epoch": 1.9654904526311485, "grad_norm": 0.04477168619632721, "learning_rate": 9.066548198059521e-09, "loss": 0.0006, "step": 240350 }, { "epoch": 1.9655722288097477, "grad_norm": 0.059985578060150146, "learning_rate": 9.023642601008186e-09, "loss": 0.0009, "step": 240360 }, { "epoch": 1.9656540049883469, "grad_norm": 0.01460745558142662, "learning_rate": 8.980838674201852e-09, "loss": 0.0004, "step": 240370 }, { "epoch": 1.965735781166946, "grad_norm": 0.06515633314847946, "learning_rate": 8.938136418513155e-09, "loss": 0.0005, "step": 240380 }, { "epoch": 1.9658175573455452, "grad_norm": 0.03801710531115532, "learning_rate": 8.895535834811398e-09, "loss": 0.0007, "step": 240390 }, { "epoch": 1.9658993335241444, "grad_norm": 0.03404651954770088, "learning_rate": 8.85303692396422e-09, "loss": 0.0013, "step": 240400 }, { "epoch": 1.9659811097027435, "grad_norm": 0.053745824843645096, "learning_rate": 8.810639686838151e-09, "loss": 0.0005, "step": 240410 }, { "epoch": 1.966062885881343, "grad_norm": 0.08601313829421997, "learning_rate": 8.76834412429639e-09, "loss": 0.0006, "step": 240420 }, { "epoch": 1.9661446620599419, "grad_norm": 0.030031152069568634, "learning_rate": 8.726150237200471e-09, "loss": 0.0011, "step": 240430 }, { "epoch": 1.9662264382385413, "grad_norm": 0.10216152667999268, "learning_rate": 8.684058026409704e-09, "loss": 0.0024, "step": 240440 }, { "epoch": 1.9663082144171402, "grad_norm": 0.002154982415959239, "learning_rate": 8.642067492781736e-09, "loss": 0.0007, "step": 240450 }, { "epoch": 1.9663899905957396, "grad_norm": 0.01953718811273575, "learning_rate": 8.600178637172552e-09, "loss": 0.0014, "step": 240460 }, { "epoch": 1.9664717667743385, "grad_norm": 0.017573783174157143, "learning_rate": 8.5583914604348e-09, "loss": 0.0006, "step": 240470 }, { "epoch": 1.966553542952938, "grad_norm": 0.02257007732987404, "learning_rate": 8.51670596341947e-09, "loss": 0.0004, "step": 240480 }, { "epoch": 1.9666353191315369, "grad_norm": 0.024718206375837326, "learning_rate": 8.475122146976433e-09, "loss": 0.0007, "step": 240490 }, { "epoch": 1.9667170953101363, "grad_norm": 0.04456334933638573, "learning_rate": 8.433640011952238e-09, "loss": 0.0009, "step": 240500 }, { "epoch": 1.9667988714887352, "grad_norm": 0.011128578335046768, "learning_rate": 8.392259559192317e-09, "loss": 0.0007, "step": 240510 }, { "epoch": 1.9668806476673346, "grad_norm": 0.012935756705701351, "learning_rate": 8.350980789539886e-09, "loss": 0.0007, "step": 240520 }, { "epoch": 1.9669624238459336, "grad_norm": 0.09111718088388443, "learning_rate": 8.309803703834829e-09, "loss": 0.0006, "step": 240530 }, { "epoch": 1.967044200024533, "grad_norm": 0.0004453026340343058, "learning_rate": 8.268728302917028e-09, "loss": 0.001, "step": 240540 }, { "epoch": 1.967125976203132, "grad_norm": 0.0460994653403759, "learning_rate": 8.227754587621928e-09, "loss": 0.0006, "step": 240550 }, { "epoch": 1.9672077523817313, "grad_norm": 0.0482340008020401, "learning_rate": 8.186882558785526e-09, "loss": 0.0005, "step": 240560 }, { "epoch": 1.9672895285603302, "grad_norm": 0.10941614955663681, "learning_rate": 8.146112217239933e-09, "loss": 0.0005, "step": 240570 }, { "epoch": 1.9673713047389296, "grad_norm": 0.005077095702290535, "learning_rate": 8.105443563815596e-09, "loss": 0.0003, "step": 240580 }, { "epoch": 1.9674530809175286, "grad_norm": 0.019257530570030212, "learning_rate": 8.064876599340742e-09, "loss": 0.0004, "step": 240590 }, { "epoch": 1.967534857096128, "grad_norm": 0.043034978210926056, "learning_rate": 8.024411324641934e-09, "loss": 0.0005, "step": 240600 }, { "epoch": 1.967616633274727, "grad_norm": 0.03399571031332016, "learning_rate": 7.984047740544065e-09, "loss": 0.0005, "step": 240610 }, { "epoch": 1.9676984094533263, "grad_norm": 0.04958905652165413, "learning_rate": 7.943785847868146e-09, "loss": 0.0003, "step": 240620 }, { "epoch": 1.9677801856319255, "grad_norm": 0.0013936804607510567, "learning_rate": 7.903625647435742e-09, "loss": 0.0004, "step": 240630 }, { "epoch": 1.9678619618105246, "grad_norm": 0.06087272986769676, "learning_rate": 7.863567140063976e-09, "loss": 0.0007, "step": 240640 }, { "epoch": 1.9679437379891238, "grad_norm": 0.03697393834590912, "learning_rate": 7.823610326568865e-09, "loss": 0.0006, "step": 240650 }, { "epoch": 1.968025514167723, "grad_norm": 0.0019425739301368594, "learning_rate": 7.783755207765309e-09, "loss": 0.0005, "step": 240660 }, { "epoch": 1.9681072903463221, "grad_norm": 0.005390380043536425, "learning_rate": 7.744001784463773e-09, "loss": 0.0004, "step": 240670 }, { "epoch": 1.9681890665249213, "grad_norm": 0.02413727529346943, "learning_rate": 7.704350057475273e-09, "loss": 0.0014, "step": 240680 }, { "epoch": 1.9682708427035205, "grad_norm": 0.04087621718645096, "learning_rate": 7.664800027606945e-09, "loss": 0.0011, "step": 240690 }, { "epoch": 1.9683526188821197, "grad_norm": 0.004723111633211374, "learning_rate": 7.625351695664806e-09, "loss": 0.0005, "step": 240700 }, { "epoch": 1.9684343950607188, "grad_norm": 0.025067336857318878, "learning_rate": 7.586005062452106e-09, "loss": 0.0004, "step": 240710 }, { "epoch": 1.968516171239318, "grad_norm": 0.0046750339679419994, "learning_rate": 7.54676012877098e-09, "loss": 0.0005, "step": 240720 }, { "epoch": 1.9685979474179172, "grad_norm": 0.032489120960235596, "learning_rate": 7.507616895419678e-09, "loss": 0.0007, "step": 240730 }, { "epoch": 1.9686797235965163, "grad_norm": 0.020355721935629845, "learning_rate": 7.468575363197007e-09, "loss": 0.0006, "step": 240740 }, { "epoch": 1.9687614997751155, "grad_norm": 0.0073118931613862514, "learning_rate": 7.429635532897328e-09, "loss": 0.0003, "step": 240750 }, { "epoch": 1.9688432759537147, "grad_norm": 0.030869361013174057, "learning_rate": 7.390797405313899e-09, "loss": 0.0005, "step": 240760 }, { "epoch": 1.9689250521323138, "grad_norm": 0.05701932683587074, "learning_rate": 7.352060981238307e-09, "loss": 0.001, "step": 240770 }, { "epoch": 1.969006828310913, "grad_norm": 0.005467955023050308, "learning_rate": 7.3134262614593666e-09, "loss": 0.0007, "step": 240780 }, { "epoch": 1.9690886044895122, "grad_norm": 0.10576683282852173, "learning_rate": 7.2748932467642255e-09, "loss": 0.0004, "step": 240790 }, { "epoch": 1.9691703806681113, "grad_norm": 0.057211052626371384, "learning_rate": 7.236461937938366e-09, "loss": 0.0006, "step": 240800 }, { "epoch": 1.9692521568467105, "grad_norm": 0.06146574765443802, "learning_rate": 7.1981323357633855e-09, "loss": 0.0006, "step": 240810 }, { "epoch": 1.9693339330253097, "grad_norm": 0.02515679970383644, "learning_rate": 7.159904441020882e-09, "loss": 0.0008, "step": 240820 }, { "epoch": 1.9694157092039088, "grad_norm": 0.004430609289556742, "learning_rate": 7.1217782544896754e-09, "loss": 0.0005, "step": 240830 }, { "epoch": 1.969497485382508, "grad_norm": 0.06326758861541748, "learning_rate": 7.083753776946367e-09, "loss": 0.0004, "step": 240840 }, { "epoch": 1.9695792615611074, "grad_norm": 0.03574049100279808, "learning_rate": 7.045831009165338e-09, "loss": 0.0007, "step": 240850 }, { "epoch": 1.9696610377397064, "grad_norm": 0.038448479026556015, "learning_rate": 7.0080099519187485e-09, "loss": 0.0004, "step": 240860 }, { "epoch": 1.9697428139183057, "grad_norm": 0.00811791606247425, "learning_rate": 6.970290605978203e-09, "loss": 0.0004, "step": 240870 }, { "epoch": 1.9698245900969047, "grad_norm": 0.003815850941464305, "learning_rate": 6.932672972110865e-09, "loss": 0.0016, "step": 240880 }, { "epoch": 1.969906366275504, "grad_norm": 0.044383928179740906, "learning_rate": 6.8951570510839006e-09, "loss": 0.0005, "step": 240890 }, { "epoch": 1.969988142454103, "grad_norm": 0.04898260533809662, "learning_rate": 6.857742843661141e-09, "loss": 0.0006, "step": 240900 }, { "epoch": 1.9700699186327024, "grad_norm": 0.04598771780729294, "learning_rate": 6.820430350604757e-09, "loss": 0.0005, "step": 240910 }, { "epoch": 1.9701516948113014, "grad_norm": 0.032798998057842255, "learning_rate": 6.783219572674693e-09, "loss": 0.0006, "step": 240920 }, { "epoch": 1.9702334709899008, "grad_norm": 0.02982020564377308, "learning_rate": 6.746110510629234e-09, "loss": 0.0005, "step": 240930 }, { "epoch": 1.9703152471684997, "grad_norm": 0.010241258889436722, "learning_rate": 6.70910316522444e-09, "loss": 0.0002, "step": 240940 }, { "epoch": 1.970397023347099, "grad_norm": 0.009274272248148918, "learning_rate": 6.672197537213599e-09, "loss": 0.0005, "step": 240950 }, { "epoch": 1.970478799525698, "grad_norm": 0.0424266941845417, "learning_rate": 6.63539362734944e-09, "loss": 0.0005, "step": 240960 }, { "epoch": 1.9705605757042974, "grad_norm": 0.004772974643856287, "learning_rate": 6.5986914363802555e-09, "loss": 0.0004, "step": 240970 }, { "epoch": 1.9706423518828964, "grad_norm": 0.033562082797288895, "learning_rate": 6.562090965055445e-09, "loss": 0.0006, "step": 240980 }, { "epoch": 1.9707241280614958, "grad_norm": 0.019937969744205475, "learning_rate": 6.525592214119414e-09, "loss": 0.0006, "step": 240990 }, { "epoch": 1.9708059042400947, "grad_norm": 0.006185318809002638, "learning_rate": 6.489195184316011e-09, "loss": 0.0005, "step": 241000 }, { "epoch": 1.970887680418694, "grad_norm": 0.04000472277402878, "learning_rate": 6.452899876386309e-09, "loss": 0.0007, "step": 241010 }, { "epoch": 1.970969456597293, "grad_norm": 0.015952764078974724, "learning_rate": 6.416706291070274e-09, "loss": 0.0009, "step": 241020 }, { "epoch": 1.9710512327758924, "grad_norm": 0.006037895567715168, "learning_rate": 6.3806144291045365e-09, "loss": 0.0017, "step": 241030 }, { "epoch": 1.9711330089544914, "grad_norm": 0.014701037667691708, "learning_rate": 6.344624291225176e-09, "loss": 0.0002, "step": 241040 }, { "epoch": 1.9712147851330908, "grad_norm": 0.01358600053936243, "learning_rate": 6.30873587816494e-09, "loss": 0.0004, "step": 241050 }, { "epoch": 1.97129656131169, "grad_norm": 0.003301543416455388, "learning_rate": 6.2729491906543535e-09, "loss": 0.0003, "step": 241060 }, { "epoch": 1.9713783374902891, "grad_norm": 0.06364406645298004, "learning_rate": 6.23726422942339e-09, "loss": 0.0006, "step": 241070 }, { "epoch": 1.9714601136688883, "grad_norm": 0.13085290789604187, "learning_rate": 6.201680995198134e-09, "loss": 0.0012, "step": 241080 }, { "epoch": 1.9715418898474875, "grad_norm": 0.0044484734535217285, "learning_rate": 6.166199488703561e-09, "loss": 0.0003, "step": 241090 }, { "epoch": 1.9716236660260866, "grad_norm": 0.13391919434070587, "learning_rate": 6.130819710662983e-09, "loss": 0.0005, "step": 241100 }, { "epoch": 1.9717054422046858, "grad_norm": 0.1289430856704712, "learning_rate": 6.095541661796933e-09, "loss": 0.0012, "step": 241110 }, { "epoch": 1.971787218383285, "grad_norm": 0.0023907579015940428, "learning_rate": 6.060365342824282e-09, "loss": 0.0004, "step": 241120 }, { "epoch": 1.9718689945618841, "grad_norm": 0.01812039688229561, "learning_rate": 6.025290754460567e-09, "loss": 0.0009, "step": 241130 }, { "epoch": 1.9719507707404833, "grad_norm": 0.0262007974088192, "learning_rate": 5.990317897421327e-09, "loss": 0.0006, "step": 241140 }, { "epoch": 1.9720325469190825, "grad_norm": 0.00908130593597889, "learning_rate": 5.955446772418771e-09, "loss": 0.0006, "step": 241150 }, { "epoch": 1.9721143230976816, "grad_norm": 0.009402163326740265, "learning_rate": 5.920677380162887e-09, "loss": 0.0003, "step": 241160 }, { "epoch": 1.9721960992762808, "grad_norm": 0.05002060905098915, "learning_rate": 5.886009721362551e-09, "loss": 0.0006, "step": 241170 }, { "epoch": 1.97227787545488, "grad_norm": 0.017797857522964478, "learning_rate": 5.851443796723311e-09, "loss": 0.0006, "step": 241180 }, { "epoch": 1.9723596516334791, "grad_norm": 0.06299176812171936, "learning_rate": 5.816979606949602e-09, "loss": 0.0008, "step": 241190 }, { "epoch": 1.9724414278120783, "grad_norm": 0.0037520471960306168, "learning_rate": 5.782617152743641e-09, "loss": 0.0004, "step": 241200 }, { "epoch": 1.9725232039906775, "grad_norm": 0.0037390124052762985, "learning_rate": 5.748356434804869e-09, "loss": 0.0008, "step": 241210 }, { "epoch": 1.9726049801692767, "grad_norm": 0.0013213999336585402, "learning_rate": 5.714197453832171e-09, "loss": 0.0005, "step": 241220 }, { "epoch": 1.9726867563478758, "grad_norm": 0.02182280458509922, "learning_rate": 5.680140210520546e-09, "loss": 0.0005, "step": 241230 }, { "epoch": 1.972768532526475, "grad_norm": 0.02570185251533985, "learning_rate": 5.646184705563884e-09, "loss": 0.0005, "step": 241240 }, { "epoch": 1.9728503087050742, "grad_norm": 0.002982005476951599, "learning_rate": 5.612330939654409e-09, "loss": 0.0003, "step": 241250 }, { "epoch": 1.9729320848836733, "grad_norm": 0.010846239514648914, "learning_rate": 5.5785789134815696e-09, "loss": 0.0012, "step": 241260 }, { "epoch": 1.9730138610622725, "grad_norm": 0.04616263136267662, "learning_rate": 5.544928627732038e-09, "loss": 0.001, "step": 241270 }, { "epoch": 1.973095637240872, "grad_norm": 0.01187106128782034, "learning_rate": 5.511380083092488e-09, "loss": 0.0006, "step": 241280 }, { "epoch": 1.9731774134194708, "grad_norm": 0.019073035567998886, "learning_rate": 5.477933280245707e-09, "loss": 0.0003, "step": 241290 }, { "epoch": 1.9732591895980702, "grad_norm": 0.0264121126383543, "learning_rate": 5.44458821987337e-09, "loss": 0.0005, "step": 241300 }, { "epoch": 1.9733409657766692, "grad_norm": 0.08062582463026047, "learning_rate": 5.411344902654381e-09, "loss": 0.0004, "step": 241310 }, { "epoch": 1.9734227419552686, "grad_norm": 0.006117796525359154, "learning_rate": 5.378203329265974e-09, "loss": 0.0005, "step": 241320 }, { "epoch": 1.9735045181338675, "grad_norm": 0.042786017060279846, "learning_rate": 5.345163500383721e-09, "loss": 0.0005, "step": 241330 }, { "epoch": 1.973586294312467, "grad_norm": 0.0697493851184845, "learning_rate": 5.312225416680417e-09, "loss": 0.0006, "step": 241340 }, { "epoch": 1.9736680704910659, "grad_norm": 0.11216804385185242, "learning_rate": 5.279389078826635e-09, "loss": 0.0017, "step": 241350 }, { "epoch": 1.9737498466696652, "grad_norm": 0.0017841545632109046, "learning_rate": 5.246654487491843e-09, "loss": 0.0009, "step": 241360 }, { "epoch": 1.9738316228482642, "grad_norm": 0.036699146032333374, "learning_rate": 5.214021643342171e-09, "loss": 0.0007, "step": 241370 }, { "epoch": 1.9739133990268636, "grad_norm": 0.048677898943424225, "learning_rate": 5.1814905470432e-09, "loss": 0.0005, "step": 241380 }, { "epoch": 1.9739951752054625, "grad_norm": 0.0009051343076862395, "learning_rate": 5.149061199257177e-09, "loss": 0.0005, "step": 241390 }, { "epoch": 1.974076951384062, "grad_norm": 0.0052076298743486404, "learning_rate": 5.1167336006452405e-09, "loss": 0.0024, "step": 241400 }, { "epoch": 1.9741587275626609, "grad_norm": 0.038893625140190125, "learning_rate": 5.084507751864642e-09, "loss": 0.0005, "step": 241410 }, { "epoch": 1.9742405037412603, "grad_norm": 0.03347164765000343, "learning_rate": 5.05238365357319e-09, "loss": 0.0008, "step": 241420 }, { "epoch": 1.9743222799198592, "grad_norm": 0.00402004225179553, "learning_rate": 5.0203613064248035e-09, "loss": 0.001, "step": 241430 }, { "epoch": 1.9744040560984586, "grad_norm": 0.0011595640098676085, "learning_rate": 4.98844071107174e-09, "loss": 0.0004, "step": 241440 }, { "epoch": 1.9744858322770575, "grad_norm": 0.04709523916244507, "learning_rate": 4.9566218681645905e-09, "loss": 0.0004, "step": 241450 }, { "epoch": 1.974567608455657, "grad_norm": 0.01428243052214384, "learning_rate": 4.924904778350614e-09, "loss": 0.0003, "step": 241460 }, { "epoch": 1.9746493846342559, "grad_norm": 0.007314295973628759, "learning_rate": 4.893289442277071e-09, "loss": 0.0005, "step": 241470 }, { "epoch": 1.9747311608128553, "grad_norm": 0.05621631443500519, "learning_rate": 4.861775860587337e-09, "loss": 0.002, "step": 241480 }, { "epoch": 1.9748129369914544, "grad_norm": 0.10029825568199158, "learning_rate": 4.830364033923118e-09, "loss": 0.0011, "step": 241490 }, { "epoch": 1.9748947131700536, "grad_norm": 0.033434540033340454, "learning_rate": 4.799053962925016e-09, "loss": 0.0006, "step": 241500 }, { "epoch": 1.9749764893486528, "grad_norm": 0.005545210093259811, "learning_rate": 4.7678456482302956e-09, "loss": 0.0004, "step": 241510 }, { "epoch": 1.975058265527252, "grad_norm": 0.09923939406871796, "learning_rate": 4.736739090475118e-09, "loss": 0.0009, "step": 241520 }, { "epoch": 1.9751400417058511, "grad_norm": 0.028972722589969635, "learning_rate": 4.705734290292862e-09, "loss": 0.0005, "step": 241530 }, { "epoch": 1.9752218178844503, "grad_norm": 0.05353650078177452, "learning_rate": 4.674831248314693e-09, "loss": 0.001, "step": 241540 }, { "epoch": 1.9753035940630495, "grad_norm": 0.011617948301136494, "learning_rate": 4.644029965171215e-09, "loss": 0.0004, "step": 241550 }, { "epoch": 1.9753853702416486, "grad_norm": 0.01929154433310032, "learning_rate": 4.61333044148915e-09, "loss": 0.0004, "step": 241560 }, { "epoch": 1.9754671464202478, "grad_norm": 0.06845477223396301, "learning_rate": 4.582732677893553e-09, "loss": 0.0006, "step": 241570 }, { "epoch": 1.975548922598847, "grad_norm": 0.022806446999311447, "learning_rate": 4.552236675008925e-09, "loss": 0.0003, "step": 241580 }, { "epoch": 1.9756306987774461, "grad_norm": 0.005350159481167793, "learning_rate": 4.521842433455326e-09, "loss": 0.0003, "step": 241590 }, { "epoch": 1.9757124749560453, "grad_norm": 0.02332056686282158, "learning_rate": 4.491549953852259e-09, "loss": 0.0003, "step": 241600 }, { "epoch": 1.9757942511346445, "grad_norm": 0.02349778823554516, "learning_rate": 4.461359236817009e-09, "loss": 0.0005, "step": 241610 }, { "epoch": 1.9758760273132436, "grad_norm": 0.009193314239382744, "learning_rate": 4.431270282964084e-09, "loss": 0.0004, "step": 241620 }, { "epoch": 1.9759578034918428, "grad_norm": 0.04008142277598381, "learning_rate": 4.4012830929074375e-09, "loss": 0.0005, "step": 241630 }, { "epoch": 1.976039579670442, "grad_norm": 0.0015981694450601935, "learning_rate": 4.3713976672571375e-09, "loss": 0.0005, "step": 241640 }, { "epoch": 1.9761213558490411, "grad_norm": 0.048106320202350616, "learning_rate": 4.341614006621586e-09, "loss": 0.0007, "step": 241650 }, { "epoch": 1.9762031320276403, "grad_norm": 0.011358382180333138, "learning_rate": 4.31193211160863e-09, "loss": 0.0007, "step": 241660 }, { "epoch": 1.9762849082062395, "grad_norm": 0.047565165907144547, "learning_rate": 4.282351982821675e-09, "loss": 0.0005, "step": 241670 }, { "epoch": 1.9763666843848386, "grad_norm": 0.03898319602012634, "learning_rate": 4.252873620864683e-09, "loss": 0.0005, "step": 241680 }, { "epoch": 1.976448460563438, "grad_norm": 0.02796461619436741, "learning_rate": 4.2234970263366205e-09, "loss": 0.0005, "step": 241690 }, { "epoch": 1.976530236742037, "grad_norm": 0.06606664508581161, "learning_rate": 4.194222199837561e-09, "loss": 0.0005, "step": 241700 }, { "epoch": 1.9766120129206364, "grad_norm": 0.00037103466456755996, "learning_rate": 4.165049141962585e-09, "loss": 0.0009, "step": 241710 }, { "epoch": 1.9766937890992353, "grad_norm": 0.042293794453144073, "learning_rate": 4.135977853306217e-09, "loss": 0.0006, "step": 241720 }, { "epoch": 1.9767755652778347, "grad_norm": 0.05601499229669571, "learning_rate": 4.1070083344607605e-09, "loss": 0.0004, "step": 241730 }, { "epoch": 1.9768573414564337, "grad_norm": 0.021409180015325546, "learning_rate": 4.0781405860168545e-09, "loss": 0.0005, "step": 241740 }, { "epoch": 1.976939117635033, "grad_norm": 0.037012193351984024, "learning_rate": 4.049374608561807e-09, "loss": 0.0004, "step": 241750 }, { "epoch": 1.977020893813632, "grad_norm": 0.008575026877224445, "learning_rate": 4.020710402681816e-09, "loss": 0.0005, "step": 241760 }, { "epoch": 1.9771026699922314, "grad_norm": 0.11026008427143097, "learning_rate": 3.992147968961413e-09, "loss": 0.0009, "step": 241770 }, { "epoch": 1.9771844461708303, "grad_norm": 0.010968420654535294, "learning_rate": 3.963687307981245e-09, "loss": 0.0007, "step": 241780 }, { "epoch": 1.9772662223494297, "grad_norm": 0.05607381463050842, "learning_rate": 3.93532842032196e-09, "loss": 0.0009, "step": 241790 }, { "epoch": 1.9773479985280287, "grad_norm": 0.212687686085701, "learning_rate": 3.907071306561428e-09, "loss": 0.0006, "step": 241800 }, { "epoch": 1.977429774706628, "grad_norm": 0.02276325784623623, "learning_rate": 3.878915967274743e-09, "loss": 0.0003, "step": 241810 }, { "epoch": 1.977511550885227, "grad_norm": 0.026725925505161285, "learning_rate": 3.8508624030353384e-09, "loss": 0.0006, "step": 241820 }, { "epoch": 1.9775933270638264, "grad_norm": 0.02614242024719715, "learning_rate": 3.8229106144149764e-09, "loss": 0.0004, "step": 241830 }, { "epoch": 1.9776751032424253, "grad_norm": 0.003523143706843257, "learning_rate": 3.7950606019832024e-09, "loss": 0.0004, "step": 241840 }, { "epoch": 1.9777568794210247, "grad_norm": 0.016170550137758255, "learning_rate": 3.767312366306786e-09, "loss": 0.0007, "step": 241850 }, { "epoch": 1.9778386555996237, "grad_norm": 0.001389918033964932, "learning_rate": 3.73966590795194e-09, "loss": 0.0006, "step": 241860 }, { "epoch": 1.977920431778223, "grad_norm": 0.013995314948260784, "learning_rate": 3.7121212274804365e-09, "loss": 0.0001, "step": 241870 }, { "epoch": 1.978002207956822, "grad_norm": 0.0157009307295084, "learning_rate": 3.68467832545516e-09, "loss": 0.0009, "step": 241880 }, { "epoch": 1.9780839841354214, "grad_norm": 0.05680796876549721, "learning_rate": 3.6573372024334417e-09, "loss": 0.0003, "step": 241890 }, { "epoch": 1.9781657603140204, "grad_norm": 0.033381905406713486, "learning_rate": 3.6300978589731695e-09, "loss": 0.0005, "step": 241900 }, { "epoch": 1.9782475364926198, "grad_norm": 0.016352137550711632, "learning_rate": 3.602960295629454e-09, "loss": 0.0007, "step": 241910 }, { "epoch": 1.978329312671219, "grad_norm": 0.0358956903219223, "learning_rate": 3.5759245129546315e-09, "loss": 0.0004, "step": 241920 }, { "epoch": 1.978411088849818, "grad_norm": 0.007845875807106495, "learning_rate": 3.548990511499373e-09, "loss": 0.0003, "step": 241930 }, { "epoch": 1.9784928650284173, "grad_norm": 0.06882649660110474, "learning_rate": 3.522158291812128e-09, "loss": 0.0008, "step": 241940 }, { "epoch": 1.9785746412070164, "grad_norm": 0.002018315950408578, "learning_rate": 3.4954278544402364e-09, "loss": 0.0005, "step": 241950 }, { "epoch": 1.9786564173856156, "grad_norm": 0.061756931245326996, "learning_rate": 3.468799199927708e-09, "loss": 0.0009, "step": 241960 }, { "epoch": 1.9787381935642148, "grad_norm": 0.023102276027202606, "learning_rate": 3.442272328817442e-09, "loss": 0.0009, "step": 241970 }, { "epoch": 1.978819969742814, "grad_norm": 0.040346067398786545, "learning_rate": 3.4158472416495616e-09, "loss": 0.0006, "step": 241980 }, { "epoch": 1.978901745921413, "grad_norm": 0.05740728601813316, "learning_rate": 3.3895239389619693e-09, "loss": 0.0008, "step": 241990 }, { "epoch": 1.9789835221000123, "grad_norm": 0.05267826095223427, "learning_rate": 3.3633024212909036e-09, "loss": 0.0015, "step": 242000 }, { "epoch": 1.9790652982786114, "grad_norm": 0.0328393317759037, "learning_rate": 3.3371826891714922e-09, "loss": 0.0003, "step": 242010 }, { "epoch": 1.9791470744572106, "grad_norm": 0.03796203434467316, "learning_rate": 3.3111647431349756e-09, "loss": 0.0012, "step": 242020 }, { "epoch": 1.9792288506358098, "grad_norm": 0.00634622760117054, "learning_rate": 3.2852485837114865e-09, "loss": 0.0008, "step": 242030 }, { "epoch": 1.979310626814409, "grad_norm": 0.03088500164449215, "learning_rate": 3.2594342114289356e-09, "loss": 0.0025, "step": 242040 }, { "epoch": 1.9793924029930081, "grad_norm": 0.012569415383040905, "learning_rate": 3.233721626813013e-09, "loss": 0.0006, "step": 242050 }, { "epoch": 1.9794741791716073, "grad_norm": 0.04237579554319382, "learning_rate": 3.2081108303883003e-09, "loss": 0.001, "step": 242060 }, { "epoch": 1.9795559553502065, "grad_norm": 0.015612710267305374, "learning_rate": 3.182601822675491e-09, "loss": 0.0007, "step": 242070 }, { "epoch": 1.9796377315288056, "grad_norm": 0.0172442514449358, "learning_rate": 3.1571946041952795e-09, "loss": 0.0008, "step": 242080 }, { "epoch": 1.9797195077074048, "grad_norm": 0.019258227199316025, "learning_rate": 3.1318891754639203e-09, "loss": 0.0005, "step": 242090 }, { "epoch": 1.979801283886004, "grad_norm": 0.05103430151939392, "learning_rate": 3.1066855369982217e-09, "loss": 0.0006, "step": 242100 }, { "epoch": 1.9798830600646031, "grad_norm": 0.09772294759750366, "learning_rate": 3.081583689311107e-09, "loss": 0.0006, "step": 242110 }, { "epoch": 1.9799648362432025, "grad_norm": 0.00986001268029213, "learning_rate": 3.0565836329132793e-09, "loss": 0.015, "step": 242120 }, { "epoch": 1.9800466124218015, "grad_norm": 0.027505286037921906, "learning_rate": 3.031685368314885e-09, "loss": 0.0005, "step": 242130 }, { "epoch": 1.9801283886004009, "grad_norm": 0.05544930696487427, "learning_rate": 3.0068888960227417e-09, "loss": 0.0006, "step": 242140 }, { "epoch": 1.9802101647789998, "grad_norm": 0.00381863652728498, "learning_rate": 2.982194216542e-09, "loss": 0.0005, "step": 242150 }, { "epoch": 1.9802919409575992, "grad_norm": 0.02804141491651535, "learning_rate": 2.9576013303761477e-09, "loss": 0.0008, "step": 242160 }, { "epoch": 1.9803737171361981, "grad_norm": 0.029790468513965607, "learning_rate": 2.9331102380253386e-09, "loss": 0.0004, "step": 242170 }, { "epoch": 1.9804554933147975, "grad_norm": 0.015405683778226376, "learning_rate": 2.9087209399886183e-09, "loss": 0.0005, "step": 242180 }, { "epoch": 1.9805372694933965, "grad_norm": 0.007714676205068827, "learning_rate": 2.8844334367633673e-09, "loss": 0.0015, "step": 242190 }, { "epoch": 1.9806190456719959, "grad_norm": 0.03098832629621029, "learning_rate": 2.86024772884419e-09, "loss": 0.0007, "step": 242200 }, { "epoch": 1.9807008218505948, "grad_norm": 0.051364727318286896, "learning_rate": 2.8361638167234697e-09, "loss": 0.0004, "step": 242210 }, { "epoch": 1.9807825980291942, "grad_norm": 0.0095879677683115, "learning_rate": 2.81218170089248e-09, "loss": 0.0004, "step": 242220 }, { "epoch": 1.9808643742077932, "grad_norm": 0.036237526684999466, "learning_rate": 2.7883013818386094e-09, "loss": 0.0004, "step": 242230 }, { "epoch": 1.9809461503863925, "grad_norm": 0.04136117920279503, "learning_rate": 2.7645228600492456e-09, "loss": 0.0006, "step": 242240 }, { "epoch": 1.9810279265649915, "grad_norm": 0.01858515851199627, "learning_rate": 2.740846136009001e-09, "loss": 0.0002, "step": 242250 }, { "epoch": 1.9811097027435909, "grad_norm": 0.05775732919573784, "learning_rate": 2.7172712101991573e-09, "loss": 0.0005, "step": 242260 }, { "epoch": 1.9811914789221898, "grad_norm": 0.014053824357688427, "learning_rate": 2.6937980831004405e-09, "loss": 0.0004, "step": 242270 }, { "epoch": 1.9812732551007892, "grad_norm": 0.08364515006542206, "learning_rate": 2.6704267551913575e-09, "loss": 0.0003, "step": 242280 }, { "epoch": 1.9813550312793882, "grad_norm": 0.02703542448580265, "learning_rate": 2.6471572269470836e-09, "loss": 0.0003, "step": 242290 }, { "epoch": 1.9814368074579876, "grad_norm": 0.051156312227249146, "learning_rate": 2.6239894988427938e-09, "loss": 0.001, "step": 242300 }, { "epoch": 1.9815185836365865, "grad_norm": 0.029574209824204445, "learning_rate": 2.600923571349778e-09, "loss": 0.0003, "step": 242310 }, { "epoch": 1.981600359815186, "grad_norm": 0.0020739648025482893, "learning_rate": 2.577959444938216e-09, "loss": 0.0004, "step": 242320 }, { "epoch": 1.981682135993785, "grad_norm": 0.029199261218309402, "learning_rate": 2.5550971200755117e-09, "loss": 0.0004, "step": 242330 }, { "epoch": 1.9817639121723842, "grad_norm": 0.02064715325832367, "learning_rate": 2.532336597227958e-09, "loss": 0.0004, "step": 242340 }, { "epoch": 1.9818456883509834, "grad_norm": 0.05951239913702011, "learning_rate": 2.509677876858518e-09, "loss": 0.0004, "step": 242350 }, { "epoch": 1.9819274645295826, "grad_norm": 0.005541054531931877, "learning_rate": 2.4871209594296008e-09, "loss": 0.0004, "step": 242360 }, { "epoch": 1.9820092407081817, "grad_norm": 0.00892571359872818, "learning_rate": 2.4646658453997275e-09, "loss": 0.0012, "step": 242370 }, { "epoch": 1.982091016886781, "grad_norm": 0.017268633469939232, "learning_rate": 2.442312535226865e-09, "loss": 0.0004, "step": 242380 }, { "epoch": 1.98217279306538, "grad_norm": 0.07043632864952087, "learning_rate": 2.4200610293662054e-09, "loss": 0.0004, "step": 242390 }, { "epoch": 1.9822545692439792, "grad_norm": 0.05330367386341095, "learning_rate": 2.397911328271274e-09, "loss": 0.0005, "step": 242400 }, { "epoch": 1.9823363454225784, "grad_norm": 0.059993572533130646, "learning_rate": 2.375863432393377e-09, "loss": 0.0012, "step": 242410 }, { "epoch": 1.9824181216011776, "grad_norm": 0.009167298674583435, "learning_rate": 2.3539173421810447e-09, "loss": 0.0006, "step": 242420 }, { "epoch": 1.9824998977797768, "grad_norm": 0.002495533786714077, "learning_rate": 2.3320730580816964e-09, "loss": 0.0018, "step": 242430 }, { "epoch": 1.982581673958376, "grad_norm": 0.05922267585992813, "learning_rate": 2.310330580540532e-09, "loss": 0.0009, "step": 242440 }, { "epoch": 1.982663450136975, "grad_norm": 0.013036827556788921, "learning_rate": 2.28868991000053e-09, "loss": 0.0024, "step": 242450 }, { "epoch": 1.9827452263155743, "grad_norm": 0.061149388551712036, "learning_rate": 2.267151046901894e-09, "loss": 0.0005, "step": 242460 }, { "epoch": 1.9828270024941734, "grad_norm": 0.03200455382466316, "learning_rate": 2.2457139916837177e-09, "loss": 0.0008, "step": 242470 }, { "epoch": 1.9829087786727726, "grad_norm": 0.053347397595644, "learning_rate": 2.2243787447828736e-09, "loss": 0.0018, "step": 242480 }, { "epoch": 1.9829905548513718, "grad_norm": 0.04626654461026192, "learning_rate": 2.2031453066340137e-09, "loss": 0.0005, "step": 242490 }, { "epoch": 1.983072331029971, "grad_norm": 0.0084682060405612, "learning_rate": 2.182013677669015e-09, "loss": 0.0005, "step": 242500 }, { "epoch": 1.98315410720857, "grad_norm": 0.01655169017612934, "learning_rate": 2.1609838583191987e-09, "loss": 0.0019, "step": 242510 }, { "epoch": 1.9832358833871693, "grad_norm": 0.07208152115345001, "learning_rate": 2.1400558490125565e-09, "loss": 0.0022, "step": 242520 }, { "epoch": 1.9833176595657684, "grad_norm": 0.021671434864401817, "learning_rate": 2.119229650175414e-09, "loss": 0.0007, "step": 242530 }, { "epoch": 1.9833994357443676, "grad_norm": 0.1549607515335083, "learning_rate": 2.098505262231876e-09, "loss": 0.0013, "step": 242540 }, { "epoch": 1.983481211922967, "grad_norm": 0.061715275049209595, "learning_rate": 2.077882685604382e-09, "loss": 0.0006, "step": 242550 }, { "epoch": 1.983562988101566, "grad_norm": 0.041468583047389984, "learning_rate": 2.0573619207125974e-09, "loss": 0.0008, "step": 242560 }, { "epoch": 1.9836447642801653, "grad_norm": 0.031454090029001236, "learning_rate": 2.03694296797563e-09, "loss": 0.0003, "step": 242570 }, { "epoch": 1.9837265404587643, "grad_norm": 0.012856340035796165, "learning_rate": 2.0166258278081498e-09, "loss": 0.0001, "step": 242580 }, { "epoch": 1.9838083166373637, "grad_norm": 0.06417667120695114, "learning_rate": 1.9964105006248236e-09, "loss": 0.0005, "step": 242590 }, { "epoch": 1.9838900928159626, "grad_norm": 0.05148879066109657, "learning_rate": 1.9762969868369895e-09, "loss": 0.0007, "step": 242600 }, { "epoch": 1.983971868994562, "grad_norm": 0.01044284924864769, "learning_rate": 1.9562852868548752e-09, "loss": 0.0002, "step": 242610 }, { "epoch": 1.984053645173161, "grad_norm": 0.0017677185824140906, "learning_rate": 1.9363754010853777e-09, "loss": 0.0007, "step": 242620 }, { "epoch": 1.9841354213517604, "grad_norm": 0.028897859156131744, "learning_rate": 1.9165673299348376e-09, "loss": 0.0008, "step": 242630 }, { "epoch": 1.9842171975303593, "grad_norm": 0.028698192909359932, "learning_rate": 1.8968610738062666e-09, "loss": 0.0006, "step": 242640 }, { "epoch": 1.9842989737089587, "grad_norm": 0.0387558750808239, "learning_rate": 1.8772566331015653e-09, "loss": 0.0005, "step": 242650 }, { "epoch": 1.9843807498875576, "grad_norm": 0.0370880663394928, "learning_rate": 1.8577540082198587e-09, "loss": 0.0003, "step": 242660 }, { "epoch": 1.984462526066157, "grad_norm": 0.01423011813312769, "learning_rate": 1.8383531995580516e-09, "loss": 0.0005, "step": 242670 }, { "epoch": 1.984544302244756, "grad_norm": 0.01578783057630062, "learning_rate": 1.819054207511939e-09, "loss": 0.0004, "step": 242680 }, { "epoch": 1.9846260784233554, "grad_norm": 0.03593045100569725, "learning_rate": 1.799857032474539e-09, "loss": 0.0006, "step": 242690 }, { "epoch": 1.9847078546019543, "grad_norm": 0.030654286965727806, "learning_rate": 1.7807616748366507e-09, "loss": 0.0004, "step": 242700 }, { "epoch": 1.9847896307805537, "grad_norm": 0.10449978709220886, "learning_rate": 1.761768134987407e-09, "loss": 0.001, "step": 242710 }, { "epoch": 1.9848714069591527, "grad_norm": 0.01600361242890358, "learning_rate": 1.7428764133137211e-09, "loss": 0.0005, "step": 242720 }, { "epoch": 1.984953183137752, "grad_norm": 0.051650140434503555, "learning_rate": 1.7240865102002846e-09, "loss": 0.0006, "step": 242730 }, { "epoch": 1.985034959316351, "grad_norm": 0.004527616314589977, "learning_rate": 1.7053984260301249e-09, "loss": 0.001, "step": 242740 }, { "epoch": 1.9851167354949504, "grad_norm": 0.02015996351838112, "learning_rate": 1.6868121611840482e-09, "loss": 0.0005, "step": 242750 }, { "epoch": 1.9851985116735495, "grad_norm": 0.009762981906533241, "learning_rate": 1.6683277160400858e-09, "loss": 0.0006, "step": 242760 }, { "epoch": 1.9852802878521487, "grad_norm": 0.041216764599084854, "learning_rate": 1.649945090975713e-09, "loss": 0.0006, "step": 242770 }, { "epoch": 1.9853620640307479, "grad_norm": 0.027970533818006516, "learning_rate": 1.6316642863639653e-09, "loss": 0.0005, "step": 242780 }, { "epoch": 1.985443840209347, "grad_norm": 0.04153378680348396, "learning_rate": 1.6134853025784325e-09, "loss": 0.0008, "step": 242790 }, { "epoch": 1.9855256163879462, "grad_norm": 0.03898989409208298, "learning_rate": 1.595408139989374e-09, "loss": 0.0011, "step": 242800 }, { "epoch": 1.9856073925665454, "grad_norm": 0.005557865835726261, "learning_rate": 1.5774327989648286e-09, "loss": 0.0005, "step": 242810 }, { "epoch": 1.9856891687451446, "grad_norm": 0.01333558838814497, "learning_rate": 1.559559279870615e-09, "loss": 0.0003, "step": 242820 }, { "epoch": 1.9857709449237437, "grad_norm": 0.04999687895178795, "learning_rate": 1.5417875830708862e-09, "loss": 0.0006, "step": 242830 }, { "epoch": 1.985852721102343, "grad_norm": 0.04976752772927284, "learning_rate": 1.52411770892813e-09, "loss": 0.0005, "step": 242840 }, { "epoch": 1.985934497280942, "grad_norm": 0.16817842423915863, "learning_rate": 1.506549657802614e-09, "loss": 0.0009, "step": 242850 }, { "epoch": 1.9860162734595412, "grad_norm": 0.0031711917836219072, "learning_rate": 1.4890834300507196e-09, "loss": 0.0005, "step": 242860 }, { "epoch": 1.9860980496381404, "grad_norm": 0.06085673347115517, "learning_rate": 1.4717190260299386e-09, "loss": 0.0009, "step": 242870 }, { "epoch": 1.9861798258167396, "grad_norm": 0.011583678424358368, "learning_rate": 1.4544564460927667e-09, "loss": 0.0006, "step": 242880 }, { "epoch": 1.9862616019953387, "grad_norm": 0.021990571171045303, "learning_rate": 1.4372956905917e-09, "loss": 0.0004, "step": 242890 }, { "epoch": 1.986343378173938, "grad_norm": 0.004719884134829044, "learning_rate": 1.4202367598753485e-09, "loss": 0.0008, "step": 242900 }, { "epoch": 1.986425154352537, "grad_norm": 0.0348055474460125, "learning_rate": 1.4032796542923221e-09, "loss": 0.0007, "step": 242910 }, { "epoch": 1.9865069305311363, "grad_norm": 0.11047333478927612, "learning_rate": 1.386424374187345e-09, "loss": 0.0009, "step": 242920 }, { "epoch": 1.9865887067097354, "grad_norm": 0.21884146332740784, "learning_rate": 1.3696709199045866e-09, "loss": 0.0015, "step": 242930 }, { "epoch": 1.9866704828883346, "grad_norm": 0.06295128911733627, "learning_rate": 1.3530192917837748e-09, "loss": 0.0007, "step": 242940 }, { "epoch": 1.9867522590669338, "grad_norm": 0.056698597967624664, "learning_rate": 1.3364694901657482e-09, "loss": 0.0005, "step": 242950 }, { "epoch": 1.986834035245533, "grad_norm": 0.06841948628425598, "learning_rate": 1.3200215153869044e-09, "loss": 0.0011, "step": 242960 }, { "epoch": 1.986915811424132, "grad_norm": 0.04968063533306122, "learning_rate": 1.3036753677819758e-09, "loss": 0.0006, "step": 242970 }, { "epoch": 1.9869975876027315, "grad_norm": 0.11358007788658142, "learning_rate": 1.2874310476845842e-09, "loss": 0.001, "step": 242980 }, { "epoch": 1.9870793637813304, "grad_norm": 0.04311491549015045, "learning_rate": 1.271288555425576e-09, "loss": 0.0003, "step": 242990 }, { "epoch": 1.9871611399599298, "grad_norm": 0.017451776191592216, "learning_rate": 1.2552478913330223e-09, "loss": 0.0005, "step": 243000 }, { "epoch": 1.9872429161385288, "grad_norm": 0.027856240049004555, "learning_rate": 1.239309055734439e-09, "loss": 0.0006, "step": 243010 }, { "epoch": 1.9873246923171282, "grad_norm": 0.0031799080315977335, "learning_rate": 1.2234720489545659e-09, "loss": 0.0003, "step": 243020 }, { "epoch": 1.987406468495727, "grad_norm": 0.025735342875123024, "learning_rate": 1.2077368713159233e-09, "loss": 0.0006, "step": 243030 }, { "epoch": 1.9874882446743265, "grad_norm": 0.017706649377942085, "learning_rate": 1.19210352313881e-09, "loss": 0.0015, "step": 243040 }, { "epoch": 1.9875700208529254, "grad_norm": 0.02277296781539917, "learning_rate": 1.1765720047413054e-09, "loss": 0.0004, "step": 243050 }, { "epoch": 1.9876517970315248, "grad_norm": 0.013356395065784454, "learning_rate": 1.161142316440933e-09, "loss": 0.0005, "step": 243060 }, { "epoch": 1.9877335732101238, "grad_norm": 0.07796961069107056, "learning_rate": 1.1458144585507759e-09, "loss": 0.0006, "step": 243070 }, { "epoch": 1.9878153493887232, "grad_norm": 0.03563355281949043, "learning_rate": 1.1305884313839165e-09, "loss": 0.0005, "step": 243080 }, { "epoch": 1.9878971255673221, "grad_norm": 0.0035805441439151764, "learning_rate": 1.1154642352501077e-09, "loss": 0.0007, "step": 243090 }, { "epoch": 1.9879789017459215, "grad_norm": 0.0982377752661705, "learning_rate": 1.1004418704579911e-09, "loss": 0.0005, "step": 243100 }, { "epoch": 1.9880606779245205, "grad_norm": 0.039969924837350845, "learning_rate": 1.085521337312878e-09, "loss": 0.0007, "step": 243110 }, { "epoch": 1.9881424541031198, "grad_norm": 0.013728262856602669, "learning_rate": 1.07070263611897e-09, "loss": 0.0005, "step": 243120 }, { "epoch": 1.9882242302817188, "grad_norm": 0.05014362558722496, "learning_rate": 1.055985767178247e-09, "loss": 0.0008, "step": 243130 }, { "epoch": 1.9883060064603182, "grad_norm": 0.031403448432683945, "learning_rate": 1.0413707307904698e-09, "loss": 0.0007, "step": 243140 }, { "epoch": 1.9883877826389171, "grad_norm": 0.09819818288087845, "learning_rate": 1.0268575272531777e-09, "loss": 0.0006, "step": 243150 }, { "epoch": 1.9884695588175165, "grad_norm": 0.04715341702103615, "learning_rate": 1.0124461568622457e-09, "loss": 0.0006, "step": 243160 }, { "epoch": 1.9885513349961155, "grad_norm": 0.025960048660635948, "learning_rate": 9.981366199113273e-10, "loss": 0.0005, "step": 243170 }, { "epoch": 1.9886331111747149, "grad_norm": 0.023748718202114105, "learning_rate": 9.839289166918565e-10, "loss": 0.0011, "step": 243180 }, { "epoch": 1.988714887353314, "grad_norm": 0.023884590715169907, "learning_rate": 9.69823047493046e-10, "loss": 0.0003, "step": 243190 }, { "epoch": 1.9887966635319132, "grad_norm": 0.22337070107460022, "learning_rate": 9.558190126024436e-10, "loss": 0.0008, "step": 243200 }, { "epoch": 1.9888784397105124, "grad_norm": 0.004158542025834322, "learning_rate": 9.419168123048218e-10, "loss": 0.0005, "step": 243210 }, { "epoch": 1.9889602158891115, "grad_norm": 0.031803011894226074, "learning_rate": 9.281164468838422e-10, "loss": 0.0006, "step": 243220 }, { "epoch": 1.9890419920677107, "grad_norm": 0.03658867999911308, "learning_rate": 9.144179166209466e-10, "loss": 0.0004, "step": 243230 }, { "epoch": 1.9891237682463099, "grad_norm": 0.0011914949864149094, "learning_rate": 9.008212217948009e-10, "loss": 0.0005, "step": 243240 }, { "epoch": 1.989205544424909, "grad_norm": 0.00817101076245308, "learning_rate": 8.873263626824058e-10, "loss": 0.0007, "step": 243250 }, { "epoch": 1.9892873206035082, "grad_norm": 0.08919080346822739, "learning_rate": 8.739333395585415e-10, "loss": 0.0005, "step": 243260 }, { "epoch": 1.9893690967821074, "grad_norm": 0.02406563237309456, "learning_rate": 8.606421526957676e-10, "loss": 0.0006, "step": 243270 }, { "epoch": 1.9894508729607066, "grad_norm": 0.03705612197518349, "learning_rate": 8.474528023655338e-10, "loss": 0.0007, "step": 243280 }, { "epoch": 1.9895326491393057, "grad_norm": 0.0035344595089554787, "learning_rate": 8.343652888365139e-10, "loss": 0.0007, "step": 243290 }, { "epoch": 1.9896144253179049, "grad_norm": 0.007035909686237574, "learning_rate": 8.213796123746065e-10, "loss": 0.0004, "step": 243300 }, { "epoch": 1.989696201496504, "grad_norm": 0.0031052962876856327, "learning_rate": 8.084957732445997e-10, "loss": 0.0004, "step": 243310 }, { "epoch": 1.9897779776751032, "grad_norm": 0.016611140221357346, "learning_rate": 7.957137717096164e-10, "loss": 0.0005, "step": 243320 }, { "epoch": 1.9898597538537024, "grad_norm": 0.03267759084701538, "learning_rate": 7.830336080294488e-10, "loss": 0.0009, "step": 243330 }, { "epoch": 1.9899415300323016, "grad_norm": 0.12248150259256363, "learning_rate": 7.704552824622236e-10, "loss": 0.0007, "step": 243340 }, { "epoch": 1.9900233062109007, "grad_norm": 0.0010312807280570269, "learning_rate": 7.579787952644024e-10, "loss": 0.0007, "step": 243350 }, { "epoch": 1.9901050823895, "grad_norm": 0.055827487260103226, "learning_rate": 7.456041466896713e-10, "loss": 0.0008, "step": 243360 }, { "epoch": 1.990186858568099, "grad_norm": 0.0059418766759335995, "learning_rate": 7.33331336991161e-10, "loss": 0.0003, "step": 243370 }, { "epoch": 1.9902686347466982, "grad_norm": 0.014492794871330261, "learning_rate": 7.211603664181166e-10, "loss": 0.0006, "step": 243380 }, { "epoch": 1.9903504109252974, "grad_norm": 0.017568137496709824, "learning_rate": 7.09091235218673e-10, "loss": 0.0003, "step": 243390 }, { "epoch": 1.9904321871038966, "grad_norm": 0.030116204172372818, "learning_rate": 6.971239436387445e-10, "loss": 0.0004, "step": 243400 }, { "epoch": 1.990513963282496, "grad_norm": 0.0008592420490458608, "learning_rate": 6.85258491922025e-10, "loss": 0.0004, "step": 243410 }, { "epoch": 1.990595739461095, "grad_norm": 0.054695483297109604, "learning_rate": 6.734948803105434e-10, "loss": 0.0008, "step": 243420 }, { "epoch": 1.9906775156396943, "grad_norm": 0.06571916490793228, "learning_rate": 6.618331090429975e-10, "loss": 0.0004, "step": 243430 }, { "epoch": 1.9907592918182933, "grad_norm": 0.04663654789328575, "learning_rate": 6.502731783580851e-10, "loss": 0.0005, "step": 243440 }, { "epoch": 1.9908410679968926, "grad_norm": 0.0027421717531979084, "learning_rate": 6.388150884906185e-10, "loss": 0.0003, "step": 243450 }, { "epoch": 1.9909228441754916, "grad_norm": 0.07200174033641815, "learning_rate": 6.274588396742997e-10, "loss": 0.0007, "step": 243460 }, { "epoch": 1.991004620354091, "grad_norm": 0.0012981770560145378, "learning_rate": 6.162044321400551e-10, "loss": 0.0019, "step": 243470 }, { "epoch": 1.99108639653269, "grad_norm": 0.012426537461578846, "learning_rate": 6.050518661177007e-10, "loss": 0.0008, "step": 243480 }, { "epoch": 1.9911681727112893, "grad_norm": 0.06304698437452316, "learning_rate": 5.940011418337221e-10, "loss": 0.0009, "step": 243490 }, { "epoch": 1.9912499488898883, "grad_norm": 0.03826666623353958, "learning_rate": 5.830522595140498e-10, "loss": 0.001, "step": 243500 }, { "epoch": 1.9913317250684877, "grad_norm": 0.023413872346282005, "learning_rate": 5.722052193812832e-10, "loss": 0.0013, "step": 243510 }, { "epoch": 1.9914135012470866, "grad_norm": 0.038544636219739914, "learning_rate": 5.61460021656357e-10, "loss": 0.0009, "step": 243520 }, { "epoch": 1.991495277425686, "grad_norm": 0.06038980185985565, "learning_rate": 5.508166665585402e-10, "loss": 0.0004, "step": 243530 }, { "epoch": 1.991577053604285, "grad_norm": 0.028533192351460457, "learning_rate": 5.40275154303771e-10, "loss": 0.0008, "step": 243540 }, { "epoch": 1.9916588297828843, "grad_norm": 0.004153640475124121, "learning_rate": 5.298354851079879e-10, "loss": 0.0005, "step": 243550 }, { "epoch": 1.9917406059614833, "grad_norm": 0.007320370525121689, "learning_rate": 5.194976591826883e-10, "loss": 0.0006, "step": 243560 }, { "epoch": 1.9918223821400827, "grad_norm": 0.0586671382188797, "learning_rate": 5.092616767393699e-10, "loss": 0.0004, "step": 243570 }, { "epoch": 1.9919041583186816, "grad_norm": 0.05910743772983551, "learning_rate": 4.991275379861992e-10, "loss": 0.0007, "step": 243580 }, { "epoch": 1.991985934497281, "grad_norm": 0.06531307846307755, "learning_rate": 4.890952431291229e-10, "loss": 0.0005, "step": 243590 }, { "epoch": 1.99206771067588, "grad_norm": 0.027202334254980087, "learning_rate": 4.791647923735321e-10, "loss": 0.0013, "step": 243600 }, { "epoch": 1.9921494868544793, "grad_norm": 0.021506289020180702, "learning_rate": 4.693361859209322e-10, "loss": 0.0006, "step": 243610 }, { "epoch": 1.9922312630330785, "grad_norm": 0.020727775990962982, "learning_rate": 4.596094239717186e-10, "loss": 0.0007, "step": 243620 }, { "epoch": 1.9923130392116777, "grad_norm": 0.04822252318263054, "learning_rate": 4.49984506724066e-10, "loss": 0.0007, "step": 243630 }, { "epoch": 1.9923948153902769, "grad_norm": 0.010149694979190826, "learning_rate": 4.40461434374484e-10, "loss": 0.0006, "step": 243640 }, { "epoch": 1.992476591568876, "grad_norm": 0.06314530223608017, "learning_rate": 4.3104020711615127e-10, "loss": 0.0006, "step": 243650 }, { "epoch": 1.9925583677474752, "grad_norm": 0.010144185274839401, "learning_rate": 4.2172082514169156e-10, "loss": 0.0008, "step": 243660 }, { "epoch": 1.9926401439260744, "grad_norm": 0.021372929215431213, "learning_rate": 4.125032886403979e-10, "loss": 0.0009, "step": 243670 }, { "epoch": 1.9927219201046735, "grad_norm": 0.11169394850730896, "learning_rate": 4.03387597799898e-10, "loss": 0.0015, "step": 243680 }, { "epoch": 1.9928036962832727, "grad_norm": 0.04219764843583107, "learning_rate": 3.943737528067093e-10, "loss": 0.0006, "step": 243690 }, { "epoch": 1.9928854724618719, "grad_norm": 0.0021682525984942913, "learning_rate": 3.8546175384401865e-10, "loss": 0.0005, "step": 243700 }, { "epoch": 1.992967248640471, "grad_norm": 0.028812063857913017, "learning_rate": 3.766516010933474e-10, "loss": 0.0007, "step": 243710 }, { "epoch": 1.9930490248190702, "grad_norm": 0.01154189184308052, "learning_rate": 3.679432947339967e-10, "loss": 0.0002, "step": 243720 }, { "epoch": 1.9931308009976694, "grad_norm": 0.017821617424488068, "learning_rate": 3.593368349441573e-10, "loss": 0.0012, "step": 243730 }, { "epoch": 1.9932125771762685, "grad_norm": 0.0045397644862532616, "learning_rate": 3.508322218981342e-10, "loss": 0.0004, "step": 243740 }, { "epoch": 1.9932943533548677, "grad_norm": 0.00038278891588561237, "learning_rate": 3.4242945576912213e-10, "loss": 0.0002, "step": 243750 }, { "epoch": 1.9933761295334669, "grad_norm": 0.06083367392420769, "learning_rate": 3.3412853672920577e-10, "loss": 0.0012, "step": 243760 }, { "epoch": 1.993457905712066, "grad_norm": 0.046859681606292725, "learning_rate": 3.2592946494713895e-10, "loss": 0.0009, "step": 243770 }, { "epoch": 1.9935396818906652, "grad_norm": 0.06526057422161102, "learning_rate": 3.178322405894552e-10, "loss": 0.0005, "step": 243780 }, { "epoch": 1.9936214580692644, "grad_norm": 0.2505834996700287, "learning_rate": 3.0983686382213273e-10, "loss": 0.0004, "step": 243790 }, { "epoch": 1.9937032342478636, "grad_norm": 0.005824002902954817, "learning_rate": 3.019433348067091e-10, "loss": 0.0005, "step": 243800 }, { "epoch": 1.9937850104264627, "grad_norm": 0.062012385576963425, "learning_rate": 2.9415165370472175e-10, "loss": 0.0003, "step": 243810 }, { "epoch": 1.993866786605062, "grad_norm": 0.035107605159282684, "learning_rate": 2.8646182067493254e-10, "loss": 0.002, "step": 243820 }, { "epoch": 1.993948562783661, "grad_norm": 0.01660139113664627, "learning_rate": 2.7887383587443807e-10, "loss": 0.0005, "step": 243830 }, { "epoch": 1.9940303389622605, "grad_norm": 0.015313266776502132, "learning_rate": 2.7138769945644904e-10, "loss": 0.0004, "step": 243840 }, { "epoch": 1.9941121151408594, "grad_norm": 0.09416909515857697, "learning_rate": 2.640034115747314e-10, "loss": 0.0009, "step": 243850 }, { "epoch": 1.9941938913194588, "grad_norm": 0.014561254531145096, "learning_rate": 2.5672097237916526e-10, "loss": 0.0006, "step": 243860 }, { "epoch": 1.9942756674980577, "grad_norm": 0.006333979312330484, "learning_rate": 2.4954038201796535e-10, "loss": 0.0003, "step": 243870 }, { "epoch": 1.9943574436766571, "grad_norm": 0.06730221211910248, "learning_rate": 2.4246164063768116e-10, "loss": 0.0008, "step": 243880 }, { "epoch": 1.994439219855256, "grad_norm": 0.05210169777274132, "learning_rate": 2.354847483826417e-10, "loss": 0.0004, "step": 243890 }, { "epoch": 1.9945209960338555, "grad_norm": 0.0014734066789969802, "learning_rate": 2.286097053949554e-10, "loss": 0.0003, "step": 243900 }, { "epoch": 1.9946027722124544, "grad_norm": 0.07441649585962296, "learning_rate": 2.218365118145105e-10, "loss": 0.0007, "step": 243910 }, { "epoch": 1.9946845483910538, "grad_norm": 0.1003521978855133, "learning_rate": 2.1516516777897457e-10, "loss": 0.0005, "step": 243920 }, { "epoch": 1.9947663245696527, "grad_norm": 0.02432224527001381, "learning_rate": 2.0859567342434995e-10, "loss": 0.0005, "step": 243930 }, { "epoch": 1.9948481007482521, "grad_norm": 0.04885013401508331, "learning_rate": 2.0212802888497363e-10, "loss": 0.0014, "step": 243940 }, { "epoch": 1.994929876926851, "grad_norm": 0.02410965971648693, "learning_rate": 1.9576223429240704e-10, "loss": 0.0008, "step": 243950 }, { "epoch": 1.9950116531054505, "grad_norm": 0.010995727963745594, "learning_rate": 1.8949828977599117e-10, "loss": 0.0003, "step": 243960 }, { "epoch": 1.9950934292840494, "grad_norm": 0.02119249664247036, "learning_rate": 1.8333619546395676e-10, "loss": 0.0005, "step": 243970 }, { "epoch": 1.9951752054626488, "grad_norm": 0.03941359743475914, "learning_rate": 1.7727595148120392e-10, "loss": 0.0008, "step": 243980 }, { "epoch": 1.9952569816412478, "grad_norm": 0.01631036214530468, "learning_rate": 1.7131755795096738e-10, "loss": 0.0004, "step": 243990 }, { "epoch": 1.9953387578198472, "grad_norm": 0.007034262176603079, "learning_rate": 1.654610149953717e-10, "loss": 0.0006, "step": 244000 }, { "epoch": 1.9953387578198472, "eval_loss": 0.0010057429317384958, "eval_runtime": 5.3638, "eval_samples_per_second": 37.287, "eval_steps_per_second": 9.322, "step": 244000 }, { "epoch": 1.995420533998446, "grad_norm": 0.006109113804996014, "learning_rate": 1.5970632273376586e-10, "loss": 0.0004, "step": 244010 }, { "epoch": 1.9955023101770455, "grad_norm": 0.032708968967199326, "learning_rate": 1.5405348128272324e-10, "loss": 0.0004, "step": 244020 }, { "epoch": 1.9955840863556444, "grad_norm": 0.014827064238488674, "learning_rate": 1.4850249075770706e-10, "loss": 0.0015, "step": 244030 }, { "epoch": 1.9956658625342438, "grad_norm": 0.06450054049491882, "learning_rate": 1.4305335127140495e-10, "loss": 0.0008, "step": 244040 }, { "epoch": 1.995747638712843, "grad_norm": 0.032168082892894745, "learning_rate": 1.3770606293539434e-10, "loss": 0.0005, "step": 244050 }, { "epoch": 1.9958294148914422, "grad_norm": 0.008877459913492203, "learning_rate": 1.324606258584771e-10, "loss": 0.0004, "step": 244060 }, { "epoch": 1.9959111910700413, "grad_norm": 0.10466492176055908, "learning_rate": 1.2731704014778968e-10, "loss": 0.0005, "step": 244070 }, { "epoch": 1.9959929672486405, "grad_norm": 0.034350909292697906, "learning_rate": 1.22275305907138e-10, "loss": 0.0008, "step": 244080 }, { "epoch": 1.9960747434272397, "grad_norm": 0.02010415308177471, "learning_rate": 1.1733542324032787e-10, "loss": 0.0004, "step": 244090 }, { "epoch": 1.9961565196058388, "grad_norm": 0.007391198072582483, "learning_rate": 1.1249739224727941e-10, "loss": 0.0006, "step": 244100 }, { "epoch": 1.996238295784438, "grad_norm": 0.10426922887563705, "learning_rate": 1.0776121302680243e-10, "loss": 0.0008, "step": 244110 }, { "epoch": 1.9963200719630372, "grad_norm": 0.0241229347884655, "learning_rate": 1.0312688567548634e-10, "loss": 0.0005, "step": 244120 }, { "epoch": 1.9964018481416363, "grad_norm": 0.021904272958636284, "learning_rate": 9.859441028770012e-11, "loss": 0.0005, "step": 244130 }, { "epoch": 1.9964836243202355, "grad_norm": 0.03443886712193489, "learning_rate": 9.416378695503713e-11, "loss": 0.0005, "step": 244140 }, { "epoch": 1.9965654004988347, "grad_norm": 0.01572505570948124, "learning_rate": 8.983501576909081e-11, "loss": 0.0002, "step": 244150 }, { "epoch": 1.9966471766774339, "grad_norm": 0.02616344578564167, "learning_rate": 8.560809681701365e-11, "loss": 0.0004, "step": 244160 }, { "epoch": 1.996728952856033, "grad_norm": 0.01655670255422592, "learning_rate": 8.148303018540305e-11, "loss": 0.0002, "step": 244170 }, { "epoch": 1.9968107290346322, "grad_norm": 0.12532943487167358, "learning_rate": 7.745981595752572e-11, "loss": 0.0008, "step": 244180 }, { "epoch": 1.9968925052132314, "grad_norm": 0.03274392709136009, "learning_rate": 7.35384542166484e-11, "loss": 0.0007, "step": 244190 }, { "epoch": 1.9969742813918305, "grad_norm": 0.01467348076403141, "learning_rate": 6.971894504159693e-11, "loss": 0.0006, "step": 244200 }, { "epoch": 1.9970560575704297, "grad_norm": 0.01499939989298582, "learning_rate": 6.600128851064202e-11, "loss": 0.0004, "step": 244210 }, { "epoch": 1.9971378337490289, "grad_norm": 0.048474930226802826, "learning_rate": 6.238548469927885e-11, "loss": 0.0009, "step": 244220 }, { "epoch": 1.997219609927628, "grad_norm": 0.02971203625202179, "learning_rate": 5.887153368133724e-11, "loss": 0.0007, "step": 244230 }, { "epoch": 1.9973013861062272, "grad_norm": 0.041117820888757706, "learning_rate": 5.545943552842659e-11, "loss": 0.0006, "step": 244240 }, { "epoch": 1.9973831622848266, "grad_norm": 0.034854594618082047, "learning_rate": 5.214919030993581e-11, "loss": 0.0009, "step": 244250 }, { "epoch": 1.9974649384634255, "grad_norm": 0.06408319622278214, "learning_rate": 4.8940798093033426e-11, "loss": 0.0007, "step": 244260 }, { "epoch": 1.997546714642025, "grad_norm": 0.025305647403001785, "learning_rate": 4.5834258943777685e-11, "loss": 0.0003, "step": 244270 }, { "epoch": 1.9976284908206239, "grad_norm": 0.019120370969176292, "learning_rate": 4.28295729248962e-11, "loss": 0.0009, "step": 244280 }, { "epoch": 1.9977102669992233, "grad_norm": 0.013292333111166954, "learning_rate": 3.9926740098006346e-11, "loss": 0.0002, "step": 244290 }, { "epoch": 1.9977920431778222, "grad_norm": 0.013857093639671803, "learning_rate": 3.712576052139483e-11, "loss": 0.0004, "step": 244300 }, { "epoch": 1.9978738193564216, "grad_norm": 0.11521243304014206, "learning_rate": 3.4426634253348354e-11, "loss": 0.0008, "step": 244310 }, { "epoch": 1.9979555955350206, "grad_norm": 0.07520883530378342, "learning_rate": 3.182936134771275e-11, "loss": 0.0009, "step": 244320 }, { "epoch": 1.99803737171362, "grad_norm": 0.05592300370335579, "learning_rate": 2.933394185833383e-11, "loss": 0.0006, "step": 244330 }, { "epoch": 1.998119147892219, "grad_norm": 0.00841664057224989, "learning_rate": 2.6940375835171616e-11, "loss": 0.0005, "step": 244340 }, { "epoch": 1.9982009240708183, "grad_norm": 0.020405374467372894, "learning_rate": 2.464866332763105e-11, "loss": 0.0006, "step": 244350 }, { "epoch": 1.9982827002494172, "grad_norm": 0.04843279719352722, "learning_rate": 2.2458804381786382e-11, "loss": 0.0004, "step": 244360 }, { "epoch": 1.9983644764280166, "grad_norm": 0.0026495466008782387, "learning_rate": 2.037079904315675e-11, "loss": 0.0013, "step": 244370 }, { "epoch": 1.9984462526066156, "grad_norm": 0.005690338555723429, "learning_rate": 1.838464735393064e-11, "loss": 0.0007, "step": 244380 }, { "epoch": 1.998528028785215, "grad_norm": 0.17249616980552673, "learning_rate": 1.6500349354076072e-11, "loss": 0.0005, "step": 244390 }, { "epoch": 1.998609804963814, "grad_norm": 0.013904266990721226, "learning_rate": 1.4717905081895745e-11, "loss": 0.0008, "step": 244400 }, { "epoch": 1.9986915811424133, "grad_norm": 0.04509434849023819, "learning_rate": 1.3037314574582128e-11, "loss": 0.0005, "step": 244410 }, { "epoch": 1.9987733573210122, "grad_norm": 0.04148709028959274, "learning_rate": 1.1458577865997023e-11, "loss": 0.0014, "step": 244420 }, { "epoch": 1.9988551334996116, "grad_norm": 0.04488525912165642, "learning_rate": 9.981694987781786e-12, "loss": 0.0002, "step": 244430 }, { "epoch": 1.9989369096782106, "grad_norm": 0.017127731814980507, "learning_rate": 8.606665971022665e-12, "loss": 0.0007, "step": 244440 }, { "epoch": 1.99901868585681, "grad_norm": 0.014885212294757366, "learning_rate": 7.33349084236501e-12, "loss": 0.0002, "step": 244450 }, { "epoch": 1.999100462035409, "grad_norm": 0.02325461432337761, "learning_rate": 6.162169629009285e-12, "loss": 0.0004, "step": 244460 }, { "epoch": 1.9991822382140083, "grad_norm": 0.017696388065814972, "learning_rate": 5.092702354270173e-12, "loss": 0.0008, "step": 244470 }, { "epoch": 1.9992640143926075, "grad_norm": 0.0433100201189518, "learning_rate": 4.125089040352137e-12, "loss": 0.0003, "step": 244480 }, { "epoch": 1.9993457905712066, "grad_norm": 0.007889938540756702, "learning_rate": 3.2593297061289663e-12, "loss": 0.0006, "step": 244490 }, { "epoch": 1.9994275667498058, "grad_norm": 0.017977185547351837, "learning_rate": 2.4954243699193416e-12, "loss": 0.0008, "step": 244500 }, { "epoch": 1.999509342928405, "grad_norm": 0.08037788420915604, "learning_rate": 1.8333730467112733e-12, "loss": 0.0022, "step": 244510 }, { "epoch": 1.9995911191070042, "grad_norm": 0.15058213472366333, "learning_rate": 1.2731757509376607e-12, "loss": 0.001, "step": 244520 }, { "epoch": 1.9996728952856033, "grad_norm": 0.07900869846343994, "learning_rate": 8.148324931456231e-13, "loss": 0.0007, "step": 244530 }, { "epoch": 1.9997546714642025, "grad_norm": 0.0006607503164559603, "learning_rate": 4.583432827720558e-13, "loss": 0.0003, "step": 244540 }, { "epoch": 1.9998364476428017, "grad_norm": 0.10040383785963058, "learning_rate": 2.0370812758851998e-13, "loss": 0.0006, "step": 244550 }, { "epoch": 1.9999182238214008, "grad_norm": 0.039227742701768875, "learning_rate": 5.092703203590788e-14, "loss": 0.0005, "step": 244560 }, { "epoch": 2.0, "grad_norm": 0.03504420444369316, "learning_rate": 0.0, "loss": 0.0008, "step": 244570 }, { "epoch": 2.0, "step": 244570, "total_flos": 2.8500563716426695e+19, "train_loss": 0.002917896304052038, "train_runtime": 243643.3567, "train_samples_per_second": 8.03, "train_steps_per_second": 1.004 } ], "logging_steps": 10, "max_steps": 244570, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 62000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8500563716426695e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }