|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.000566358316028, |
|
"eval_steps": 500, |
|
"global_step": 1325, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007551444213705871, |
|
"grad_norm": 0.059659671038389206, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4492, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015102888427411742, |
|
"grad_norm": 0.0642104372382164, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4602, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0022654332641117614, |
|
"grad_norm": 0.06154001131653786, |
|
"learning_rate": 6e-05, |
|
"loss": 0.488, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0030205776854823484, |
|
"grad_norm": 0.07486463338136673, |
|
"learning_rate": 8e-05, |
|
"loss": 0.5672, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003775722106852936, |
|
"grad_norm": 0.08084491640329361, |
|
"learning_rate": 0.0001, |
|
"loss": 0.525, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004530866528223523, |
|
"grad_norm": 0.09173104912042618, |
|
"learning_rate": 9.999985839072915e-05, |
|
"loss": 0.523, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00528601094959411, |
|
"grad_norm": 0.10932840406894684, |
|
"learning_rate": 9.999943356371866e-05, |
|
"loss": 0.4872, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006041155370964697, |
|
"grad_norm": 0.14193305373191833, |
|
"learning_rate": 9.999872552137497e-05, |
|
"loss": 0.5677, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006796299792335284, |
|
"grad_norm": 0.15595699846744537, |
|
"learning_rate": 9.999773426770865e-05, |
|
"loss": 0.6329, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007551444213705872, |
|
"grad_norm": 0.157088503241539, |
|
"learning_rate": 9.999645980833454e-05, |
|
"loss": 0.5819, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008306588635076459, |
|
"grad_norm": 0.12618108093738556, |
|
"learning_rate": 9.999490215047167e-05, |
|
"loss": 0.5207, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009061733056447046, |
|
"grad_norm": 0.15058279037475586, |
|
"learning_rate": 9.999306130294317e-05, |
|
"loss": 0.5821, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009816877477817633, |
|
"grad_norm": 0.12929701805114746, |
|
"learning_rate": 9.99909372761763e-05, |
|
"loss": 0.5065, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01057202189918822, |
|
"grad_norm": 0.12497358024120331, |
|
"learning_rate": 9.99885300822023e-05, |
|
"loss": 0.524, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011327166320558807, |
|
"grad_norm": 0.12322711199522018, |
|
"learning_rate": 9.998583973465646e-05, |
|
"loss": 0.605, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012082310741929394, |
|
"grad_norm": 0.11271341890096664, |
|
"learning_rate": 9.998286624877786e-05, |
|
"loss": 0.5465, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01283745516329998, |
|
"grad_norm": 0.11549581587314606, |
|
"learning_rate": 9.997960964140947e-05, |
|
"loss": 0.5595, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013592599584670568, |
|
"grad_norm": 0.12339697778224945, |
|
"learning_rate": 9.997606993099789e-05, |
|
"loss": 0.4647, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014347744006041155, |
|
"grad_norm": 0.13940560817718506, |
|
"learning_rate": 9.997224713759335e-05, |
|
"loss": 0.5441, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015102888427411743, |
|
"grad_norm": 0.12651537358760834, |
|
"learning_rate": 9.99681412828496e-05, |
|
"loss": 0.5482, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01585803284878233, |
|
"grad_norm": 0.13802285492420197, |
|
"learning_rate": 9.996375239002369e-05, |
|
"loss": 0.4715, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.016613177270152917, |
|
"grad_norm": 0.13440218567848206, |
|
"learning_rate": 9.995908048397595e-05, |
|
"loss": 0.5641, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.017368321691523504, |
|
"grad_norm": 0.139227494597435, |
|
"learning_rate": 9.995412559116979e-05, |
|
"loss": 0.4398, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01812346611289409, |
|
"grad_norm": 0.1373261958360672, |
|
"learning_rate": 9.994888773967157e-05, |
|
"loss": 0.5384, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018878610534264678, |
|
"grad_norm": 0.14462506771087646, |
|
"learning_rate": 9.99433669591504e-05, |
|
"loss": 0.5708, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019633754955635265, |
|
"grad_norm": 0.14911501109600067, |
|
"learning_rate": 9.993756328087805e-05, |
|
"loss": 0.5851, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.020388899377005852, |
|
"grad_norm": 0.13722750544548035, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 0.488, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02114404379837644, |
|
"grad_norm": 0.1530655175447464, |
|
"learning_rate": 9.992510736417878e-05, |
|
"loss": 0.5364, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.021899188219747026, |
|
"grad_norm": 0.14158278703689575, |
|
"learning_rate": 9.991845519630678e-05, |
|
"loss": 0.4066, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022654332641117613, |
|
"grad_norm": 0.14002585411071777, |
|
"learning_rate": 9.991152027179307e-05, |
|
"loss": 0.4307, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0234094770624882, |
|
"grad_norm": 0.15578630566596985, |
|
"learning_rate": 9.990430262991962e-05, |
|
"loss": 0.4661, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.024164621483858787, |
|
"grad_norm": 0.16023118793964386, |
|
"learning_rate": 9.989680231156981e-05, |
|
"loss": 0.4454, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.024919765905229374, |
|
"grad_norm": 0.17581893503665924, |
|
"learning_rate": 9.988901935922826e-05, |
|
"loss": 0.5209, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02567491032659996, |
|
"grad_norm": 0.1649864912033081, |
|
"learning_rate": 9.988095381698048e-05, |
|
"loss": 0.5037, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.026430054747970548, |
|
"grad_norm": 0.18106521666049957, |
|
"learning_rate": 9.987260573051269e-05, |
|
"loss": 0.4649, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.027185199169341135, |
|
"grad_norm": 0.1807573437690735, |
|
"learning_rate": 9.986397514711154e-05, |
|
"loss": 0.4987, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.027940343590711722, |
|
"grad_norm": 0.392092764377594, |
|
"learning_rate": 9.985506211566388e-05, |
|
"loss": 0.526, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02869548801208231, |
|
"grad_norm": 0.17764483392238617, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 0.4262, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0294506324334529, |
|
"grad_norm": 0.20041850209236145, |
|
"learning_rate": 9.983638891217544e-05, |
|
"loss": 0.5239, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.030205776854823486, |
|
"grad_norm": 0.19753199815750122, |
|
"learning_rate": 9.982662884590662e-05, |
|
"loss": 0.4918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.030960921276194073, |
|
"grad_norm": 0.19215907156467438, |
|
"learning_rate": 9.981658654313457e-05, |
|
"loss": 0.4474, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03171606569756466, |
|
"grad_norm": 0.19931669533252716, |
|
"learning_rate": 9.980626206074263e-05, |
|
"loss": 0.4531, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.032471210118935244, |
|
"grad_norm": 0.20595461130142212, |
|
"learning_rate": 9.979565545721248e-05, |
|
"loss": 0.4495, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.033226354540305834, |
|
"grad_norm": 0.20857509970664978, |
|
"learning_rate": 9.978476679262387e-05, |
|
"loss": 0.4331, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03398149896167642, |
|
"grad_norm": 0.21194373071193695, |
|
"learning_rate": 9.977359612865423e-05, |
|
"loss": 0.3721, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03473664338304701, |
|
"grad_norm": 0.21357247233390808, |
|
"learning_rate": 9.976214352857834e-05, |
|
"loss": 0.4586, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03549178780441759, |
|
"grad_norm": 0.23830629885196686, |
|
"learning_rate": 9.975040905726798e-05, |
|
"loss": 0.4416, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03624693222578818, |
|
"grad_norm": 0.2576565444469452, |
|
"learning_rate": 9.973839278119155e-05, |
|
"loss": 0.4685, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.037002076647158766, |
|
"grad_norm": 0.5738092660903931, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 0.3931, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.037757221068529356, |
|
"grad_norm": 0.31976327300071716, |
|
"learning_rate": 9.971351508859488e-05, |
|
"loss": 0.419, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03851236548989995, |
|
"grad_norm": 0.11089778691530228, |
|
"learning_rate": 9.970065381299112e-05, |
|
"loss": 0.3417, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03926750991127053, |
|
"grad_norm": 0.13030797243118286, |
|
"learning_rate": 9.968751101445343e-05, |
|
"loss": 0.3921, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04002265433264112, |
|
"grad_norm": 0.12426062673330307, |
|
"learning_rate": 9.967408676742751e-05, |
|
"loss": 0.3519, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.040777798754011704, |
|
"grad_norm": 0.13413658738136292, |
|
"learning_rate": 9.966038114795328e-05, |
|
"loss": 0.4093, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.041532943175382295, |
|
"grad_norm": 0.1284988820552826, |
|
"learning_rate": 9.964639423366442e-05, |
|
"loss": 0.4002, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04228808759675288, |
|
"grad_norm": 0.11679685115814209, |
|
"learning_rate": 9.963212610378803e-05, |
|
"loss": 0.4191, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04304323201812347, |
|
"grad_norm": 0.12084402143955231, |
|
"learning_rate": 9.961757683914406e-05, |
|
"loss": 0.3768, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04379837643949405, |
|
"grad_norm": 0.11981435120105743, |
|
"learning_rate": 9.960274652214496e-05, |
|
"loss": 0.4396, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04455352086086464, |
|
"grad_norm": 0.1252336949110031, |
|
"learning_rate": 9.958763523679514e-05, |
|
"loss": 0.4526, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.045308665282235226, |
|
"grad_norm": 0.12791119515895844, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 0.4877, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04606380970360582, |
|
"grad_norm": 0.12734884023666382, |
|
"learning_rate": 9.955657010501806e-05, |
|
"loss": 0.4608, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0468189541249764, |
|
"grad_norm": 0.12033625692129135, |
|
"learning_rate": 9.954061643455523e-05, |
|
"loss": 0.4842, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04757409854634699, |
|
"grad_norm": 0.11826111376285553, |
|
"learning_rate": 9.952438214766955e-05, |
|
"loss": 0.4132, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.048329242967717574, |
|
"grad_norm": 0.12830643355846405, |
|
"learning_rate": 9.950786733631801e-05, |
|
"loss": 0.4733, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.049084387389088165, |
|
"grad_norm": 0.13267682492733002, |
|
"learning_rate": 9.949107209404665e-05, |
|
"loss": 0.4303, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04983953181045875, |
|
"grad_norm": 0.12670353055000305, |
|
"learning_rate": 9.947399651598993e-05, |
|
"loss": 0.5202, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05059467623182934, |
|
"grad_norm": 0.15055082738399506, |
|
"learning_rate": 9.945664069887028e-05, |
|
"loss": 0.4475, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05134982065319992, |
|
"grad_norm": 0.13549265265464783, |
|
"learning_rate": 9.943900474099748e-05, |
|
"loss": 0.5083, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05210496507457051, |
|
"grad_norm": 0.12346430122852325, |
|
"learning_rate": 9.942108874226811e-05, |
|
"loss": 0.3601, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.052860109495941096, |
|
"grad_norm": 0.14251157641410828, |
|
"learning_rate": 9.940289280416508e-05, |
|
"loss": 0.4827, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05361525391731169, |
|
"grad_norm": 0.13678328692913055, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 0.4081, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05437039833868227, |
|
"grad_norm": 0.13870520889759064, |
|
"learning_rate": 9.93656615236972e-05, |
|
"loss": 0.442, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05512554276005286, |
|
"grad_norm": 0.13190938532352448, |
|
"learning_rate": 9.934662639222412e-05, |
|
"loss": 0.409, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.055880687181423444, |
|
"grad_norm": 0.1491832584142685, |
|
"learning_rate": 9.932731174315972e-05, |
|
"loss": 0.5432, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.056635831602794035, |
|
"grad_norm": 0.14117342233657837, |
|
"learning_rate": 9.930771768590933e-05, |
|
"loss": 0.436, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05739097602416462, |
|
"grad_norm": 0.15490145981311798, |
|
"learning_rate": 9.928784433146096e-05, |
|
"loss": 0.4399, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05814612044553521, |
|
"grad_norm": 0.15420539677143097, |
|
"learning_rate": 9.926769179238466e-05, |
|
"loss": 0.4223, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0589012648669058, |
|
"grad_norm": 0.15358476340770721, |
|
"learning_rate": 9.924726018283187e-05, |
|
"loss": 0.4702, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05965640928827638, |
|
"grad_norm": 0.15698279440402985, |
|
"learning_rate": 9.922654961853481e-05, |
|
"loss": 0.4728, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06041155370964697, |
|
"grad_norm": 0.16668903827667236, |
|
"learning_rate": 9.92055602168058e-05, |
|
"loss": 0.4654, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.061166698131017556, |
|
"grad_norm": 0.16828201711177826, |
|
"learning_rate": 9.918429209653662e-05, |
|
"loss": 0.4588, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06192184255238815, |
|
"grad_norm": 0.1675289273262024, |
|
"learning_rate": 9.916274537819775e-05, |
|
"loss": 0.489, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06267698697375873, |
|
"grad_norm": 0.19768594205379486, |
|
"learning_rate": 9.914092018383778e-05, |
|
"loss": 0.4707, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06343213139512932, |
|
"grad_norm": 0.17943724989891052, |
|
"learning_rate": 9.911881663708275e-05, |
|
"loss": 0.458, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06418727581649991, |
|
"grad_norm": 0.18578174710273743, |
|
"learning_rate": 9.909643486313533e-05, |
|
"loss": 0.403, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06494242023787049, |
|
"grad_norm": 0.1998097449541092, |
|
"learning_rate": 9.90737749887742e-05, |
|
"loss": 0.5154, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06569756465924108, |
|
"grad_norm": 0.18927428126335144, |
|
"learning_rate": 9.905083714235326e-05, |
|
"loss": 0.4682, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06645270908061167, |
|
"grad_norm": 0.18993408977985382, |
|
"learning_rate": 9.9027621453801e-05, |
|
"loss": 0.4373, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06720785350198226, |
|
"grad_norm": 0.18295501172542572, |
|
"learning_rate": 9.900412805461967e-05, |
|
"loss": 0.4163, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06796299792335284, |
|
"grad_norm": 0.20295414328575134, |
|
"learning_rate": 9.898035707788463e-05, |
|
"loss": 0.4816, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06871814234472343, |
|
"grad_norm": 0.21273529529571533, |
|
"learning_rate": 9.895630865824347e-05, |
|
"loss": 0.4363, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06947328676609402, |
|
"grad_norm": 0.21219299733638763, |
|
"learning_rate": 9.893198293191538e-05, |
|
"loss": 0.5055, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07022843118746461, |
|
"grad_norm": 0.2346925586462021, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.5144, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07098357560883518, |
|
"grad_norm": 0.2355310320854187, |
|
"learning_rate": 9.888250011192811e-05, |
|
"loss": 0.4826, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07173872003020577, |
|
"grad_norm": 0.23005840182304382, |
|
"learning_rate": 9.885734329855798e-05, |
|
"loss": 0.4764, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07249386445157636, |
|
"grad_norm": 0.2246306836605072, |
|
"learning_rate": 9.883190973907741e-05, |
|
"loss": 0.3891, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07324900887294696, |
|
"grad_norm": 0.28144529461860657, |
|
"learning_rate": 9.880619957755151e-05, |
|
"loss": 0.5174, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07400415329431753, |
|
"grad_norm": 0.27972379326820374, |
|
"learning_rate": 9.878021295961217e-05, |
|
"loss": 0.4433, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07475929771568812, |
|
"grad_norm": 0.3294559717178345, |
|
"learning_rate": 9.875395003245724e-05, |
|
"loss": 0.442, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07551444213705871, |
|
"grad_norm": 0.3774302303791046, |
|
"learning_rate": 9.872741094484965e-05, |
|
"loss": 0.5358, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0762695865584293, |
|
"grad_norm": 0.12596380710601807, |
|
"learning_rate": 9.870059584711668e-05, |
|
"loss": 0.3584, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0770247309797999, |
|
"grad_norm": 0.1355014592409134, |
|
"learning_rate": 9.867350489114894e-05, |
|
"loss": 0.4008, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07777987540117047, |
|
"grad_norm": 0.14024491608142853, |
|
"learning_rate": 9.864613823039969e-05, |
|
"loss": 0.3752, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07853501982254106, |
|
"grad_norm": 0.13403770327568054, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 0.38, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07929016424391165, |
|
"grad_norm": 0.1362624615430832, |
|
"learning_rate": 9.859057841617709e-05, |
|
"loss": 0.3889, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08004530866528224, |
|
"grad_norm": 0.137498140335083, |
|
"learning_rate": 9.856238557741513e-05, |
|
"loss": 0.4215, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08080045308665282, |
|
"grad_norm": 0.1259029656648636, |
|
"learning_rate": 9.853391766329263e-05, |
|
"loss": 0.3831, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08155559750802341, |
|
"grad_norm": 0.12306073307991028, |
|
"learning_rate": 9.850517483506244e-05, |
|
"loss": 0.3843, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.082310741929394, |
|
"grad_norm": 0.12481655925512314, |
|
"learning_rate": 9.847615725553456e-05, |
|
"loss": 0.4206, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08306588635076459, |
|
"grad_norm": 0.11769527196884155, |
|
"learning_rate": 9.844686508907537e-05, |
|
"loss": 0.3633, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08382103077213517, |
|
"grad_norm": 0.12572641670703888, |
|
"learning_rate": 9.841729850160652e-05, |
|
"loss": 0.4113, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08457617519350576, |
|
"grad_norm": 0.12385623157024384, |
|
"learning_rate": 9.838745766060416e-05, |
|
"loss": 0.3894, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08533131961487635, |
|
"grad_norm": 0.13819383084774017, |
|
"learning_rate": 9.835734273509786e-05, |
|
"loss": 0.4989, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08608646403624694, |
|
"grad_norm": 0.13039085268974304, |
|
"learning_rate": 9.832695389566972e-05, |
|
"loss": 0.4502, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08684160845761751, |
|
"grad_norm": 0.14915941655635834, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 0.4674, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0875967528789881, |
|
"grad_norm": 0.13414114713668823, |
|
"learning_rate": 9.826535516513317e-05, |
|
"loss": 0.4914, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0883518973003587, |
|
"grad_norm": 0.13531801104545593, |
|
"learning_rate": 9.82341456229428e-05, |
|
"loss": 0.4764, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08910704172172929, |
|
"grad_norm": 0.1374198943376541, |
|
"learning_rate": 9.820266286466471e-05, |
|
"loss": 0.4299, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08986218614309986, |
|
"grad_norm": 0.14264227449893951, |
|
"learning_rate": 9.817090706862895e-05, |
|
"loss": 0.4164, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09061733056447045, |
|
"grad_norm": 0.14601197838783264, |
|
"learning_rate": 9.81388784147121e-05, |
|
"loss": 0.4273, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09137247498584104, |
|
"grad_norm": 0.1535925269126892, |
|
"learning_rate": 9.810657708433637e-05, |
|
"loss": 0.4947, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09212761940721163, |
|
"grad_norm": 0.1509617269039154, |
|
"learning_rate": 9.807400326046843e-05, |
|
"loss": 0.3626, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09288276382858221, |
|
"grad_norm": 0.15619726479053497, |
|
"learning_rate": 9.804115712761851e-05, |
|
"loss": 0.4597, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0936379082499528, |
|
"grad_norm": 0.1454990804195404, |
|
"learning_rate": 9.80080388718393e-05, |
|
"loss": 0.3858, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09439305267132339, |
|
"grad_norm": 0.1631968766450882, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 0.435, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09514819709269398, |
|
"grad_norm": 0.15620705485343933, |
|
"learning_rate": 9.794098674340965e-05, |
|
"loss": 0.4259, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09590334151406457, |
|
"grad_norm": 0.16626444458961487, |
|
"learning_rate": 9.790705325056735e-05, |
|
"loss": 0.4253, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09665848593543515, |
|
"grad_norm": 0.16531290113925934, |
|
"learning_rate": 9.787284839440982e-05, |
|
"loss": 0.4284, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09741363035680574, |
|
"grad_norm": 0.15550102293491364, |
|
"learning_rate": 9.783837236868609e-05, |
|
"loss": 0.3745, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09816877477817633, |
|
"grad_norm": 0.17284280061721802, |
|
"learning_rate": 9.780362536868113e-05, |
|
"loss": 0.4458, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09892391919954692, |
|
"grad_norm": 0.16307754814624786, |
|
"learning_rate": 9.776860759121484e-05, |
|
"loss": 0.4001, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0996790636209175, |
|
"grad_norm": 0.1765722781419754, |
|
"learning_rate": 9.77333192346409e-05, |
|
"loss": 0.4215, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10043420804228809, |
|
"grad_norm": 0.1748800426721573, |
|
"learning_rate": 9.769776049884563e-05, |
|
"loss": 0.4215, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10118935246365868, |
|
"grad_norm": 0.18451336026191711, |
|
"learning_rate": 9.766193158524692e-05, |
|
"loss": 0.4214, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10194449688502927, |
|
"grad_norm": 0.19954369962215424, |
|
"learning_rate": 9.762583269679303e-05, |
|
"loss": 0.4085, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10269964130639984, |
|
"grad_norm": 0.2019212394952774, |
|
"learning_rate": 9.758946403796143e-05, |
|
"loss": 0.3836, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10345478572777043, |
|
"grad_norm": 0.20586973428726196, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.4231, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.10420993014914103, |
|
"grad_norm": 0.2090785652399063, |
|
"learning_rate": 9.751591823471429e-05, |
|
"loss": 0.4385, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10496507457051162, |
|
"grad_norm": 0.21643619239330292, |
|
"learning_rate": 9.747874150688948e-05, |
|
"loss": 0.4758, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10572021899188219, |
|
"grad_norm": 0.19914501905441284, |
|
"learning_rate": 9.744129584186598e-05, |
|
"loss": 0.3888, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10647536341325278, |
|
"grad_norm": 0.23445309698581696, |
|
"learning_rate": 9.740358145174998e-05, |
|
"loss": 0.4988, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10723050783462337, |
|
"grad_norm": 0.21564048528671265, |
|
"learning_rate": 9.736559855016973e-05, |
|
"loss": 0.4387, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10798565225599396, |
|
"grad_norm": 0.21970051527023315, |
|
"learning_rate": 9.73273473522745e-05, |
|
"loss": 0.3984, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10874079667736454, |
|
"grad_norm": 0.24472594261169434, |
|
"learning_rate": 9.728882807473324e-05, |
|
"loss": 0.378, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10949594109873513, |
|
"grad_norm": 0.24230483174324036, |
|
"learning_rate": 9.725004093573342e-05, |
|
"loss": 0.4024, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11025108552010572, |
|
"grad_norm": 0.24002547562122345, |
|
"learning_rate": 9.72109861549798e-05, |
|
"loss": 0.3738, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11100622994147631, |
|
"grad_norm": 0.2504161298274994, |
|
"learning_rate": 9.717166395369313e-05, |
|
"loss": 0.3868, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11176137436284689, |
|
"grad_norm": 0.2596012055873871, |
|
"learning_rate": 9.713207455460894e-05, |
|
"loss": 0.4068, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11251651878421748, |
|
"grad_norm": 0.3224775493144989, |
|
"learning_rate": 9.709221818197624e-05, |
|
"loss": 0.4875, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11327166320558807, |
|
"grad_norm": 0.40476924180984497, |
|
"learning_rate": 9.705209506155634e-05, |
|
"loss": 0.4832, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11402680762695866, |
|
"grad_norm": 0.11512809246778488, |
|
"learning_rate": 9.701170542062148e-05, |
|
"loss": 0.3255, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11478195204832924, |
|
"grad_norm": 0.13325871527194977, |
|
"learning_rate": 9.697104948795352e-05, |
|
"loss": 0.3707, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11553709646969983, |
|
"grad_norm": 0.1390884816646576, |
|
"learning_rate": 9.693012749384279e-05, |
|
"loss": 0.3659, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11629224089107042, |
|
"grad_norm": 0.13481466472148895, |
|
"learning_rate": 9.688893967008661e-05, |
|
"loss": 0.3847, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11704738531244101, |
|
"grad_norm": 0.14832735061645508, |
|
"learning_rate": 9.68474862499881e-05, |
|
"loss": 0.457, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1178025297338116, |
|
"grad_norm": 0.13634005188941956, |
|
"learning_rate": 9.68057674683548e-05, |
|
"loss": 0.4172, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11855767415518217, |
|
"grad_norm": 0.1387951821088791, |
|
"learning_rate": 9.676378356149734e-05, |
|
"loss": 0.4252, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11931281857655276, |
|
"grad_norm": 0.1387217789888382, |
|
"learning_rate": 9.672153476722816e-05, |
|
"loss": 0.3928, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.12006796299792336, |
|
"grad_norm": 0.14391182363033295, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 0.4545, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12082310741929395, |
|
"grad_norm": 0.1416245996952057, |
|
"learning_rate": 9.663624347520505e-05, |
|
"loss": 0.4543, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12157825184066452, |
|
"grad_norm": 0.13763579726219177, |
|
"learning_rate": 9.659320146057262e-05, |
|
"loss": 0.3957, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12233339626203511, |
|
"grad_norm": 0.14178583025932312, |
|
"learning_rate": 9.654989552476875e-05, |
|
"loss": 0.448, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1230885406834057, |
|
"grad_norm": 0.13738113641738892, |
|
"learning_rate": 9.650632591309431e-05, |
|
"loss": 0.3954, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1238436851047763, |
|
"grad_norm": 0.14869531989097595, |
|
"learning_rate": 9.646249287234374e-05, |
|
"loss": 0.4405, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12459882952614687, |
|
"grad_norm": 0.14332245290279388, |
|
"learning_rate": 9.641839665080363e-05, |
|
"loss": 0.4543, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12535397394751746, |
|
"grad_norm": 0.14617004990577698, |
|
"learning_rate": 9.637403749825135e-05, |
|
"loss": 0.397, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12610911836888805, |
|
"grad_norm": 0.16142374277114868, |
|
"learning_rate": 9.632941566595357e-05, |
|
"loss": 0.5283, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12686426279025864, |
|
"grad_norm": 0.15721464157104492, |
|
"learning_rate": 9.628453140666492e-05, |
|
"loss": 0.433, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12761940721162923, |
|
"grad_norm": 0.1646830439567566, |
|
"learning_rate": 9.623938497462646e-05, |
|
"loss": 0.5024, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12837455163299982, |
|
"grad_norm": 0.14574110507965088, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 0.4194, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12912969605437039, |
|
"grad_norm": 0.15734295547008514, |
|
"learning_rate": 9.614830661668829e-05, |
|
"loss": 0.4757, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12988484047574098, |
|
"grad_norm": 0.15386731922626495, |
|
"learning_rate": 9.610237520669016e-05, |
|
"loss": 0.3874, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13063998489711157, |
|
"grad_norm": 0.16264206171035767, |
|
"learning_rate": 9.60561826557425e-05, |
|
"loss": 0.409, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13139512931848216, |
|
"grad_norm": 0.1713552325963974, |
|
"learning_rate": 9.600972922549707e-05, |
|
"loss": 0.4799, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13215027373985275, |
|
"grad_norm": 0.17139887809753418, |
|
"learning_rate": 9.596301517908328e-05, |
|
"loss": 0.3992, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13290541816122334, |
|
"grad_norm": 0.16348589956760406, |
|
"learning_rate": 9.591604078110685e-05, |
|
"loss": 0.3942, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.13366056258259393, |
|
"grad_norm": 0.1675892323255539, |
|
"learning_rate": 9.586880629764817e-05, |
|
"loss": 0.4305, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.13441570700396452, |
|
"grad_norm": 0.1761837750673294, |
|
"learning_rate": 9.582131199626087e-05, |
|
"loss": 0.4386, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1351708514253351, |
|
"grad_norm": 0.1876501590013504, |
|
"learning_rate": 9.577355814597031e-05, |
|
"loss": 0.4765, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.13592599584670567, |
|
"grad_norm": 0.18781447410583496, |
|
"learning_rate": 9.572554501727198e-05, |
|
"loss": 0.4502, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13668114026807626, |
|
"grad_norm": 0.20662984251976013, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.4612, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13743628468944685, |
|
"grad_norm": 0.19330036640167236, |
|
"learning_rate": 9.56287420139758e-05, |
|
"loss": 0.4344, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13819142911081744, |
|
"grad_norm": 0.19399768114089966, |
|
"learning_rate": 9.557995268770608e-05, |
|
"loss": 0.4314, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13894657353218803, |
|
"grad_norm": 0.200825035572052, |
|
"learning_rate": 9.553090517968169e-05, |
|
"loss": 0.4095, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13970171795355862, |
|
"grad_norm": 0.21249161660671234, |
|
"learning_rate": 9.548159976772592e-05, |
|
"loss": 0.5122, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14045686237492921, |
|
"grad_norm": 0.2057344764471054, |
|
"learning_rate": 9.543203673112293e-05, |
|
"loss": 0.4131, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1412120067962998, |
|
"grad_norm": 0.2109992653131485, |
|
"learning_rate": 9.538221635061611e-05, |
|
"loss": 0.4596, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.14196715121767037, |
|
"grad_norm": 0.2060767263174057, |
|
"learning_rate": 9.533213890840657e-05, |
|
"loss": 0.4008, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14272229563904096, |
|
"grad_norm": 0.22520488500595093, |
|
"learning_rate": 9.528180468815155e-05, |
|
"loss": 0.451, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.14347744006041155, |
|
"grad_norm": 0.23366759717464447, |
|
"learning_rate": 9.523121397496269e-05, |
|
"loss": 0.435, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14423258448178214, |
|
"grad_norm": 0.23018625378608704, |
|
"learning_rate": 9.518036705540458e-05, |
|
"loss": 0.4397, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.14498772890315273, |
|
"grad_norm": 0.22498714923858643, |
|
"learning_rate": 9.512926421749304e-05, |
|
"loss": 0.3717, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14574287332452332, |
|
"grad_norm": 0.24823418259620667, |
|
"learning_rate": 9.507790575069347e-05, |
|
"loss": 0.3805, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1464980177458939, |
|
"grad_norm": 0.27361419796943665, |
|
"learning_rate": 9.502629194591926e-05, |
|
"loss": 0.5063, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1472531621672645, |
|
"grad_norm": 0.27400317788124084, |
|
"learning_rate": 9.497442309553016e-05, |
|
"loss": 0.455, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14800830658863506, |
|
"grad_norm": 0.30166488885879517, |
|
"learning_rate": 9.492229949333058e-05, |
|
"loss": 0.5024, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14876345101000565, |
|
"grad_norm": 0.29768070578575134, |
|
"learning_rate": 9.486992143456792e-05, |
|
"loss": 0.382, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14951859543137624, |
|
"grad_norm": 0.3229611814022064, |
|
"learning_rate": 9.481728921593093e-05, |
|
"loss": 0.4178, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15027373985274683, |
|
"grad_norm": 0.31606340408325195, |
|
"learning_rate": 9.476440313554803e-05, |
|
"loss": 0.403, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15102888427411743, |
|
"grad_norm": 0.3485797047615051, |
|
"learning_rate": 9.471126349298556e-05, |
|
"loss": 0.3898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15178402869548802, |
|
"grad_norm": 0.12325902283191681, |
|
"learning_rate": 9.46578705892462e-05, |
|
"loss": 0.3083, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1525391731168586, |
|
"grad_norm": 0.1569177210330963, |
|
"learning_rate": 9.460422472676712e-05, |
|
"loss": 0.3685, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1532943175382292, |
|
"grad_norm": 0.1423245519399643, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 0.341, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1540494619595998, |
|
"grad_norm": 0.13771581649780273, |
|
"learning_rate": 9.449617534250122e-05, |
|
"loss": 0.3271, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.15480460638097035, |
|
"grad_norm": 0.15911588072776794, |
|
"learning_rate": 9.444177243274618e-05, |
|
"loss": 0.4001, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.15555975080234094, |
|
"grad_norm": 0.14060606062412262, |
|
"learning_rate": 9.438711778831152e-05, |
|
"loss": 0.3537, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.15631489522371153, |
|
"grad_norm": 0.1690395623445511, |
|
"learning_rate": 9.433221171878144e-05, |
|
"loss": 0.5235, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15707003964508212, |
|
"grad_norm": 0.133980393409729, |
|
"learning_rate": 9.427705453516427e-05, |
|
"loss": 0.351, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1578251840664527, |
|
"grad_norm": 0.1446908861398697, |
|
"learning_rate": 9.422164654989072e-05, |
|
"loss": 0.3714, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1585803284878233, |
|
"grad_norm": 0.15370596945285797, |
|
"learning_rate": 9.41659880768122e-05, |
|
"loss": 0.4452, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1593354729091939, |
|
"grad_norm": 0.15254907310009003, |
|
"learning_rate": 9.411007943119894e-05, |
|
"loss": 0.4388, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16009061733056448, |
|
"grad_norm": 0.14596116542816162, |
|
"learning_rate": 9.405392092973823e-05, |
|
"loss": 0.4037, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.16084576175193505, |
|
"grad_norm": 0.14959284663200378, |
|
"learning_rate": 9.399751289053267e-05, |
|
"loss": 0.3917, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.16160090617330564, |
|
"grad_norm": 0.16624942421913147, |
|
"learning_rate": 9.394085563309827e-05, |
|
"loss": 0.4609, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.16235605059467623, |
|
"grad_norm": 0.16217663884162903, |
|
"learning_rate": 9.388394947836279e-05, |
|
"loss": 0.4446, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.16311119501604682, |
|
"grad_norm": 0.1554042100906372, |
|
"learning_rate": 9.382679474866376e-05, |
|
"loss": 0.4281, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1638663394374174, |
|
"grad_norm": 0.16471102833747864, |
|
"learning_rate": 9.376939176774679e-05, |
|
"loss": 0.4674, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.164621483858788, |
|
"grad_norm": 0.15578734874725342, |
|
"learning_rate": 9.371174086076363e-05, |
|
"loss": 0.3938, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1653766282801586, |
|
"grad_norm": 0.16161073744297028, |
|
"learning_rate": 9.365384235427042e-05, |
|
"loss": 0.4238, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.16613177270152918, |
|
"grad_norm": 0.16967645287513733, |
|
"learning_rate": 9.359569657622574e-05, |
|
"loss": 0.4663, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16688691712289974, |
|
"grad_norm": 0.1597963124513626, |
|
"learning_rate": 9.353730385598887e-05, |
|
"loss": 0.4046, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.16764206154427033, |
|
"grad_norm": 0.17794618010520935, |
|
"learning_rate": 9.34786645243178e-05, |
|
"loss": 0.4111, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.16839720596564092, |
|
"grad_norm": 0.16721504926681519, |
|
"learning_rate": 9.341977891336749e-05, |
|
"loss": 0.4147, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1691523503870115, |
|
"grad_norm": 0.1782609224319458, |
|
"learning_rate": 9.336064735668784e-05, |
|
"loss": 0.4159, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1699074948083821, |
|
"grad_norm": 0.17892874777317047, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.4498, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1706626392297527, |
|
"grad_norm": 0.18540053069591522, |
|
"learning_rate": 9.324164774730406e-05, |
|
"loss": 0.4727, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.17141778365112328, |
|
"grad_norm": 0.19462263584136963, |
|
"learning_rate": 9.318178036865785e-05, |
|
"loss": 0.4427, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.17217292807249387, |
|
"grad_norm": 0.19947673380374908, |
|
"learning_rate": 9.312166839239433e-05, |
|
"loss": 0.4262, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.17292807249386447, |
|
"grad_norm": 0.19704130291938782, |
|
"learning_rate": 9.306131215901003e-05, |
|
"loss": 0.4226, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.17368321691523503, |
|
"grad_norm": 0.18819350004196167, |
|
"learning_rate": 9.300071201038503e-05, |
|
"loss": 0.4082, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17443836133660562, |
|
"grad_norm": 0.20187118649482727, |
|
"learning_rate": 9.293986828978106e-05, |
|
"loss": 0.449, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1751935057579762, |
|
"grad_norm": 0.2177709937095642, |
|
"learning_rate": 9.287878134183948e-05, |
|
"loss": 0.4607, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1759486501793468, |
|
"grad_norm": 0.2070026397705078, |
|
"learning_rate": 9.281745151257946e-05, |
|
"loss": 0.3923, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1767037946007174, |
|
"grad_norm": 0.2048753798007965, |
|
"learning_rate": 9.275587914939586e-05, |
|
"loss": 0.4518, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17745893902208798, |
|
"grad_norm": 0.20374053716659546, |
|
"learning_rate": 9.26940646010574e-05, |
|
"loss": 0.4258, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17821408344345857, |
|
"grad_norm": 0.22188866138458252, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 0.3896, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17896922786482916, |
|
"grad_norm": 0.22264693677425385, |
|
"learning_rate": 9.256971035084785e-05, |
|
"loss": 0.4189, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.17972437228619972, |
|
"grad_norm": 0.2051049768924713, |
|
"learning_rate": 9.250717135336534e-05, |
|
"loss": 0.3751, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.18047951670757031, |
|
"grad_norm": 0.24615737795829773, |
|
"learning_rate": 9.244439157950114e-05, |
|
"loss": 0.4728, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1812346611289409, |
|
"grad_norm": 0.2331840842962265, |
|
"learning_rate": 9.238137138486318e-05, |
|
"loss": 0.4516, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1819898055503115, |
|
"grad_norm": 0.23615127801895142, |
|
"learning_rate": 9.231811112642121e-05, |
|
"loss": 0.3788, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.18274494997168209, |
|
"grad_norm": 0.2417721003293991, |
|
"learning_rate": 9.225461116250483e-05, |
|
"loss": 0.4161, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.18350009439305268, |
|
"grad_norm": 0.26503533124923706, |
|
"learning_rate": 9.219087185280132e-05, |
|
"loss": 0.4356, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.18425523881442327, |
|
"grad_norm": 0.24484668672084808, |
|
"learning_rate": 9.212689355835379e-05, |
|
"loss": 0.3629, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.18501038323579386, |
|
"grad_norm": 0.2426530420780182, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 0.3427, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.18576552765716442, |
|
"grad_norm": 0.26813021302223206, |
|
"learning_rate": 9.199822146616552e-05, |
|
"loss": 0.4148, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.186520672078535, |
|
"grad_norm": 0.3065304458141327, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 0.4015, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1872758164999056, |
|
"grad_norm": 0.32816389203071594, |
|
"learning_rate": 9.186859780132164e-05, |
|
"loss": 0.4233, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1880309609212762, |
|
"grad_norm": 0.3237447738647461, |
|
"learning_rate": 9.18034300461078e-05, |
|
"loss": 0.351, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.18878610534264678, |
|
"grad_norm": 0.3921673595905304, |
|
"learning_rate": 9.173802550076401e-05, |
|
"loss": 0.4465, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18954124976401737, |
|
"grad_norm": 0.12883761525154114, |
|
"learning_rate": 9.167238453576589e-05, |
|
"loss": 0.3514, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.19029639418538796, |
|
"grad_norm": 0.14347058534622192, |
|
"learning_rate": 9.160650752292819e-05, |
|
"loss": 0.3831, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.19105153860675855, |
|
"grad_norm": 0.1370943933725357, |
|
"learning_rate": 9.154039483540273e-05, |
|
"loss": 0.3797, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.19180668302812914, |
|
"grad_norm": 0.1585707813501358, |
|
"learning_rate": 9.147404684767632e-05, |
|
"loss": 0.4072, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.1925618274494997, |
|
"grad_norm": 0.147821843624115, |
|
"learning_rate": 9.140746393556854e-05, |
|
"loss": 0.3441, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1933169718708703, |
|
"grad_norm": 0.1582058221101761, |
|
"learning_rate": 9.134064647622972e-05, |
|
"loss": 0.4164, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1940721162922409, |
|
"grad_norm": 0.1520204097032547, |
|
"learning_rate": 9.12735948481387e-05, |
|
"loss": 0.3704, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.19482726071361148, |
|
"grad_norm": 0.14902523159980774, |
|
"learning_rate": 9.120630943110077e-05, |
|
"loss": 0.4204, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.19558240513498207, |
|
"grad_norm": 0.14658547937870026, |
|
"learning_rate": 9.113879060624553e-05, |
|
"loss": 0.3435, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.19633754955635266, |
|
"grad_norm": 0.14122474193572998, |
|
"learning_rate": 9.107103875602459e-05, |
|
"loss": 0.3681, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19709269397772325, |
|
"grad_norm": 0.14616741240024567, |
|
"learning_rate": 9.100305426420956e-05, |
|
"loss": 0.3931, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.19784783839909384, |
|
"grad_norm": 0.1556493043899536, |
|
"learning_rate": 9.093483751588983e-05, |
|
"loss": 0.3966, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.1986029828204644, |
|
"grad_norm": 0.1637788712978363, |
|
"learning_rate": 9.086638889747035e-05, |
|
"loss": 0.5167, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.199358127241835, |
|
"grad_norm": 0.1598958671092987, |
|
"learning_rate": 9.079770879666949e-05, |
|
"loss": 0.4257, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.20011327166320558, |
|
"grad_norm": 0.15921838581562042, |
|
"learning_rate": 9.072879760251679e-05, |
|
"loss": 0.4247, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.20086841608457617, |
|
"grad_norm": 0.16750189661979675, |
|
"learning_rate": 9.065965570535082e-05, |
|
"loss": 0.4249, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.20162356050594676, |
|
"grad_norm": 0.18144546449184418, |
|
"learning_rate": 9.059028349681694e-05, |
|
"loss": 0.5394, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.20237870492731735, |
|
"grad_norm": 0.16736812889575958, |
|
"learning_rate": 9.052068136986502e-05, |
|
"loss": 0.5119, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.20313384934868794, |
|
"grad_norm": 0.1700868159532547, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.4552, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.20388899377005854, |
|
"grad_norm": 0.17564022541046143, |
|
"learning_rate": 9.038078893901634e-05, |
|
"loss": 0.4344, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2046441381914291, |
|
"grad_norm": 0.18228355050086975, |
|
"learning_rate": 9.031049942752215e-05, |
|
"loss": 0.4206, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2053992826127997, |
|
"grad_norm": 0.17782603204250336, |
|
"learning_rate": 9.023998158241068e-05, |
|
"loss": 0.4786, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.20615442703417028, |
|
"grad_norm": 0.18378295004367828, |
|
"learning_rate": 9.016923580312113e-05, |
|
"loss": 0.443, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.20690957145554087, |
|
"grad_norm": 0.17299628257751465, |
|
"learning_rate": 9.009826249038387e-05, |
|
"loss": 0.3804, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.20766471587691146, |
|
"grad_norm": 0.19087707996368408, |
|
"learning_rate": 9.002706204621803e-05, |
|
"loss": 0.4698, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.20841986029828205, |
|
"grad_norm": 0.17627054452896118, |
|
"learning_rate": 8.995563487392932e-05, |
|
"loss": 0.3927, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.20917500471965264, |
|
"grad_norm": 0.19746743142604828, |
|
"learning_rate": 8.988398137810777e-05, |
|
"loss": 0.4765, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.20993014914102323, |
|
"grad_norm": 0.19492515921592712, |
|
"learning_rate": 8.981210196462533e-05, |
|
"loss": 0.4254, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.21068529356239382, |
|
"grad_norm": 0.19054411351680756, |
|
"learning_rate": 8.973999704063365e-05, |
|
"loss": 0.398, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.21144043798376438, |
|
"grad_norm": 0.202229306101799, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 0.5233, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.21219558240513497, |
|
"grad_norm": 0.20624679327011108, |
|
"learning_rate": 8.959511229611376e-05, |
|
"loss": 0.4411, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.21295072682650557, |
|
"grad_norm": 0.2044438272714615, |
|
"learning_rate": 8.952233329626647e-05, |
|
"loss": 0.4102, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.21370587124787616, |
|
"grad_norm": 0.20926257967948914, |
|
"learning_rate": 8.944933042726714e-05, |
|
"loss": 0.3872, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.21446101566924675, |
|
"grad_norm": 0.2206258326768875, |
|
"learning_rate": 8.937610410263109e-05, |
|
"loss": 0.4126, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.21521616009061734, |
|
"grad_norm": 0.23429769277572632, |
|
"learning_rate": 8.930265473713938e-05, |
|
"loss": 0.4453, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.21597130451198793, |
|
"grad_norm": 0.2304621934890747, |
|
"learning_rate": 8.922898274683644e-05, |
|
"loss": 0.4429, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.21672644893335852, |
|
"grad_norm": 0.23192839324474335, |
|
"learning_rate": 8.915508854902778e-05, |
|
"loss": 0.4737, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.21748159335472908, |
|
"grad_norm": 0.23144613206386566, |
|
"learning_rate": 8.908097256227749e-05, |
|
"loss": 0.382, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.21823673777609967, |
|
"grad_norm": 0.24735011160373688, |
|
"learning_rate": 8.900663520640604e-05, |
|
"loss": 0.3929, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.21899188219747026, |
|
"grad_norm": 0.267395555973053, |
|
"learning_rate": 8.893207690248776e-05, |
|
"loss": 0.4586, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21974702661884085, |
|
"grad_norm": 0.2836948335170746, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 0.5218, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.22050217104021144, |
|
"grad_norm": 0.2509090304374695, |
|
"learning_rate": 8.878229914106342e-05, |
|
"loss": 0.4114, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.22125731546158203, |
|
"grad_norm": 0.27369990944862366, |
|
"learning_rate": 8.870708053195413e-05, |
|
"loss": 0.3736, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.22201245988295262, |
|
"grad_norm": 0.279341459274292, |
|
"learning_rate": 8.863164267158678e-05, |
|
"loss": 0.3845, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2227676043043232, |
|
"grad_norm": 0.27738478779792786, |
|
"learning_rate": 8.855598598726939e-05, |
|
"loss": 0.3331, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.22352274872569378, |
|
"grad_norm": 0.32042670249938965, |
|
"learning_rate": 8.848011090754947e-05, |
|
"loss": 0.3943, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.22427789314706437, |
|
"grad_norm": 0.2977651059627533, |
|
"learning_rate": 8.840401786221159e-05, |
|
"loss": 0.3701, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.22503303756843496, |
|
"grad_norm": 0.35676780343055725, |
|
"learning_rate": 8.832770728227502e-05, |
|
"loss": 0.4728, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.22578818198980555, |
|
"grad_norm": 0.3656991720199585, |
|
"learning_rate": 8.825117959999116e-05, |
|
"loss": 0.3662, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.22654332641117614, |
|
"grad_norm": 0.36816850304603577, |
|
"learning_rate": 8.817443524884119e-05, |
|
"loss": 0.3644, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22729847083254673, |
|
"grad_norm": 0.12952867150306702, |
|
"learning_rate": 8.809747466353356e-05, |
|
"loss": 0.2991, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.22805361525391732, |
|
"grad_norm": 0.1346244215965271, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 0.3154, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2288087596752879, |
|
"grad_norm": 0.15575996041297913, |
|
"learning_rate": 8.794290653540084e-05, |
|
"loss": 0.4171, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.22956390409665847, |
|
"grad_norm": 0.1542641818523407, |
|
"learning_rate": 8.7865299868107e-05, |
|
"loss": 0.3851, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.23031904851802906, |
|
"grad_norm": 0.16775798797607422, |
|
"learning_rate": 8.778747871771292e-05, |
|
"loss": 0.3872, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.23107419293939965, |
|
"grad_norm": 0.15936799347400665, |
|
"learning_rate": 8.770944352502648e-05, |
|
"loss": 0.3998, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.23182933736077024, |
|
"grad_norm": 0.16513392329216003, |
|
"learning_rate": 8.763119473206794e-05, |
|
"loss": 0.4005, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.23258448178214083, |
|
"grad_norm": 0.14965298771858215, |
|
"learning_rate": 8.755273278206749e-05, |
|
"loss": 0.3792, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.23333962620351142, |
|
"grad_norm": 0.1585433930158615, |
|
"learning_rate": 8.74740581194627e-05, |
|
"loss": 0.4291, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.23409477062488201, |
|
"grad_norm": 0.16825465857982635, |
|
"learning_rate": 8.739517118989605e-05, |
|
"loss": 0.4434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2348499150462526, |
|
"grad_norm": 0.16293483972549438, |
|
"learning_rate": 8.731607244021236e-05, |
|
"loss": 0.4481, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2356050594676232, |
|
"grad_norm": 0.15974049270153046, |
|
"learning_rate": 8.723676231845626e-05, |
|
"loss": 0.4557, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.23636020388899376, |
|
"grad_norm": 0.1649303436279297, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 0.4451, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.23711534831036435, |
|
"grad_norm": 0.16639627516269684, |
|
"learning_rate": 8.70775097568894e-05, |
|
"loss": 0.4518, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.23787049273173494, |
|
"grad_norm": 0.17083673179149628, |
|
"learning_rate": 8.69975682191442e-05, |
|
"loss": 0.3745, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.23862563715310553, |
|
"grad_norm": 0.16381201148033142, |
|
"learning_rate": 8.691741711345263e-05, |
|
"loss": 0.3922, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.23938078157447612, |
|
"grad_norm": 0.17072418332099915, |
|
"learning_rate": 8.683705689382024e-05, |
|
"loss": 0.3563, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2401359259958467, |
|
"grad_norm": 0.1824284940958023, |
|
"learning_rate": 8.675648801543718e-05, |
|
"loss": 0.432, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2408910704172173, |
|
"grad_norm": 0.17872655391693115, |
|
"learning_rate": 8.667571093467541e-05, |
|
"loss": 0.4294, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2416462148385879, |
|
"grad_norm": 0.17728659510612488, |
|
"learning_rate": 8.659472610908627e-05, |
|
"loss": 0.3963, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.24240135925995845, |
|
"grad_norm": 0.18014661967754364, |
|
"learning_rate": 8.651353399739787e-05, |
|
"loss": 0.4614, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.24315650368132904, |
|
"grad_norm": 0.18057285249233246, |
|
"learning_rate": 8.643213505951242e-05, |
|
"loss": 0.4134, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.24391164810269964, |
|
"grad_norm": 0.19068454205989838, |
|
"learning_rate": 8.635052975650369e-05, |
|
"loss": 0.539, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.24466679252407023, |
|
"grad_norm": 0.18311984837055206, |
|
"learning_rate": 8.626871855061438e-05, |
|
"loss": 0.3847, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.24542193694544082, |
|
"grad_norm": 0.172931507229805, |
|
"learning_rate": 8.618670190525352e-05, |
|
"loss": 0.3835, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2461770813668114, |
|
"grad_norm": 0.18892034888267517, |
|
"learning_rate": 8.610448028499376e-05, |
|
"loss": 0.4231, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.246932225788182, |
|
"grad_norm": 0.19887331128120422, |
|
"learning_rate": 8.602205415556889e-05, |
|
"loss": 0.4835, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2476873702095526, |
|
"grad_norm": 0.18918727338314056, |
|
"learning_rate": 8.593942398387105e-05, |
|
"loss": 0.4285, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.24844251463092315, |
|
"grad_norm": 0.19010977447032928, |
|
"learning_rate": 8.585659023794818e-05, |
|
"loss": 0.4059, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.24919765905229374, |
|
"grad_norm": 0.1906062811613083, |
|
"learning_rate": 8.577355338700132e-05, |
|
"loss": 0.423, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24995280347366433, |
|
"grad_norm": 0.1999729573726654, |
|
"learning_rate": 8.569031390138202e-05, |
|
"loss": 0.4482, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2507079478950349, |
|
"grad_norm": 0.20304431021213531, |
|
"learning_rate": 8.560687225258958e-05, |
|
"loss": 0.3917, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2514630923164055, |
|
"grad_norm": 0.21377113461494446, |
|
"learning_rate": 8.552322891326846e-05, |
|
"loss": 0.4558, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2522182367377761, |
|
"grad_norm": 0.21330960094928741, |
|
"learning_rate": 8.543938435720549e-05, |
|
"loss": 0.4073, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2529733811591467, |
|
"grad_norm": 0.2062496691942215, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.3382, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2537285255805173, |
|
"grad_norm": 0.24538478255271912, |
|
"learning_rate": 8.527109349569787e-05, |
|
"loss": 0.4455, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2544836700018879, |
|
"grad_norm": 0.2312334179878235, |
|
"learning_rate": 8.518664814351502e-05, |
|
"loss": 0.4248, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.25523881442325846, |
|
"grad_norm": 0.2378857582807541, |
|
"learning_rate": 8.510200348110868e-05, |
|
"loss": 0.4568, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.25599395884462905, |
|
"grad_norm": 0.2495100200176239, |
|
"learning_rate": 8.501715998793757e-05, |
|
"loss": 0.4504, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.25674910326599965, |
|
"grad_norm": 0.2401399314403534, |
|
"learning_rate": 8.493211814458673e-05, |
|
"loss": 0.3736, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.25750424768737024, |
|
"grad_norm": 0.2643365263938904, |
|
"learning_rate": 8.484687843276469e-05, |
|
"loss": 0.4577, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.25825939210874077, |
|
"grad_norm": 0.26086193323135376, |
|
"learning_rate": 8.476144133530075e-05, |
|
"loss": 0.4151, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.25901453653011136, |
|
"grad_norm": 0.25575414299964905, |
|
"learning_rate": 8.467580733614233e-05, |
|
"loss": 0.4097, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.25976968095148195, |
|
"grad_norm": 0.24969545006752014, |
|
"learning_rate": 8.45899769203522e-05, |
|
"loss": 0.3696, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.26052482537285254, |
|
"grad_norm": 0.2540886402130127, |
|
"learning_rate": 8.450395057410561e-05, |
|
"loss": 0.3615, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.26127996979422313, |
|
"grad_norm": 0.30520960688591003, |
|
"learning_rate": 8.44177287846877e-05, |
|
"loss": 0.393, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2620351142155937, |
|
"grad_norm": 0.300483763217926, |
|
"learning_rate": 8.433131204049067e-05, |
|
"loss": 0.3889, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2627902586369643, |
|
"grad_norm": 0.326023131608963, |
|
"learning_rate": 8.424470083101101e-05, |
|
"loss": 0.4587, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2635454030583349, |
|
"grad_norm": 0.3497852683067322, |
|
"learning_rate": 8.415789564684673e-05, |
|
"loss": 0.4071, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2643005474797055, |
|
"grad_norm": 0.39250481128692627, |
|
"learning_rate": 8.407089697969457e-05, |
|
"loss": 0.3864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2650556919010761, |
|
"grad_norm": 0.13501006364822388, |
|
"learning_rate": 8.398370532234722e-05, |
|
"loss": 0.2857, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2658108363224467, |
|
"grad_norm": 0.13370132446289062, |
|
"learning_rate": 8.389632116869061e-05, |
|
"loss": 0.3307, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.26656598074381727, |
|
"grad_norm": 0.15557359158992767, |
|
"learning_rate": 8.380874501370097e-05, |
|
"loss": 0.3663, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.26732112516518786, |
|
"grad_norm": 0.15083420276641846, |
|
"learning_rate": 8.372097735344212e-05, |
|
"loss": 0.3517, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.26807626958655845, |
|
"grad_norm": 0.1640172004699707, |
|
"learning_rate": 8.363301868506264e-05, |
|
"loss": 0.3621, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.26883141400792904, |
|
"grad_norm": 0.17162185907363892, |
|
"learning_rate": 8.354486950679301e-05, |
|
"loss": 0.3933, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2695865584292996, |
|
"grad_norm": 0.154635950922966, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.3834, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2703417028506702, |
|
"grad_norm": 0.15567855536937714, |
|
"learning_rate": 8.336800161889826e-05, |
|
"loss": 0.4, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.27109684727204075, |
|
"grad_norm": 0.1589149832725525, |
|
"learning_rate": 8.327928391111841e-05, |
|
"loss": 0.3923, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.27185199169341134, |
|
"grad_norm": 0.1654612272977829, |
|
"learning_rate": 8.319037769713338e-05, |
|
"loss": 0.3808, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27260713611478193, |
|
"grad_norm": 0.1704426407814026, |
|
"learning_rate": 8.310128348054094e-05, |
|
"loss": 0.4662, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2733622805361525, |
|
"grad_norm": 0.16645170748233795, |
|
"learning_rate": 8.301200176600375e-05, |
|
"loss": 0.4369, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2741174249575231, |
|
"grad_norm": 0.16606634855270386, |
|
"learning_rate": 8.292253305924655e-05, |
|
"loss": 0.4147, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2748725693788937, |
|
"grad_norm": 0.17440947890281677, |
|
"learning_rate": 8.283287786705331e-05, |
|
"loss": 0.4787, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2756277138002643, |
|
"grad_norm": 0.1618100106716156, |
|
"learning_rate": 8.274303669726426e-05, |
|
"loss": 0.3666, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2763828582216349, |
|
"grad_norm": 0.17216123640537262, |
|
"learning_rate": 8.265301005877309e-05, |
|
"loss": 0.4511, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2771380026430055, |
|
"grad_norm": 0.17408417165279388, |
|
"learning_rate": 8.25627984615241e-05, |
|
"loss": 0.4472, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.27789314706437607, |
|
"grad_norm": 0.17124348878860474, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 0.4406, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.27864829148574666, |
|
"grad_norm": 0.1730695217847824, |
|
"learning_rate": 8.238182243576512e-05, |
|
"loss": 0.3658, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.27940343590711725, |
|
"grad_norm": 0.1981906294822693, |
|
"learning_rate": 8.229105903237044e-05, |
|
"loss": 0.4417, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.28015858032848784, |
|
"grad_norm": 0.18475179374217987, |
|
"learning_rate": 8.220011272044277e-05, |
|
"loss": 0.4125, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.28091372474985843, |
|
"grad_norm": 0.1894509643316269, |
|
"learning_rate": 8.210898401513574e-05, |
|
"loss": 0.463, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.281668869171229, |
|
"grad_norm": 0.18303871154785156, |
|
"learning_rate": 8.201767343263612e-05, |
|
"loss": 0.4299, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2824240135925996, |
|
"grad_norm": 0.1773396134376526, |
|
"learning_rate": 8.192618149016091e-05, |
|
"loss": 0.384, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.28317915801397014, |
|
"grad_norm": 0.1883654147386551, |
|
"learning_rate": 8.183450870595441e-05, |
|
"loss": 0.3913, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.28393430243534074, |
|
"grad_norm": 0.18863160908222198, |
|
"learning_rate": 8.174265559928527e-05, |
|
"loss": 0.4005, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2846894468567113, |
|
"grad_norm": 0.18807992339134216, |
|
"learning_rate": 8.165062269044353e-05, |
|
"loss": 0.3547, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2854445912780819, |
|
"grad_norm": 0.1992112398147583, |
|
"learning_rate": 8.155841050073771e-05, |
|
"loss": 0.3788, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2861997356994525, |
|
"grad_norm": 0.2015550285577774, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 0.457, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2869548801208231, |
|
"grad_norm": 0.2002776712179184, |
|
"learning_rate": 8.13734503690426e-05, |
|
"loss": 0.3584, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2877100245421937, |
|
"grad_norm": 0.21863123774528503, |
|
"learning_rate": 8.128070347473609e-05, |
|
"loss": 0.4101, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2884651689635643, |
|
"grad_norm": 0.21147026121616364, |
|
"learning_rate": 8.11877793949251e-05, |
|
"loss": 0.4124, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.28922031338493487, |
|
"grad_norm": 0.21045903861522675, |
|
"learning_rate": 8.109467865596612e-05, |
|
"loss": 0.3634, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.28997545780630546, |
|
"grad_norm": 0.23374846577644348, |
|
"learning_rate": 8.100140178521624e-05, |
|
"loss": 0.4587, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.29073060222767605, |
|
"grad_norm": 0.21156945824623108, |
|
"learning_rate": 8.090794931103026e-05, |
|
"loss": 0.3417, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.29148574664904664, |
|
"grad_norm": 0.23073367774486542, |
|
"learning_rate": 8.081432176275765e-05, |
|
"loss": 0.4538, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.29224089107041723, |
|
"grad_norm": 0.25584685802459717, |
|
"learning_rate": 8.072051967073955e-05, |
|
"loss": 0.4006, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2929960354917878, |
|
"grad_norm": 0.25985392928123474, |
|
"learning_rate": 8.06265435663058e-05, |
|
"loss": 0.4146, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2937511799131584, |
|
"grad_norm": 0.23220856487751007, |
|
"learning_rate": 8.053239398177191e-05, |
|
"loss": 0.3574, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.294506324334529, |
|
"grad_norm": 0.26954004168510437, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.4694, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2952614687558996, |
|
"grad_norm": 0.25626078248023987, |
|
"learning_rate": 8.034357650657598e-05, |
|
"loss": 0.3952, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.2960166131772701, |
|
"grad_norm": 0.24921610951423645, |
|
"learning_rate": 8.024890968544613e-05, |
|
"loss": 0.3574, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2967717575986407, |
|
"grad_norm": 0.25851958990097046, |
|
"learning_rate": 8.015407152327448e-05, |
|
"loss": 0.3562, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2975269020200113, |
|
"grad_norm": 0.29698118567466736, |
|
"learning_rate": 8.005906255725956e-05, |
|
"loss": 0.4558, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2982820464413819, |
|
"grad_norm": 0.2812412977218628, |
|
"learning_rate": 7.996388332556735e-05, |
|
"loss": 0.3705, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2990371908627525, |
|
"grad_norm": 0.27774620056152344, |
|
"learning_rate": 7.986853436732836e-05, |
|
"loss": 0.3893, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.2997923352841231, |
|
"grad_norm": 0.3051348626613617, |
|
"learning_rate": 7.97730162226344e-05, |
|
"loss": 0.3988, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.30054747970549367, |
|
"grad_norm": 0.34216588735580444, |
|
"learning_rate": 7.967732943253571e-05, |
|
"loss": 0.3755, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.30130262412686426, |
|
"grad_norm": 0.3706994652748108, |
|
"learning_rate": 7.958147453903773e-05, |
|
"loss": 0.4116, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.30205776854823485, |
|
"grad_norm": 0.44550538063049316, |
|
"learning_rate": 7.94854520850981e-05, |
|
"loss": 0.3799, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.30281291296960544, |
|
"grad_norm": 0.14217671751976013, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.3466, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.30356805739097603, |
|
"grad_norm": 0.15967446565628052, |
|
"learning_rate": 7.92929066724672e-05, |
|
"loss": 0.3642, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3043232018123466, |
|
"grad_norm": 0.15074422955513, |
|
"learning_rate": 7.919638480442452e-05, |
|
"loss": 0.3297, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3050783462337172, |
|
"grad_norm": 0.15751783549785614, |
|
"learning_rate": 7.90996975572313e-05, |
|
"loss": 0.3825, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3058334906550878, |
|
"grad_norm": 0.15959064662456512, |
|
"learning_rate": 7.900284547855991e-05, |
|
"loss": 0.4007, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3065886350764584, |
|
"grad_norm": 0.16654494404792786, |
|
"learning_rate": 7.890582911701649e-05, |
|
"loss": 0.3951, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.307343779497829, |
|
"grad_norm": 0.16326242685317993, |
|
"learning_rate": 7.880864902213765e-05, |
|
"loss": 0.3828, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3080989239191996, |
|
"grad_norm": 0.1605200171470642, |
|
"learning_rate": 7.871130574438752e-05, |
|
"loss": 0.4009, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3088540683405701, |
|
"grad_norm": 0.1753162443637848, |
|
"learning_rate": 7.861379983515449e-05, |
|
"loss": 0.4088, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3096092127619407, |
|
"grad_norm": 0.1612052321434021, |
|
"learning_rate": 7.85161318467482e-05, |
|
"loss": 0.3821, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3103643571833113, |
|
"grad_norm": 0.17267635464668274, |
|
"learning_rate": 7.841830233239638e-05, |
|
"loss": 0.4376, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.3111195016046819, |
|
"grad_norm": 0.17416229844093323, |
|
"learning_rate": 7.832031184624164e-05, |
|
"loss": 0.4073, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.31187464602605247, |
|
"grad_norm": 0.17623671889305115, |
|
"learning_rate": 7.822216094333847e-05, |
|
"loss": 0.4482, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.31262979044742306, |
|
"grad_norm": 0.1862659901380539, |
|
"learning_rate": 7.812385017964994e-05, |
|
"loss": 0.4902, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.31338493486879365, |
|
"grad_norm": 0.19047270715236664, |
|
"learning_rate": 7.80253801120447e-05, |
|
"loss": 0.4848, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.31414007929016424, |
|
"grad_norm": 0.18323764204978943, |
|
"learning_rate": 7.792675129829373e-05, |
|
"loss": 0.3942, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.31489522371153483, |
|
"grad_norm": 0.17627227306365967, |
|
"learning_rate": 7.78279642970672e-05, |
|
"loss": 0.3859, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3156503681329054, |
|
"grad_norm": 0.18996286392211914, |
|
"learning_rate": 7.772901966793132e-05, |
|
"loss": 0.3722, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.316405512554276, |
|
"grad_norm": 0.17553554475307465, |
|
"learning_rate": 7.762991797134514e-05, |
|
"loss": 0.4016, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3171606569756466, |
|
"grad_norm": 0.18145516514778137, |
|
"learning_rate": 7.753065976865744e-05, |
|
"loss": 0.3781, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3179158013970172, |
|
"grad_norm": 0.18947438895702362, |
|
"learning_rate": 7.74312456221035e-05, |
|
"loss": 0.4143, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3186709458183878, |
|
"grad_norm": 0.18466342985630035, |
|
"learning_rate": 7.73316760948019e-05, |
|
"loss": 0.446, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3194260902397584, |
|
"grad_norm": 0.19619819521903992, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 0.4955, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.32018123466112897, |
|
"grad_norm": 0.1804795265197754, |
|
"learning_rate": 7.713207315482755e-05, |
|
"loss": 0.3693, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3209363790824995, |
|
"grad_norm": 0.1992131769657135, |
|
"learning_rate": 7.703204087277988e-05, |
|
"loss": 0.4447, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3216915235038701, |
|
"grad_norm": 0.19404684007167816, |
|
"learning_rate": 7.693185547122829e-05, |
|
"loss": 0.3876, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3224466679252407, |
|
"grad_norm": 0.19772908091545105, |
|
"learning_rate": 7.683151751766004e-05, |
|
"loss": 0.4085, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3232018123466113, |
|
"grad_norm": 0.21671298146247864, |
|
"learning_rate": 7.673102758042653e-05, |
|
"loss": 0.4911, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.32395695676798186, |
|
"grad_norm": 0.20475253462791443, |
|
"learning_rate": 7.663038622873999e-05, |
|
"loss": 0.395, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.32471210118935245, |
|
"grad_norm": 0.2204219251871109, |
|
"learning_rate": 7.652959403267041e-05, |
|
"loss": 0.4481, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.32546724561072304, |
|
"grad_norm": 0.20216889679431915, |
|
"learning_rate": 7.64286515631421e-05, |
|
"loss": 0.3628, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.32622239003209363, |
|
"grad_norm": 0.22937530279159546, |
|
"learning_rate": 7.63275593919307e-05, |
|
"loss": 0.5035, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3269775344534642, |
|
"grad_norm": 0.2090550810098648, |
|
"learning_rate": 7.622631809165973e-05, |
|
"loss": 0.3664, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3277326788748348, |
|
"grad_norm": 0.22946757078170776, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 0.4146, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3284878232962054, |
|
"grad_norm": 0.24275080859661102, |
|
"learning_rate": 7.602339039865362e-05, |
|
"loss": 0.3957, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.329242967717576, |
|
"grad_norm": 0.24225813150405884, |
|
"learning_rate": 7.59217051553762e-05, |
|
"loss": 0.4345, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3299981121389466, |
|
"grad_norm": 0.24835467338562012, |
|
"learning_rate": 7.58198730819481e-05, |
|
"loss": 0.4618, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3307532565603172, |
|
"grad_norm": 0.22757121920585632, |
|
"learning_rate": 7.571789475518399e-05, |
|
"loss": 0.3643, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.33150840098168777, |
|
"grad_norm": 0.2474457025527954, |
|
"learning_rate": 7.561577075272686e-05, |
|
"loss": 0.3929, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.33226354540305836, |
|
"grad_norm": 0.2527807056903839, |
|
"learning_rate": 7.5513501653045e-05, |
|
"loss": 0.3763, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33301868982442895, |
|
"grad_norm": 0.25088363885879517, |
|
"learning_rate": 7.541108803542846e-05, |
|
"loss": 0.4079, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3337738342457995, |
|
"grad_norm": 0.26713383197784424, |
|
"learning_rate": 7.530853047998599e-05, |
|
"loss": 0.4074, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3345289786671701, |
|
"grad_norm": 0.2865240275859833, |
|
"learning_rate": 7.52058295676416e-05, |
|
"loss": 0.4333, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.33528412308854066, |
|
"grad_norm": 0.27827370166778564, |
|
"learning_rate": 7.510298588013134e-05, |
|
"loss": 0.4248, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.33603926750991125, |
|
"grad_norm": 0.27344340085983276, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.3485, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.33679441193128185, |
|
"grad_norm": 0.2948186993598938, |
|
"learning_rate": 7.48968725105978e-05, |
|
"loss": 0.3763, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.33754955635265244, |
|
"grad_norm": 0.3137521743774414, |
|
"learning_rate": 7.479360399607707e-05, |
|
"loss": 0.4188, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.338304700774023, |
|
"grad_norm": 0.30302637815475464, |
|
"learning_rate": 7.469019504138898e-05, |
|
"loss": 0.3524, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3390598451953936, |
|
"grad_norm": 0.4019578695297241, |
|
"learning_rate": 7.45866462322802e-05, |
|
"loss": 0.4313, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3398149896167642, |
|
"grad_norm": 0.40166327357292175, |
|
"learning_rate": 7.448295815528956e-05, |
|
"loss": 0.4065, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3405701340381348, |
|
"grad_norm": 0.15198914706707, |
|
"learning_rate": 7.437913139774482e-05, |
|
"loss": 0.3844, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3413252784595054, |
|
"grad_norm": 0.1483955681324005, |
|
"learning_rate": 7.427516654775922e-05, |
|
"loss": 0.3974, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.342080422880876, |
|
"grad_norm": 0.15665127336978912, |
|
"learning_rate": 7.417106419422819e-05, |
|
"loss": 0.3879, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.34283556730224657, |
|
"grad_norm": 0.15843887627124786, |
|
"learning_rate": 7.406682492682611e-05, |
|
"loss": 0.4013, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.34359071172361716, |
|
"grad_norm": 0.1631614714860916, |
|
"learning_rate": 7.396244933600285e-05, |
|
"loss": 0.4135, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.34434585614498775, |
|
"grad_norm": 0.1582673341035843, |
|
"learning_rate": 7.385793801298042e-05, |
|
"loss": 0.3674, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.34510100056635834, |
|
"grad_norm": 0.15745416283607483, |
|
"learning_rate": 7.375329154974975e-05, |
|
"loss": 0.3907, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.34585614498772893, |
|
"grad_norm": 0.15578390657901764, |
|
"learning_rate": 7.364851053906718e-05, |
|
"loss": 0.3829, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.34661128940909947, |
|
"grad_norm": 0.16378001868724823, |
|
"learning_rate": 7.354359557445126e-05, |
|
"loss": 0.4303, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.34736643383047006, |
|
"grad_norm": 0.16894683241844177, |
|
"learning_rate": 7.343854725017918e-05, |
|
"loss": 0.4237, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.34812157825184065, |
|
"grad_norm": 0.16270391643047333, |
|
"learning_rate": 7.333336616128369e-05, |
|
"loss": 0.4058, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.34887672267321124, |
|
"grad_norm": 0.1628381311893463, |
|
"learning_rate": 7.322805290354943e-05, |
|
"loss": 0.3908, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3496318670945818, |
|
"grad_norm": 0.17563199996948242, |
|
"learning_rate": 7.312260807350975e-05, |
|
"loss": 0.4293, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3503870115159524, |
|
"grad_norm": 0.16583438217639923, |
|
"learning_rate": 7.301703226844327e-05, |
|
"loss": 0.3951, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.351142155937323, |
|
"grad_norm": 0.18068847060203552, |
|
"learning_rate": 7.291132608637052e-05, |
|
"loss": 0.4112, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3518973003586936, |
|
"grad_norm": 0.18490007519721985, |
|
"learning_rate": 7.28054901260505e-05, |
|
"loss": 0.4502, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3526524447800642, |
|
"grad_norm": 0.1794682741165161, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 0.414, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3534075892014348, |
|
"grad_norm": 0.18929120898246765, |
|
"learning_rate": 7.259343126937689e-05, |
|
"loss": 0.4282, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.35416273362280537, |
|
"grad_norm": 0.18550460040569305, |
|
"learning_rate": 7.24872095742033e-05, |
|
"loss": 0.4052, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.35491787804417596, |
|
"grad_norm": 0.20153513550758362, |
|
"learning_rate": 7.238086050313563e-05, |
|
"loss": 0.5138, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.35567302246554655, |
|
"grad_norm": 0.1876605898141861, |
|
"learning_rate": 7.227438465857448e-05, |
|
"loss": 0.3959, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.35642816688691714, |
|
"grad_norm": 0.19823205471038818, |
|
"learning_rate": 7.216778264363853e-05, |
|
"loss": 0.4628, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.35718331130828773, |
|
"grad_norm": 0.1963682770729065, |
|
"learning_rate": 7.206105506216106e-05, |
|
"loss": 0.3864, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3579384557296583, |
|
"grad_norm": 0.197306826710701, |
|
"learning_rate": 7.195420251868675e-05, |
|
"loss": 0.3942, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.35869360015102886, |
|
"grad_norm": 0.20867206156253815, |
|
"learning_rate": 7.184722561846798e-05, |
|
"loss": 0.4348, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.35944874457239945, |
|
"grad_norm": 0.20809237658977509, |
|
"learning_rate": 7.17401249674616e-05, |
|
"loss": 0.4088, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.36020388899377004, |
|
"grad_norm": 0.2266395539045334, |
|
"learning_rate": 7.163290117232542e-05, |
|
"loss": 0.4356, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.36095903341514063, |
|
"grad_norm": 0.21087811887264252, |
|
"learning_rate": 7.152555484041476e-05, |
|
"loss": 0.3613, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3617141778365112, |
|
"grad_norm": 0.21059347689151764, |
|
"learning_rate": 7.141808657977907e-05, |
|
"loss": 0.3699, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3624693222578818, |
|
"grad_norm": 0.2281947284936905, |
|
"learning_rate": 7.131049699915841e-05, |
|
"loss": 0.4497, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3632244666792524, |
|
"grad_norm": 0.21307678520679474, |
|
"learning_rate": 7.120278670798009e-05, |
|
"loss": 0.3323, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.363979611100623, |
|
"grad_norm": 0.22060398757457733, |
|
"learning_rate": 7.109495631635512e-05, |
|
"loss": 0.3923, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3647347555219936, |
|
"grad_norm": 0.2321634739637375, |
|
"learning_rate": 7.098700643507485e-05, |
|
"loss": 0.3901, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.36548989994336417, |
|
"grad_norm": 0.25326424837112427, |
|
"learning_rate": 7.08789376756074e-05, |
|
"loss": 0.3722, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.36624504436473476, |
|
"grad_norm": 0.23451083898544312, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 0.3816, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.36700018878610535, |
|
"grad_norm": 0.2323211133480072, |
|
"learning_rate": 7.066244597134706e-05, |
|
"loss": 0.3628, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.36775533320747594, |
|
"grad_norm": 0.22949382662773132, |
|
"learning_rate": 7.055402425284346e-05, |
|
"loss": 0.382, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.36851047762884653, |
|
"grad_norm": 0.2613668739795685, |
|
"learning_rate": 7.044548610872434e-05, |
|
"loss": 0.4107, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3692656220502171, |
|
"grad_norm": 0.264017254114151, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.4147, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3700207664715877, |
|
"grad_norm": 0.2688436806201935, |
|
"learning_rate": 7.022806300349675e-05, |
|
"loss": 0.4326, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3707759108929583, |
|
"grad_norm": 0.2659049332141876, |
|
"learning_rate": 7.01191792739534e-05, |
|
"loss": 0.3967, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.37153105531432884, |
|
"grad_norm": 0.25551602244377136, |
|
"learning_rate": 7.001018158191772e-05, |
|
"loss": 0.3369, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.37228619973569943, |
|
"grad_norm": 0.31377312541007996, |
|
"learning_rate": 6.990107054479312e-05, |
|
"loss": 0.4221, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.37304134415707, |
|
"grad_norm": 0.2841019332408905, |
|
"learning_rate": 6.979184678062493e-05, |
|
"loss": 0.439, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.3737964885784406, |
|
"grad_norm": 0.3099525570869446, |
|
"learning_rate": 6.968251090809708e-05, |
|
"loss": 0.398, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3745516329998112, |
|
"grad_norm": 0.3198589086532593, |
|
"learning_rate": 6.957306354652848e-05, |
|
"loss": 0.4434, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3753067774211818, |
|
"grad_norm": 0.308400422334671, |
|
"learning_rate": 6.946350531586959e-05, |
|
"loss": 0.3945, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3760619218425524, |
|
"grad_norm": 0.3412231206893921, |
|
"learning_rate": 6.935383683669884e-05, |
|
"loss": 0.4598, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.376817066263923, |
|
"grad_norm": 0.3658400774002075, |
|
"learning_rate": 6.924405873021918e-05, |
|
"loss": 0.3627, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.37757221068529356, |
|
"grad_norm": 0.44540345668792725, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.4255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37832735510666415, |
|
"grad_norm": 0.14448663592338562, |
|
"learning_rate": 6.902417612324615e-05, |
|
"loss": 0.346, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.37908249952803474, |
|
"grad_norm": 0.15504960715770721, |
|
"learning_rate": 6.891407286824944e-05, |
|
"loss": 0.346, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.37983764394940533, |
|
"grad_norm": 0.1535167396068573, |
|
"learning_rate": 6.880386247692999e-05, |
|
"loss": 0.3239, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.3805927883707759, |
|
"grad_norm": 0.1622663289308548, |
|
"learning_rate": 6.869354557356036e-05, |
|
"loss": 0.3378, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3813479327921465, |
|
"grad_norm": 0.17973972856998444, |
|
"learning_rate": 6.858312278301637e-05, |
|
"loss": 0.4137, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3821030772135171, |
|
"grad_norm": 0.17408986389636993, |
|
"learning_rate": 6.84725947307737e-05, |
|
"loss": 0.4164, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.3828582216348877, |
|
"grad_norm": 0.1629510372877121, |
|
"learning_rate": 6.836196204290417e-05, |
|
"loss": 0.3753, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3836133660562583, |
|
"grad_norm": 0.17549729347229004, |
|
"learning_rate": 6.825122534607239e-05, |
|
"loss": 0.3796, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3843685104776288, |
|
"grad_norm": 0.1774250566959381, |
|
"learning_rate": 6.814038526753205e-05, |
|
"loss": 0.4563, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.3851236548989994, |
|
"grad_norm": 0.17569324374198914, |
|
"learning_rate": 6.80294424351225e-05, |
|
"loss": 0.3899, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.38587879932037, |
|
"grad_norm": 0.18050192296504974, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 0.4665, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.3866339437417406, |
|
"grad_norm": 0.18089371919631958, |
|
"learning_rate": 6.780725102295948e-05, |
|
"loss": 0.4632, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.3873890881631112, |
|
"grad_norm": 0.16716820001602173, |
|
"learning_rate": 6.769600370178059e-05, |
|
"loss": 0.3797, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.3881442325844818, |
|
"grad_norm": 0.18242698907852173, |
|
"learning_rate": 6.758465614387446e-05, |
|
"loss": 0.4545, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.38889937700585236, |
|
"grad_norm": 0.179984450340271, |
|
"learning_rate": 6.747320897995493e-05, |
|
"loss": 0.42, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.38965452142722296, |
|
"grad_norm": 0.17841747403144836, |
|
"learning_rate": 6.736166284130006e-05, |
|
"loss": 0.4256, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.39040966584859355, |
|
"grad_norm": 0.1961648315191269, |
|
"learning_rate": 6.725001835974853e-05, |
|
"loss": 0.424, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.39116481026996414, |
|
"grad_norm": 0.18314050137996674, |
|
"learning_rate": 6.713827616769614e-05, |
|
"loss": 0.4107, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3919199546913347, |
|
"grad_norm": 0.18332213163375854, |
|
"learning_rate": 6.702643689809205e-05, |
|
"loss": 0.4012, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3926750991127053, |
|
"grad_norm": 0.18527810275554657, |
|
"learning_rate": 6.691450118443537e-05, |
|
"loss": 0.4209, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3934302435340759, |
|
"grad_norm": 0.18408484756946564, |
|
"learning_rate": 6.680246966077151e-05, |
|
"loss": 0.3916, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3941853879554465, |
|
"grad_norm": 0.1861732453107834, |
|
"learning_rate": 6.669034296168855e-05, |
|
"loss": 0.4096, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3949405323768171, |
|
"grad_norm": 0.20170702040195465, |
|
"learning_rate": 6.65781217223137e-05, |
|
"loss": 0.4398, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.3956956767981877, |
|
"grad_norm": 0.20506803691387177, |
|
"learning_rate": 6.646580657830966e-05, |
|
"loss": 0.4482, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3964508212195582, |
|
"grad_norm": 0.199477881193161, |
|
"learning_rate": 6.635339816587109e-05, |
|
"loss": 0.4267, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3972059656409288, |
|
"grad_norm": 0.19555439054965973, |
|
"learning_rate": 6.624089712172088e-05, |
|
"loss": 0.4385, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3979611100622994, |
|
"grad_norm": 0.20855289697647095, |
|
"learning_rate": 6.61283040831067e-05, |
|
"loss": 0.4423, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.39871625448367, |
|
"grad_norm": 0.20509350299835205, |
|
"learning_rate": 6.601561968779725e-05, |
|
"loss": 0.3744, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3994713989050406, |
|
"grad_norm": 0.19435042142868042, |
|
"learning_rate": 6.590284457407876e-05, |
|
"loss": 0.3924, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.40022654332641117, |
|
"grad_norm": 0.22069820761680603, |
|
"learning_rate": 6.578997938075125e-05, |
|
"loss": 0.388, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.40098168774778176, |
|
"grad_norm": 0.21975107491016388, |
|
"learning_rate": 6.567702474712507e-05, |
|
"loss": 0.4088, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.40173683216915235, |
|
"grad_norm": 0.21666480600833893, |
|
"learning_rate": 6.556398131301713e-05, |
|
"loss": 0.397, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.40249197659052294, |
|
"grad_norm": 0.22098886966705322, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.4027, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.40324712101189353, |
|
"grad_norm": 0.23352603614330292, |
|
"learning_rate": 6.53376306051351e-05, |
|
"loss": 0.4029, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4040022654332641, |
|
"grad_norm": 0.23887783288955688, |
|
"learning_rate": 6.522432461349536e-05, |
|
"loss": 0.3958, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4047574098546347, |
|
"grad_norm": 0.2337152659893036, |
|
"learning_rate": 6.51109323856353e-05, |
|
"loss": 0.3805, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4055125542760053, |
|
"grad_norm": 0.24378469586372375, |
|
"learning_rate": 6.499745456385054e-05, |
|
"loss": 0.4048, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4062676986973759, |
|
"grad_norm": 0.2503972351551056, |
|
"learning_rate": 6.488389179092155e-05, |
|
"loss": 0.3469, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4070228431187465, |
|
"grad_norm": 0.26343488693237305, |
|
"learning_rate": 6.477024471011001e-05, |
|
"loss": 0.3914, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.40777798754011707, |
|
"grad_norm": 0.2782142758369446, |
|
"learning_rate": 6.46565139651551e-05, |
|
"loss": 0.4735, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.40853313196148766, |
|
"grad_norm": 0.27638283371925354, |
|
"learning_rate": 6.454270020026995e-05, |
|
"loss": 0.3961, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4092882763828582, |
|
"grad_norm": 0.28320929408073425, |
|
"learning_rate": 6.442880406013794e-05, |
|
"loss": 0.3899, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4100434208042288, |
|
"grad_norm": 0.26396995782852173, |
|
"learning_rate": 6.431482618990902e-05, |
|
"loss": 0.3126, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4107985652255994, |
|
"grad_norm": 0.29057344794273376, |
|
"learning_rate": 6.420076723519614e-05, |
|
"loss": 0.3829, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.41155370964696997, |
|
"grad_norm": 0.30193406343460083, |
|
"learning_rate": 6.408662784207149e-05, |
|
"loss": 0.3592, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.41230885406834056, |
|
"grad_norm": 0.3181254267692566, |
|
"learning_rate": 6.397240865706295e-05, |
|
"loss": 0.382, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.41306399848971115, |
|
"grad_norm": 0.32498013973236084, |
|
"learning_rate": 6.38581103271503e-05, |
|
"loss": 0.3579, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.41381914291108174, |
|
"grad_norm": 0.36153993010520935, |
|
"learning_rate": 6.374373349976169e-05, |
|
"loss": 0.3281, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.41457428733245233, |
|
"grad_norm": 0.3981688618659973, |
|
"learning_rate": 6.36292788227699e-05, |
|
"loss": 0.3811, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4153294317538229, |
|
"grad_norm": 0.4573695957660675, |
|
"learning_rate": 6.351474694448864e-05, |
|
"loss": 0.4341, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4160845761751935, |
|
"grad_norm": 0.1360294222831726, |
|
"learning_rate": 6.340013851366896e-05, |
|
"loss": 0.3219, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4168397205965641, |
|
"grad_norm": 0.1482553780078888, |
|
"learning_rate": 6.328545417949549e-05, |
|
"loss": 0.3273, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4175948650179347, |
|
"grad_norm": 0.16173024475574493, |
|
"learning_rate": 6.317069459158284e-05, |
|
"loss": 0.387, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4183500094393053, |
|
"grad_norm": 0.15186697244644165, |
|
"learning_rate": 6.305586039997188e-05, |
|
"loss": 0.3458, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.41910515386067587, |
|
"grad_norm": 0.16657663881778717, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.3808, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.41986029828204646, |
|
"grad_norm": 0.1734580099582672, |
|
"learning_rate": 6.282597080792768e-05, |
|
"loss": 0.3749, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.42061544270341705, |
|
"grad_norm": 0.16928981244564056, |
|
"learning_rate": 6.271091670967436e-05, |
|
"loss": 0.4225, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.42137058712478764, |
|
"grad_norm": 0.1675954908132553, |
|
"learning_rate": 6.259579061207512e-05, |
|
"loss": 0.3699, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4221257315461582, |
|
"grad_norm": 0.16955533623695374, |
|
"learning_rate": 6.248059316724693e-05, |
|
"loss": 0.3821, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.42288087596752877, |
|
"grad_norm": 0.17539368569850922, |
|
"learning_rate": 6.236532502771078e-05, |
|
"loss": 0.4061, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42363602038889936, |
|
"grad_norm": 0.16773320734500885, |
|
"learning_rate": 6.22499868463882e-05, |
|
"loss": 0.3684, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.42439116481026995, |
|
"grad_norm": 0.17698176205158234, |
|
"learning_rate": 6.213457927659736e-05, |
|
"loss": 0.446, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.42514630923164054, |
|
"grad_norm": 0.175985187292099, |
|
"learning_rate": 6.201910297204962e-05, |
|
"loss": 0.4014, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.42590145365301113, |
|
"grad_norm": 0.18848945200443268, |
|
"learning_rate": 6.190355858684554e-05, |
|
"loss": 0.3939, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4266565980743817, |
|
"grad_norm": 0.20211070775985718, |
|
"learning_rate": 6.178794677547137e-05, |
|
"loss": 0.5346, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4274117424957523, |
|
"grad_norm": 0.20022115111351013, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 0.485, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4281668869171229, |
|
"grad_norm": 0.1909688413143158, |
|
"learning_rate": 6.155652349406365e-05, |
|
"loss": 0.3948, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4289220313384935, |
|
"grad_norm": 0.19687382876873016, |
|
"learning_rate": 6.144071333489741e-05, |
|
"loss": 0.5016, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4296771757598641, |
|
"grad_norm": 0.19113020598888397, |
|
"learning_rate": 6.132483837128823e-05, |
|
"loss": 0.409, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4304323201812347, |
|
"grad_norm": 0.19415104389190674, |
|
"learning_rate": 6.120889925959485e-05, |
|
"loss": 0.4469, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.43118746460260526, |
|
"grad_norm": 0.20082654058933258, |
|
"learning_rate": 6.109289665653944e-05, |
|
"loss": 0.3998, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.43194260902397585, |
|
"grad_norm": 0.2006131410598755, |
|
"learning_rate": 6.0976831219203724e-05, |
|
"loss": 0.3904, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.43269775344534644, |
|
"grad_norm": 0.21381130814552307, |
|
"learning_rate": 6.0860703605025395e-05, |
|
"loss": 0.4501, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.43345289786671704, |
|
"grad_norm": 0.2047959566116333, |
|
"learning_rate": 6.074451447179432e-05, |
|
"loss": 0.4456, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.43420804228808757, |
|
"grad_norm": 0.19398140907287598, |
|
"learning_rate": 6.062826447764883e-05, |
|
"loss": 0.3951, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.43496318670945816, |
|
"grad_norm": 0.20938366651535034, |
|
"learning_rate": 6.0511954281072034e-05, |
|
"loss": 0.4182, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.43571833113082875, |
|
"grad_norm": 0.22829927504062653, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 0.4267, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.43647347555219934, |
|
"grad_norm": 0.20386339724063873, |
|
"learning_rate": 6.027915591625804e-05, |
|
"loss": 0.3305, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.43722861997356993, |
|
"grad_norm": 0.2285105437040329, |
|
"learning_rate": 6.016266906667711e-05, |
|
"loss": 0.4457, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4379837643949405, |
|
"grad_norm": 0.21181875467300415, |
|
"learning_rate": 6.004612465196994e-05, |
|
"loss": 0.3773, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4387389088163111, |
|
"grad_norm": 0.21869616210460663, |
|
"learning_rate": 5.992952333228728e-05, |
|
"loss": 0.4118, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4394940532376817, |
|
"grad_norm": 0.22501884400844574, |
|
"learning_rate": 5.981286576810225e-05, |
|
"loss": 0.3624, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4402491976590523, |
|
"grad_norm": 0.22811758518218994, |
|
"learning_rate": 5.969615262020657e-05, |
|
"loss": 0.3633, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4410043420804229, |
|
"grad_norm": 0.23688824474811554, |
|
"learning_rate": 5.9579384549706775e-05, |
|
"loss": 0.4004, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4417594865017935, |
|
"grad_norm": 0.2543923258781433, |
|
"learning_rate": 5.946256221802051e-05, |
|
"loss": 0.4547, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.44251463092316407, |
|
"grad_norm": 0.2558960020542145, |
|
"learning_rate": 5.9345686286872826e-05, |
|
"loss": 0.3585, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.44326977534453466, |
|
"grad_norm": 0.25656744837760925, |
|
"learning_rate": 5.9228757418292266e-05, |
|
"loss": 0.3907, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.44402491976590525, |
|
"grad_norm": 0.2832098603248596, |
|
"learning_rate": 5.911177627460739e-05, |
|
"loss": 0.4802, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.44478006418727584, |
|
"grad_norm": 0.2527141571044922, |
|
"learning_rate": 5.8994743518442694e-05, |
|
"loss": 0.3935, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4455352086086464, |
|
"grad_norm": 0.27374371886253357, |
|
"learning_rate": 5.887765981271518e-05, |
|
"loss": 0.3938, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.446290353030017, |
|
"grad_norm": 0.26626354455947876, |
|
"learning_rate": 5.876052582063031e-05, |
|
"loss": 0.3361, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.44704549745138755, |
|
"grad_norm": 0.2699456512928009, |
|
"learning_rate": 5.864334220567851e-05, |
|
"loss": 0.3243, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.44780064187275814, |
|
"grad_norm": 0.3059113323688507, |
|
"learning_rate": 5.85261096316312e-05, |
|
"loss": 0.4765, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.44855578629412873, |
|
"grad_norm": 0.26939067244529724, |
|
"learning_rate": 5.840882876253715e-05, |
|
"loss": 0.355, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4493109307154993, |
|
"grad_norm": 0.32320332527160645, |
|
"learning_rate": 5.829150026271871e-05, |
|
"loss": 0.4086, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4500660751368699, |
|
"grad_norm": 0.33342641592025757, |
|
"learning_rate": 5.8174124796768e-05, |
|
"loss": 0.4379, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4508212195582405, |
|
"grad_norm": 0.33563926815986633, |
|
"learning_rate": 5.805670302954321e-05, |
|
"loss": 0.3539, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.4515763639796111, |
|
"grad_norm": 0.35168662667274475, |
|
"learning_rate": 5.793923562616475e-05, |
|
"loss": 0.4063, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4523315084009817, |
|
"grad_norm": 0.3756003677845001, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 0.3802, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4530866528223523, |
|
"grad_norm": 0.43410277366638184, |
|
"learning_rate": 5.770416657271729e-05, |
|
"loss": 0.4295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45384179724372287, |
|
"grad_norm": 0.16801206767559052, |
|
"learning_rate": 5.7586566254166583e-05, |
|
"loss": 0.3764, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.45459694166509346, |
|
"grad_norm": 0.13662360608577728, |
|
"learning_rate": 5.746892296249126e-05, |
|
"loss": 0.3318, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.45535208608646405, |
|
"grad_norm": 0.157858207821846, |
|
"learning_rate": 5.7351237364066547e-05, |
|
"loss": 0.3435, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.45610723050783464, |
|
"grad_norm": 0.16429783403873444, |
|
"learning_rate": 5.723351012550729e-05, |
|
"loss": 0.3754, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.45686237492920523, |
|
"grad_norm": 0.1615344136953354, |
|
"learning_rate": 5.7115741913664264e-05, |
|
"loss": 0.3407, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4576175193505758, |
|
"grad_norm": 0.17267441749572754, |
|
"learning_rate": 5.699793339562026e-05, |
|
"loss": 0.3956, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.4583726637719464, |
|
"grad_norm": 0.17077352106571198, |
|
"learning_rate": 5.6880085238686454e-05, |
|
"loss": 0.3547, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.45912780819331694, |
|
"grad_norm": 0.1691327840089798, |
|
"learning_rate": 5.6762198110398444e-05, |
|
"loss": 0.3248, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.45988295261468753, |
|
"grad_norm": 0.17237545549869537, |
|
"learning_rate": 5.664427267851271e-05, |
|
"loss": 0.3824, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4606380970360581, |
|
"grad_norm": 0.1798117756843567, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.3806, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4613932414574287, |
|
"grad_norm": 0.1780758649110794, |
|
"learning_rate": 5.640830957605465e-05, |
|
"loss": 0.4111, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4621483858787993, |
|
"grad_norm": 0.17500564455986023, |
|
"learning_rate": 5.629027324206484e-05, |
|
"loss": 0.4043, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4629035303001699, |
|
"grad_norm": 0.1749061942100525, |
|
"learning_rate": 5.617220127763474e-05, |
|
"loss": 0.3671, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4636586747215405, |
|
"grad_norm": 0.1828213781118393, |
|
"learning_rate": 5.6054094351567746e-05, |
|
"loss": 0.3784, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.4644138191429111, |
|
"grad_norm": 0.18562084436416626, |
|
"learning_rate": 5.593595313286526e-05, |
|
"loss": 0.4139, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.46516896356428167, |
|
"grad_norm": 0.18980775773525238, |
|
"learning_rate": 5.581777829072299e-05, |
|
"loss": 0.4277, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.46592410798565226, |
|
"grad_norm": 0.19855181872844696, |
|
"learning_rate": 5.569957049452703e-05, |
|
"loss": 0.4124, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.46667925240702285, |
|
"grad_norm": 0.18563315272331238, |
|
"learning_rate": 5.5581330413850206e-05, |
|
"loss": 0.4239, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.46743439682839344, |
|
"grad_norm": 0.20975831151008606, |
|
"learning_rate": 5.5463058718448155e-05, |
|
"loss": 0.4546, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.46818954124976403, |
|
"grad_norm": 0.21700595319271088, |
|
"learning_rate": 5.534475607825566e-05, |
|
"loss": 0.4703, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4689446856711346, |
|
"grad_norm": 0.19500944018363953, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.4008, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4696998300925052, |
|
"grad_norm": 0.18913483619689941, |
|
"learning_rate": 5.510806064411078e-05, |
|
"loss": 0.3416, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4704549745138758, |
|
"grad_norm": 0.22911082208156586, |
|
"learning_rate": 5.4989669190889136e-05, |
|
"loss": 0.4004, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4712101189352464, |
|
"grad_norm": 0.21053546667099, |
|
"learning_rate": 5.4871249474330866e-05, |
|
"loss": 0.4137, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4719652633566169, |
|
"grad_norm": 0.20017597079277039, |
|
"learning_rate": 5.475280216520913e-05, |
|
"loss": 0.3467, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4727204077779875, |
|
"grad_norm": 0.23001310229301453, |
|
"learning_rate": 5.463432793445344e-05, |
|
"loss": 0.4107, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.4734755521993581, |
|
"grad_norm": 0.22943522036075592, |
|
"learning_rate": 5.4515827453145765e-05, |
|
"loss": 0.4129, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4742306966207287, |
|
"grad_norm": 0.22657130658626556, |
|
"learning_rate": 5.439730139251675e-05, |
|
"loss": 0.3364, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.4749858410420993, |
|
"grad_norm": 0.22038240730762482, |
|
"learning_rate": 5.427875042394199e-05, |
|
"loss": 0.3809, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4757409854634699, |
|
"grad_norm": 0.24683795869350433, |
|
"learning_rate": 5.4160175218938124e-05, |
|
"loss": 0.4047, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.47649612988484047, |
|
"grad_norm": 0.22947093844413757, |
|
"learning_rate": 5.404157644915907e-05, |
|
"loss": 0.3557, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.47725127430621106, |
|
"grad_norm": 0.23205658793449402, |
|
"learning_rate": 5.392295478639225e-05, |
|
"loss": 0.3911, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.47800641872758165, |
|
"grad_norm": 0.2397453486919403, |
|
"learning_rate": 5.3804310902554754e-05, |
|
"loss": 0.341, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.47876156314895224, |
|
"grad_norm": 0.25788554549217224, |
|
"learning_rate": 5.368564546968954e-05, |
|
"loss": 0.4089, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.47951670757032283, |
|
"grad_norm": 0.2771666347980499, |
|
"learning_rate": 5.3566959159961615e-05, |
|
"loss": 0.4121, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4802718519916934, |
|
"grad_norm": 0.3412117063999176, |
|
"learning_rate": 5.344825264565426e-05, |
|
"loss": 0.4031, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.481026996413064, |
|
"grad_norm": 0.2685895562171936, |
|
"learning_rate": 5.3329526599165204e-05, |
|
"loss": 0.4285, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4817821408344346, |
|
"grad_norm": 0.2930139899253845, |
|
"learning_rate": 5.3210781693002754e-05, |
|
"loss": 0.4991, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4825372852558052, |
|
"grad_norm": 0.3006288707256317, |
|
"learning_rate": 5.3092018599782155e-05, |
|
"loss": 0.454, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.4832924296771758, |
|
"grad_norm": 0.2801501452922821, |
|
"learning_rate": 5.297323799222156e-05, |
|
"loss": 0.3826, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4840475740985464, |
|
"grad_norm": 0.26145270466804504, |
|
"learning_rate": 5.2854440543138406e-05, |
|
"loss": 0.3434, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.4848027185199169, |
|
"grad_norm": 0.29839491844177246, |
|
"learning_rate": 5.273562692544548e-05, |
|
"loss": 0.3767, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.4855578629412875, |
|
"grad_norm": 0.3101263642311096, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 0.383, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4863130073626581, |
|
"grad_norm": 0.35069891810417175, |
|
"learning_rate": 5.24979538763357e-05, |
|
"loss": 0.4163, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.4870681517840287, |
|
"grad_norm": 0.32553043961524963, |
|
"learning_rate": 5.2379095791187124e-05, |
|
"loss": 0.3942, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.48782329620539927, |
|
"grad_norm": 0.31216907501220703, |
|
"learning_rate": 5.226022422995773e-05, |
|
"loss": 0.3572, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.48857844062676986, |
|
"grad_norm": 0.3375815749168396, |
|
"learning_rate": 5.2141339865980134e-05, |
|
"loss": 0.3524, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.48933358504814045, |
|
"grad_norm": 0.33117055892944336, |
|
"learning_rate": 5.2022443372659446e-05, |
|
"loss": 0.3501, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.49008872946951104, |
|
"grad_norm": 0.41448086500167847, |
|
"learning_rate": 5.1903535423469505e-05, |
|
"loss": 0.3976, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.49084387389088163, |
|
"grad_norm": 0.5221715569496155, |
|
"learning_rate": 5.178461669194903e-05, |
|
"loss": 0.4395, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4915990183122522, |
|
"grad_norm": 0.15094821155071259, |
|
"learning_rate": 5.166568785169781e-05, |
|
"loss": 0.3399, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4923541627336228, |
|
"grad_norm": 0.14581607282161713, |
|
"learning_rate": 5.154674957637291e-05, |
|
"loss": 0.3412, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4931093071549934, |
|
"grad_norm": 0.16245442628860474, |
|
"learning_rate": 5.142780253968481e-05, |
|
"loss": 0.3448, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.493864451576364, |
|
"grad_norm": 0.15717031061649323, |
|
"learning_rate": 5.1308847415393666e-05, |
|
"loss": 0.332, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.4946195959977346, |
|
"grad_norm": 0.17713572084903717, |
|
"learning_rate": 5.1189884877305375e-05, |
|
"loss": 0.3773, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.4953747404191052, |
|
"grad_norm": 0.18064700067043304, |
|
"learning_rate": 5.107091559926791e-05, |
|
"loss": 0.3872, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.49612988484047577, |
|
"grad_norm": 0.18684056401252747, |
|
"learning_rate": 5.095194025516733e-05, |
|
"loss": 0.4255, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.4968850292618463, |
|
"grad_norm": 0.18245136737823486, |
|
"learning_rate": 5.0832959518924165e-05, |
|
"loss": 0.3741, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4976401736832169, |
|
"grad_norm": 0.1796809732913971, |
|
"learning_rate": 5.0713974064489367e-05, |
|
"loss": 0.3846, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4983953181045875, |
|
"grad_norm": 0.193410262465477, |
|
"learning_rate": 5.059498456584072e-05, |
|
"loss": 0.3921, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.49915046252595807, |
|
"grad_norm": 0.17409726977348328, |
|
"learning_rate": 5.047599169697884e-05, |
|
"loss": 0.3479, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.49990560694732866, |
|
"grad_norm": 0.19628435373306274, |
|
"learning_rate": 5.035699613192347e-05, |
|
"loss": 0.4364, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5006607513686993, |
|
"grad_norm": 0.1964567005634308, |
|
"learning_rate": 5.023799854470963e-05, |
|
"loss": 0.4891, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5014158957900698, |
|
"grad_norm": 0.1932559609413147, |
|
"learning_rate": 5.0118999609383776e-05, |
|
"loss": 0.4156, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5021710402114404, |
|
"grad_norm": 0.18889658153057098, |
|
"learning_rate": 5e-05, |
|
"loss": 0.393, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.502926184632811, |
|
"grad_norm": 0.20598454773426056, |
|
"learning_rate": 4.9881000390616236e-05, |
|
"loss": 0.3809, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5036813290541816, |
|
"grad_norm": 0.18910270929336548, |
|
"learning_rate": 4.9762001455290385e-05, |
|
"loss": 0.3594, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5044364734755522, |
|
"grad_norm": 0.19010648131370544, |
|
"learning_rate": 4.964300386807653e-05, |
|
"loss": 0.4025, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5051916178969228, |
|
"grad_norm": 0.19111211597919464, |
|
"learning_rate": 4.952400830302117e-05, |
|
"loss": 0.4046, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5059467623182934, |
|
"grad_norm": 0.20030421018600464, |
|
"learning_rate": 4.940501543415929e-05, |
|
"loss": 0.3772, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.506701906739664, |
|
"grad_norm": 0.20106443762779236, |
|
"learning_rate": 4.928602593551065e-05, |
|
"loss": 0.3878, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5074570511610346, |
|
"grad_norm": 0.21929006278514862, |
|
"learning_rate": 4.916704048107586e-05, |
|
"loss": 0.374, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5082121955824052, |
|
"grad_norm": 0.2119339108467102, |
|
"learning_rate": 4.9048059744832666e-05, |
|
"loss": 0.3846, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5089673400037757, |
|
"grad_norm": 0.2138437032699585, |
|
"learning_rate": 4.89290844007321e-05, |
|
"loss": 0.4283, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5097224844251463, |
|
"grad_norm": 0.21618351340293884, |
|
"learning_rate": 4.881011512269463e-05, |
|
"loss": 0.3673, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5104776288465169, |
|
"grad_norm": 0.22225847840309143, |
|
"learning_rate": 4.869115258460635e-05, |
|
"loss": 0.4018, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5112327732678875, |
|
"grad_norm": 0.20154333114624023, |
|
"learning_rate": 4.85721974603152e-05, |
|
"loss": 0.3655, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5119879176892581, |
|
"grad_norm": 0.2222219705581665, |
|
"learning_rate": 4.845325042362709e-05, |
|
"loss": 0.3887, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5127430621106287, |
|
"grad_norm": 0.23664206266403198, |
|
"learning_rate": 4.83343121483022e-05, |
|
"loss": 0.4378, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5134982065319993, |
|
"grad_norm": 0.22598662972450256, |
|
"learning_rate": 4.821538330805098e-05, |
|
"loss": 0.361, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5142533509533699, |
|
"grad_norm": 0.23625950515270233, |
|
"learning_rate": 4.8096464576530507e-05, |
|
"loss": 0.3911, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5150084953747405, |
|
"grad_norm": 0.24311421811580658, |
|
"learning_rate": 4.797755662734056e-05, |
|
"loss": 0.383, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.515763639796111, |
|
"grad_norm": 0.2555188834667206, |
|
"learning_rate": 4.7858660134019884e-05, |
|
"loss": 0.4043, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5165187842174815, |
|
"grad_norm": 0.26212912797927856, |
|
"learning_rate": 4.7739775770042285e-05, |
|
"loss": 0.3899, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5172739286388521, |
|
"grad_norm": 0.25993528962135315, |
|
"learning_rate": 4.762090420881289e-05, |
|
"loss": 0.4474, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5180290730602227, |
|
"grad_norm": 0.2636634409427643, |
|
"learning_rate": 4.7502046123664316e-05, |
|
"loss": 0.4205, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5187842174815933, |
|
"grad_norm": 0.27789896726608276, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 0.4362, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5195393619029639, |
|
"grad_norm": 0.2860543727874756, |
|
"learning_rate": 4.726437307455452e-05, |
|
"loss": 0.3886, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5202945063243345, |
|
"grad_norm": 0.26604828238487244, |
|
"learning_rate": 4.71455594568616e-05, |
|
"loss": 0.3778, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5210496507457051, |
|
"grad_norm": 0.29641634225845337, |
|
"learning_rate": 4.702676200777846e-05, |
|
"loss": 0.4318, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5218047951670757, |
|
"grad_norm": 0.30234697461128235, |
|
"learning_rate": 4.6907981400217864e-05, |
|
"loss": 0.3916, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5225599395884463, |
|
"grad_norm": 0.2846793830394745, |
|
"learning_rate": 4.678921830699724e-05, |
|
"loss": 0.3517, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5233150840098169, |
|
"grad_norm": 0.305910587310791, |
|
"learning_rate": 4.667047340083481e-05, |
|
"loss": 0.3995, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5240702284311874, |
|
"grad_norm": 0.2852495014667511, |
|
"learning_rate": 4.655174735434575e-05, |
|
"loss": 0.3481, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.524825372852558, |
|
"grad_norm": 0.32877644896507263, |
|
"learning_rate": 4.643304084003839e-05, |
|
"loss": 0.3678, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5255805172739286, |
|
"grad_norm": 0.3292486071586609, |
|
"learning_rate": 4.631435453031047e-05, |
|
"loss": 0.3611, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5263356616952992, |
|
"grad_norm": 0.33952105045318604, |
|
"learning_rate": 4.619568909744524e-05, |
|
"loss": 0.3875, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5270908061166698, |
|
"grad_norm": 0.41025200486183167, |
|
"learning_rate": 4.607704521360776e-05, |
|
"loss": 0.3853, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5278459505380404, |
|
"grad_norm": 0.3911071717739105, |
|
"learning_rate": 4.595842355084094e-05, |
|
"loss": 0.3416, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.528601094959411, |
|
"grad_norm": 0.46904653310775757, |
|
"learning_rate": 4.583982478106189e-05, |
|
"loss": 0.3506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5293562393807816, |
|
"grad_norm": 0.14935268461704254, |
|
"learning_rate": 4.5721249576058027e-05, |
|
"loss": 0.3182, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5301113838021522, |
|
"grad_norm": 0.15800884366035461, |
|
"learning_rate": 4.560269860748325e-05, |
|
"loss": 0.3109, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5308665282235228, |
|
"grad_norm": 0.16600751876831055, |
|
"learning_rate": 4.5484172546854246e-05, |
|
"loss": 0.3653, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5316216726448934, |
|
"grad_norm": 0.17389142513275146, |
|
"learning_rate": 4.536567206554656e-05, |
|
"loss": 0.3637, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5323768170662639, |
|
"grad_norm": 0.16549134254455566, |
|
"learning_rate": 4.5247197834790876e-05, |
|
"loss": 0.3916, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5331319614876345, |
|
"grad_norm": 0.16650977730751038, |
|
"learning_rate": 4.512875052566915e-05, |
|
"loss": 0.3398, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5338871059090051, |
|
"grad_norm": 0.17638632655143738, |
|
"learning_rate": 4.501033080911086e-05, |
|
"loss": 0.3994, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5346422503303757, |
|
"grad_norm": 0.1747698038816452, |
|
"learning_rate": 4.489193935588923e-05, |
|
"loss": 0.399, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5353973947517463, |
|
"grad_norm": 0.1881653517484665, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.4072, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5361525391731169, |
|
"grad_norm": 0.1721341758966446, |
|
"learning_rate": 4.4655243921744374e-05, |
|
"loss": 0.3317, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5369076835944875, |
|
"grad_norm": 0.18106478452682495, |
|
"learning_rate": 4.4536941281551864e-05, |
|
"loss": 0.4081, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5376628280158581, |
|
"grad_norm": 0.19442430138587952, |
|
"learning_rate": 4.44186695861498e-05, |
|
"loss": 0.4044, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5384179724372287, |
|
"grad_norm": 0.19563159346580505, |
|
"learning_rate": 4.4300429505472976e-05, |
|
"loss": 0.3849, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5391731168585993, |
|
"grad_norm": 0.1997566670179367, |
|
"learning_rate": 4.418222170927702e-05, |
|
"loss": 0.432, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5399282612799698, |
|
"grad_norm": 0.1938876062631607, |
|
"learning_rate": 4.4064046867134756e-05, |
|
"loss": 0.3994, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5406834057013404, |
|
"grad_norm": 0.2057284563779831, |
|
"learning_rate": 4.394590564843226e-05, |
|
"loss": 0.4499, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5414385501227109, |
|
"grad_norm": 0.1877799779176712, |
|
"learning_rate": 4.3827798722365264e-05, |
|
"loss": 0.3824, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5421936945440815, |
|
"grad_norm": 0.2022031992673874, |
|
"learning_rate": 4.370972675793517e-05, |
|
"loss": 0.4552, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5429488389654521, |
|
"grad_norm": 0.22796858847141266, |
|
"learning_rate": 4.359169042394536e-05, |
|
"loss": 0.4589, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5437039833868227, |
|
"grad_norm": 0.19858501851558685, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.4075, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5444591278081933, |
|
"grad_norm": 0.20992298424243927, |
|
"learning_rate": 4.33557273214873e-05, |
|
"loss": 0.382, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5452142722295639, |
|
"grad_norm": 0.2098989337682724, |
|
"learning_rate": 4.3237801889601554e-05, |
|
"loss": 0.4268, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5459694166509345, |
|
"grad_norm": 0.2050914168357849, |
|
"learning_rate": 4.3119914761313564e-05, |
|
"loss": 0.3695, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.546724561072305, |
|
"grad_norm": 0.21308262646198273, |
|
"learning_rate": 4.3002066604379746e-05, |
|
"loss": 0.4058, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5474797054936756, |
|
"grad_norm": 0.22114039957523346, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 0.4412, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5482348499150462, |
|
"grad_norm": 0.2253432720899582, |
|
"learning_rate": 4.276648987449271e-05, |
|
"loss": 0.4339, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5489899943364168, |
|
"grad_norm": 0.22624394297599792, |
|
"learning_rate": 4.2648762635933465e-05, |
|
"loss": 0.4147, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5497451387577874, |
|
"grad_norm": 0.23290926218032837, |
|
"learning_rate": 4.253107703750875e-05, |
|
"loss": 0.4315, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.550500283179158, |
|
"grad_norm": 0.23128274083137512, |
|
"learning_rate": 4.241343374583343e-05, |
|
"loss": 0.4271, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5512554276005286, |
|
"grad_norm": 0.243194580078125, |
|
"learning_rate": 4.2295833427282734e-05, |
|
"loss": 0.4344, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5520105720218992, |
|
"grad_norm": 0.2592814564704895, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 0.4633, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5527657164432698, |
|
"grad_norm": 0.2409772276878357, |
|
"learning_rate": 4.2060764373835264e-05, |
|
"loss": 0.3903, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5535208608646404, |
|
"grad_norm": 0.23426584899425507, |
|
"learning_rate": 4.19432969704568e-05, |
|
"loss": 0.3476, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.554276005286011, |
|
"grad_norm": 0.2637597322463989, |
|
"learning_rate": 4.182587520323201e-05, |
|
"loss": 0.4371, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5550311497073815, |
|
"grad_norm": 0.2537882328033447, |
|
"learning_rate": 4.17084997372813e-05, |
|
"loss": 0.4176, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5557862941287521, |
|
"grad_norm": 0.25388282537460327, |
|
"learning_rate": 4.159117123746286e-05, |
|
"loss": 0.3644, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5565414385501227, |
|
"grad_norm": 0.259795218706131, |
|
"learning_rate": 4.147389036836881e-05, |
|
"loss": 0.3493, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5572965829714933, |
|
"grad_norm": 0.2668931782245636, |
|
"learning_rate": 4.1356657794321496e-05, |
|
"loss": 0.3802, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5580517273928639, |
|
"grad_norm": 0.28695622086524963, |
|
"learning_rate": 4.12394741793697e-05, |
|
"loss": 0.3657, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5588068718142345, |
|
"grad_norm": 0.2887243628501892, |
|
"learning_rate": 4.1122340187284846e-05, |
|
"loss": 0.4289, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5595620162356051, |
|
"grad_norm": 0.2813442349433899, |
|
"learning_rate": 4.100525648155731e-05, |
|
"loss": 0.3503, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5603171606569757, |
|
"grad_norm": 0.2727963924407959, |
|
"learning_rate": 4.088822372539263e-05, |
|
"loss": 0.3067, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5610723050783463, |
|
"grad_norm": 0.2962748408317566, |
|
"learning_rate": 4.077124258170774e-05, |
|
"loss": 0.3492, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5618274494997169, |
|
"grad_norm": 0.293473482131958, |
|
"learning_rate": 4.06543137131272e-05, |
|
"loss": 0.3261, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5625825939210874, |
|
"grad_norm": 0.3267402946949005, |
|
"learning_rate": 4.0537437781979506e-05, |
|
"loss": 0.3525, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.563337738342458, |
|
"grad_norm": 0.33864033222198486, |
|
"learning_rate": 4.042061545029323e-05, |
|
"loss": 0.3754, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5640928827638286, |
|
"grad_norm": 0.38044723868370056, |
|
"learning_rate": 4.0303847379793447e-05, |
|
"loss": 0.4512, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5648480271851992, |
|
"grad_norm": 0.37810018658638, |
|
"learning_rate": 4.018713423189775e-05, |
|
"loss": 0.359, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5656031716065698, |
|
"grad_norm": 0.42582571506500244, |
|
"learning_rate": 4.007047666771274e-05, |
|
"loss": 0.4383, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5663583160279403, |
|
"grad_norm": 0.4589140713214874, |
|
"learning_rate": 3.995387534803006e-05, |
|
"loss": 0.4365, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5671134604493109, |
|
"grad_norm": 0.155814066529274, |
|
"learning_rate": 3.983733093332289e-05, |
|
"loss": 0.3505, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5678686048706815, |
|
"grad_norm": 0.16563649475574493, |
|
"learning_rate": 3.9720844083741975e-05, |
|
"loss": 0.3567, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5686237492920521, |
|
"grad_norm": 0.16976523399353027, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 0.3639, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5693788937134227, |
|
"grad_norm": 0.18034231662750244, |
|
"learning_rate": 3.948804571892799e-05, |
|
"loss": 0.3462, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5701340381347932, |
|
"grad_norm": 0.1798115074634552, |
|
"learning_rate": 3.937173552235117e-05, |
|
"loss": 0.374, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5708891825561638, |
|
"grad_norm": 0.18299606442451477, |
|
"learning_rate": 3.925548552820568e-05, |
|
"loss": 0.3865, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5716443269775344, |
|
"grad_norm": 0.20100784301757812, |
|
"learning_rate": 3.913929639497462e-05, |
|
"loss": 0.4081, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.572399471398905, |
|
"grad_norm": 0.18966151773929596, |
|
"learning_rate": 3.9023168780796294e-05, |
|
"loss": 0.3809, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5731546158202756, |
|
"grad_norm": 0.2044794112443924, |
|
"learning_rate": 3.890710334346058e-05, |
|
"loss": 0.4423, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5739097602416462, |
|
"grad_norm": 0.19047518074512482, |
|
"learning_rate": 3.879110074040514e-05, |
|
"loss": 0.3845, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5746649046630168, |
|
"grad_norm": 0.1907937079668045, |
|
"learning_rate": 3.8675161628711776e-05, |
|
"loss": 0.394, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5754200490843874, |
|
"grad_norm": 0.19930242002010345, |
|
"learning_rate": 3.85592866651026e-05, |
|
"loss": 0.434, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.576175193505758, |
|
"grad_norm": 0.1895764023065567, |
|
"learning_rate": 3.844347650593635e-05, |
|
"loss": 0.3668, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5769303379271286, |
|
"grad_norm": 0.20043134689331055, |
|
"learning_rate": 3.832773180720475e-05, |
|
"loss": 0.3956, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5776854823484991, |
|
"grad_norm": 0.2107314020395279, |
|
"learning_rate": 3.821205322452863e-05, |
|
"loss": 0.4535, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5784406267698697, |
|
"grad_norm": 0.22263993322849274, |
|
"learning_rate": 3.8096441413154464e-05, |
|
"loss": 0.4392, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5791957711912403, |
|
"grad_norm": 0.2031395435333252, |
|
"learning_rate": 3.798089702795038e-05, |
|
"loss": 0.3902, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5799509156126109, |
|
"grad_norm": 0.20621052384376526, |
|
"learning_rate": 3.7865420723402634e-05, |
|
"loss": 0.4127, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5807060600339815, |
|
"grad_norm": 0.20813485980033875, |
|
"learning_rate": 3.775001315361183e-05, |
|
"loss": 0.3858, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5814612044553521, |
|
"grad_norm": 0.21165066957473755, |
|
"learning_rate": 3.763467497228922e-05, |
|
"loss": 0.4723, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5822163488767227, |
|
"grad_norm": 0.2068208009004593, |
|
"learning_rate": 3.7519406832753085e-05, |
|
"loss": 0.3991, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5829714932980933, |
|
"grad_norm": 0.22032210230827332, |
|
"learning_rate": 3.740420938792489e-05, |
|
"loss": 0.4052, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5837266377194639, |
|
"grad_norm": 0.21245840191841125, |
|
"learning_rate": 3.728908329032567e-05, |
|
"loss": 0.3571, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5844817821408345, |
|
"grad_norm": 0.2291347086429596, |
|
"learning_rate": 3.717402919207234e-05, |
|
"loss": 0.4335, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.585236926562205, |
|
"grad_norm": 0.23557013273239136, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.4521, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5859920709835756, |
|
"grad_norm": 0.23025040328502655, |
|
"learning_rate": 3.6944139600028136e-05, |
|
"loss": 0.3855, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5867472154049462, |
|
"grad_norm": 0.23952262103557587, |
|
"learning_rate": 3.6829305408417166e-05, |
|
"loss": 0.3627, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5875023598263168, |
|
"grad_norm": 0.21594206988811493, |
|
"learning_rate": 3.6714545820504525e-05, |
|
"loss": 0.3442, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5882575042476874, |
|
"grad_norm": 0.22128231823444366, |
|
"learning_rate": 3.659986148633107e-05, |
|
"loss": 0.355, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.589012648669058, |
|
"grad_norm": 0.25088027119636536, |
|
"learning_rate": 3.648525305551136e-05, |
|
"loss": 0.4293, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5897677930904286, |
|
"grad_norm": 0.2476910948753357, |
|
"learning_rate": 3.6370721177230116e-05, |
|
"loss": 0.3875, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5905229375117992, |
|
"grad_norm": 0.24124464392662048, |
|
"learning_rate": 3.625626650023831e-05, |
|
"loss": 0.393, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5912780819331697, |
|
"grad_norm": 0.2532835900783539, |
|
"learning_rate": 3.6141889672849726e-05, |
|
"loss": 0.418, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5920332263545403, |
|
"grad_norm": 0.24893806874752045, |
|
"learning_rate": 3.602759134293706e-05, |
|
"loss": 0.3763, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5927883707759108, |
|
"grad_norm": 0.2560838758945465, |
|
"learning_rate": 3.591337215792852e-05, |
|
"loss": 0.3267, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5935435151972814, |
|
"grad_norm": 0.26388421654701233, |
|
"learning_rate": 3.579923276480387e-05, |
|
"loss": 0.4026, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.594298659618652, |
|
"grad_norm": 0.27024954557418823, |
|
"learning_rate": 3.568517381009099e-05, |
|
"loss": 0.3871, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5950538040400226, |
|
"grad_norm": 0.26593852043151855, |
|
"learning_rate": 3.557119593986208e-05, |
|
"loss": 0.3685, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5958089484613932, |
|
"grad_norm": 0.2777867317199707, |
|
"learning_rate": 3.545729979973005e-05, |
|
"loss": 0.3664, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5965640928827638, |
|
"grad_norm": 0.28436291217803955, |
|
"learning_rate": 3.5343486034844895e-05, |
|
"loss": 0.3673, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5973192373041344, |
|
"grad_norm": 0.3022597134113312, |
|
"learning_rate": 3.522975528989e-05, |
|
"loss": 0.4292, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.598074381725505, |
|
"grad_norm": 0.3157149851322174, |
|
"learning_rate": 3.511610820907846e-05, |
|
"loss": 0.4385, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5988295261468756, |
|
"grad_norm": 0.2940748333930969, |
|
"learning_rate": 3.5002545436149474e-05, |
|
"loss": 0.389, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5995846705682462, |
|
"grad_norm": 0.3128991425037384, |
|
"learning_rate": 3.4889067614364714e-05, |
|
"loss": 0.3999, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6003398149896167, |
|
"grad_norm": 0.29500824213027954, |
|
"learning_rate": 3.4775675386504656e-05, |
|
"loss": 0.3033, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6010949594109873, |
|
"grad_norm": 0.36105644702911377, |
|
"learning_rate": 3.466236939486491e-05, |
|
"loss": 0.4132, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6018501038323579, |
|
"grad_norm": 0.3596034646034241, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.335, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6026052482537285, |
|
"grad_norm": 0.3680574893951416, |
|
"learning_rate": 3.443601868698288e-05, |
|
"loss": 0.3667, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6033603926750991, |
|
"grad_norm": 0.38893964886665344, |
|
"learning_rate": 3.4322975252874946e-05, |
|
"loss": 0.3922, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6041155370964697, |
|
"grad_norm": 0.4646053910255432, |
|
"learning_rate": 3.421002061924876e-05, |
|
"loss": 0.3689, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6048706815178403, |
|
"grad_norm": 0.16041673719882965, |
|
"learning_rate": 3.4097155425921254e-05, |
|
"loss": 0.2799, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6056258259392109, |
|
"grad_norm": 0.14617888629436493, |
|
"learning_rate": 3.398438031220276e-05, |
|
"loss": 0.3073, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6063809703605815, |
|
"grad_norm": 0.1890193223953247, |
|
"learning_rate": 3.3871695916893314e-05, |
|
"loss": 0.3961, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6071361147819521, |
|
"grad_norm": 0.17151913046836853, |
|
"learning_rate": 3.375910287827912e-05, |
|
"loss": 0.3757, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6078912592033227, |
|
"grad_norm": 0.17613713443279266, |
|
"learning_rate": 3.364660183412892e-05, |
|
"loss": 0.3356, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6086464036246932, |
|
"grad_norm": 0.17522138357162476, |
|
"learning_rate": 3.353419342169035e-05, |
|
"loss": 0.3864, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6094015480460638, |
|
"grad_norm": 0.17417658865451813, |
|
"learning_rate": 3.3421878277686314e-05, |
|
"loss": 0.3412, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6101566924674344, |
|
"grad_norm": 0.1665177345275879, |
|
"learning_rate": 3.330965703831146e-05, |
|
"loss": 0.3375, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.610911836888805, |
|
"grad_norm": 0.2024083286523819, |
|
"learning_rate": 3.3197530339228487e-05, |
|
"loss": 0.3916, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6116669813101756, |
|
"grad_norm": 0.19576655328273773, |
|
"learning_rate": 3.3085498815564645e-05, |
|
"loss": 0.4199, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6124221257315462, |
|
"grad_norm": 0.19635462760925293, |
|
"learning_rate": 3.297356310190797e-05, |
|
"loss": 0.448, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6131772701529168, |
|
"grad_norm": 0.1944982409477234, |
|
"learning_rate": 3.286172383230388e-05, |
|
"loss": 0.3877, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6139324145742874, |
|
"grad_norm": 0.1916523575782776, |
|
"learning_rate": 3.274998164025148e-05, |
|
"loss": 0.3653, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.614687558995658, |
|
"grad_norm": 0.20687338709831238, |
|
"learning_rate": 3.263833715869996e-05, |
|
"loss": 0.4505, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6154427034170286, |
|
"grad_norm": 0.18990960717201233, |
|
"learning_rate": 3.2526791020045086e-05, |
|
"loss": 0.3874, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6161978478383991, |
|
"grad_norm": 0.18829166889190674, |
|
"learning_rate": 3.2415343856125547e-05, |
|
"loss": 0.3377, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6169529922597696, |
|
"grad_norm": 0.21549442410469055, |
|
"learning_rate": 3.230399629821942e-05, |
|
"loss": 0.4082, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6177081366811402, |
|
"grad_norm": 0.21937482059001923, |
|
"learning_rate": 3.219274897704053e-05, |
|
"loss": 0.3876, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6184632811025108, |
|
"grad_norm": 0.21827095746994019, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 0.4106, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6192184255238814, |
|
"grad_norm": 0.2126101851463318, |
|
"learning_rate": 3.197055756487752e-05, |
|
"loss": 0.3958, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.619973569945252, |
|
"grad_norm": 0.22217732667922974, |
|
"learning_rate": 3.1859614732467954e-05, |
|
"loss": 0.414, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6207287143666226, |
|
"grad_norm": 0.19480924308300018, |
|
"learning_rate": 3.174877465392763e-05, |
|
"loss": 0.3671, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6214838587879932, |
|
"grad_norm": 0.19732743501663208, |
|
"learning_rate": 3.163803795709583e-05, |
|
"loss": 0.3422, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6222390032093638, |
|
"grad_norm": 0.2333568036556244, |
|
"learning_rate": 3.1527405269226305e-05, |
|
"loss": 0.3685, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6229941476307344, |
|
"grad_norm": 0.21316733956336975, |
|
"learning_rate": 3.141687721698363e-05, |
|
"loss": 0.3704, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6237492920521049, |
|
"grad_norm": 0.20892879366874695, |
|
"learning_rate": 3.130645442643965e-05, |
|
"loss": 0.3474, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6245044364734755, |
|
"grad_norm": 0.23324836790561676, |
|
"learning_rate": 3.119613752307002e-05, |
|
"loss": 0.3941, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6252595808948461, |
|
"grad_norm": 0.2080870121717453, |
|
"learning_rate": 3.108592713175056e-05, |
|
"loss": 0.358, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6260147253162167, |
|
"grad_norm": 0.24301281571388245, |
|
"learning_rate": 3.097582387675385e-05, |
|
"loss": 0.4344, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6267698697375873, |
|
"grad_norm": 0.2200392484664917, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.3661, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6275250141589579, |
|
"grad_norm": 0.24137113988399506, |
|
"learning_rate": 3.075594126978084e-05, |
|
"loss": 0.4266, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6282801585803285, |
|
"grad_norm": 0.24250073730945587, |
|
"learning_rate": 3.0646163163301186e-05, |
|
"loss": 0.41, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6290353030016991, |
|
"grad_norm": 0.26428642868995667, |
|
"learning_rate": 3.053649468413043e-05, |
|
"loss": 0.4392, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6297904474230697, |
|
"grad_norm": 0.2617412805557251, |
|
"learning_rate": 3.0426936453471533e-05, |
|
"loss": 0.4245, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6305455918444403, |
|
"grad_norm": 0.2729879319667816, |
|
"learning_rate": 3.0317489091902935e-05, |
|
"loss": 0.3944, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6313007362658108, |
|
"grad_norm": 0.261433482170105, |
|
"learning_rate": 3.020815321937509e-05, |
|
"loss": 0.3796, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6320558806871814, |
|
"grad_norm": 0.2721140682697296, |
|
"learning_rate": 3.0098929455206904e-05, |
|
"loss": 0.395, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.632811025108552, |
|
"grad_norm": 0.26962924003601074, |
|
"learning_rate": 2.998981841808227e-05, |
|
"loss": 0.3807, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6335661695299226, |
|
"grad_norm": 0.2710483968257904, |
|
"learning_rate": 2.988082072604661e-05, |
|
"loss": 0.3361, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6343213139512932, |
|
"grad_norm": 0.2910868227481842, |
|
"learning_rate": 2.9771936996503248e-05, |
|
"loss": 0.4196, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6350764583726638, |
|
"grad_norm": 0.2808794379234314, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 0.3742, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6358316027940344, |
|
"grad_norm": 0.3063143491744995, |
|
"learning_rate": 2.955451389127567e-05, |
|
"loss": 0.4192, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.636586747215405, |
|
"grad_norm": 0.2972528338432312, |
|
"learning_rate": 2.9445975747156545e-05, |
|
"loss": 0.3618, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6373418916367756, |
|
"grad_norm": 0.2982980012893677, |
|
"learning_rate": 2.9337554028652952e-05, |
|
"loss": 0.3622, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6380970360581462, |
|
"grad_norm": 0.29826125502586365, |
|
"learning_rate": 2.9229249349905684e-05, |
|
"loss": 0.3206, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6388521804795168, |
|
"grad_norm": 0.3471531569957733, |
|
"learning_rate": 2.9121062324392623e-05, |
|
"loss": 0.383, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6396073249008873, |
|
"grad_norm": 0.37260305881500244, |
|
"learning_rate": 2.901299356492516e-05, |
|
"loss": 0.403, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6403624693222579, |
|
"grad_norm": 0.3672800064086914, |
|
"learning_rate": 2.8905043683644872e-05, |
|
"loss": 0.3851, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6411176137436285, |
|
"grad_norm": 0.40943288803100586, |
|
"learning_rate": 2.8797213292019926e-05, |
|
"loss": 0.3267, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.641872758164999, |
|
"grad_norm": 0.5209497213363647, |
|
"learning_rate": 2.86895030008416e-05, |
|
"loss": 0.4702, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6426279025863696, |
|
"grad_norm": 0.15170446038246155, |
|
"learning_rate": 2.858191342022095e-05, |
|
"loss": 0.2699, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6433830470077402, |
|
"grad_norm": 0.16055040061473846, |
|
"learning_rate": 2.8474445159585235e-05, |
|
"loss": 0.3269, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6441381914291108, |
|
"grad_norm": 0.16934970021247864, |
|
"learning_rate": 2.8367098827674578e-05, |
|
"loss": 0.3698, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6448933358504814, |
|
"grad_norm": 0.16251638531684875, |
|
"learning_rate": 2.8259875032538407e-05, |
|
"loss": 0.3339, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.645648480271852, |
|
"grad_norm": 0.1724422574043274, |
|
"learning_rate": 2.8152774381532033e-05, |
|
"loss": 0.4104, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6464036246932225, |
|
"grad_norm": 0.17181172966957092, |
|
"learning_rate": 2.8045797481313262e-05, |
|
"loss": 0.3565, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6471587691145931, |
|
"grad_norm": 0.17750045657157898, |
|
"learning_rate": 2.7938944937838923e-05, |
|
"loss": 0.3573, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6479139135359637, |
|
"grad_norm": 0.18744954466819763, |
|
"learning_rate": 2.78322173563615e-05, |
|
"loss": 0.3277, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6486690579573343, |
|
"grad_norm": 0.18920008838176727, |
|
"learning_rate": 2.7725615341425525e-05, |
|
"loss": 0.407, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6494242023787049, |
|
"grad_norm": 0.19674719870090485, |
|
"learning_rate": 2.7619139496864378e-05, |
|
"loss": 0.4228, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6501793468000755, |
|
"grad_norm": 0.20689047873020172, |
|
"learning_rate": 2.7512790425796718e-05, |
|
"loss": 0.4252, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6509344912214461, |
|
"grad_norm": 0.18963493406772614, |
|
"learning_rate": 2.740656873062312e-05, |
|
"loss": 0.3796, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6516896356428167, |
|
"grad_norm": 0.21359539031982422, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 0.4206, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6524447800641873, |
|
"grad_norm": 0.2088915854692459, |
|
"learning_rate": 2.7194509873949503e-05, |
|
"loss": 0.4344, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6531999244855579, |
|
"grad_norm": 0.2131178230047226, |
|
"learning_rate": 2.708867391362948e-05, |
|
"loss": 0.4203, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6539550689069284, |
|
"grad_norm": 0.20361606776714325, |
|
"learning_rate": 2.698296773155673e-05, |
|
"loss": 0.3994, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.654710213328299, |
|
"grad_norm": 0.2058803290128708, |
|
"learning_rate": 2.687739192649026e-05, |
|
"loss": 0.3746, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6554653577496696, |
|
"grad_norm": 0.22526845335960388, |
|
"learning_rate": 2.6771947096450577e-05, |
|
"loss": 0.4883, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6562205021710402, |
|
"grad_norm": 0.1975262463092804, |
|
"learning_rate": 2.6666633838716314e-05, |
|
"loss": 0.3258, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6569756465924108, |
|
"grad_norm": 0.2137899249792099, |
|
"learning_rate": 2.6561452749820807e-05, |
|
"loss": 0.3739, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6577307910137814, |
|
"grad_norm": 0.21968792378902435, |
|
"learning_rate": 2.6456404425548774e-05, |
|
"loss": 0.3703, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.658485935435152, |
|
"grad_norm": 0.21001875400543213, |
|
"learning_rate": 2.6351489460932816e-05, |
|
"loss": 0.4517, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6592410798565226, |
|
"grad_norm": 0.23233747482299805, |
|
"learning_rate": 2.6246708450250256e-05, |
|
"loss": 0.3885, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6599962242778932, |
|
"grad_norm": 0.23604817688465118, |
|
"learning_rate": 2.6142061987019577e-05, |
|
"loss": 0.4035, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6607513686992638, |
|
"grad_norm": 0.23978619277477264, |
|
"learning_rate": 2.603755066399718e-05, |
|
"loss": 0.4077, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6615065131206344, |
|
"grad_norm": 0.23582671582698822, |
|
"learning_rate": 2.5933175073173898e-05, |
|
"loss": 0.4175, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.662261657542005, |
|
"grad_norm": 0.24461962282657623, |
|
"learning_rate": 2.5828935805771802e-05, |
|
"loss": 0.4297, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6630168019633755, |
|
"grad_norm": 0.24499231576919556, |
|
"learning_rate": 2.5724833452240792e-05, |
|
"loss": 0.3868, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6637719463847461, |
|
"grad_norm": 0.2279515117406845, |
|
"learning_rate": 2.5620868602255197e-05, |
|
"loss": 0.4166, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6645270908061167, |
|
"grad_norm": 0.24552056193351746, |
|
"learning_rate": 2.5517041844710453e-05, |
|
"loss": 0.4447, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6652822352274873, |
|
"grad_norm": 0.25690439343452454, |
|
"learning_rate": 2.5413353767719805e-05, |
|
"loss": 0.4388, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6660373796488579, |
|
"grad_norm": 0.27102774381637573, |
|
"learning_rate": 2.5309804958611016e-05, |
|
"loss": 0.4063, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6667925240702284, |
|
"grad_norm": 0.2607583701610565, |
|
"learning_rate": 2.520639600392295e-05, |
|
"loss": 0.4336, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.667547668491599, |
|
"grad_norm": 0.28491443395614624, |
|
"learning_rate": 2.5103127489402217e-05, |
|
"loss": 0.493, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6683028129129696, |
|
"grad_norm": 0.27002084255218506, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.4069, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6690579573343401, |
|
"grad_norm": 0.2716214060783386, |
|
"learning_rate": 2.489701411986865e-05, |
|
"loss": 0.4394, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6698131017557107, |
|
"grad_norm": 0.26700031757354736, |
|
"learning_rate": 2.4794170432358415e-05, |
|
"loss": 0.3797, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6705682461770813, |
|
"grad_norm": 0.2846347987651825, |
|
"learning_rate": 2.4691469520014025e-05, |
|
"loss": 0.3867, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6713233905984519, |
|
"grad_norm": 0.32261618971824646, |
|
"learning_rate": 2.4588911964571553e-05, |
|
"loss": 0.4208, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6720785350198225, |
|
"grad_norm": 0.2805870771408081, |
|
"learning_rate": 2.4486498346955027e-05, |
|
"loss": 0.3418, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6728336794411931, |
|
"grad_norm": 0.2825523912906647, |
|
"learning_rate": 2.4384229247273155e-05, |
|
"loss": 0.399, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6735888238625637, |
|
"grad_norm": 0.34398892521858215, |
|
"learning_rate": 2.4282105244816045e-05, |
|
"loss": 0.4278, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6743439682839343, |
|
"grad_norm": 0.31729134917259216, |
|
"learning_rate": 2.418012691805191e-05, |
|
"loss": 0.3586, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6750991127053049, |
|
"grad_norm": 0.30837181210517883, |
|
"learning_rate": 2.4078294844623816e-05, |
|
"loss": 0.3639, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6758542571266755, |
|
"grad_norm": 0.33780837059020996, |
|
"learning_rate": 2.3976609601346394e-05, |
|
"loss": 0.3787, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.676609401548046, |
|
"grad_norm": 0.3720472455024719, |
|
"learning_rate": 2.3875071764202563e-05, |
|
"loss": 0.3951, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6773645459694166, |
|
"grad_norm": 0.3754374384880066, |
|
"learning_rate": 2.3773681908340284e-05, |
|
"loss": 0.3508, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6781196903907872, |
|
"grad_norm": 0.4134625196456909, |
|
"learning_rate": 2.3672440608069313e-05, |
|
"loss": 0.3392, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6788748348121578, |
|
"grad_norm": 0.43518126010894775, |
|
"learning_rate": 2.3571348436857904e-05, |
|
"loss": 0.3959, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6796299792335284, |
|
"grad_norm": 0.46200618147850037, |
|
"learning_rate": 2.3470405967329605e-05, |
|
"loss": 0.3458, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.680385123654899, |
|
"grad_norm": 0.1568119376897812, |
|
"learning_rate": 2.336961377126001e-05, |
|
"loss": 0.2835, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6811402680762696, |
|
"grad_norm": 0.1660660058259964, |
|
"learning_rate": 2.326897241957348e-05, |
|
"loss": 0.3211, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6818954124976402, |
|
"grad_norm": 0.1690598577260971, |
|
"learning_rate": 2.3168482482339955e-05, |
|
"loss": 0.3941, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6826505569190108, |
|
"grad_norm": 0.17117637395858765, |
|
"learning_rate": 2.3068144528771712e-05, |
|
"loss": 0.3738, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6834057013403814, |
|
"grad_norm": 0.16965438425540924, |
|
"learning_rate": 2.296795912722014e-05, |
|
"loss": 0.3393, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.684160845761752, |
|
"grad_norm": 0.18404419720172882, |
|
"learning_rate": 2.286792684517245e-05, |
|
"loss": 0.357, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6849159901831225, |
|
"grad_norm": 0.1736985743045807, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 0.3317, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6856711346044931, |
|
"grad_norm": 0.18678586184978485, |
|
"learning_rate": 2.2668323905198108e-05, |
|
"loss": 0.3558, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6864262790258637, |
|
"grad_norm": 0.19678306579589844, |
|
"learning_rate": 2.2568754377896516e-05, |
|
"loss": 0.4089, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6871814234472343, |
|
"grad_norm": 0.20676788687705994, |
|
"learning_rate": 2.246934023134257e-05, |
|
"loss": 0.3973, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6879365678686049, |
|
"grad_norm": 0.21614781022071838, |
|
"learning_rate": 2.2370082028654866e-05, |
|
"loss": 0.3998, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6886917122899755, |
|
"grad_norm": 0.2001326084136963, |
|
"learning_rate": 2.22709803320687e-05, |
|
"loss": 0.3733, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6894468567113461, |
|
"grad_norm": 0.20481480658054352, |
|
"learning_rate": 2.2172035702932825e-05, |
|
"loss": 0.4074, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6902020011327167, |
|
"grad_norm": 0.21287479996681213, |
|
"learning_rate": 2.207324870170629e-05, |
|
"loss": 0.429, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6909571455540873, |
|
"grad_norm": 0.21085505187511444, |
|
"learning_rate": 2.1974619887955294e-05, |
|
"loss": 0.3979, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6917122899754579, |
|
"grad_norm": 0.21903711557388306, |
|
"learning_rate": 2.1876149820350057e-05, |
|
"loss": 0.5028, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6924674343968283, |
|
"grad_norm": 0.2196279913187027, |
|
"learning_rate": 2.1777839056661554e-05, |
|
"loss": 0.4467, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6932225788181989, |
|
"grad_norm": 0.2197204977273941, |
|
"learning_rate": 2.167968815375837e-05, |
|
"loss": 0.4149, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6939777232395695, |
|
"grad_norm": 0.23177607357501984, |
|
"learning_rate": 2.1581697667603633e-05, |
|
"loss": 0.4254, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6947328676609401, |
|
"grad_norm": 0.22916048765182495, |
|
"learning_rate": 2.148386815325179e-05, |
|
"loss": 0.4336, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6954880120823107, |
|
"grad_norm": 0.2257329225540161, |
|
"learning_rate": 2.1386200164845526e-05, |
|
"loss": 0.3985, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6962431565036813, |
|
"grad_norm": 0.2298842817544937, |
|
"learning_rate": 2.1288694255612502e-05, |
|
"loss": 0.4249, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6969983009250519, |
|
"grad_norm": 0.22104403376579285, |
|
"learning_rate": 2.119135097786236e-05, |
|
"loss": 0.3709, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6977534453464225, |
|
"grad_norm": 0.2183627337217331, |
|
"learning_rate": 2.1094170882983526e-05, |
|
"loss": 0.3912, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6985085897677931, |
|
"grad_norm": 0.23446856439113617, |
|
"learning_rate": 2.09971545214401e-05, |
|
"loss": 0.3894, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6992637341891637, |
|
"grad_norm": 0.22170968353748322, |
|
"learning_rate": 2.0900302442768715e-05, |
|
"loss": 0.3473, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7000188786105342, |
|
"grad_norm": 0.2619408071041107, |
|
"learning_rate": 2.0803615195575475e-05, |
|
"loss": 0.4502, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7007740230319048, |
|
"grad_norm": 0.24339045584201813, |
|
"learning_rate": 2.0707093327532805e-05, |
|
"loss": 0.3812, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7015291674532754, |
|
"grad_norm": 0.2485007792711258, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.374, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.702284311874646, |
|
"grad_norm": 0.252399206161499, |
|
"learning_rate": 2.05145479149019e-05, |
|
"loss": 0.3943, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7030394562960166, |
|
"grad_norm": 0.2727779448032379, |
|
"learning_rate": 2.0418525460962285e-05, |
|
"loss": 0.415, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7037946007173872, |
|
"grad_norm": 0.24974308907985687, |
|
"learning_rate": 2.03226705674643e-05, |
|
"loss": 0.3446, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7045497451387578, |
|
"grad_norm": 0.2764071524143219, |
|
"learning_rate": 2.0226983777365604e-05, |
|
"loss": 0.4001, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7053048895601284, |
|
"grad_norm": 0.25649023056030273, |
|
"learning_rate": 2.0131465632671652e-05, |
|
"loss": 0.3642, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.706060033981499, |
|
"grad_norm": 0.26145219802856445, |
|
"learning_rate": 2.0036116674432654e-05, |
|
"loss": 0.367, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7068151784028696, |
|
"grad_norm": 0.2874867022037506, |
|
"learning_rate": 1.9940937442740454e-05, |
|
"loss": 0.3967, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7075703228242401, |
|
"grad_norm": 0.2877250015735626, |
|
"learning_rate": 1.9845928476725524e-05, |
|
"loss": 0.4071, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7083254672456107, |
|
"grad_norm": 0.2828865647315979, |
|
"learning_rate": 1.9751090314553878e-05, |
|
"loss": 0.3551, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.7090806116669813, |
|
"grad_norm": 0.30538272857666016, |
|
"learning_rate": 1.9656423493424048e-05, |
|
"loss": 0.3898, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7098357560883519, |
|
"grad_norm": 0.33847618103027344, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.4906, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7105909005097225, |
|
"grad_norm": 0.3496238887310028, |
|
"learning_rate": 1.946760601822809e-05, |
|
"loss": 0.4622, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7113460449310931, |
|
"grad_norm": 0.3086839020252228, |
|
"learning_rate": 1.9373456433694198e-05, |
|
"loss": 0.3681, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7121011893524637, |
|
"grad_norm": 0.33560845255851746, |
|
"learning_rate": 1.927948032926047e-05, |
|
"loss": 0.3716, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7128563337738343, |
|
"grad_norm": 0.34799009561538696, |
|
"learning_rate": 1.9185678237242373e-05, |
|
"loss": 0.3493, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7136114781952049, |
|
"grad_norm": 0.3494877815246582, |
|
"learning_rate": 1.9092050688969738e-05, |
|
"loss": 0.3688, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7143666226165755, |
|
"grad_norm": 0.34960487484931946, |
|
"learning_rate": 1.899859821478376e-05, |
|
"loss": 0.3618, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.715121767037946, |
|
"grad_norm": 0.38590875267982483, |
|
"learning_rate": 1.8905321344033898e-05, |
|
"loss": 0.437, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7158769114593166, |
|
"grad_norm": 0.4215663969516754, |
|
"learning_rate": 1.881222060507492e-05, |
|
"loss": 0.4118, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7166320558806872, |
|
"grad_norm": 0.36362165212631226, |
|
"learning_rate": 1.8719296525263922e-05, |
|
"loss": 0.3083, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7173872003020577, |
|
"grad_norm": 0.5022253394126892, |
|
"learning_rate": 1.8626549630957396e-05, |
|
"loss": 0.4431, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7181423447234283, |
|
"grad_norm": 0.14882560074329376, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 0.3217, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7188974891447989, |
|
"grad_norm": 0.16775713860988617, |
|
"learning_rate": 1.8441589499262303e-05, |
|
"loss": 0.3601, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7196526335661695, |
|
"grad_norm": 0.17136643826961517, |
|
"learning_rate": 1.8349377309556486e-05, |
|
"loss": 0.3492, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7204077779875401, |
|
"grad_norm": 0.17329272627830505, |
|
"learning_rate": 1.8257344400714732e-05, |
|
"loss": 0.3725, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7211629224089107, |
|
"grad_norm": 0.16661885380744934, |
|
"learning_rate": 1.8165491294045593e-05, |
|
"loss": 0.3236, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7219180668302813, |
|
"grad_norm": 0.182418555021286, |
|
"learning_rate": 1.8073818509839098e-05, |
|
"loss": 0.4007, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7226732112516518, |
|
"grad_norm": 0.18849271535873413, |
|
"learning_rate": 1.7982326567363888e-05, |
|
"loss": 0.3725, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7234283556730224, |
|
"grad_norm": 0.1787819266319275, |
|
"learning_rate": 1.789101598486427e-05, |
|
"loss": 0.3415, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.724183500094393, |
|
"grad_norm": 0.20412220060825348, |
|
"learning_rate": 1.7799887279557237e-05, |
|
"loss": 0.392, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7249386445157636, |
|
"grad_norm": 0.19325533509254456, |
|
"learning_rate": 1.7708940967629567e-05, |
|
"loss": 0.3702, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7256937889371342, |
|
"grad_norm": 0.19539514183998108, |
|
"learning_rate": 1.7618177564234905e-05, |
|
"loss": 0.3573, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7264489333585048, |
|
"grad_norm": 0.21055525541305542, |
|
"learning_rate": 1.7527597583490822e-05, |
|
"loss": 0.3993, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7272040777798754, |
|
"grad_norm": 0.19510890543460846, |
|
"learning_rate": 1.7437201538475916e-05, |
|
"loss": 0.391, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.727959222201246, |
|
"grad_norm": 0.2113380879163742, |
|
"learning_rate": 1.734698994122691e-05, |
|
"loss": 0.4522, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7287143666226166, |
|
"grad_norm": 0.23572376370429993, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 0.4537, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7294695110439872, |
|
"grad_norm": 0.3286976218223572, |
|
"learning_rate": 1.7167122132946694e-05, |
|
"loss": 0.3092, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7302246554653578, |
|
"grad_norm": 0.22728495299816132, |
|
"learning_rate": 1.7077466940753444e-05, |
|
"loss": 0.3905, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7309797998867283, |
|
"grad_norm": 0.21779009699821472, |
|
"learning_rate": 1.698799823399628e-05, |
|
"loss": 0.402, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7317349443080989, |
|
"grad_norm": 0.22000010311603546, |
|
"learning_rate": 1.6898716519459074e-05, |
|
"loss": 0.4021, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7324900887294695, |
|
"grad_norm": 0.21992164850234985, |
|
"learning_rate": 1.6809622302866625e-05, |
|
"loss": 0.3588, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7332452331508401, |
|
"grad_norm": 0.24943576753139496, |
|
"learning_rate": 1.6720716088881594e-05, |
|
"loss": 0.4089, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7340003775722107, |
|
"grad_norm": 0.22193026542663574, |
|
"learning_rate": 1.6631998381101767e-05, |
|
"loss": 0.379, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7347555219935813, |
|
"grad_norm": 0.21193212270736694, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.296, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7355106664149519, |
|
"grad_norm": 0.251336008310318, |
|
"learning_rate": 1.6455130493206987e-05, |
|
"loss": 0.4699, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7362658108363225, |
|
"grad_norm": 0.23066718876361847, |
|
"learning_rate": 1.6366981314937376e-05, |
|
"loss": 0.4149, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7370209552576931, |
|
"grad_norm": 0.26630717515945435, |
|
"learning_rate": 1.627902264655788e-05, |
|
"loss": 0.4322, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7377760996790637, |
|
"grad_norm": 0.263698548078537, |
|
"learning_rate": 1.619125498629904e-05, |
|
"loss": 0.407, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7385312441004342, |
|
"grad_norm": 0.2588689625263214, |
|
"learning_rate": 1.61036788313094e-05, |
|
"loss": 0.4343, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7392863885218048, |
|
"grad_norm": 0.23290039598941803, |
|
"learning_rate": 1.601629467765277e-05, |
|
"loss": 0.3627, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7400415329431754, |
|
"grad_norm": 0.24483409523963928, |
|
"learning_rate": 1.592910302030544e-05, |
|
"loss": 0.3279, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.740796677364546, |
|
"grad_norm": 0.25037166476249695, |
|
"learning_rate": 1.5842104353153287e-05, |
|
"loss": 0.4103, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7415518217859166, |
|
"grad_norm": 0.2862619459629059, |
|
"learning_rate": 1.5755299168988997e-05, |
|
"loss": 0.433, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7423069662072871, |
|
"grad_norm": 0.28889885544776917, |
|
"learning_rate": 1.566868795950932e-05, |
|
"loss": 0.426, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7430621106286577, |
|
"grad_norm": 0.2708713710308075, |
|
"learning_rate": 1.5582271215312294e-05, |
|
"loss": 0.4114, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7438172550500283, |
|
"grad_norm": 0.269611120223999, |
|
"learning_rate": 1.549604942589441e-05, |
|
"loss": 0.3912, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7445723994713989, |
|
"grad_norm": 0.2908805310726166, |
|
"learning_rate": 1.5410023079647822e-05, |
|
"loss": 0.3776, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7453275438927695, |
|
"grad_norm": 0.2953813076019287, |
|
"learning_rate": 1.5324192663857674e-05, |
|
"loss": 0.4081, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.74608268831414, |
|
"grad_norm": 0.28919684886932373, |
|
"learning_rate": 1.5238558664699255e-05, |
|
"loss": 0.4329, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7468378327355106, |
|
"grad_norm": 0.31675902009010315, |
|
"learning_rate": 1.5153121567235335e-05, |
|
"loss": 0.4214, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7475929771568812, |
|
"grad_norm": 0.3113098442554474, |
|
"learning_rate": 1.5067881855413274e-05, |
|
"loss": 0.4025, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7483481215782518, |
|
"grad_norm": 0.2837510108947754, |
|
"learning_rate": 1.4982840012062426e-05, |
|
"loss": 0.347, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7491032659996224, |
|
"grad_norm": 0.32457566261291504, |
|
"learning_rate": 1.4897996518891327e-05, |
|
"loss": 0.3995, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.749858410420993, |
|
"grad_norm": 0.3207525908946991, |
|
"learning_rate": 1.481335185648498e-05, |
|
"loss": 0.3603, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7506135548423636, |
|
"grad_norm": 0.37354031205177307, |
|
"learning_rate": 1.4728906504302153e-05, |
|
"loss": 0.4288, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7513686992637342, |
|
"grad_norm": 0.3249981105327606, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.346, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7521238436851048, |
|
"grad_norm": 0.3194781541824341, |
|
"learning_rate": 1.4560615642794517e-05, |
|
"loss": 0.3108, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7528789881064754, |
|
"grad_norm": 0.39953500032424927, |
|
"learning_rate": 1.4476771086731567e-05, |
|
"loss": 0.4049, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.753634132527846, |
|
"grad_norm": 0.35916373133659363, |
|
"learning_rate": 1.4393127747410417e-05, |
|
"loss": 0.335, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7543892769492165, |
|
"grad_norm": 0.3811121881008148, |
|
"learning_rate": 1.4309686098617975e-05, |
|
"loss": 0.3608, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7551444213705871, |
|
"grad_norm": 0.49154841899871826, |
|
"learning_rate": 1.4226446612998673e-05, |
|
"loss": 0.4254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7558995657919577, |
|
"grad_norm": 0.14996010065078735, |
|
"learning_rate": 1.414340976205183e-05, |
|
"loss": 0.3259, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7566547102133283, |
|
"grad_norm": 0.15046906471252441, |
|
"learning_rate": 1.4060576016128974e-05, |
|
"loss": 0.3184, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7574098546346989, |
|
"grad_norm": 0.1714065670967102, |
|
"learning_rate": 1.3977945844431118e-05, |
|
"loss": 0.3564, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7581649990560695, |
|
"grad_norm": 0.18413116037845612, |
|
"learning_rate": 1.3895519715006238e-05, |
|
"loss": 0.3889, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7589201434774401, |
|
"grad_norm": 0.19010891020298004, |
|
"learning_rate": 1.3813298094746491e-05, |
|
"loss": 0.3909, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7596752878988107, |
|
"grad_norm": 0.1898065060377121, |
|
"learning_rate": 1.373128144938563e-05, |
|
"loss": 0.4388, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7604304323201813, |
|
"grad_norm": 0.1871468871831894, |
|
"learning_rate": 1.3649470243496326e-05, |
|
"loss": 0.401, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7611855767415519, |
|
"grad_norm": 0.18679498136043549, |
|
"learning_rate": 1.3567864940487584e-05, |
|
"loss": 0.4038, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7619407211629224, |
|
"grad_norm": 0.19137872755527496, |
|
"learning_rate": 1.3486466002602133e-05, |
|
"loss": 0.3803, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.762695865584293, |
|
"grad_norm": 0.1978340446949005, |
|
"learning_rate": 1.340527389091374e-05, |
|
"loss": 0.388, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7634510100056636, |
|
"grad_norm": 0.19116266071796417, |
|
"learning_rate": 1.3324289065324608e-05, |
|
"loss": 0.3728, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7642061544270342, |
|
"grad_norm": 0.21141821146011353, |
|
"learning_rate": 1.3243511984562824e-05, |
|
"loss": 0.4367, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7649612988484048, |
|
"grad_norm": 0.22845213115215302, |
|
"learning_rate": 1.3162943106179749e-05, |
|
"loss": 0.3907, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7657164432697754, |
|
"grad_norm": 0.22269576787948608, |
|
"learning_rate": 1.3082582886547395e-05, |
|
"loss": 0.4779, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.766471587691146, |
|
"grad_norm": 0.21351350843906403, |
|
"learning_rate": 1.3002431780855817e-05, |
|
"loss": 0.4206, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7672267321125166, |
|
"grad_norm": 0.2109295129776001, |
|
"learning_rate": 1.2922490243110614e-05, |
|
"loss": 0.3882, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.767981876533887, |
|
"grad_norm": 0.23167261481285095, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 0.4386, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7687370209552576, |
|
"grad_norm": 0.23334629833698273, |
|
"learning_rate": 1.2763237681543732e-05, |
|
"loss": 0.4477, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7694921653766282, |
|
"grad_norm": 0.23084315657615662, |
|
"learning_rate": 1.2683927559787655e-05, |
|
"loss": 0.423, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7702473097979988, |
|
"grad_norm": 0.2281108796596527, |
|
"learning_rate": 1.2604828810103957e-05, |
|
"loss": 0.4073, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7710024542193694, |
|
"grad_norm": 0.2173498570919037, |
|
"learning_rate": 1.2525941880537307e-05, |
|
"loss": 0.3423, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.77175759864074, |
|
"grad_norm": 0.21901051700115204, |
|
"learning_rate": 1.2447267217932507e-05, |
|
"loss": 0.3601, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7725127430621106, |
|
"grad_norm": 0.22421492636203766, |
|
"learning_rate": 1.236880526793207e-05, |
|
"loss": 0.3953, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7732678874834812, |
|
"grad_norm": 0.2377631813287735, |
|
"learning_rate": 1.2290556474973536e-05, |
|
"loss": 0.3744, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7740230319048518, |
|
"grad_norm": 0.23121589422225952, |
|
"learning_rate": 1.2212521282287092e-05, |
|
"loss": 0.3976, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7747781763262224, |
|
"grad_norm": 0.255655437707901, |
|
"learning_rate": 1.2134700131893012e-05, |
|
"loss": 0.4125, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.775533320747593, |
|
"grad_norm": 0.2366100549697876, |
|
"learning_rate": 1.2057093464599157e-05, |
|
"loss": 0.3724, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7762884651689635, |
|
"grad_norm": 0.2489083856344223, |
|
"learning_rate": 1.1979701719998453e-05, |
|
"loss": 0.4281, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7770436095903341, |
|
"grad_norm": 0.26776382327079773, |
|
"learning_rate": 1.1902525336466464e-05, |
|
"loss": 0.4041, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7777987540117047, |
|
"grad_norm": 0.24452626705169678, |
|
"learning_rate": 1.1825564751158823e-05, |
|
"loss": 0.4135, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7785538984330753, |
|
"grad_norm": 0.2541411817073822, |
|
"learning_rate": 1.1748820400008843e-05, |
|
"loss": 0.4086, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7793090428544459, |
|
"grad_norm": 0.27573496103286743, |
|
"learning_rate": 1.167229271772498e-05, |
|
"loss": 0.3883, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7800641872758165, |
|
"grad_norm": 0.2743297219276428, |
|
"learning_rate": 1.1595982137788403e-05, |
|
"loss": 0.4083, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7808193316971871, |
|
"grad_norm": 0.2688688039779663, |
|
"learning_rate": 1.1519889092450542e-05, |
|
"loss": 0.3992, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7815744761185577, |
|
"grad_norm": 0.28381526470184326, |
|
"learning_rate": 1.144401401273062e-05, |
|
"loss": 0.3882, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7823296205399283, |
|
"grad_norm": 0.26710647344589233, |
|
"learning_rate": 1.1368357328413242e-05, |
|
"loss": 0.3639, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7830847649612989, |
|
"grad_norm": 0.3097337782382965, |
|
"learning_rate": 1.1292919468045877e-05, |
|
"loss": 0.4394, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7838399093826695, |
|
"grad_norm": 0.28815412521362305, |
|
"learning_rate": 1.1217700858936587e-05, |
|
"loss": 0.4298, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.78459505380404, |
|
"grad_norm": 0.30151909589767456, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 0.393, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7853501982254106, |
|
"grad_norm": 0.30772241950035095, |
|
"learning_rate": 1.1067923097512256e-05, |
|
"loss": 0.3688, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7861053426467812, |
|
"grad_norm": 0.31982895731925964, |
|
"learning_rate": 1.099336479359398e-05, |
|
"loss": 0.3815, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7868604870681518, |
|
"grad_norm": 0.3298172950744629, |
|
"learning_rate": 1.0919027437722513e-05, |
|
"loss": 0.4153, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7876156314895224, |
|
"grad_norm": 0.33412277698516846, |
|
"learning_rate": 1.0844911450972229e-05, |
|
"loss": 0.3972, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.788370775910893, |
|
"grad_norm": 0.3366442322731018, |
|
"learning_rate": 1.0771017253163568e-05, |
|
"loss": 0.3627, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7891259203322636, |
|
"grad_norm": 0.3680926263332367, |
|
"learning_rate": 1.0697345262860636e-05, |
|
"loss": 0.4297, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7898810647536342, |
|
"grad_norm": 0.3372995853424072, |
|
"learning_rate": 1.0623895897368913e-05, |
|
"loss": 0.3856, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7906362091750048, |
|
"grad_norm": 0.34899917244911194, |
|
"learning_rate": 1.0550669572732863e-05, |
|
"loss": 0.2923, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7913913535963754, |
|
"grad_norm": 0.4165075421333313, |
|
"learning_rate": 1.0477666703733541e-05, |
|
"loss": 0.3788, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.792146498017746, |
|
"grad_norm": 0.44895628094673157, |
|
"learning_rate": 1.0404887703886251e-05, |
|
"loss": 0.373, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7929016424391164, |
|
"grad_norm": 0.4823060631752014, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.3716, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.793656786860487, |
|
"grad_norm": 0.15826448798179626, |
|
"learning_rate": 1.0260002959366349e-05, |
|
"loss": 0.3269, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7944119312818576, |
|
"grad_norm": 0.1592281609773636, |
|
"learning_rate": 1.0187898035374682e-05, |
|
"loss": 0.3417, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7951670757032282, |
|
"grad_norm": 0.18132025003433228, |
|
"learning_rate": 1.0116018621892237e-05, |
|
"loss": 0.3531, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7959222201245988, |
|
"grad_norm": 0.16262286901474, |
|
"learning_rate": 1.0044365126070682e-05, |
|
"loss": 0.3089, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7966773645459694, |
|
"grad_norm": 0.17961286008358002, |
|
"learning_rate": 9.972937953781986e-06, |
|
"loss": 0.3534, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.79743250896734, |
|
"grad_norm": 0.17105191946029663, |
|
"learning_rate": 9.901737509616143e-06, |
|
"loss": 0.3361, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7981876533887106, |
|
"grad_norm": 0.1858292818069458, |
|
"learning_rate": 9.830764196878872e-06, |
|
"loss": 0.354, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7989427978100812, |
|
"grad_norm": 0.19711358845233917, |
|
"learning_rate": 9.760018417589334e-06, |
|
"loss": 0.3887, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7996979422314517, |
|
"grad_norm": 0.20733587443828583, |
|
"learning_rate": 9.689500572477855e-06, |
|
"loss": 0.4724, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8004530866528223, |
|
"grad_norm": 0.2481202483177185, |
|
"learning_rate": 9.619211060983675e-06, |
|
"loss": 0.4828, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8012082310741929, |
|
"grad_norm": 0.191118523478508, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.3909, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8019633754955635, |
|
"grad_norm": 0.19519171118736267, |
|
"learning_rate": 9.479318630134976e-06, |
|
"loss": 0.339, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8027185199169341, |
|
"grad_norm": 0.19820590317249298, |
|
"learning_rate": 9.409716503183074e-06, |
|
"loss": 0.3476, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8034736643383047, |
|
"grad_norm": 0.23485320806503296, |
|
"learning_rate": 9.340344294649184e-06, |
|
"loss": 0.4675, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8042288087596753, |
|
"grad_norm": 0.20198017358779907, |
|
"learning_rate": 9.271202397483215e-06, |
|
"loss": 0.336, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8049839531810459, |
|
"grad_norm": 0.19426412880420685, |
|
"learning_rate": 9.20229120333052e-06, |
|
"loss": 0.3578, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8057390976024165, |
|
"grad_norm": 0.2336643636226654, |
|
"learning_rate": 9.133611102529654e-06, |
|
"loss": 0.4355, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8064942420237871, |
|
"grad_norm": 0.2223149985074997, |
|
"learning_rate": 9.065162484110179e-06, |
|
"loss": 0.4256, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8072493864451576, |
|
"grad_norm": 0.23664018511772156, |
|
"learning_rate": 8.996945735790447e-06, |
|
"loss": 0.4148, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8080045308665282, |
|
"grad_norm": 0.22716124355793, |
|
"learning_rate": 8.928961243975437e-06, |
|
"loss": 0.3981, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8087596752878988, |
|
"grad_norm": 0.230534628033638, |
|
"learning_rate": 8.861209393754477e-06, |
|
"loss": 0.4269, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8095148197092694, |
|
"grad_norm": 0.21818408370018005, |
|
"learning_rate": 8.793690568899216e-06, |
|
"loss": 0.3498, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.81026996413064, |
|
"grad_norm": 0.2515822947025299, |
|
"learning_rate": 8.7264051518613e-06, |
|
"loss": 0.4829, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8110251085520106, |
|
"grad_norm": 0.23883438110351562, |
|
"learning_rate": 8.659353523770297e-06, |
|
"loss": 0.3792, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8117802529733812, |
|
"grad_norm": 0.25294432044029236, |
|
"learning_rate": 8.592536064431467e-06, |
|
"loss": 0.3966, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8125353973947518, |
|
"grad_norm": 0.2528051435947418, |
|
"learning_rate": 8.525953152323684e-06, |
|
"loss": 0.4245, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8132905418161224, |
|
"grad_norm": 0.25422972440719604, |
|
"learning_rate": 8.459605164597267e-06, |
|
"loss": 0.4256, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.814045686237493, |
|
"grad_norm": 0.2697378098964691, |
|
"learning_rate": 8.393492477071829e-06, |
|
"loss": 0.4137, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8148008306588636, |
|
"grad_norm": 0.25492045283317566, |
|
"learning_rate": 8.327615464234129e-06, |
|
"loss": 0.4055, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8155559750802341, |
|
"grad_norm": 0.26645827293395996, |
|
"learning_rate": 8.261974499235987e-06, |
|
"loss": 0.4531, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8163111195016047, |
|
"grad_norm": 0.2661876082420349, |
|
"learning_rate": 8.196569953892202e-06, |
|
"loss": 0.3774, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8170662639229753, |
|
"grad_norm": 0.2471131682395935, |
|
"learning_rate": 8.131402198678373e-06, |
|
"loss": 0.3474, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8178214083443458, |
|
"grad_norm": 0.26696231961250305, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 0.3357, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8185765527657164, |
|
"grad_norm": 0.26743122935295105, |
|
"learning_rate": 8.001778533834487e-06, |
|
"loss": 0.3404, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.819331697187087, |
|
"grad_norm": 0.28732678294181824, |
|
"learning_rate": 7.937323358440935e-06, |
|
"loss": 0.389, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8200868416084576, |
|
"grad_norm": 0.30629798769950867, |
|
"learning_rate": 7.873106441646205e-06, |
|
"loss": 0.4185, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8208419860298282, |
|
"grad_norm": 0.2828892469406128, |
|
"learning_rate": 7.809128147198691e-06, |
|
"loss": 0.3792, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8215971304511988, |
|
"grad_norm": 0.28884345293045044, |
|
"learning_rate": 7.745388837495188e-06, |
|
"loss": 0.369, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8223522748725693, |
|
"grad_norm": 0.30468007922172546, |
|
"learning_rate": 7.681888873578786e-06, |
|
"loss": 0.4518, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8231074192939399, |
|
"grad_norm": 0.3138682246208191, |
|
"learning_rate": 7.618628615136825e-06, |
|
"loss": 0.3665, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8238625637153105, |
|
"grad_norm": 0.2910728454589844, |
|
"learning_rate": 7.555608420498872e-06, |
|
"loss": 0.2928, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8246177081366811, |
|
"grad_norm": 0.3152346611022949, |
|
"learning_rate": 7.4928286466346754e-06, |
|
"loss": 0.3834, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8253728525580517, |
|
"grad_norm": 0.336488276720047, |
|
"learning_rate": 7.430289649152156e-06, |
|
"loss": 0.3728, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8261279969794223, |
|
"grad_norm": 0.32753413915634155, |
|
"learning_rate": 7.367991782295391e-06, |
|
"loss": 0.3237, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8268831414007929, |
|
"grad_norm": 0.33121833205223083, |
|
"learning_rate": 7.305935398942598e-06, |
|
"loss": 0.3403, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8276382858221635, |
|
"grad_norm": 0.3293071389198303, |
|
"learning_rate": 7.244120850604141e-06, |
|
"loss": 0.3105, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8283934302435341, |
|
"grad_norm": 0.3871884047985077, |
|
"learning_rate": 7.182548487420554e-06, |
|
"loss": 0.3617, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8291485746649047, |
|
"grad_norm": 0.4038209915161133, |
|
"learning_rate": 7.121218658160527e-06, |
|
"loss": 0.4204, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8299037190862752, |
|
"grad_norm": 0.41719168424606323, |
|
"learning_rate": 7.060131710218959e-06, |
|
"loss": 0.299, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8306588635076458, |
|
"grad_norm": 0.4760392904281616, |
|
"learning_rate": 6.999287989614972e-06, |
|
"loss": 0.3683, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8314140079290164, |
|
"grad_norm": 0.17138616740703583, |
|
"learning_rate": 6.9386878409899715e-06, |
|
"loss": 0.3231, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.832169152350387, |
|
"grad_norm": 0.16634538769721985, |
|
"learning_rate": 6.87833160760567e-06, |
|
"loss": 0.3349, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8329242967717576, |
|
"grad_norm": 0.17278127372264862, |
|
"learning_rate": 6.818219631342149e-06, |
|
"loss": 0.3614, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8336794411931282, |
|
"grad_norm": 0.16819556057453156, |
|
"learning_rate": 6.758352252695949e-06, |
|
"loss": 0.3442, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8344345856144988, |
|
"grad_norm": 0.181631401181221, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.3663, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8351897300358694, |
|
"grad_norm": 0.18373197317123413, |
|
"learning_rate": 6.639352643312164e-06, |
|
"loss": 0.3636, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.83594487445724, |
|
"grad_norm": 0.1827540099620819, |
|
"learning_rate": 6.580221086632516e-06, |
|
"loss": 0.3765, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8367000188786106, |
|
"grad_norm": 0.1983499974012375, |
|
"learning_rate": 6.521335475682205e-06, |
|
"loss": 0.3806, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8374551632999812, |
|
"grad_norm": 0.2072797417640686, |
|
"learning_rate": 6.462696144011149e-06, |
|
"loss": 0.4196, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8382103077213517, |
|
"grad_norm": 0.20540378987789154, |
|
"learning_rate": 6.40430342377426e-06, |
|
"loss": 0.4063, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8389654521427223, |
|
"grad_norm": 0.22013606131076813, |
|
"learning_rate": 6.346157645729589e-06, |
|
"loss": 0.4732, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8397205965640929, |
|
"grad_norm": 0.2054942101240158, |
|
"learning_rate": 6.2882591392363795e-06, |
|
"loss": 0.3476, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8404757409854635, |
|
"grad_norm": 0.22685834765434265, |
|
"learning_rate": 6.230608232253227e-06, |
|
"loss": 0.4091, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8412308854068341, |
|
"grad_norm": 0.22038882970809937, |
|
"learning_rate": 6.173205251336239e-06, |
|
"loss": 0.4229, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8419860298282047, |
|
"grad_norm": 0.20709578692913055, |
|
"learning_rate": 6.116050521637218e-06, |
|
"loss": 0.4012, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8427411742495753, |
|
"grad_norm": 0.2158709317445755, |
|
"learning_rate": 6.059144366901736e-06, |
|
"loss": 0.3793, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8434963186709458, |
|
"grad_norm": 0.21242888271808624, |
|
"learning_rate": 6.002487109467347e-06, |
|
"loss": 0.334, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8442514630923164, |
|
"grad_norm": 0.23054109513759613, |
|
"learning_rate": 5.946079070261773e-06, |
|
"loss": 0.4508, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.845006607513687, |
|
"grad_norm": 0.22902311384677887, |
|
"learning_rate": 5.889920568801055e-06, |
|
"loss": 0.4533, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8457617519350575, |
|
"grad_norm": 0.22745831310749054, |
|
"learning_rate": 5.834011923187805e-06, |
|
"loss": 0.4043, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8465168963564281, |
|
"grad_norm": 0.22601962089538574, |
|
"learning_rate": 5.778353450109286e-06, |
|
"loss": 0.4465, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8472720407777987, |
|
"grad_norm": 0.2339319884777069, |
|
"learning_rate": 5.722945464835749e-06, |
|
"loss": 0.3846, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8480271851991693, |
|
"grad_norm": 0.23972941935062408, |
|
"learning_rate": 5.667788281218567e-06, |
|
"loss": 0.4077, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8487823296205399, |
|
"grad_norm": 0.24830228090286255, |
|
"learning_rate": 5.61288221168848e-06, |
|
"loss": 0.4068, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8495374740419105, |
|
"grad_norm": 0.24531161785125732, |
|
"learning_rate": 5.558227567253832e-06, |
|
"loss": 0.3847, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8502926184632811, |
|
"grad_norm": 0.2520170509815216, |
|
"learning_rate": 5.503824657498785e-06, |
|
"loss": 0.3514, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8510477628846517, |
|
"grad_norm": 0.24631567299365997, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 0.4191, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8518029073060223, |
|
"grad_norm": 0.26101672649383545, |
|
"learning_rate": 5.39577527323289e-06, |
|
"loss": 0.4393, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8525580517273929, |
|
"grad_norm": 0.2639968991279602, |
|
"learning_rate": 5.34212941075381e-06, |
|
"loss": 0.4323, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8533131961487634, |
|
"grad_norm": 0.2551827132701874, |
|
"learning_rate": 5.288736507014435e-06, |
|
"loss": 0.3638, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.854068340570134, |
|
"grad_norm": 0.24728746712207794, |
|
"learning_rate": 5.235596864451975e-06, |
|
"loss": 0.3579, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8548234849915046, |
|
"grad_norm": 0.2714408040046692, |
|
"learning_rate": 5.182710784069067e-06, |
|
"loss": 0.4218, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8555786294128752, |
|
"grad_norm": 0.26579996943473816, |
|
"learning_rate": 5.13007856543209e-06, |
|
"loss": 0.3796, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8563337738342458, |
|
"grad_norm": 0.2808961868286133, |
|
"learning_rate": 5.077700506669425e-06, |
|
"loss": 0.35, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8570889182556164, |
|
"grad_norm": 0.30303752422332764, |
|
"learning_rate": 5.025576904469842e-06, |
|
"loss": 0.4059, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.857844062676987, |
|
"grad_norm": 0.30269986391067505, |
|
"learning_rate": 4.97370805408075e-06, |
|
"loss": 0.4018, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8585992070983576, |
|
"grad_norm": 0.27650511264801025, |
|
"learning_rate": 4.922094249306558e-06, |
|
"loss": 0.3363, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8593543515197282, |
|
"grad_norm": 0.28319036960601807, |
|
"learning_rate": 4.87073578250698e-06, |
|
"loss": 0.3566, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8601094959410988, |
|
"grad_norm": 0.31123289465904236, |
|
"learning_rate": 4.819632944595415e-06, |
|
"loss": 0.3427, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8608646403624693, |
|
"grad_norm": 0.3235510587692261, |
|
"learning_rate": 4.768786025037309e-06, |
|
"loss": 0.3882, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8616197847838399, |
|
"grad_norm": 0.34232163429260254, |
|
"learning_rate": 4.7181953118484556e-06, |
|
"loss": 0.4307, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8623749292052105, |
|
"grad_norm": 0.3100459575653076, |
|
"learning_rate": 4.667861091593434e-06, |
|
"loss": 0.3515, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8631300736265811, |
|
"grad_norm": 0.33639565110206604, |
|
"learning_rate": 4.617783649383905e-06, |
|
"loss": 0.4251, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8638852180479517, |
|
"grad_norm": 0.30844351649284363, |
|
"learning_rate": 4.567963268877079e-06, |
|
"loss": 0.336, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8646403624693223, |
|
"grad_norm": 0.3265226483345032, |
|
"learning_rate": 4.5184002322740785e-06, |
|
"loss": 0.3545, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8653955068906929, |
|
"grad_norm": 0.3475089967250824, |
|
"learning_rate": 4.4690948203183255e-06, |
|
"loss": 0.3436, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.8661506513120635, |
|
"grad_norm": 0.3850986957550049, |
|
"learning_rate": 4.4200473122939456e-06, |
|
"loss": 0.4274, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8669057957334341, |
|
"grad_norm": 0.4068509340286255, |
|
"learning_rate": 4.371257986024202e-06, |
|
"loss": 0.4225, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8676609401548047, |
|
"grad_norm": 0.41805300116539, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.4207, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8684160845761751, |
|
"grad_norm": 0.4633561670780182, |
|
"learning_rate": 4.274454982728032e-06, |
|
"loss": 0.3865, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8691712289975457, |
|
"grad_norm": 0.13895747065544128, |
|
"learning_rate": 4.2264418540297e-06, |
|
"loss": 0.2752, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8699263734189163, |
|
"grad_norm": 0.16666154563426971, |
|
"learning_rate": 4.178688003739129e-06, |
|
"loss": 0.3396, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8706815178402869, |
|
"grad_norm": 0.1707499921321869, |
|
"learning_rate": 4.131193702351827e-06, |
|
"loss": 0.3197, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8714366622616575, |
|
"grad_norm": 0.176160529255867, |
|
"learning_rate": 4.0839592188931576e-06, |
|
"loss": 0.3482, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8721918066830281, |
|
"grad_norm": 0.18616865575313568, |
|
"learning_rate": 4.036984820916723e-06, |
|
"loss": 0.3697, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8729469511043987, |
|
"grad_norm": 0.19371068477630615, |
|
"learning_rate": 3.990270774502941e-06, |
|
"loss": 0.413, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8737020955257693, |
|
"grad_norm": 0.19405323266983032, |
|
"learning_rate": 3.9438173442575e-06, |
|
"loss": 0.3545, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8744572399471399, |
|
"grad_norm": 0.19808508455753326, |
|
"learning_rate": 3.897624793309846e-06, |
|
"loss": 0.3991, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8752123843685105, |
|
"grad_norm": 0.19052360951900482, |
|
"learning_rate": 3.851693383311722e-06, |
|
"loss": 0.3765, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.875967528789881, |
|
"grad_norm": 0.1995311975479126, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.4073, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8767226732112516, |
|
"grad_norm": 0.19826874136924744, |
|
"learning_rate": 3.760615025373543e-06, |
|
"loss": 0.3841, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8774778176326222, |
|
"grad_norm": 0.20532841980457306, |
|
"learning_rate": 3.7154685933350864e-06, |
|
"loss": 0.3538, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8782329620539928, |
|
"grad_norm": 0.20944344997406006, |
|
"learning_rate": 3.6705843340464286e-06, |
|
"loss": 0.4038, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8789881064753634, |
|
"grad_norm": 0.22046121954917908, |
|
"learning_rate": 3.625962501748653e-06, |
|
"loss": 0.4242, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.879743250896734, |
|
"grad_norm": 0.22591526806354523, |
|
"learning_rate": 3.581603349196372e-06, |
|
"loss": 0.4469, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8804983953181046, |
|
"grad_norm": 0.22444604337215424, |
|
"learning_rate": 3.53750712765627e-06, |
|
"loss": 0.3994, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8812535397394752, |
|
"grad_norm": 0.2247525304555893, |
|
"learning_rate": 3.4936740869057073e-06, |
|
"loss": 0.4276, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8820086841608458, |
|
"grad_norm": 0.2309032678604126, |
|
"learning_rate": 3.4501044752312582e-06, |
|
"loss": 0.463, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8827638285822164, |
|
"grad_norm": 0.22152245044708252, |
|
"learning_rate": 3.406798539427386e-06, |
|
"loss": 0.3662, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.883518973003587, |
|
"grad_norm": 0.2504233419895172, |
|
"learning_rate": 3.3637565247949588e-06, |
|
"loss": 0.4318, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8842741174249575, |
|
"grad_norm": 0.23805570602416992, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 0.4156, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8850292618463281, |
|
"grad_norm": 0.23133568465709686, |
|
"learning_rate": 3.2784652327718547e-06, |
|
"loss": 0.3695, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8857844062676987, |
|
"grad_norm": 0.2318771332502365, |
|
"learning_rate": 3.2362164385026706e-06, |
|
"loss": 0.3824, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8865395506890693, |
|
"grad_norm": 0.22176909446716309, |
|
"learning_rate": 3.194232531645219e-06, |
|
"loss": 0.3575, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8872946951104399, |
|
"grad_norm": 0.24867790937423706, |
|
"learning_rate": 3.1525137500119207e-06, |
|
"loss": 0.4419, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8880498395318105, |
|
"grad_norm": 0.252105176448822, |
|
"learning_rate": 3.111060329913401e-06, |
|
"loss": 0.3854, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8888049839531811, |
|
"grad_norm": 0.24969127774238586, |
|
"learning_rate": 3.069872506157212e-06, |
|
"loss": 0.3825, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8895601283745517, |
|
"grad_norm": 0.2607312500476837, |
|
"learning_rate": 3.0289505120464743e-06, |
|
"loss": 0.3986, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8903152727959223, |
|
"grad_norm": 0.2574225664138794, |
|
"learning_rate": 2.9882945793785367e-06, |
|
"loss": 0.3998, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8910704172172929, |
|
"grad_norm": 0.25807490944862366, |
|
"learning_rate": 2.947904938443663e-06, |
|
"loss": 0.4147, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8918255616386634, |
|
"grad_norm": 0.24891719222068787, |
|
"learning_rate": 2.9077818180237693e-06, |
|
"loss": 0.3648, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.892580706060034, |
|
"grad_norm": 0.27970343828201294, |
|
"learning_rate": 2.8679254453910785e-06, |
|
"loss": 0.4347, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8933358504814045, |
|
"grad_norm": 0.26613345742225647, |
|
"learning_rate": 2.8283360463068785e-06, |
|
"loss": 0.3969, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8940909949027751, |
|
"grad_norm": 0.3011374771595001, |
|
"learning_rate": 2.789013845020205e-06, |
|
"loss": 0.389, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8948461393241457, |
|
"grad_norm": 0.26815304160118103, |
|
"learning_rate": 2.7499590642665774e-06, |
|
"loss": 0.3572, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8956012837455163, |
|
"grad_norm": 0.27228736877441406, |
|
"learning_rate": 2.7111719252667647e-06, |
|
"loss": 0.3945, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.8963564281668869, |
|
"grad_norm": 0.28344476222991943, |
|
"learning_rate": 2.6726526477254987e-06, |
|
"loss": 0.4098, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8971115725882575, |
|
"grad_norm": 0.3316936790943146, |
|
"learning_rate": 2.6344014498302704e-06, |
|
"loss": 0.4422, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8978667170096281, |
|
"grad_norm": 0.3098110556602478, |
|
"learning_rate": 2.596418548250029e-06, |
|
"loss": 0.3844, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8986218614309986, |
|
"grad_norm": 0.27956005930900574, |
|
"learning_rate": 2.5587041581340233e-06, |
|
"loss": 0.3017, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8993770058523692, |
|
"grad_norm": 0.3119887709617615, |
|
"learning_rate": 2.52125849311054e-06, |
|
"loss": 0.4119, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9001321502737398, |
|
"grad_norm": 0.3407526910305023, |
|
"learning_rate": 2.4840817652857172e-06, |
|
"loss": 0.4231, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9008872946951104, |
|
"grad_norm": 0.34797540307044983, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.394, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.901642439116481, |
|
"grad_norm": 0.3451668322086334, |
|
"learning_rate": 2.4105359620385847e-06, |
|
"loss": 0.3652, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9023975835378516, |
|
"grad_norm": 0.3375682532787323, |
|
"learning_rate": 2.3741673032069756e-06, |
|
"loss": 0.3551, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9031527279592222, |
|
"grad_norm": 0.36490514874458313, |
|
"learning_rate": 2.338068414753075e-06, |
|
"loss": 0.3753, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9039078723805928, |
|
"grad_norm": 0.3733910322189331, |
|
"learning_rate": 2.3022395011543686e-06, |
|
"loss": 0.4036, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9046630168019634, |
|
"grad_norm": 0.38239195942878723, |
|
"learning_rate": 2.2666807653591083e-06, |
|
"loss": 0.4228, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.905418161223334, |
|
"grad_norm": 0.46867436170578003, |
|
"learning_rate": 2.2313924087851656e-06, |
|
"loss": 0.4567, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9061733056447046, |
|
"grad_norm": 0.6003281474113464, |
|
"learning_rate": 2.196374631318876e-06, |
|
"loss": 0.4376, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9069284500660751, |
|
"grad_norm": 0.1415417641401291, |
|
"learning_rate": 2.161627631313923e-06, |
|
"loss": 0.2603, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9076835944874457, |
|
"grad_norm": 0.1586138904094696, |
|
"learning_rate": 2.1271516055901777e-06, |
|
"loss": 0.3263, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9084387389088163, |
|
"grad_norm": 0.16962364315986633, |
|
"learning_rate": 2.0929467494326614e-06, |
|
"loss": 0.3294, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9091938833301869, |
|
"grad_norm": 0.16859561204910278, |
|
"learning_rate": 2.0590132565903476e-06, |
|
"loss": 0.3619, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9099490277515575, |
|
"grad_norm": 0.1871105134487152, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 0.3679, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9107041721729281, |
|
"grad_norm": 0.18284808099269867, |
|
"learning_rate": 1.9919611281607077e-06, |
|
"loss": 0.3423, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9114593165942987, |
|
"grad_norm": 0.19052286446094513, |
|
"learning_rate": 1.9588428723814946e-06, |
|
"loss": 0.4149, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9122144610156693, |
|
"grad_norm": 0.20341211557388306, |
|
"learning_rate": 1.925996739531577e-06, |
|
"loss": 0.3938, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9129696054370399, |
|
"grad_norm": 0.19892559945583344, |
|
"learning_rate": 1.8934229156636452e-06, |
|
"loss": 0.361, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9137247498584105, |
|
"grad_norm": 0.21719536185264587, |
|
"learning_rate": 1.8611215852879005e-06, |
|
"loss": 0.429, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.914479894279781, |
|
"grad_norm": 0.20925089716911316, |
|
"learning_rate": 1.8290929313710513e-06, |
|
"loss": 0.3961, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9152350387011516, |
|
"grad_norm": 0.21349644660949707, |
|
"learning_rate": 1.797337135335292e-06, |
|
"loss": 0.3969, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9159901831225222, |
|
"grad_norm": 0.2184101939201355, |
|
"learning_rate": 1.7658543770572189e-06, |
|
"loss": 0.3583, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9167453275438928, |
|
"grad_norm": 0.21886181831359863, |
|
"learning_rate": 1.7346448348668443e-06, |
|
"loss": 0.42, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9175004719652634, |
|
"grad_norm": 0.20769384503364563, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.3761, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9182556163866339, |
|
"grad_norm": 0.2125682830810547, |
|
"learning_rate": 1.6730461043302726e-06, |
|
"loss": 0.3481, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9190107608080045, |
|
"grad_norm": 0.22738561034202576, |
|
"learning_rate": 1.6426572649021476e-06, |
|
"loss": 0.4114, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9197659052293751, |
|
"grad_norm": 0.22645215690135956, |
|
"learning_rate": 1.612542339395845e-06, |
|
"loss": 0.4581, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9205210496507457, |
|
"grad_norm": 0.2223154753446579, |
|
"learning_rate": 1.582701498393474e-06, |
|
"loss": 0.4042, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9212761940721163, |
|
"grad_norm": 0.23326793313026428, |
|
"learning_rate": 1.5531349109246362e-06, |
|
"loss": 0.4387, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9220313384934868, |
|
"grad_norm": 0.23844638466835022, |
|
"learning_rate": 1.523842744465437e-06, |
|
"loss": 0.4143, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9227864829148574, |
|
"grad_norm": 0.23629891872406006, |
|
"learning_rate": 1.4948251649375745e-06, |
|
"loss": 0.4301, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.923541627336228, |
|
"grad_norm": 0.23292282223701477, |
|
"learning_rate": 1.4660823367073751e-06, |
|
"loss": 0.389, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9242967717575986, |
|
"grad_norm": 0.25617265701293945, |
|
"learning_rate": 1.437614422584882e-06, |
|
"loss": 0.4615, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9250519161789692, |
|
"grad_norm": 0.23757751286029816, |
|
"learning_rate": 1.4094215838229176e-06, |
|
"loss": 0.3766, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9258070606003398, |
|
"grad_norm": 0.2631858289241791, |
|
"learning_rate": 1.3815039801161721e-06, |
|
"loss": 0.4561, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9265622050217104, |
|
"grad_norm": 0.2697192132472992, |
|
"learning_rate": 1.3538617696003064e-06, |
|
"loss": 0.4365, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.927317349443081, |
|
"grad_norm": 0.23894578218460083, |
|
"learning_rate": 1.3264951088510502e-06, |
|
"loss": 0.3623, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9280724938644516, |
|
"grad_norm": 0.27340683341026306, |
|
"learning_rate": 1.2994041528833266e-06, |
|
"loss": 0.4602, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9288276382858222, |
|
"grad_norm": 0.2491341382265091, |
|
"learning_rate": 1.2725890551503472e-06, |
|
"loss": 0.3397, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9295827827071927, |
|
"grad_norm": 0.26839953660964966, |
|
"learning_rate": 1.2460499675427729e-06, |
|
"loss": 0.4095, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9303379271285633, |
|
"grad_norm": 0.28354331851005554, |
|
"learning_rate": 1.2197870403878375e-06, |
|
"loss": 0.4034, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9310930715499339, |
|
"grad_norm": 0.2778373062610626, |
|
"learning_rate": 1.1938004224484988e-06, |
|
"loss": 0.3612, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9318482159713045, |
|
"grad_norm": 0.28623783588409424, |
|
"learning_rate": 1.1680902609225941e-06, |
|
"loss": 0.3763, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9326033603926751, |
|
"grad_norm": 0.2844613790512085, |
|
"learning_rate": 1.1426567014420297e-06, |
|
"loss": 0.4077, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9333585048140457, |
|
"grad_norm": 0.2865941524505615, |
|
"learning_rate": 1.1174998880718935e-06, |
|
"loss": 0.3973, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9341136492354163, |
|
"grad_norm": 0.2908569276332855, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.3688, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9348687936567869, |
|
"grad_norm": 0.3032941520214081, |
|
"learning_rate": 1.0680170680846259e-06, |
|
"loss": 0.3971, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9356239380781575, |
|
"grad_norm": 0.29943525791168213, |
|
"learning_rate": 1.0436913417565365e-06, |
|
"loss": 0.4151, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9363790824995281, |
|
"grad_norm": 0.3232915997505188, |
|
"learning_rate": 1.0196429221153824e-06, |
|
"loss": 0.336, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9371342269208986, |
|
"grad_norm": 0.3355953097343445, |
|
"learning_rate": 9.958719453803278e-07, |
|
"loss": 0.4568, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9378893713422692, |
|
"grad_norm": 0.35610586404800415, |
|
"learning_rate": 9.723785461990099e-07, |
|
"loss": 0.4446, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9386445157636398, |
|
"grad_norm": 0.347074031829834, |
|
"learning_rate": 9.491628576467515e-07, |
|
"loss": 0.4065, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9393996601850104, |
|
"grad_norm": 0.37643951177597046, |
|
"learning_rate": 9.26225011225812e-07, |
|
"loss": 0.3945, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.940154804606381, |
|
"grad_norm": 0.34759992361068726, |
|
"learning_rate": 9.035651368646648e-07, |
|
"loss": 0.3654, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9409099490277516, |
|
"grad_norm": 0.3707546889781952, |
|
"learning_rate": 8.811833629172428e-07, |
|
"loss": 0.2976, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9416650934491222, |
|
"grad_norm": 0.3839170038700104, |
|
"learning_rate": 8.590798161622227e-07, |
|
"loss": 0.4161, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9424202378704928, |
|
"grad_norm": 0.3534461557865143, |
|
"learning_rate": 8.372546218022747e-07, |
|
"loss": 0.3231, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9431753822918634, |
|
"grad_norm": 0.4794304370880127, |
|
"learning_rate": 8.157079034633974e-07, |
|
"loss": 0.4584, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9439305267132339, |
|
"grad_norm": 0.5317772030830383, |
|
"learning_rate": 7.944397831941952e-07, |
|
"loss": 0.4143, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9446856711346044, |
|
"grad_norm": 0.16856196522712708, |
|
"learning_rate": 7.734503814651906e-07, |
|
"loss": 0.3159, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.945440815555975, |
|
"grad_norm": 0.16190548241138458, |
|
"learning_rate": 7.527398171681354e-07, |
|
"loss": 0.3332, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.9461959599773456, |
|
"grad_norm": 0.16370098292827606, |
|
"learning_rate": 7.323082076153509e-07, |
|
"loss": 0.331, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9469511043987162, |
|
"grad_norm": 0.16921286284923553, |
|
"learning_rate": 7.12155668539044e-07, |
|
"loss": 0.2992, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9477062488200868, |
|
"grad_norm": 0.17997263371944427, |
|
"learning_rate": 6.922823140906753e-07, |
|
"loss": 0.3884, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9484613932414574, |
|
"grad_norm": 0.18398523330688477, |
|
"learning_rate": 6.726882568402871e-07, |
|
"loss": 0.3779, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.949216537662828, |
|
"grad_norm": 0.19981886446475983, |
|
"learning_rate": 6.533736077758868e-07, |
|
"loss": 0.3687, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9499716820841986, |
|
"grad_norm": 0.19422514736652374, |
|
"learning_rate": 6.343384763028148e-07, |
|
"loss": 0.3624, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9507268265055692, |
|
"grad_norm": 0.19577431678771973, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 0.4185, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9514819709269398, |
|
"grad_norm": 0.2101883441209793, |
|
"learning_rate": 5.971071958349228e-07, |
|
"loss": 0.3948, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9522371153483103, |
|
"grad_norm": 0.19973735511302948, |
|
"learning_rate": 5.78911257731879e-07, |
|
"loss": 0.4095, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9529922597696809, |
|
"grad_norm": 0.20807954668998718, |
|
"learning_rate": 5.609952590025224e-07, |
|
"loss": 0.3892, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9537474041910515, |
|
"grad_norm": 0.2160942405462265, |
|
"learning_rate": 5.4335930112972e-07, |
|
"loss": 0.3782, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9545025486124221, |
|
"grad_norm": 0.1981229931116104, |
|
"learning_rate": 5.260034840100736e-07, |
|
"loss": 0.3768, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9552576930337927, |
|
"grad_norm": 0.19447720050811768, |
|
"learning_rate": 5.089279059533658e-07, |
|
"loss": 0.3527, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9560128374551633, |
|
"grad_norm": 0.21603922545909882, |
|
"learning_rate": 4.92132663681999e-07, |
|
"loss": 0.3792, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9567679818765339, |
|
"grad_norm": 0.22324316203594208, |
|
"learning_rate": 4.756178523304622e-07, |
|
"loss": 0.413, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9575231262979045, |
|
"grad_norm": 0.21948575973510742, |
|
"learning_rate": 4.593835654447709e-07, |
|
"loss": 0.4481, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9582782707192751, |
|
"grad_norm": 0.21984027326107025, |
|
"learning_rate": 4.434298949819449e-07, |
|
"loss": 0.3766, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9590334151406457, |
|
"grad_norm": 0.24279214441776276, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 0.3761, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9597885595620163, |
|
"grad_norm": 0.23110735416412354, |
|
"learning_rate": 4.123647632048644e-07, |
|
"loss": 0.4528, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9605437039833868, |
|
"grad_norm": 0.2304868847131729, |
|
"learning_rate": 3.972534778550474e-07, |
|
"loss": 0.3633, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9612988484047574, |
|
"grad_norm": 0.2233697474002838, |
|
"learning_rate": 3.824231608559492e-07, |
|
"loss": 0.397, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.962053992826128, |
|
"grad_norm": 0.23600885272026062, |
|
"learning_rate": 3.6787389621198987e-07, |
|
"loss": 0.4136, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9628091372474986, |
|
"grad_norm": 0.23501838743686676, |
|
"learning_rate": 3.536057663355852e-07, |
|
"loss": 0.323, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9635642816688692, |
|
"grad_norm": 0.2491409033536911, |
|
"learning_rate": 3.3961885204673026e-07, |
|
"loss": 0.4763, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.9643194260902398, |
|
"grad_norm": 0.2430865615606308, |
|
"learning_rate": 3.2591323257248893e-07, |
|
"loss": 0.3949, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9650745705116104, |
|
"grad_norm": 0.2593831717967987, |
|
"learning_rate": 3.124889855465718e-07, |
|
"loss": 0.3738, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.965829714932981, |
|
"grad_norm": 0.24975533783435822, |
|
"learning_rate": 2.993461870088921e-07, |
|
"loss": 0.3614, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9665848593543516, |
|
"grad_norm": 0.25487789511680603, |
|
"learning_rate": 2.8648491140513266e-07, |
|
"loss": 0.46, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9673400037757222, |
|
"grad_norm": 0.2505668103694916, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 0.3317, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9680951481970927, |
|
"grad_norm": 0.261342853307724, |
|
"learning_rate": 2.616072188084628e-07, |
|
"loss": 0.3857, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9688502926184632, |
|
"grad_norm": 0.2685639262199402, |
|
"learning_rate": 2.4959094273201977e-07, |
|
"loss": 0.3631, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9696054370398338, |
|
"grad_norm": 0.26906096935272217, |
|
"learning_rate": 2.378564714216547e-07, |
|
"loss": 0.4212, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9703605814612044, |
|
"grad_norm": 0.28708136081695557, |
|
"learning_rate": 2.2640387134577058e-07, |
|
"loss": 0.5326, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.971115725882575, |
|
"grad_norm": 0.2781200706958771, |
|
"learning_rate": 2.1523320737613095e-07, |
|
"loss": 0.3952, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9718708703039456, |
|
"grad_norm": 0.2814629077911377, |
|
"learning_rate": 2.0434454278752123e-07, |
|
"loss": 0.3578, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9726260147253162, |
|
"grad_norm": 0.2876596450805664, |
|
"learning_rate": 1.937379392573768e-07, |
|
"loss": 0.3978, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9733811591466868, |
|
"grad_norm": 0.3008500635623932, |
|
"learning_rate": 1.8341345686543332e-07, |
|
"loss": 0.3936, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9741363035680574, |
|
"grad_norm": 0.32256075739860535, |
|
"learning_rate": 1.7337115409338244e-07, |
|
"loss": 0.4423, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.974891447989428, |
|
"grad_norm": 0.3017309010028839, |
|
"learning_rate": 1.6361108782456113e-07, |
|
"loss": 0.4027, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.9756465924107985, |
|
"grad_norm": 0.3096626400947571, |
|
"learning_rate": 1.5413331334360182e-07, |
|
"loss": 0.3962, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9764017368321691, |
|
"grad_norm": 0.3427668511867523, |
|
"learning_rate": 1.449378843361271e-07, |
|
"loss": 0.4388, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9771568812535397, |
|
"grad_norm": 0.3453672528266907, |
|
"learning_rate": 1.360248528884611e-07, |
|
"loss": 0.4638, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9779120256749103, |
|
"grad_norm": 0.33292150497436523, |
|
"learning_rate": 1.2739426948732424e-07, |
|
"loss": 0.3513, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9786671700962809, |
|
"grad_norm": 0.34688884019851685, |
|
"learning_rate": 1.190461830195333e-07, |
|
"loss": 0.3666, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9794223145176515, |
|
"grad_norm": 0.41783586144447327, |
|
"learning_rate": 1.109806407717462e-07, |
|
"loss": 0.4518, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9801774589390221, |
|
"grad_norm": 0.36416101455688477, |
|
"learning_rate": 1.0319768843018996e-07, |
|
"loss": 0.3785, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9809326033603927, |
|
"grad_norm": 0.4353952407836914, |
|
"learning_rate": 9.56973700803887e-08, |
|
"loss": 0.4347, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9816877477817633, |
|
"grad_norm": 0.5201441645622253, |
|
"learning_rate": 8.847972820693051e-08, |
|
"loss": 0.3487, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9824428922031339, |
|
"grad_norm": 0.14752991497516632, |
|
"learning_rate": 8.15448036932176e-08, |
|
"loss": 0.3271, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9831980366245044, |
|
"grad_norm": 0.1663774847984314, |
|
"learning_rate": 7.489263582122763e-08, |
|
"loss": 0.3467, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.983953181045875, |
|
"grad_norm": 0.18630105257034302, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 0.4198, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9847083254672456, |
|
"grad_norm": 0.18574944138526917, |
|
"learning_rate": 6.243671912194993e-08, |
|
"loss": 0.393, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9854634698886162, |
|
"grad_norm": 0.20900239050388336, |
|
"learning_rate": 5.663304084960186e-08, |
|
"loss": 0.3765, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9862186143099868, |
|
"grad_norm": 0.20655685663223267, |
|
"learning_rate": 5.111226032843974e-08, |
|
"loss": 0.4079, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9869737587313574, |
|
"grad_norm": 0.22022481262683868, |
|
"learning_rate": 4.5874408830215434e-08, |
|
"loss": 0.4319, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.987728903152728, |
|
"grad_norm": 0.22208468616008759, |
|
"learning_rate": 4.0919516024057195e-08, |
|
"loss": 0.4083, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9884840475740986, |
|
"grad_norm": 0.24290700256824493, |
|
"learning_rate": 3.624760997631982e-08, |
|
"loss": 0.4351, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9892391919954692, |
|
"grad_norm": 0.22663183510303497, |
|
"learning_rate": 3.185871715041255e-08, |
|
"loss": 0.3828, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9899943364168398, |
|
"grad_norm": 0.25170832872390747, |
|
"learning_rate": 2.7752862406654757e-08, |
|
"loss": 0.4401, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9907494808382104, |
|
"grad_norm": 0.2576093077659607, |
|
"learning_rate": 2.393006900212047e-08, |
|
"loss": 0.4513, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9915046252595809, |
|
"grad_norm": 0.2478175014257431, |
|
"learning_rate": 2.0390358590538504e-08, |
|
"loss": 0.3902, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9922597696809515, |
|
"grad_norm": 0.26220056414604187, |
|
"learning_rate": 1.7133751222137007e-08, |
|
"loss": 0.4562, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9930149141023221, |
|
"grad_norm": 0.272030234336853, |
|
"learning_rate": 1.4160265343549083e-08, |
|
"loss": 0.4128, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9937700585236926, |
|
"grad_norm": 0.282482385635376, |
|
"learning_rate": 1.1469917797696239e-08, |
|
"loss": 0.4678, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9945252029450632, |
|
"grad_norm": 0.29758918285369873, |
|
"learning_rate": 9.06272382371065e-09, |
|
"loss": 0.4549, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9952803473664338, |
|
"grad_norm": 0.2753942012786865, |
|
"learning_rate": 6.9386970568297014e-09, |
|
"loss": 0.3572, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9960354917878044, |
|
"grad_norm": 0.3179655075073242, |
|
"learning_rate": 5.097849528334919e-09, |
|
"loss": 0.4328, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.996790636209175, |
|
"grad_norm": 0.30263400077819824, |
|
"learning_rate": 3.540191665457604e-09, |
|
"loss": 0.3691, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9975457806305456, |
|
"grad_norm": 0.3153160810470581, |
|
"learning_rate": 2.265732291356626e-09, |
|
"loss": 0.3449, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9983009250519161, |
|
"grad_norm": 0.33442163467407227, |
|
"learning_rate": 1.2744786250407092e-09, |
|
"loss": 0.3951, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9990560694732867, |
|
"grad_norm": 0.3670744299888611, |
|
"learning_rate": 5.664362813406765e-10, |
|
"loss": 0.3497, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9998112138946573, |
|
"grad_norm": 0.5614824891090393, |
|
"learning_rate": 1.416092708650396e-10, |
|
"loss": 0.4963, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9998112138946573, |
|
"eval_loss": 0.3858806788921356, |
|
"eval_runtime": 92.8566, |
|
"eval_samples_per_second": 12.008, |
|
"eval_steps_per_second": 3.005, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.000566358316028, |
|
"grad_norm": 3.6732022762298584, |
|
"learning_rate": 0.0, |
|
"loss": 1.1897, |
|
"step": 1325 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1325, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 332, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7073098196516864e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|