|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9055484126199189, |
|
"eval_steps": 500, |
|
"global_step": 27468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010219892526291499, |
|
"grad_norm": 52.572940826416016, |
|
"learning_rate": 1.0157273918741808e-06, |
|
"loss": 9.6136, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0020439785052582997, |
|
"grad_norm": 26.900487899780273, |
|
"learning_rate": 2.0314547837483616e-06, |
|
"loss": 7.35, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.003065967757887449, |
|
"grad_norm": 15.413382530212402, |
|
"learning_rate": 3.0471821756225426e-06, |
|
"loss": 5.872, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0040879570105165994, |
|
"grad_norm": 9.475220680236816, |
|
"learning_rate": 4.062909567496723e-06, |
|
"loss": 4.8148, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.005109946263145749, |
|
"grad_norm": 16.33511734008789, |
|
"learning_rate": 5.078636959370905e-06, |
|
"loss": 4.2357, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.006131935515774898, |
|
"grad_norm": 17.79788589477539, |
|
"learning_rate": 6.094364351245085e-06, |
|
"loss": 3.9005, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.007153924768404049, |
|
"grad_norm": 7.385252952575684, |
|
"learning_rate": 7.110091743119267e-06, |
|
"loss": 3.6349, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.008175914021033199, |
|
"grad_norm": 7.6574835777282715, |
|
"learning_rate": 8.125819134993446e-06, |
|
"loss": 3.4197, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.009197903273662348, |
|
"grad_norm": 10.027671813964844, |
|
"learning_rate": 9.141546526867629e-06, |
|
"loss": 3.2883, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.010219892526291498, |
|
"grad_norm": 7.447092056274414, |
|
"learning_rate": 1.015727391874181e-05, |
|
"loss": 3.1606, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.011241881778920647, |
|
"grad_norm": 8.796594619750977, |
|
"learning_rate": 1.117300131061599e-05, |
|
"loss": 3.0724, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.012263871031549797, |
|
"grad_norm": 8.47999382019043, |
|
"learning_rate": 1.218872870249017e-05, |
|
"loss": 3.0249, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.013285860284178948, |
|
"grad_norm": 11.703062057495117, |
|
"learning_rate": 1.3204456094364351e-05, |
|
"loss": 2.9383, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.014307849536808097, |
|
"grad_norm": 7.270478248596191, |
|
"learning_rate": 1.4220183486238533e-05, |
|
"loss": 2.8753, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.015329838789437247, |
|
"grad_norm": 5.699134349822998, |
|
"learning_rate": 1.5235910878112714e-05, |
|
"loss": 2.8226, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.016351828042066398, |
|
"grad_norm": 6.3912529945373535, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 2.7754, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.017373817294695545, |
|
"grad_norm": 8.240257263183594, |
|
"learning_rate": 1.7267365661861077e-05, |
|
"loss": 2.7351, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.018395806547324697, |
|
"grad_norm": 9.017704010009766, |
|
"learning_rate": 1.8283093053735257e-05, |
|
"loss": 2.7084, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.019417795799953844, |
|
"grad_norm": 6.017426013946533, |
|
"learning_rate": 1.9298820445609438e-05, |
|
"loss": 2.6741, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.020439785052582995, |
|
"grad_norm": 3.789416790008545, |
|
"learning_rate": 2.031454783748362e-05, |
|
"loss": 2.6605, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.021461774305212147, |
|
"grad_norm": 4.747366428375244, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 2.6245, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.022483763557841294, |
|
"grad_norm": 3.8344268798828125, |
|
"learning_rate": 2.234600262123198e-05, |
|
"loss": 2.5791, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.023505752810470446, |
|
"grad_norm": 4.5539116859436035, |
|
"learning_rate": 2.336173001310616e-05, |
|
"loss": 2.5648, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.024527742063099593, |
|
"grad_norm": 4.216089248657227, |
|
"learning_rate": 2.437745740498034e-05, |
|
"loss": 2.5125, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.025549731315728744, |
|
"grad_norm": 4.225554466247559, |
|
"learning_rate": 2.5393184796854525e-05, |
|
"loss": 2.4973, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.026571720568357896, |
|
"grad_norm": 3.357351541519165, |
|
"learning_rate": 2.6408912188728702e-05, |
|
"loss": 2.4532, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.027593709820987043, |
|
"grad_norm": 4.109170436859131, |
|
"learning_rate": 2.7424639580602886e-05, |
|
"loss": 2.4504, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.028615699073616194, |
|
"grad_norm": 17.247940063476562, |
|
"learning_rate": 2.8440366972477066e-05, |
|
"loss": 2.4226, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.029637688326245342, |
|
"grad_norm": 3.2851662635803223, |
|
"learning_rate": 2.9456094364351244e-05, |
|
"loss": 2.4053, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.030659677578874493, |
|
"grad_norm": 3.3796141147613525, |
|
"learning_rate": 3.0471821756225428e-05, |
|
"loss": 2.3977, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.03168166683150364, |
|
"grad_norm": 3.0984764099121094, |
|
"learning_rate": 3.148754914809961e-05, |
|
"loss": 2.3622, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.032703656084132796, |
|
"grad_norm": 2.5464305877685547, |
|
"learning_rate": 3.2503276539973785e-05, |
|
"loss": 2.348, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.03372564533676194, |
|
"grad_norm": 2.848860263824463, |
|
"learning_rate": 3.351900393184797e-05, |
|
"loss": 2.3208, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.03474763458939109, |
|
"grad_norm": 2.5870606899261475, |
|
"learning_rate": 3.453473132372215e-05, |
|
"loss": 2.3023, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.03576962384202024, |
|
"grad_norm": 2.6556804180145264, |
|
"learning_rate": 3.555045871559633e-05, |
|
"loss": 2.2698, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.03679161309464939, |
|
"grad_norm": 2.779650926589966, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 2.2614, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.03781360234727854, |
|
"grad_norm": 2.624191999435425, |
|
"learning_rate": 3.7581913499344695e-05, |
|
"loss": 2.2422, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.03883559159990769, |
|
"grad_norm": 2.5255484580993652, |
|
"learning_rate": 3.8597640891218876e-05, |
|
"loss": 2.2419, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.03985758085253684, |
|
"grad_norm": 2.3195745944976807, |
|
"learning_rate": 3.9613368283093056e-05, |
|
"loss": 2.2468, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.04087957010516599, |
|
"grad_norm": 2.4712162017822266, |
|
"learning_rate": 4.062909567496724e-05, |
|
"loss": 2.2072, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.04190155935779514, |
|
"grad_norm": 4.451296806335449, |
|
"learning_rate": 4.164482306684142e-05, |
|
"loss": 2.2136, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.04292354861042429, |
|
"grad_norm": 2.4180150032043457, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.1843, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.04394553786305344, |
|
"grad_norm": 2.395840883255005, |
|
"learning_rate": 4.367627785058978e-05, |
|
"loss": 2.1447, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.04496752711568259, |
|
"grad_norm": 3.072429895401001, |
|
"learning_rate": 4.469200524246396e-05, |
|
"loss": 2.1405, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.045989516368311736, |
|
"grad_norm": 1.9566724300384521, |
|
"learning_rate": 4.570773263433814e-05, |
|
"loss": 2.1321, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.04701150562094089, |
|
"grad_norm": 23.60377311706543, |
|
"learning_rate": 4.672346002621232e-05, |
|
"loss": 2.1289, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.04803349487357004, |
|
"grad_norm": 2.117250919342041, |
|
"learning_rate": 4.77391874180865e-05, |
|
"loss": 2.1239, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.049055484126199186, |
|
"grad_norm": 2.361362934112549, |
|
"learning_rate": 4.875491480996068e-05, |
|
"loss": 2.0838, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.05007747337882834, |
|
"grad_norm": 2.1867992877960205, |
|
"learning_rate": 4.977064220183487e-05, |
|
"loss": 2.0622, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.05109946263145749, |
|
"grad_norm": 1.9326456785202026, |
|
"learning_rate": 4.9999915451558777e-05, |
|
"loss": 2.0441, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.052121451884086636, |
|
"grad_norm": 2.076503038406372, |
|
"learning_rate": 4.999955597496219e-05, |
|
"loss": 2.0503, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.05314344113671579, |
|
"grad_norm": 3.051212787628174, |
|
"learning_rate": 4.9998914381774255e-05, |
|
"loss": 2.0357, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.05416543038934494, |
|
"grad_norm": 1.7924102544784546, |
|
"learning_rate": 4.999799067923527e-05, |
|
"loss": 2.0326, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.055187419641974086, |
|
"grad_norm": 1.8103365898132324, |
|
"learning_rate": 4.999678487776908e-05, |
|
"loss": 2.0023, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.056209408894603234, |
|
"grad_norm": 1.913725733757019, |
|
"learning_rate": 4.9995296990983006e-05, |
|
"loss": 2.0035, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.05723139814723239, |
|
"grad_norm": 1.7415859699249268, |
|
"learning_rate": 4.999352703566763e-05, |
|
"loss": 2.0063, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.058253387399861536, |
|
"grad_norm": 1.8145827054977417, |
|
"learning_rate": 4.999147503179668e-05, |
|
"loss": 1.9731, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 0.059275376652490684, |
|
"grad_norm": 1.8761731386184692, |
|
"learning_rate": 4.998914100252672e-05, |
|
"loss": 1.9883, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.06029736590511984, |
|
"grad_norm": 6.824073791503906, |
|
"learning_rate": 4.998652497419696e-05, |
|
"loss": 1.9713, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 0.061319355157748986, |
|
"grad_norm": 1.7231906652450562, |
|
"learning_rate": 4.9983626976328927e-05, |
|
"loss": 1.9492, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.062341344410378134, |
|
"grad_norm": 1.6888827085494995, |
|
"learning_rate": 4.998044704162613e-05, |
|
"loss": 1.9449, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 0.06336333366300728, |
|
"grad_norm": 1.5924124717712402, |
|
"learning_rate": 4.9976985205973705e-05, |
|
"loss": 1.938, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 0.06438532291563644, |
|
"grad_norm": 1.7029409408569336, |
|
"learning_rate": 4.997324150843799e-05, |
|
"loss": 1.9285, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 0.06540731216826559, |
|
"grad_norm": 1.5554330348968506, |
|
"learning_rate": 4.99692159912661e-05, |
|
"loss": 1.9091, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.06642930142089473, |
|
"grad_norm": 1.5639121532440186, |
|
"learning_rate": 4.996490869988546e-05, |
|
"loss": 1.9105, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.06745129067352389, |
|
"grad_norm": 1.6262747049331665, |
|
"learning_rate": 4.996031968290326e-05, |
|
"loss": 1.9164, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 0.06847327992615304, |
|
"grad_norm": 1.5408495664596558, |
|
"learning_rate": 4.995544899210594e-05, |
|
"loss": 1.8728, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.06949526917878218, |
|
"grad_norm": 1.5183970928192139, |
|
"learning_rate": 4.9950296682458583e-05, |
|
"loss": 1.8854, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.07051725843141134, |
|
"grad_norm": 1.9362810850143433, |
|
"learning_rate": 4.994486281210429e-05, |
|
"loss": 1.8811, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 0.07153924768404048, |
|
"grad_norm": 1.5646640062332153, |
|
"learning_rate": 4.9939147442363566e-05, |
|
"loss": 1.8744, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.07256123693666963, |
|
"grad_norm": 1.5929124355316162, |
|
"learning_rate": 4.9933150637733574e-05, |
|
"loss": 1.867, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 0.07358322618929879, |
|
"grad_norm": 1.7689995765686035, |
|
"learning_rate": 4.992687246588743e-05, |
|
"loss": 1.8659, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.07460521544192793, |
|
"grad_norm": 1.375406265258789, |
|
"learning_rate": 4.992031299767347e-05, |
|
"loss": 1.8477, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 0.07562720469455708, |
|
"grad_norm": 1.3596042394638062, |
|
"learning_rate": 4.9913472307114386e-05, |
|
"loss": 1.8498, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 0.07664919394718624, |
|
"grad_norm": 1.3918544054031372, |
|
"learning_rate": 4.9906350471406446e-05, |
|
"loss": 1.8538, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.07767118319981538, |
|
"grad_norm": 1.4614112377166748, |
|
"learning_rate": 4.989894757091861e-05, |
|
"loss": 1.8359, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 0.07869317245244453, |
|
"grad_norm": 1.563186764717102, |
|
"learning_rate": 4.989126368919158e-05, |
|
"loss": 1.821, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 0.07971516170507369, |
|
"grad_norm": 1.3217226266860962, |
|
"learning_rate": 4.988329891293693e-05, |
|
"loss": 1.8345, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 0.08073715095770283, |
|
"grad_norm": 1.3906276226043701, |
|
"learning_rate": 4.987505333203608e-05, |
|
"loss": 1.8313, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.08175914021033198, |
|
"grad_norm": 1.2736890316009521, |
|
"learning_rate": 4.9866527039539276e-05, |
|
"loss": 1.8234, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.08278112946296114, |
|
"grad_norm": 1.3050850629806519, |
|
"learning_rate": 4.9857720131664594e-05, |
|
"loss": 1.8181, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 0.08380311871559028, |
|
"grad_norm": 1.3847019672393799, |
|
"learning_rate": 4.9848632707796773e-05, |
|
"loss": 1.8128, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 0.08482510796821943, |
|
"grad_norm": 1.3893433809280396, |
|
"learning_rate": 4.9839264870486155e-05, |
|
"loss": 1.7871, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 0.08584709722084859, |
|
"grad_norm": 1.3469080924987793, |
|
"learning_rate": 4.9829616725447526e-05, |
|
"loss": 1.8212, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 0.08686908647347773, |
|
"grad_norm": 1.269865870475769, |
|
"learning_rate": 4.981968838155888e-05, |
|
"loss": 1.7868, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.08789107572610688, |
|
"grad_norm": 1.2972242832183838, |
|
"learning_rate": 4.980947995086024e-05, |
|
"loss": 1.7987, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 0.08891306497873604, |
|
"grad_norm": 1.3488340377807617, |
|
"learning_rate": 4.979899154855234e-05, |
|
"loss": 1.7859, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 0.08993505423136518, |
|
"grad_norm": 1.3487133979797363, |
|
"learning_rate": 4.9788223292995386e-05, |
|
"loss": 1.7851, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 0.09095704348399433, |
|
"grad_norm": 1.2412410974502563, |
|
"learning_rate": 4.977717530570768e-05, |
|
"loss": 1.7802, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 0.09197903273662347, |
|
"grad_norm": 1.3017562627792358, |
|
"learning_rate": 4.976584771136425e-05, |
|
"loss": 1.775, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.09300102198925263, |
|
"grad_norm": 1.2923076152801514, |
|
"learning_rate": 4.975424063779547e-05, |
|
"loss": 1.76, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 0.09402301124188178, |
|
"grad_norm": 1.2735223770141602, |
|
"learning_rate": 4.974235421598557e-05, |
|
"loss": 1.7604, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 0.09504500049451092, |
|
"grad_norm": 1.303673505783081, |
|
"learning_rate": 4.973018858007122e-05, |
|
"loss": 1.7685, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 0.09606698974714008, |
|
"grad_norm": 1.5336410999298096, |
|
"learning_rate": 4.9717743867339963e-05, |
|
"loss": 1.7682, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 0.09708897899976923, |
|
"grad_norm": 5.014227867126465, |
|
"learning_rate": 4.9705020218228695e-05, |
|
"loss": 1.7617, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.09811096825239837, |
|
"grad_norm": 1.2603938579559326, |
|
"learning_rate": 4.969201777632205e-05, |
|
"loss": 1.7621, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.09913295750502753, |
|
"grad_norm": 1.32491934299469, |
|
"learning_rate": 4.9678736688350846e-05, |
|
"loss": 1.7411, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 0.10015494675765668, |
|
"grad_norm": 1.206735372543335, |
|
"learning_rate": 4.966517710419033e-05, |
|
"loss": 1.7262, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 0.10117693601028582, |
|
"grad_norm": 1.254231572151184, |
|
"learning_rate": 4.965133917685858e-05, |
|
"loss": 1.7454, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 0.10219892526291498, |
|
"grad_norm": 1.3085408210754395, |
|
"learning_rate": 4.9637223062514714e-05, |
|
"loss": 1.7517, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.10322091451554413, |
|
"grad_norm": 1.3184605836868286, |
|
"learning_rate": 4.962282892045718e-05, |
|
"loss": 1.7542, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 0.10424290376817327, |
|
"grad_norm": 1.2284983396530151, |
|
"learning_rate": 4.9608156913121904e-05, |
|
"loss": 1.738, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 0.10526489302080243, |
|
"grad_norm": 1.3870880603790283, |
|
"learning_rate": 4.959320720608049e-05, |
|
"loss": 1.7358, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 0.10628688227343158, |
|
"grad_norm": 1.1335322856903076, |
|
"learning_rate": 4.9577979968038354e-05, |
|
"loss": 1.742, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 0.10730887152606072, |
|
"grad_norm": 1.1794465780258179, |
|
"learning_rate": 4.956247537083282e-05, |
|
"loss": 1.7357, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.10833086077868988, |
|
"grad_norm": 1.1701149940490723, |
|
"learning_rate": 4.9546693589431145e-05, |
|
"loss": 1.7289, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 0.10935285003131903, |
|
"grad_norm": 1.169094443321228, |
|
"learning_rate": 4.9530634801928595e-05, |
|
"loss": 1.7238, |
|
"step": 3317 |
|
}, |
|
{ |
|
"epoch": 0.11037483928394817, |
|
"grad_norm": 1.2735379934310913, |
|
"learning_rate": 4.9514299189546395e-05, |
|
"loss": 1.7275, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 0.11139682853657733, |
|
"grad_norm": 1.2806981801986694, |
|
"learning_rate": 4.949768693662973e-05, |
|
"loss": 1.7135, |
|
"step": 3379 |
|
}, |
|
{ |
|
"epoch": 0.11241881778920647, |
|
"grad_norm": 1.208024263381958, |
|
"learning_rate": 4.948079823064559e-05, |
|
"loss": 1.7251, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.11344080704183562, |
|
"grad_norm": 1.275516152381897, |
|
"learning_rate": 4.946363326218074e-05, |
|
"loss": 1.7259, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 0.11446279629446478, |
|
"grad_norm": 1.1761465072631836, |
|
"learning_rate": 4.9446192224939525e-05, |
|
"loss": 1.7024, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 0.11548478554709392, |
|
"grad_norm": 1.1216075420379639, |
|
"learning_rate": 4.942847531574167e-05, |
|
"loss": 1.6849, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 0.11650677479972307, |
|
"grad_norm": 1.1286563873291016, |
|
"learning_rate": 4.941048273452008e-05, |
|
"loss": 1.7004, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 0.11752876405235223, |
|
"grad_norm": 1.1531614065170288, |
|
"learning_rate": 4.9392214684318605e-05, |
|
"loss": 1.7101, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.11855075330498137, |
|
"grad_norm": 1.143051266670227, |
|
"learning_rate": 4.93736713712897e-05, |
|
"loss": 1.7127, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 0.11957274255761052, |
|
"grad_norm": 1.1483272314071655, |
|
"learning_rate": 4.9354853004692124e-05, |
|
"loss": 1.6968, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 0.12059473181023968, |
|
"grad_norm": 1.1267555952072144, |
|
"learning_rate": 4.93357597968886e-05, |
|
"loss": 1.7023, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 0.12161672106286882, |
|
"grad_norm": 1.1552249193191528, |
|
"learning_rate": 4.931639196334338e-05, |
|
"loss": 1.7171, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 0.12263871031549797, |
|
"grad_norm": 1.1388061046600342, |
|
"learning_rate": 4.9296749722619826e-05, |
|
"loss": 1.7061, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.12366069956812713, |
|
"grad_norm": 1.127455234527588, |
|
"learning_rate": 4.9276833296377966e-05, |
|
"loss": 1.6879, |
|
"step": 3751 |
|
}, |
|
{ |
|
"epoch": 0.12468268882075627, |
|
"grad_norm": 1.1534373760223389, |
|
"learning_rate": 4.925664290937196e-05, |
|
"loss": 1.7091, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 0.12570467807338542, |
|
"grad_norm": 1.2369154691696167, |
|
"learning_rate": 4.9236178789447576e-05, |
|
"loss": 1.6923, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 0.12672666732601456, |
|
"grad_norm": 1.074436068534851, |
|
"learning_rate": 4.921544116753962e-05, |
|
"loss": 1.6805, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 0.12774865657864373, |
|
"grad_norm": 1.112226963043213, |
|
"learning_rate": 4.919443027766935e-05, |
|
"loss": 1.6527, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.12877064583127287, |
|
"grad_norm": 1.0999863147735596, |
|
"learning_rate": 4.91731463569418e-05, |
|
"loss": 1.6614, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 0.129792635083902, |
|
"grad_norm": 1.098036527633667, |
|
"learning_rate": 4.915158964554312e-05, |
|
"loss": 1.6826, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 0.13081462433653118, |
|
"grad_norm": 1.1108450889587402, |
|
"learning_rate": 4.912976038673786e-05, |
|
"loss": 1.6886, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.13183661358916032, |
|
"grad_norm": 1.0916872024536133, |
|
"learning_rate": 4.9107658826866254e-05, |
|
"loss": 1.6782, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.13285860284178946, |
|
"grad_norm": 1.0818581581115723, |
|
"learning_rate": 4.908528521534139e-05, |
|
"loss": 1.6796, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.13388059209441863, |
|
"grad_norm": 1.0908610820770264, |
|
"learning_rate": 4.906263980464644e-05, |
|
"loss": 1.6662, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 0.13490258134704777, |
|
"grad_norm": 1.0384143590927124, |
|
"learning_rate": 4.903972285033178e-05, |
|
"loss": 1.6811, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 0.1359245705996769, |
|
"grad_norm": 1.0998533964157104, |
|
"learning_rate": 4.901653461101213e-05, |
|
"loss": 1.6817, |
|
"step": 4123 |
|
}, |
|
{ |
|
"epoch": 0.13694655985230608, |
|
"grad_norm": 1.1726231575012207, |
|
"learning_rate": 4.8993075348363626e-05, |
|
"loss": 1.6532, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.13796854910493522, |
|
"grad_norm": 1.075464129447937, |
|
"learning_rate": 4.896934532712084e-05, |
|
"loss": 1.671, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.13899053835756436, |
|
"grad_norm": 1.0557868480682373, |
|
"learning_rate": 4.8945344815073846e-05, |
|
"loss": 1.6548, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 0.14001252761019353, |
|
"grad_norm": 1.0531095266342163, |
|
"learning_rate": 4.892107408306516e-05, |
|
"loss": 1.6526, |
|
"step": 4247 |
|
}, |
|
{ |
|
"epoch": 0.14103451686282267, |
|
"grad_norm": 1.138203501701355, |
|
"learning_rate": 4.889653340498669e-05, |
|
"loss": 1.678, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 0.1420565061154518, |
|
"grad_norm": 1.0668121576309204, |
|
"learning_rate": 4.8871723057776664e-05, |
|
"loss": 1.6552, |
|
"step": 4309 |
|
}, |
|
{ |
|
"epoch": 0.14307849536808095, |
|
"grad_norm": 1.0312261581420898, |
|
"learning_rate": 4.8846643321416476e-05, |
|
"loss": 1.6731, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.14410048462071012, |
|
"grad_norm": 1.0868667364120483, |
|
"learning_rate": 4.882129447892753e-05, |
|
"loss": 1.6713, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 0.14512247387333926, |
|
"grad_norm": 1.0213130712509155, |
|
"learning_rate": 4.8795676816368076e-05, |
|
"loss": 1.6515, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 0.1461444631259684, |
|
"grad_norm": 1.190875768661499, |
|
"learning_rate": 4.876979062282995e-05, |
|
"loss": 1.651, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 0.14716645237859757, |
|
"grad_norm": 1.0551568269729614, |
|
"learning_rate": 4.8743636190435325e-05, |
|
"loss": 1.6751, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 0.1481884416312267, |
|
"grad_norm": 1.0654323101043701, |
|
"learning_rate": 4.871721381433344e-05, |
|
"loss": 1.6499, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.14921043088385585, |
|
"grad_norm": 1.04425048828125, |
|
"learning_rate": 4.869052379269719e-05, |
|
"loss": 1.6401, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 0.15023242013648502, |
|
"grad_norm": 1.3416290283203125, |
|
"learning_rate": 4.866356642671985e-05, |
|
"loss": 1.6553, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.15125440938911416, |
|
"grad_norm": 1.073529601097107, |
|
"learning_rate": 4.8636342020611634e-05, |
|
"loss": 1.6413, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 0.1522763986417433, |
|
"grad_norm": 1.0386462211608887, |
|
"learning_rate": 4.860885088159626e-05, |
|
"loss": 1.6595, |
|
"step": 4619 |
|
}, |
|
{ |
|
"epoch": 0.15329838789437247, |
|
"grad_norm": 1.0544514656066895, |
|
"learning_rate": 4.858109331990751e-05, |
|
"loss": 1.6387, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.15432037714700161, |
|
"grad_norm": 1.088112473487854, |
|
"learning_rate": 4.855306964878567e-05, |
|
"loss": 1.628, |
|
"step": 4681 |
|
}, |
|
{ |
|
"epoch": 0.15534236639963075, |
|
"grad_norm": 0.9930492639541626, |
|
"learning_rate": 4.8524780184474084e-05, |
|
"loss": 1.6554, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 0.15636435565225992, |
|
"grad_norm": 0.9864984154701233, |
|
"learning_rate": 4.8496225246215496e-05, |
|
"loss": 1.6558, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 0.15738634490488906, |
|
"grad_norm": 1.0170128345489502, |
|
"learning_rate": 4.8467405156248505e-05, |
|
"loss": 1.6289, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 0.1584083341575182, |
|
"grad_norm": 1.0360223054885864, |
|
"learning_rate": 4.843832023980392e-05, |
|
"loss": 1.6314, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.15943032341014737, |
|
"grad_norm": 1.0165129899978638, |
|
"learning_rate": 4.840897082510106e-05, |
|
"loss": 1.6294, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 0.16045231266277651, |
|
"grad_norm": 1.079991102218628, |
|
"learning_rate": 4.8379357243344084e-05, |
|
"loss": 1.6204, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 0.16147430191540565, |
|
"grad_norm": 1.0515645742416382, |
|
"learning_rate": 4.8349479828718236e-05, |
|
"loss": 1.6322, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.16249629116803482, |
|
"grad_norm": 1.0226655006408691, |
|
"learning_rate": 4.8319338918386075e-05, |
|
"loss": 1.6476, |
|
"step": 4929 |
|
}, |
|
{ |
|
"epoch": 0.16351828042066396, |
|
"grad_norm": 1.0581114292144775, |
|
"learning_rate": 4.828893485248369e-05, |
|
"loss": 1.6302, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.1645402696732931, |
|
"grad_norm": 1.0156742334365845, |
|
"learning_rate": 4.825826797411682e-05, |
|
"loss": 1.6292, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 0.16556225892592227, |
|
"grad_norm": 1.0153559446334839, |
|
"learning_rate": 4.822733862935702e-05, |
|
"loss": 1.6367, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 0.16658424817855141, |
|
"grad_norm": 1.0488505363464355, |
|
"learning_rate": 4.819614716723775e-05, |
|
"loss": 1.644, |
|
"step": 5053 |
|
}, |
|
{ |
|
"epoch": 0.16760623743118055, |
|
"grad_norm": 1.092781901359558, |
|
"learning_rate": 4.8164693939750425e-05, |
|
"loss": 1.6318, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 0.16862822668380972, |
|
"grad_norm": 0.9972744584083557, |
|
"learning_rate": 4.813297930184042e-05, |
|
"loss": 1.6206, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.16965021593643886, |
|
"grad_norm": 1.0455750226974487, |
|
"learning_rate": 4.810100361140314e-05, |
|
"loss": 1.6296, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 0.170672205189068, |
|
"grad_norm": 0.9624122977256775, |
|
"learning_rate": 4.8068767229279885e-05, |
|
"loss": 1.6307, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 0.17169419444169717, |
|
"grad_norm": 1.4828526973724365, |
|
"learning_rate": 4.8036270519253854e-05, |
|
"loss": 1.6377, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 0.17271618369432631, |
|
"grad_norm": 1.0732771158218384, |
|
"learning_rate": 4.8003513848046e-05, |
|
"loss": 1.6148, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 0.17373817294695545, |
|
"grad_norm": 1.0065757036209106, |
|
"learning_rate": 4.79704975853109e-05, |
|
"loss": 1.6192, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.17476016219958462, |
|
"grad_norm": 1.7071099281311035, |
|
"learning_rate": 4.793722210363262e-05, |
|
"loss": 1.6205, |
|
"step": 5301 |
|
}, |
|
{ |
|
"epoch": 0.17578215145221376, |
|
"grad_norm": 1.009507417678833, |
|
"learning_rate": 4.7903687778520414e-05, |
|
"loss": 1.6261, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 0.1768041407048429, |
|
"grad_norm": 1.0593280792236328, |
|
"learning_rate": 4.7869894988404593e-05, |
|
"loss": 1.6286, |
|
"step": 5363 |
|
}, |
|
{ |
|
"epoch": 0.17782612995747207, |
|
"grad_norm": 1.0053679943084717, |
|
"learning_rate": 4.783584411463221e-05, |
|
"loss": 1.6424, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 0.17884811921010121, |
|
"grad_norm": 0.983214795589447, |
|
"learning_rate": 4.780153554146274e-05, |
|
"loss": 1.6292, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.17987010846273035, |
|
"grad_norm": 0.9617491960525513, |
|
"learning_rate": 4.7766969656063766e-05, |
|
"loss": 1.6182, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.18089209771535952, |
|
"grad_norm": 1.0862797498703003, |
|
"learning_rate": 4.773214684850662e-05, |
|
"loss": 1.6213, |
|
"step": 5487 |
|
}, |
|
{ |
|
"epoch": 0.18191408696798866, |
|
"grad_norm": 1.0468218326568604, |
|
"learning_rate": 4.769706751176193e-05, |
|
"loss": 1.6176, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 0.1829360762206178, |
|
"grad_norm": 0.9474911093711853, |
|
"learning_rate": 4.7661732041695264e-05, |
|
"loss": 1.5864, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.18395806547324695, |
|
"grad_norm": 0.9839109778404236, |
|
"learning_rate": 4.762614083706258e-05, |
|
"loss": 1.6177, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.18498005472587611, |
|
"grad_norm": 1.9305601119995117, |
|
"learning_rate": 4.759029429950581e-05, |
|
"loss": 1.6132, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 0.18600204397850525, |
|
"grad_norm": 0.9609850645065308, |
|
"learning_rate": 4.7554192833548235e-05, |
|
"loss": 1.6115, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 0.1870240332311344, |
|
"grad_norm": 1.0501559972763062, |
|
"learning_rate": 4.751783684659e-05, |
|
"loss": 1.6017, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 0.18804602248376356, |
|
"grad_norm": 0.9933464527130127, |
|
"learning_rate": 4.748122674890348e-05, |
|
"loss": 1.6136, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 0.1890680117363927, |
|
"grad_norm": 0.9463350772857666, |
|
"learning_rate": 4.7444362953628654e-05, |
|
"loss": 1.6102, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.19009000098902185, |
|
"grad_norm": 0.9974256753921509, |
|
"learning_rate": 4.7407245876768424e-05, |
|
"loss": 1.6101, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 0.19111199024165101, |
|
"grad_norm": 0.9747878313064575, |
|
"learning_rate": 4.736987593718397e-05, |
|
"loss": 1.5967, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 0.19213397949428015, |
|
"grad_norm": 0.9755719900131226, |
|
"learning_rate": 4.733225355658999e-05, |
|
"loss": 1.5987, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 0.1931559687469093, |
|
"grad_norm": 0.9605233669281006, |
|
"learning_rate": 4.7294379159549926e-05, |
|
"loss": 1.6238, |
|
"step": 5859 |
|
}, |
|
{ |
|
"epoch": 0.19417795799953846, |
|
"grad_norm": 0.9777940511703491, |
|
"learning_rate": 4.725625317347119e-05, |
|
"loss": 1.5976, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.1951999472521676, |
|
"grad_norm": 0.9619265198707581, |
|
"learning_rate": 4.7217876028600374e-05, |
|
"loss": 1.5985, |
|
"step": 5921 |
|
}, |
|
{ |
|
"epoch": 0.19622193650479675, |
|
"grad_norm": 0.970813512802124, |
|
"learning_rate": 4.717924815801832e-05, |
|
"loss": 1.6097, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.19724392575742591, |
|
"grad_norm": 1.1910721063613892, |
|
"learning_rate": 4.714036999763532e-05, |
|
"loss": 1.6156, |
|
"step": 5983 |
|
}, |
|
{ |
|
"epoch": 0.19826591501005505, |
|
"grad_norm": 1.1106246709823608, |
|
"learning_rate": 4.7101241986186116e-05, |
|
"loss": 1.5761, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 0.1992879042626842, |
|
"grad_norm": 1.0065436363220215, |
|
"learning_rate": 4.7061864565225e-05, |
|
"loss": 1.5857, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.20030989351531336, |
|
"grad_norm": 0.9245477914810181, |
|
"learning_rate": 4.702223817912081e-05, |
|
"loss": 1.6099, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 0.2013318827679425, |
|
"grad_norm": 0.9705063104629517, |
|
"learning_rate": 4.698236327505195e-05, |
|
"loss": 1.5995, |
|
"step": 6107 |
|
}, |
|
{ |
|
"epoch": 0.20235387202057165, |
|
"grad_norm": 0.9455100893974304, |
|
"learning_rate": 4.694224030300127e-05, |
|
"loss": 1.6302, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 0.20337586127320081, |
|
"grad_norm": 0.9505909085273743, |
|
"learning_rate": 4.690186971575107e-05, |
|
"loss": 1.5799, |
|
"step": 6169 |
|
}, |
|
{ |
|
"epoch": 0.20439785052582995, |
|
"grad_norm": 0.9440078139305115, |
|
"learning_rate": 4.6861251968877916e-05, |
|
"loss": 1.5889, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.2054198397784591, |
|
"grad_norm": 0.9610021710395813, |
|
"learning_rate": 4.68203875207476e-05, |
|
"loss": 1.6049, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 0.20644182903108826, |
|
"grad_norm": 0.9686371684074402, |
|
"learning_rate": 4.677927683250983e-05, |
|
"loss": 1.5985, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 0.2074638182837174, |
|
"grad_norm": 0.9532095789909363, |
|
"learning_rate": 4.6737920368093156e-05, |
|
"loss": 1.5763, |
|
"step": 6293 |
|
}, |
|
{ |
|
"epoch": 0.20848580753634655, |
|
"grad_norm": 0.9185531139373779, |
|
"learning_rate": 4.669631859419965e-05, |
|
"loss": 1.5719, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 0.20950779678897571, |
|
"grad_norm": 0.917314350605011, |
|
"learning_rate": 4.6654471980299676e-05, |
|
"loss": 1.5634, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.21052978604160485, |
|
"grad_norm": 0.9497798681259155, |
|
"learning_rate": 4.661238099862658e-05, |
|
"loss": 1.5836, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 0.211551775294234, |
|
"grad_norm": 0.939011812210083, |
|
"learning_rate": 4.657004612417138e-05, |
|
"loss": 1.601, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 0.21257376454686316, |
|
"grad_norm": 0.981315016746521, |
|
"learning_rate": 4.6527467834677374e-05, |
|
"loss": 1.5896, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 0.2135957537994923, |
|
"grad_norm": 0.9485774636268616, |
|
"learning_rate": 4.648464661063478e-05, |
|
"loss": 1.5912, |
|
"step": 6479 |
|
}, |
|
{ |
|
"epoch": 0.21461774305212145, |
|
"grad_norm": 0.9504795670509338, |
|
"learning_rate": 4.6441582935275264e-05, |
|
"loss": 1.5827, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.21563973230475061, |
|
"grad_norm": 0.9627436399459839, |
|
"learning_rate": 4.6398277294566586e-05, |
|
"loss": 1.5858, |
|
"step": 6541 |
|
}, |
|
{ |
|
"epoch": 0.21666172155737976, |
|
"grad_norm": 0.9468591809272766, |
|
"learning_rate": 4.6354730177207e-05, |
|
"loss": 1.5884, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 0.2176837108100089, |
|
"grad_norm": 0.9847991466522217, |
|
"learning_rate": 4.6310942074619787e-05, |
|
"loss": 1.5744, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 0.21870570006263806, |
|
"grad_norm": 0.9207347631454468, |
|
"learning_rate": 4.626691348094777e-05, |
|
"loss": 1.5675, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 0.2197276893152672, |
|
"grad_norm": 0.9114487171173096, |
|
"learning_rate": 4.622264489304762e-05, |
|
"loss": 1.5726, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.22074967856789635, |
|
"grad_norm": 0.9141913652420044, |
|
"learning_rate": 4.617813681048434e-05, |
|
"loss": 1.5757, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 0.2217716678205255, |
|
"grad_norm": 0.9770637154579163, |
|
"learning_rate": 4.61333897355256e-05, |
|
"loss": 1.5819, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 0.22279365707315466, |
|
"grad_norm": 0.9790964126586914, |
|
"learning_rate": 4.608840417313604e-05, |
|
"loss": 1.5793, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 0.2238156463257838, |
|
"grad_norm": 0.9311193227767944, |
|
"learning_rate": 4.6043180630971646e-05, |
|
"loss": 1.5823, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 0.22483763557841294, |
|
"grad_norm": 0.934339165687561, |
|
"learning_rate": 4.599771961937391e-05, |
|
"loss": 1.5643, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.2258596248310421, |
|
"grad_norm": 0.9052058458328247, |
|
"learning_rate": 4.5952021651364204e-05, |
|
"loss": 1.5752, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 0.22688161408367125, |
|
"grad_norm": 0.9528570175170898, |
|
"learning_rate": 4.590608724263786e-05, |
|
"loss": 1.5603, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 0.2279036033363004, |
|
"grad_norm": 0.9073063135147095, |
|
"learning_rate": 4.585991691155845e-05, |
|
"loss": 1.555, |
|
"step": 6913 |
|
}, |
|
{ |
|
"epoch": 0.22892559258892956, |
|
"grad_norm": 0.9537662267684937, |
|
"learning_rate": 4.581351117915188e-05, |
|
"loss": 1.5662, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.2299475818415587, |
|
"grad_norm": 0.9296181201934814, |
|
"learning_rate": 4.5766870569100534e-05, |
|
"loss": 1.5649, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.23096957109418784, |
|
"grad_norm": 0.947211503982544, |
|
"learning_rate": 4.571999560773736e-05, |
|
"loss": 1.5834, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 0.231991560346817, |
|
"grad_norm": 0.9705089330673218, |
|
"learning_rate": 4.5672886824039915e-05, |
|
"loss": 1.5817, |
|
"step": 7037 |
|
}, |
|
{ |
|
"epoch": 0.23301354959944615, |
|
"grad_norm": 0.9289253950119019, |
|
"learning_rate": 4.5625544749624435e-05, |
|
"loss": 1.5792, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 0.2340355388520753, |
|
"grad_norm": 0.9166892766952515, |
|
"learning_rate": 4.5577969918739794e-05, |
|
"loss": 1.559, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.23505752810470446, |
|
"grad_norm": 0.9205060601234436, |
|
"learning_rate": 4.5530162868261486e-05, |
|
"loss": 1.5564, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.2360795173573336, |
|
"grad_norm": 0.9769343733787537, |
|
"learning_rate": 4.548212413768558e-05, |
|
"loss": 1.5499, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 0.23710150660996274, |
|
"grad_norm": 1.0154651403427124, |
|
"learning_rate": 4.543385426912261e-05, |
|
"loss": 1.5832, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 0.2381234958625919, |
|
"grad_norm": 0.9247255325317383, |
|
"learning_rate": 4.53853538072915e-05, |
|
"loss": 1.5844, |
|
"step": 7223 |
|
}, |
|
{ |
|
"epoch": 0.23914548511522105, |
|
"grad_norm": 0.8840001225471497, |
|
"learning_rate": 4.533662329951336e-05, |
|
"loss": 1.5456, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 0.2401674743678502, |
|
"grad_norm": 0.986392080783844, |
|
"learning_rate": 4.528766329570536e-05, |
|
"loss": 1.5743, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.24118946362047936, |
|
"grad_norm": 0.8750962615013123, |
|
"learning_rate": 4.523847434837447e-05, |
|
"loss": 1.5751, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 0.2422114528731085, |
|
"grad_norm": 0.9039379954338074, |
|
"learning_rate": 4.518905701261128e-05, |
|
"loss": 1.5603, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 0.24323344212573764, |
|
"grad_norm": 0.9081151485443115, |
|
"learning_rate": 4.5139411846083715e-05, |
|
"loss": 1.5542, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 0.2442554313783668, |
|
"grad_norm": 1.532193899154663, |
|
"learning_rate": 4.508953940903073e-05, |
|
"loss": 1.5693, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 0.24527742063099595, |
|
"grad_norm": 0.9598657488822937, |
|
"learning_rate": 4.5039440264255994e-05, |
|
"loss": 1.5672, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.2462994098836251, |
|
"grad_norm": 1.3318407535552979, |
|
"learning_rate": 4.498911497712155e-05, |
|
"loss": 1.5739, |
|
"step": 7471 |
|
}, |
|
{ |
|
"epoch": 0.24732139913625426, |
|
"grad_norm": 0.9309579730033875, |
|
"learning_rate": 4.493856411554142e-05, |
|
"loss": 1.5504, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 0.2483433883888834, |
|
"grad_norm": 0.9426462650299072, |
|
"learning_rate": 4.4887788249975206e-05, |
|
"loss": 1.5529, |
|
"step": 7533 |
|
}, |
|
{ |
|
"epoch": 0.24936537764151254, |
|
"grad_norm": 0.9388718605041504, |
|
"learning_rate": 4.4836787953421656e-05, |
|
"loss": 1.5589, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 0.2503873668941417, |
|
"grad_norm": 0.8922486901283264, |
|
"learning_rate": 4.478556380141218e-05, |
|
"loss": 1.5453, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.25140935614677085, |
|
"grad_norm": 0.9597366452217102, |
|
"learning_rate": 4.4734116372004375e-05, |
|
"loss": 1.5565, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 0.2524313453994, |
|
"grad_norm": 0.880445659160614, |
|
"learning_rate": 4.4682446245775477e-05, |
|
"loss": 1.5429, |
|
"step": 7657 |
|
}, |
|
{ |
|
"epoch": 0.2534533346520291, |
|
"grad_norm": 0.8895862102508545, |
|
"learning_rate": 4.463055400581586e-05, |
|
"loss": 1.5462, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 0.25447532390465827, |
|
"grad_norm": 0.8859049677848816, |
|
"learning_rate": 4.4578440237722374e-05, |
|
"loss": 1.5678, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 0.25549731315728746, |
|
"grad_norm": 0.9074852466583252, |
|
"learning_rate": 4.452610552959183e-05, |
|
"loss": 1.5695, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.2565193024099166, |
|
"grad_norm": 0.9472444653511047, |
|
"learning_rate": 4.447355047201428e-05, |
|
"loss": 1.5527, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 0.25754129166254575, |
|
"grad_norm": 0.901016891002655, |
|
"learning_rate": 4.4420775658066414e-05, |
|
"loss": 1.5523, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 0.2585632809151749, |
|
"grad_norm": 0.8963896632194519, |
|
"learning_rate": 4.436778168330484e-05, |
|
"loss": 1.5623, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 0.259585270167804, |
|
"grad_norm": 0.9571655988693237, |
|
"learning_rate": 4.4314569145759353e-05, |
|
"loss": 1.5446, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 0.26060725942043317, |
|
"grad_norm": 0.9321922659873962, |
|
"learning_rate": 4.42611386459262e-05, |
|
"loss": 1.5514, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.26162924867306236, |
|
"grad_norm": 0.8753949403762817, |
|
"learning_rate": 4.420749078676133e-05, |
|
"loss": 1.5524, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.2626512379256915, |
|
"grad_norm": 0.8667870759963989, |
|
"learning_rate": 4.4153626173673516e-05, |
|
"loss": 1.5435, |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 0.26367322717832065, |
|
"grad_norm": 0.926670491695404, |
|
"learning_rate": 4.409954541451762e-05, |
|
"loss": 1.5448, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 0.2646952164309498, |
|
"grad_norm": 0.9438245892524719, |
|
"learning_rate": 4.404524911958764e-05, |
|
"loss": 1.5706, |
|
"step": 8029 |
|
}, |
|
{ |
|
"epoch": 0.2657172056835789, |
|
"grad_norm": 0.9131088256835938, |
|
"learning_rate": 4.399073790160989e-05, |
|
"loss": 1.5361, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.26673919493620807, |
|
"grad_norm": 0.914857804775238, |
|
"learning_rate": 4.393601237573607e-05, |
|
"loss": 1.5588, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 0.26776118418883726, |
|
"grad_norm": 0.9113429188728333, |
|
"learning_rate": 4.388107315953628e-05, |
|
"loss": 1.5603, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 0.2687831734414664, |
|
"grad_norm": 0.8804867267608643, |
|
"learning_rate": 4.382592087299212e-05, |
|
"loss": 1.5577, |
|
"step": 8153 |
|
}, |
|
{ |
|
"epoch": 0.26980516269409555, |
|
"grad_norm": 0.8368428349494934, |
|
"learning_rate": 4.377055613848964e-05, |
|
"loss": 1.5369, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 0.2708271519467247, |
|
"grad_norm": 0.9133582711219788, |
|
"learning_rate": 4.3714979580812355e-05, |
|
"loss": 1.5522, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.2718491411993538, |
|
"grad_norm": 0.902574360370636, |
|
"learning_rate": 4.365919182713416e-05, |
|
"loss": 1.5572, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 0.27287113045198297, |
|
"grad_norm": 0.9236746430397034, |
|
"learning_rate": 4.360319350701226e-05, |
|
"loss": 1.5477, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 0.27389311970461216, |
|
"grad_norm": 0.8866250514984131, |
|
"learning_rate": 4.3546985252380115e-05, |
|
"loss": 1.5576, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 0.2749151089572413, |
|
"grad_norm": 0.8597050905227661, |
|
"learning_rate": 4.349056769754021e-05, |
|
"loss": 1.5454, |
|
"step": 8339 |
|
}, |
|
{ |
|
"epoch": 0.27593709820987045, |
|
"grad_norm": 0.9166654348373413, |
|
"learning_rate": 4.3433941479156994e-05, |
|
"loss": 1.5342, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.2769590874624996, |
|
"grad_norm": 0.8913152813911438, |
|
"learning_rate": 4.3377107236249647e-05, |
|
"loss": 1.5489, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 0.2779810767151287, |
|
"grad_norm": 0.9009787440299988, |
|
"learning_rate": 4.332006561018488e-05, |
|
"loss": 1.5617, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 0.27900306596775787, |
|
"grad_norm": 0.8625615239143372, |
|
"learning_rate": 4.3262817244669683e-05, |
|
"loss": 1.5545, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 0.28002505522038706, |
|
"grad_norm": 0.8483917713165283, |
|
"learning_rate": 4.3205362785744083e-05, |
|
"loss": 1.5287, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 0.2810470444730162, |
|
"grad_norm": 0.8959261178970337, |
|
"learning_rate": 4.314770288177384e-05, |
|
"loss": 1.5503, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.28206903372564535, |
|
"grad_norm": 0.9090222716331482, |
|
"learning_rate": 4.308983818344313e-05, |
|
"loss": 1.5469, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 0.2830910229782745, |
|
"grad_norm": 0.9215665459632874, |
|
"learning_rate": 4.3031769343747206e-05, |
|
"loss": 1.519, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 0.2841130122309036, |
|
"grad_norm": 0.8699467778205872, |
|
"learning_rate": 4.297349701798505e-05, |
|
"loss": 1.5507, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 0.28513500148353277, |
|
"grad_norm": 0.9755619764328003, |
|
"learning_rate": 4.2915021863751916e-05, |
|
"loss": 1.5542, |
|
"step": 8649 |
|
}, |
|
{ |
|
"epoch": 0.2861569907361619, |
|
"grad_norm": 0.8612878918647766, |
|
"learning_rate": 4.285634454093198e-05, |
|
"loss": 1.55, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.2871789799887911, |
|
"grad_norm": 0.8708077073097229, |
|
"learning_rate": 4.279746571169086e-05, |
|
"loss": 1.5239, |
|
"step": 8711 |
|
}, |
|
{ |
|
"epoch": 0.28820096924142025, |
|
"grad_norm": 0.8952695727348328, |
|
"learning_rate": 4.2738386040468136e-05, |
|
"loss": 1.5275, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 0.2892229584940494, |
|
"grad_norm": 0.8305310010910034, |
|
"learning_rate": 4.2679106193969866e-05, |
|
"loss": 1.5419, |
|
"step": 8773 |
|
}, |
|
{ |
|
"epoch": 0.2902449477466785, |
|
"grad_norm": 0.9172886610031128, |
|
"learning_rate": 4.261962684116106e-05, |
|
"loss": 1.5266, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 0.29126693699930767, |
|
"grad_norm": 0.8972066044807434, |
|
"learning_rate": 4.2559948653258145e-05, |
|
"loss": 1.5428, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.2922889262519368, |
|
"grad_norm": 0.8838576078414917, |
|
"learning_rate": 4.250007230372134e-05, |
|
"loss": 1.5685, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 0.293310915504566, |
|
"grad_norm": 0.8614609241485596, |
|
"learning_rate": 4.2439998468247126e-05, |
|
"loss": 1.5624, |
|
"step": 8897 |
|
}, |
|
{ |
|
"epoch": 0.29433290475719515, |
|
"grad_norm": 0.8305181860923767, |
|
"learning_rate": 4.2379727824760566e-05, |
|
"loss": 1.5424, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.2953548940098243, |
|
"grad_norm": 0.8790427446365356, |
|
"learning_rate": 4.231926105340768e-05, |
|
"loss": 1.5316, |
|
"step": 8959 |
|
}, |
|
{ |
|
"epoch": 0.2963768832624534, |
|
"grad_norm": 0.8905590772628784, |
|
"learning_rate": 4.225859883654776e-05, |
|
"loss": 1.5136, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.29739887251508257, |
|
"grad_norm": 0.8998729586601257, |
|
"learning_rate": 4.219774185874569e-05, |
|
"loss": 1.5372, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 0.2984208617677117, |
|
"grad_norm": 0.8840643763542175, |
|
"learning_rate": 4.213669080676418e-05, |
|
"loss": 1.5371, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 0.2994428510203409, |
|
"grad_norm": 0.9000579714775085, |
|
"learning_rate": 4.2075446369556056e-05, |
|
"loss": 1.5369, |
|
"step": 9083 |
|
}, |
|
{ |
|
"epoch": 0.30046484027297005, |
|
"grad_norm": 0.9541018009185791, |
|
"learning_rate": 4.201400923825648e-05, |
|
"loss": 1.5537, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 0.3014868295255992, |
|
"grad_norm": 0.8957076072692871, |
|
"learning_rate": 4.195238010617511e-05, |
|
"loss": 1.5409, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 0.30250881877822833, |
|
"grad_norm": 0.8320883512496948, |
|
"learning_rate": 4.1890559668788344e-05, |
|
"loss": 1.5178, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 0.30353080803085747, |
|
"grad_norm": 0.8729486465454102, |
|
"learning_rate": 4.1828548623731405e-05, |
|
"loss": 1.5498, |
|
"step": 9207 |
|
}, |
|
{ |
|
"epoch": 0.3045527972834866, |
|
"grad_norm": 0.8582361936569214, |
|
"learning_rate": 4.1766347670790506e-05, |
|
"loss": 1.5209, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 0.3055747865361158, |
|
"grad_norm": 0.890997052192688, |
|
"learning_rate": 4.170395751189495e-05, |
|
"loss": 1.5423, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 0.30659677578874495, |
|
"grad_norm": 0.9070558547973633, |
|
"learning_rate": 4.164137885110921e-05, |
|
"loss": 1.5519, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3076187650413741, |
|
"grad_norm": 0.9573651552200317, |
|
"learning_rate": 4.157861239462495e-05, |
|
"loss": 1.5439, |
|
"step": 9331 |
|
}, |
|
{ |
|
"epoch": 0.30864075429400323, |
|
"grad_norm": 0.9029926061630249, |
|
"learning_rate": 4.1515658850753114e-05, |
|
"loss": 1.539, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 0.30966274354663237, |
|
"grad_norm": 0.8370001316070557, |
|
"learning_rate": 4.145251892991588e-05, |
|
"loss": 1.5401, |
|
"step": 9393 |
|
}, |
|
{ |
|
"epoch": 0.3106847327992615, |
|
"grad_norm": 0.8818012475967407, |
|
"learning_rate": 4.138919334463868e-05, |
|
"loss": 1.5166, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 0.3117067220518907, |
|
"grad_norm": 0.8851699233055115, |
|
"learning_rate": 4.1325682809542124e-05, |
|
"loss": 1.5334, |
|
"step": 9455 |
|
}, |
|
{ |
|
"epoch": 0.31272871130451985, |
|
"grad_norm": 0.8938205242156982, |
|
"learning_rate": 4.126198804133398e-05, |
|
"loss": 1.5408, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 0.313750700557149, |
|
"grad_norm": 0.8480469584465027, |
|
"learning_rate": 4.1198109758801055e-05, |
|
"loss": 1.5149, |
|
"step": 9517 |
|
}, |
|
{ |
|
"epoch": 0.31477268980977813, |
|
"grad_norm": 0.8756515383720398, |
|
"learning_rate": 4.113404868280107e-05, |
|
"loss": 1.5174, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 0.31579467906240727, |
|
"grad_norm": 0.8261292576789856, |
|
"learning_rate": 4.106980553625457e-05, |
|
"loss": 1.5295, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 0.3168166683150364, |
|
"grad_norm": 0.9329167604446411, |
|
"learning_rate": 4.100538104413674e-05, |
|
"loss": 1.5151, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.3178386575676656, |
|
"grad_norm": 0.8843585848808289, |
|
"learning_rate": 4.09407759334692e-05, |
|
"loss": 1.5241, |
|
"step": 9641 |
|
}, |
|
{ |
|
"epoch": 0.31886064682029475, |
|
"grad_norm": 0.8441824316978455, |
|
"learning_rate": 4.087599093331186e-05, |
|
"loss": 1.5293, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 0.3198826360729239, |
|
"grad_norm": 0.8727023005485535, |
|
"learning_rate": 4.081102677475462e-05, |
|
"loss": 1.5332, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 0.32090462532555303, |
|
"grad_norm": 0.8603296279907227, |
|
"learning_rate": 4.0745884190909194e-05, |
|
"loss": 1.5205, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 0.32192661457818217, |
|
"grad_norm": 0.8445816040039062, |
|
"learning_rate": 4.0680563916900796e-05, |
|
"loss": 1.541, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 0.3229486038308113, |
|
"grad_norm": 0.8408164978027344, |
|
"learning_rate": 4.0615066689859815e-05, |
|
"loss": 1.5106, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 0.32397059308344045, |
|
"grad_norm": 0.901888370513916, |
|
"learning_rate": 4.0549393248913584e-05, |
|
"loss": 1.5376, |
|
"step": 9827 |
|
}, |
|
{ |
|
"epoch": 0.32499258233606965, |
|
"grad_norm": 0.878149688243866, |
|
"learning_rate": 4.048354433517794e-05, |
|
"loss": 1.5197, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 0.3260145715886988, |
|
"grad_norm": 0.8681669235229492, |
|
"learning_rate": 4.0417520691748916e-05, |
|
"loss": 1.524, |
|
"step": 9889 |
|
}, |
|
{ |
|
"epoch": 0.32703656084132793, |
|
"grad_norm": 0.8651006817817688, |
|
"learning_rate": 4.035132306369438e-05, |
|
"loss": 1.5257, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.32805855009395707, |
|
"grad_norm": 0.8485890030860901, |
|
"learning_rate": 4.028495219804555e-05, |
|
"loss": 1.5325, |
|
"step": 9951 |
|
}, |
|
{ |
|
"epoch": 0.3290805393465862, |
|
"grad_norm": 0.8644028902053833, |
|
"learning_rate": 4.021840884378864e-05, |
|
"loss": 1.5432, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 0.33010252859921535, |
|
"grad_norm": 0.9188100695610046, |
|
"learning_rate": 4.015169375185633e-05, |
|
"loss": 1.5381, |
|
"step": 10013 |
|
}, |
|
{ |
|
"epoch": 0.33112451785184455, |
|
"grad_norm": 0.9121026396751404, |
|
"learning_rate": 4.0084807675119396e-05, |
|
"loss": 1.5157, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 0.3321465071044737, |
|
"grad_norm": 0.8835768699645996, |
|
"learning_rate": 4.0017751368378106e-05, |
|
"loss": 1.527, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.33316849635710283, |
|
"grad_norm": 0.8608120679855347, |
|
"learning_rate": 3.995052558835377e-05, |
|
"loss": 1.5326, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 0.33419048560973197, |
|
"grad_norm": 0.8970103859901428, |
|
"learning_rate": 3.988313109368017e-05, |
|
"loss": 1.5241, |
|
"step": 10137 |
|
}, |
|
{ |
|
"epoch": 0.3352124748623611, |
|
"grad_norm": 0.832877516746521, |
|
"learning_rate": 3.981556864489504e-05, |
|
"loss": 1.5093, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 0.33623446411499025, |
|
"grad_norm": 0.8215609788894653, |
|
"learning_rate": 3.974783900443142e-05, |
|
"loss": 1.5243, |
|
"step": 10199 |
|
}, |
|
{ |
|
"epoch": 0.33725645336761945, |
|
"grad_norm": 0.8891729712486267, |
|
"learning_rate": 3.9679942936609095e-05, |
|
"loss": 1.527, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.3382784426202486, |
|
"grad_norm": 0.8482518196105957, |
|
"learning_rate": 3.961188120762596e-05, |
|
"loss": 1.5171, |
|
"step": 10261 |
|
}, |
|
{ |
|
"epoch": 0.33930043187287773, |
|
"grad_norm": 0.8347421288490295, |
|
"learning_rate": 3.954365458554938e-05, |
|
"loss": 1.5209, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 0.34032242112550687, |
|
"grad_norm": 0.8662514686584473, |
|
"learning_rate": 3.947526384030751e-05, |
|
"loss": 1.5225, |
|
"step": 10323 |
|
}, |
|
{ |
|
"epoch": 0.341344410378136, |
|
"grad_norm": 0.8288858532905579, |
|
"learning_rate": 3.9406709743680624e-05, |
|
"loss": 1.4988, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 0.34236639963076515, |
|
"grad_norm": 0.8532800674438477, |
|
"learning_rate": 3.9337993069292366e-05, |
|
"loss": 1.5292, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 0.34338838888339435, |
|
"grad_norm": 0.8530318140983582, |
|
"learning_rate": 3.926911459260109e-05, |
|
"loss": 1.5439, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 0.3444103781360235, |
|
"grad_norm": 0.8556480407714844, |
|
"learning_rate": 3.920007509089102e-05, |
|
"loss": 1.502, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 0.34543236738865263, |
|
"grad_norm": 0.834280252456665, |
|
"learning_rate": 3.913087534326357e-05, |
|
"loss": 1.5359, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 0.34645435664128177, |
|
"grad_norm": 0.8753178119659424, |
|
"learning_rate": 3.9061516130628475e-05, |
|
"loss": 1.5247, |
|
"step": 10509 |
|
}, |
|
{ |
|
"epoch": 0.3474763458939109, |
|
"grad_norm": 0.8129472732543945, |
|
"learning_rate": 3.8991998235695025e-05, |
|
"loss": 1.509, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.34849833514654005, |
|
"grad_norm": 0.8485814929008484, |
|
"learning_rate": 3.8922322442963224e-05, |
|
"loss": 1.5155, |
|
"step": 10571 |
|
}, |
|
{ |
|
"epoch": 0.34952032439916925, |
|
"grad_norm": 0.8839988708496094, |
|
"learning_rate": 3.885248953871491e-05, |
|
"loss": 1.5076, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 0.3505423136517984, |
|
"grad_norm": 0.8462734818458557, |
|
"learning_rate": 3.8782500311004915e-05, |
|
"loss": 1.5016, |
|
"step": 10633 |
|
}, |
|
{ |
|
"epoch": 0.35156430290442753, |
|
"grad_norm": 0.8573621511459351, |
|
"learning_rate": 3.871235554965218e-05, |
|
"loss": 1.5418, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 0.35258629215705667, |
|
"grad_norm": 0.8314201235771179, |
|
"learning_rate": 3.864205604623078e-05, |
|
"loss": 1.5167, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 0.3536082814096858, |
|
"grad_norm": 0.8317237496376038, |
|
"learning_rate": 3.857160259406107e-05, |
|
"loss": 1.5121, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 0.35463027066231495, |
|
"grad_norm": 0.8582017421722412, |
|
"learning_rate": 3.8500995988200674e-05, |
|
"loss": 1.5137, |
|
"step": 10757 |
|
}, |
|
{ |
|
"epoch": 0.35565225991494415, |
|
"grad_norm": 0.8427022695541382, |
|
"learning_rate": 3.843023702543556e-05, |
|
"loss": 1.5248, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 0.3566742491675733, |
|
"grad_norm": 0.8414435386657715, |
|
"learning_rate": 3.8359326504270984e-05, |
|
"loss": 1.4923, |
|
"step": 10819 |
|
}, |
|
{ |
|
"epoch": 0.35769623842020243, |
|
"grad_norm": 0.8638574481010437, |
|
"learning_rate": 3.828826522492255e-05, |
|
"loss": 1.4947, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.35871822767283157, |
|
"grad_norm": 0.8254904747009277, |
|
"learning_rate": 3.821705398930713e-05, |
|
"loss": 1.524, |
|
"step": 10881 |
|
}, |
|
{ |
|
"epoch": 0.3597402169254607, |
|
"grad_norm": 0.8651305437088013, |
|
"learning_rate": 3.814569360103385e-05, |
|
"loss": 1.5034, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.36076220617808985, |
|
"grad_norm": 0.8567565679550171, |
|
"learning_rate": 3.807418486539499e-05, |
|
"loss": 1.4971, |
|
"step": 10943 |
|
}, |
|
{ |
|
"epoch": 0.36178419543071905, |
|
"grad_norm": 0.8213040828704834, |
|
"learning_rate": 3.80025285893569e-05, |
|
"loss": 1.5095, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 0.3628061846833482, |
|
"grad_norm": 0.8153424859046936, |
|
"learning_rate": 3.793072558155093e-05, |
|
"loss": 1.5168, |
|
"step": 11005 |
|
}, |
|
{ |
|
"epoch": 0.36382817393597733, |
|
"grad_norm": 0.8211629390716553, |
|
"learning_rate": 3.785877665226426e-05, |
|
"loss": 1.5208, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 0.36485016318860647, |
|
"grad_norm": 0.8744972348213196, |
|
"learning_rate": 3.778668261343079e-05, |
|
"loss": 1.5133, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 0.3658721524412356, |
|
"grad_norm": 0.8211522698402405, |
|
"learning_rate": 3.771444427862192e-05, |
|
"loss": 1.5042, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 0.36689414169386475, |
|
"grad_norm": 0.8390249609947205, |
|
"learning_rate": 3.7642062463037465e-05, |
|
"loss": 1.5214, |
|
"step": 11129 |
|
}, |
|
{ |
|
"epoch": 0.3679161309464939, |
|
"grad_norm": 0.8379174470901489, |
|
"learning_rate": 3.7569537983496373e-05, |
|
"loss": 1.5164, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.3689381201991231, |
|
"grad_norm": 0.8449585437774658, |
|
"learning_rate": 3.749687165842753e-05, |
|
"loss": 1.5049, |
|
"step": 11191 |
|
}, |
|
{ |
|
"epoch": 0.36996010945175223, |
|
"grad_norm": 0.7959738969802856, |
|
"learning_rate": 3.7424064307860536e-05, |
|
"loss": 1.4941, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 0.37098209870438137, |
|
"grad_norm": 0.8311371207237244, |
|
"learning_rate": 3.735111675341645e-05, |
|
"loss": 1.513, |
|
"step": 11253 |
|
}, |
|
{ |
|
"epoch": 0.3720040879570105, |
|
"grad_norm": 0.828087329864502, |
|
"learning_rate": 3.7278029818298524e-05, |
|
"loss": 1.4983, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 0.37302607720963965, |
|
"grad_norm": 0.8467016220092773, |
|
"learning_rate": 3.720480432728287e-05, |
|
"loss": 1.5167, |
|
"step": 11315 |
|
}, |
|
{ |
|
"epoch": 0.3740480664622688, |
|
"grad_norm": 0.8619351983070374, |
|
"learning_rate": 3.71314411067092e-05, |
|
"loss": 1.5065, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 0.375070055714898, |
|
"grad_norm": 0.8230463862419128, |
|
"learning_rate": 3.70579409844715e-05, |
|
"loss": 1.4987, |
|
"step": 11377 |
|
}, |
|
{ |
|
"epoch": 0.37609204496752713, |
|
"grad_norm": 0.8631263971328735, |
|
"learning_rate": 3.698430479000865e-05, |
|
"loss": 1.5023, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 0.37711403422015627, |
|
"grad_norm": 0.8831419348716736, |
|
"learning_rate": 3.691053335429509e-05, |
|
"loss": 1.5046, |
|
"step": 11439 |
|
}, |
|
{ |
|
"epoch": 0.3781360234727854, |
|
"grad_norm": 0.8332011103630066, |
|
"learning_rate": 3.683662750983147e-05, |
|
"loss": 1.5002, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.37915801272541455, |
|
"grad_norm": 0.8661298751831055, |
|
"learning_rate": 3.676258809063518e-05, |
|
"loss": 1.51, |
|
"step": 11501 |
|
}, |
|
{ |
|
"epoch": 0.3801800019780437, |
|
"grad_norm": 0.8714830279350281, |
|
"learning_rate": 3.6688415932231004e-05, |
|
"loss": 1.5031, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 0.3812019912306729, |
|
"grad_norm": 0.8633294701576233, |
|
"learning_rate": 3.661411187164166e-05, |
|
"loss": 1.4854, |
|
"step": 11563 |
|
}, |
|
{ |
|
"epoch": 0.38222398048330203, |
|
"grad_norm": 0.8436555862426758, |
|
"learning_rate": 3.65396767473784e-05, |
|
"loss": 1.5213, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 0.38324596973593117, |
|
"grad_norm": 0.8612047433853149, |
|
"learning_rate": 3.6465111399431465e-05, |
|
"loss": 1.5002, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.3842679589885603, |
|
"grad_norm": 0.8069844245910645, |
|
"learning_rate": 3.6390416669260674e-05, |
|
"loss": 1.5034, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 0.38528994824118945, |
|
"grad_norm": 0.8230804204940796, |
|
"learning_rate": 3.63155933997859e-05, |
|
"loss": 1.5155, |
|
"step": 11687 |
|
}, |
|
{ |
|
"epoch": 0.3863119374938186, |
|
"grad_norm": 0.8166376352310181, |
|
"learning_rate": 3.624064243537758e-05, |
|
"loss": 1.487, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 0.3873339267464478, |
|
"grad_norm": 0.8213214874267578, |
|
"learning_rate": 3.616556462184716e-05, |
|
"loss": 1.5178, |
|
"step": 11749 |
|
}, |
|
{ |
|
"epoch": 0.38835591599907693, |
|
"grad_norm": 0.8418950438499451, |
|
"learning_rate": 3.609036080643755e-05, |
|
"loss": 1.5173, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.38937790525170607, |
|
"grad_norm": 0.839227020740509, |
|
"learning_rate": 3.60150318378136e-05, |
|
"loss": 1.51, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 0.3903998945043352, |
|
"grad_norm": 0.8407977819442749, |
|
"learning_rate": 3.5939578566052465e-05, |
|
"loss": 1.4967, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 0.39142188375696435, |
|
"grad_norm": 0.8286086320877075, |
|
"learning_rate": 3.586400184263408e-05, |
|
"loss": 1.4817, |
|
"step": 11873 |
|
}, |
|
{ |
|
"epoch": 0.3924438730095935, |
|
"grad_norm": 0.8609039783477783, |
|
"learning_rate": 3.578830252043148e-05, |
|
"loss": 1.4842, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.3934658622622227, |
|
"grad_norm": 0.8497804403305054, |
|
"learning_rate": 3.571248145370125e-05, |
|
"loss": 1.509, |
|
"step": 11935 |
|
}, |
|
{ |
|
"epoch": 0.39448785151485183, |
|
"grad_norm": 0.8288097977638245, |
|
"learning_rate": 3.5636539498073794e-05, |
|
"loss": 1.4914, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 0.39550984076748097, |
|
"grad_norm": 0.8199227452278137, |
|
"learning_rate": 3.556047751054378e-05, |
|
"loss": 1.4754, |
|
"step": 11997 |
|
}, |
|
{ |
|
"epoch": 0.3965318300201101, |
|
"grad_norm": 0.8441981077194214, |
|
"learning_rate": 3.548429634946039e-05, |
|
"loss": 1.4826, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 0.39755381927273925, |
|
"grad_norm": 0.7997297048568726, |
|
"learning_rate": 3.540799687451768e-05, |
|
"loss": 1.4804, |
|
"step": 12059 |
|
}, |
|
{ |
|
"epoch": 0.3985758085253684, |
|
"grad_norm": 0.8018672466278076, |
|
"learning_rate": 3.533157994674485e-05, |
|
"loss": 1.4995, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.3995977977779976, |
|
"grad_norm": 0.8712153434753418, |
|
"learning_rate": 3.5255046428496546e-05, |
|
"loss": 1.5044, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 0.40061978703062673, |
|
"grad_norm": 0.852215588092804, |
|
"learning_rate": 3.517839718344311e-05, |
|
"loss": 1.4931, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 0.40164177628325587, |
|
"grad_norm": 0.8421558141708374, |
|
"learning_rate": 3.510163307656086e-05, |
|
"loss": 1.4967, |
|
"step": 12183 |
|
}, |
|
{ |
|
"epoch": 0.402663765535885, |
|
"grad_norm": 0.8428821563720703, |
|
"learning_rate": 3.5024754974122324e-05, |
|
"loss": 1.5042, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 0.40368575478851415, |
|
"grad_norm": 0.8397454023361206, |
|
"learning_rate": 3.494776374368643e-05, |
|
"loss": 1.4987, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 0.4047077440411433, |
|
"grad_norm": 0.8511505126953125, |
|
"learning_rate": 3.4870660254088724e-05, |
|
"loss": 1.5062, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 0.40572973329377243, |
|
"grad_norm": 0.8236075639724731, |
|
"learning_rate": 3.479344537543164e-05, |
|
"loss": 1.4907, |
|
"step": 12307 |
|
}, |
|
{ |
|
"epoch": 0.40675172254640163, |
|
"grad_norm": 0.8540985584259033, |
|
"learning_rate": 3.4716119979074565e-05, |
|
"loss": 1.4898, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 0.40777371179903077, |
|
"grad_norm": 0.8223760724067688, |
|
"learning_rate": 3.463868493762412e-05, |
|
"loss": 1.4794, |
|
"step": 12369 |
|
}, |
|
{ |
|
"epoch": 0.4087957010516599, |
|
"grad_norm": 0.8553142547607422, |
|
"learning_rate": 3.456114112492418e-05, |
|
"loss": 1.5021, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.40981769030428905, |
|
"grad_norm": 0.83010333776474, |
|
"learning_rate": 3.4483489416046164e-05, |
|
"loss": 1.4776, |
|
"step": 12431 |
|
}, |
|
{ |
|
"epoch": 0.4108396795569182, |
|
"grad_norm": 0.8293822407722473, |
|
"learning_rate": 3.440573068727905e-05, |
|
"loss": 1.4883, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 0.41186166880954733, |
|
"grad_norm": 0.8343706727027893, |
|
"learning_rate": 3.4327865816119495e-05, |
|
"loss": 1.4919, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 0.41288365806217653, |
|
"grad_norm": 0.8471996784210205, |
|
"learning_rate": 3.4249895681262025e-05, |
|
"loss": 1.4999, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 0.41390564731480567, |
|
"grad_norm": 0.8553738594055176, |
|
"learning_rate": 3.417182116258899e-05, |
|
"loss": 1.4816, |
|
"step": 12555 |
|
}, |
|
{ |
|
"epoch": 0.4149276365674348, |
|
"grad_norm": 0.8174490332603455, |
|
"learning_rate": 3.409364314116074e-05, |
|
"loss": 1.471, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 0.41594962582006395, |
|
"grad_norm": 0.797466516494751, |
|
"learning_rate": 3.401536249920559e-05, |
|
"loss": 1.4922, |
|
"step": 12617 |
|
}, |
|
{ |
|
"epoch": 0.4169716150726931, |
|
"grad_norm": 0.8381075263023376, |
|
"learning_rate": 3.393698012010998e-05, |
|
"loss": 1.4955, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 0.41799360432532223, |
|
"grad_norm": 0.8211097717285156, |
|
"learning_rate": 3.385849688840839e-05, |
|
"loss": 1.5054, |
|
"step": 12679 |
|
}, |
|
{ |
|
"epoch": 0.41901559357795143, |
|
"grad_norm": 0.8614106178283691, |
|
"learning_rate": 3.3779913689773414e-05, |
|
"loss": 1.4906, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.42003758283058057, |
|
"grad_norm": 0.8300454616546631, |
|
"learning_rate": 3.370123141100578e-05, |
|
"loss": 1.5008, |
|
"step": 12741 |
|
}, |
|
{ |
|
"epoch": 0.4210595720832097, |
|
"grad_norm": 0.8297508955001831, |
|
"learning_rate": 3.3622450940024305e-05, |
|
"loss": 1.4885, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 0.42208156133583885, |
|
"grad_norm": 0.8315346837043762, |
|
"learning_rate": 3.35435731658559e-05, |
|
"loss": 1.4839, |
|
"step": 12803 |
|
}, |
|
{ |
|
"epoch": 0.423103550588468, |
|
"grad_norm": 0.8035879135131836, |
|
"learning_rate": 3.346459897862552e-05, |
|
"loss": 1.4849, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 0.42412553984109713, |
|
"grad_norm": 0.8297046422958374, |
|
"learning_rate": 3.338552926954613e-05, |
|
"loss": 1.4874, |
|
"step": 12865 |
|
}, |
|
{ |
|
"epoch": 0.42514752909372633, |
|
"grad_norm": 0.8357899188995361, |
|
"learning_rate": 3.330636493090868e-05, |
|
"loss": 1.4894, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 0.42616951834635547, |
|
"grad_norm": 0.8307090997695923, |
|
"learning_rate": 3.322710685607193e-05, |
|
"loss": 1.4906, |
|
"step": 12927 |
|
}, |
|
{ |
|
"epoch": 0.4271915075989846, |
|
"grad_norm": 0.8082265853881836, |
|
"learning_rate": 3.314775593945251e-05, |
|
"loss": 1.4879, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 0.42821349685161375, |
|
"grad_norm": 0.8400994539260864, |
|
"learning_rate": 3.3068313076514714e-05, |
|
"loss": 1.4902, |
|
"step": 12989 |
|
}, |
|
{ |
|
"epoch": 0.4292354861042429, |
|
"grad_norm": 0.8030775785446167, |
|
"learning_rate": 3.298877916376047e-05, |
|
"loss": 1.4844, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.43025747535687203, |
|
"grad_norm": 0.8701795935630798, |
|
"learning_rate": 3.290915509871915e-05, |
|
"loss": 1.4803, |
|
"step": 13051 |
|
}, |
|
{ |
|
"epoch": 0.43127946460950123, |
|
"grad_norm": 0.8453795313835144, |
|
"learning_rate": 3.282944177993753e-05, |
|
"loss": 1.4841, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 0.43230145386213037, |
|
"grad_norm": 0.8204758167266846, |
|
"learning_rate": 3.274964010696957e-05, |
|
"loss": 1.4879, |
|
"step": 13113 |
|
}, |
|
{ |
|
"epoch": 0.4333234431147595, |
|
"grad_norm": 0.8047789931297302, |
|
"learning_rate": 3.266975098036629e-05, |
|
"loss": 1.4869, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 0.43434543236738865, |
|
"grad_norm": 0.8196751475334167, |
|
"learning_rate": 3.258977530166562e-05, |
|
"loss": 1.487, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.4353674216200178, |
|
"grad_norm": 0.8294420838356018, |
|
"learning_rate": 3.250971397338227e-05, |
|
"loss": 1.4876, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 0.43638941087264693, |
|
"grad_norm": 0.7943994998931885, |
|
"learning_rate": 3.2429567898997404e-05, |
|
"loss": 1.4601, |
|
"step": 13237 |
|
}, |
|
{ |
|
"epoch": 0.43741140012527613, |
|
"grad_norm": 0.8354049921035767, |
|
"learning_rate": 3.234933798294859e-05, |
|
"loss": 1.4914, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 0.43843338937790527, |
|
"grad_norm": 0.8477530479431152, |
|
"learning_rate": 3.2269025130619535e-05, |
|
"loss": 1.4559, |
|
"step": 13299 |
|
}, |
|
{ |
|
"epoch": 0.4394553786305344, |
|
"grad_norm": 0.8300078511238098, |
|
"learning_rate": 3.218863024832985e-05, |
|
"loss": 1.4727, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.44047736788316355, |
|
"grad_norm": 0.83632892370224, |
|
"learning_rate": 3.2108154243324864e-05, |
|
"loss": 1.4822, |
|
"step": 13361 |
|
}, |
|
{ |
|
"epoch": 0.4414993571357927, |
|
"grad_norm": 0.8274350166320801, |
|
"learning_rate": 3.2027598023765345e-05, |
|
"loss": 1.482, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 0.44252134638842183, |
|
"grad_norm": 0.8118026852607727, |
|
"learning_rate": 3.194696249871729e-05, |
|
"loss": 1.4947, |
|
"step": 13423 |
|
}, |
|
{ |
|
"epoch": 0.443543335641051, |
|
"grad_norm": 0.7988345623016357, |
|
"learning_rate": 3.186624857814164e-05, |
|
"loss": 1.4634, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 0.44456532489368017, |
|
"grad_norm": 0.8391137719154358, |
|
"learning_rate": 3.178545717288401e-05, |
|
"loss": 1.4715, |
|
"step": 13485 |
|
}, |
|
{ |
|
"epoch": 0.4455873141463093, |
|
"grad_norm": 0.8533878326416016, |
|
"learning_rate": 3.170458919466444e-05, |
|
"loss": 1.4667, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 0.44660930339893845, |
|
"grad_norm": 0.8210632801055908, |
|
"learning_rate": 3.1623645556067063e-05, |
|
"loss": 1.475, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 0.4476312926515676, |
|
"grad_norm": 0.8421019911766052, |
|
"learning_rate": 3.154262717052985e-05, |
|
"loss": 1.4814, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 0.44865328190419673, |
|
"grad_norm": 0.8044966459274292, |
|
"learning_rate": 3.146153495233426e-05, |
|
"loss": 1.4739, |
|
"step": 13609 |
|
}, |
|
{ |
|
"epoch": 0.4496752711568259, |
|
"grad_norm": 0.8291748762130737, |
|
"learning_rate": 3.1380369816594944e-05, |
|
"loss": 1.4723, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.45069726040945507, |
|
"grad_norm": 0.8362712264060974, |
|
"learning_rate": 3.129913267924946e-05, |
|
"loss": 1.4783, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 0.4517192496620842, |
|
"grad_norm": 0.809481143951416, |
|
"learning_rate": 3.121782445704782e-05, |
|
"loss": 1.4771, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 0.45274123891471335, |
|
"grad_norm": 0.8484416007995605, |
|
"learning_rate": 3.11364460675423e-05, |
|
"loss": 1.4615, |
|
"step": 13733 |
|
}, |
|
{ |
|
"epoch": 0.4537632281673425, |
|
"grad_norm": 0.8735551238059998, |
|
"learning_rate": 3.1054998429076934e-05, |
|
"loss": 1.4754, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 0.45478521741997163, |
|
"grad_norm": 0.8091564774513245, |
|
"learning_rate": 3.097348246077728e-05, |
|
"loss": 1.4853, |
|
"step": 13795 |
|
}, |
|
{ |
|
"epoch": 0.4558072066726008, |
|
"grad_norm": 0.8234131932258606, |
|
"learning_rate": 3.0891899082539924e-05, |
|
"loss": 1.4786, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 0.45682919592522997, |
|
"grad_norm": 0.8537734746932983, |
|
"learning_rate": 3.0810249215022233e-05, |
|
"loss": 1.4757, |
|
"step": 13857 |
|
}, |
|
{ |
|
"epoch": 0.4578511851778591, |
|
"grad_norm": 0.8478782773017883, |
|
"learning_rate": 3.0728533779631865e-05, |
|
"loss": 1.4621, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.45887317443048825, |
|
"grad_norm": 0.8195151090621948, |
|
"learning_rate": 3.064675369851637e-05, |
|
"loss": 1.4727, |
|
"step": 13919 |
|
}, |
|
{ |
|
"epoch": 0.4598951636831174, |
|
"grad_norm": 0.8208017349243164, |
|
"learning_rate": 3.056490989455289e-05, |
|
"loss": 1.4744, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.46091715293574653, |
|
"grad_norm": 0.7893416285514832, |
|
"learning_rate": 3.0483003291337596e-05, |
|
"loss": 1.4713, |
|
"step": 13981 |
|
}, |
|
{ |
|
"epoch": 0.4619391421883757, |
|
"grad_norm": 2.002336263656616, |
|
"learning_rate": 3.040103481317539e-05, |
|
"loss": 1.4523, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 0.46296113144100487, |
|
"grad_norm": 0.7943160533905029, |
|
"learning_rate": 3.03190053850694e-05, |
|
"loss": 1.4635, |
|
"step": 14043 |
|
}, |
|
{ |
|
"epoch": 0.463983120693634, |
|
"grad_norm": 0.8262372612953186, |
|
"learning_rate": 3.0236915932710573e-05, |
|
"loss": 1.47, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 0.46500510994626315, |
|
"grad_norm": 0.8457150459289551, |
|
"learning_rate": 3.0154767382467232e-05, |
|
"loss": 1.4757, |
|
"step": 14105 |
|
}, |
|
{ |
|
"epoch": 0.4660270991988923, |
|
"grad_norm": 0.8377997279167175, |
|
"learning_rate": 3.0072560661374582e-05, |
|
"loss": 1.4752, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 0.46704908845152143, |
|
"grad_norm": 0.7971871495246887, |
|
"learning_rate": 2.999029669712431e-05, |
|
"loss": 1.4744, |
|
"step": 14167 |
|
}, |
|
{ |
|
"epoch": 0.4680710777041506, |
|
"grad_norm": 0.8239099383354187, |
|
"learning_rate": 2.990797641805408e-05, |
|
"loss": 1.4683, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 0.46909306695677977, |
|
"grad_norm": 0.8354623317718506, |
|
"learning_rate": 2.982560075313704e-05, |
|
"loss": 1.4859, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 0.4701150562094089, |
|
"grad_norm": 0.8527148962020874, |
|
"learning_rate": 2.9743170631971368e-05, |
|
"loss": 1.4569, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.47113704546203805, |
|
"grad_norm": 0.8640620112419128, |
|
"learning_rate": 2.9660686984769792e-05, |
|
"loss": 1.4711, |
|
"step": 14291 |
|
}, |
|
{ |
|
"epoch": 0.4721590347146672, |
|
"grad_norm": 0.8033869862556458, |
|
"learning_rate": 2.9578150742349047e-05, |
|
"loss": 1.4803, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 0.47318102396729633, |
|
"grad_norm": 0.8113462924957275, |
|
"learning_rate": 2.949556283611942e-05, |
|
"loss": 1.4644, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 0.4742030132199255, |
|
"grad_norm": 0.8008654117584229, |
|
"learning_rate": 2.9412924198074206e-05, |
|
"loss": 1.4591, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 0.47522500247255467, |
|
"grad_norm": 0.8297123312950134, |
|
"learning_rate": 2.9330235760779208e-05, |
|
"loss": 1.4772, |
|
"step": 14415 |
|
}, |
|
{ |
|
"epoch": 0.4762469917251838, |
|
"grad_norm": 0.8336069583892822, |
|
"learning_rate": 2.9247498457362188e-05, |
|
"loss": 1.4735, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 0.47726898097781295, |
|
"grad_norm": 0.7794061899185181, |
|
"learning_rate": 2.9164713221502373e-05, |
|
"loss": 1.4655, |
|
"step": 14477 |
|
}, |
|
{ |
|
"epoch": 0.4782909702304421, |
|
"grad_norm": 0.8473496437072754, |
|
"learning_rate": 2.9081880987419912e-05, |
|
"loss": 1.4764, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 0.47931295948307123, |
|
"grad_norm": 0.8163958191871643, |
|
"learning_rate": 2.8999002689865296e-05, |
|
"loss": 1.4735, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 0.4803349487357004, |
|
"grad_norm": 0.8134874701499939, |
|
"learning_rate": 2.8916079264108852e-05, |
|
"loss": 1.4725, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.48135693798832957, |
|
"grad_norm": 0.788411557674408, |
|
"learning_rate": 2.883311164593017e-05, |
|
"loss": 1.4641, |
|
"step": 14601 |
|
}, |
|
{ |
|
"epoch": 0.4823789272409587, |
|
"grad_norm": 0.8075402975082397, |
|
"learning_rate": 2.875010077160754e-05, |
|
"loss": 1.473, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 0.48340091649358785, |
|
"grad_norm": 0.8053046464920044, |
|
"learning_rate": 2.866704757790741e-05, |
|
"loss": 1.4642, |
|
"step": 14663 |
|
}, |
|
{ |
|
"epoch": 0.484422905746217, |
|
"grad_norm": 0.8402507901191711, |
|
"learning_rate": 2.858395300207376e-05, |
|
"loss": 1.4768, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 0.48544489499884613, |
|
"grad_norm": 0.8206636905670166, |
|
"learning_rate": 2.8500817981817607e-05, |
|
"loss": 1.448, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.4864668842514753, |
|
"grad_norm": 0.8397791385650635, |
|
"learning_rate": 2.8417643455306336e-05, |
|
"loss": 1.4611, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 0.4874888735041044, |
|
"grad_norm": 0.8360273838043213, |
|
"learning_rate": 2.8334430361153185e-05, |
|
"loss": 1.4681, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 0.4885108627567336, |
|
"grad_norm": 0.7953571677207947, |
|
"learning_rate": 2.8251179638406612e-05, |
|
"loss": 1.4749, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 0.48953285200936275, |
|
"grad_norm": 0.8476656079292297, |
|
"learning_rate": 2.8167892226539704e-05, |
|
"loss": 1.4774, |
|
"step": 14849 |
|
}, |
|
{ |
|
"epoch": 0.4905548412619919, |
|
"grad_norm": 0.7990041375160217, |
|
"learning_rate": 2.8084569065439588e-05, |
|
"loss": 1.4599, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.49157683051462103, |
|
"grad_norm": 0.8252399563789368, |
|
"learning_rate": 2.8001211095396807e-05, |
|
"loss": 1.482, |
|
"step": 14911 |
|
}, |
|
{ |
|
"epoch": 0.4925988197672502, |
|
"grad_norm": 0.8177118897438049, |
|
"learning_rate": 2.791781925709473e-05, |
|
"loss": 1.476, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 0.4936208090198793, |
|
"grad_norm": 0.8130631446838379, |
|
"learning_rate": 2.7834394491598908e-05, |
|
"loss": 1.4584, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 0.4946427982725085, |
|
"grad_norm": 0.8575690388679504, |
|
"learning_rate": 2.7750937740346485e-05, |
|
"loss": 1.4567, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 0.49566478752513765, |
|
"grad_norm": 0.8180646300315857, |
|
"learning_rate": 2.7667449945135564e-05, |
|
"loss": 1.4838, |
|
"step": 15035 |
|
}, |
|
{ |
|
"epoch": 0.4966867767777668, |
|
"grad_norm": 0.8085652589797974, |
|
"learning_rate": 2.7583932048114557e-05, |
|
"loss": 1.4473, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 0.49770876603039593, |
|
"grad_norm": 0.7771590352058411, |
|
"learning_rate": 2.7500384991771587e-05, |
|
"loss": 1.4613, |
|
"step": 15097 |
|
}, |
|
{ |
|
"epoch": 0.4987307552830251, |
|
"grad_norm": 0.8312346339225769, |
|
"learning_rate": 2.7416809718923825e-05, |
|
"loss": 1.4561, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 0.4997527445356542, |
|
"grad_norm": 0.8133281469345093, |
|
"learning_rate": 2.7333207172706864e-05, |
|
"loss": 1.4617, |
|
"step": 15159 |
|
}, |
|
{ |
|
"epoch": 0.5007747337882834, |
|
"grad_norm": 0.8043552041053772, |
|
"learning_rate": 2.7249578296564088e-05, |
|
"loss": 1.4814, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 0.5017967230409125, |
|
"grad_norm": 0.8398190140724182, |
|
"learning_rate": 2.7165924034235973e-05, |
|
"loss": 1.4604, |
|
"step": 15221 |
|
}, |
|
{ |
|
"epoch": 0.5028187122935417, |
|
"grad_norm": 0.8101472854614258, |
|
"learning_rate": 2.708224532974953e-05, |
|
"loss": 1.4655, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 0.5038407015461709, |
|
"grad_norm": 0.8169765472412109, |
|
"learning_rate": 2.6998543127407538e-05, |
|
"loss": 1.4649, |
|
"step": 15283 |
|
}, |
|
{ |
|
"epoch": 0.5048626907988, |
|
"grad_norm": 0.8130091428756714, |
|
"learning_rate": 2.6914818371777988e-05, |
|
"loss": 1.4532, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 0.5058846800514292, |
|
"grad_norm": 0.8258066177368164, |
|
"learning_rate": 2.6831072007683373e-05, |
|
"loss": 1.4645, |
|
"step": 15345 |
|
}, |
|
{ |
|
"epoch": 0.5069066693040583, |
|
"grad_norm": 0.8129923939704895, |
|
"learning_rate": 2.6747304980190018e-05, |
|
"loss": 1.4486, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 0.5079286585566875, |
|
"grad_norm": 0.8236402273178101, |
|
"learning_rate": 2.6663518234597453e-05, |
|
"loss": 1.4655, |
|
"step": 15407 |
|
}, |
|
{ |
|
"epoch": 0.5089506478093165, |
|
"grad_norm": 0.8337636590003967, |
|
"learning_rate": 2.6579712716427696e-05, |
|
"loss": 1.4617, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 0.5099726370619457, |
|
"grad_norm": 0.8363469839096069, |
|
"learning_rate": 2.6495889371414652e-05, |
|
"loss": 1.4635, |
|
"step": 15469 |
|
}, |
|
{ |
|
"epoch": 0.5109946263145749, |
|
"grad_norm": 0.8415119647979736, |
|
"learning_rate": 2.6412049145493367e-05, |
|
"loss": 1.4664, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.512016615567204, |
|
"grad_norm": 0.8207569718360901, |
|
"learning_rate": 2.632819298478939e-05, |
|
"loss": 1.4513, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 0.5130386048198332, |
|
"grad_norm": 0.840300440788269, |
|
"learning_rate": 2.6244321835608105e-05, |
|
"loss": 1.4657, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 0.5140605940724623, |
|
"grad_norm": 0.8326119780540466, |
|
"learning_rate": 2.6160436644424024e-05, |
|
"loss": 1.454, |
|
"step": 15593 |
|
}, |
|
{ |
|
"epoch": 0.5150825833250915, |
|
"grad_norm": 0.8397566080093384, |
|
"learning_rate": 2.6076538357870133e-05, |
|
"loss": 1.4827, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 0.5161045725777207, |
|
"grad_norm": 0.8247369527816772, |
|
"learning_rate": 2.5992627922727196e-05, |
|
"loss": 1.4503, |
|
"step": 15655 |
|
}, |
|
{ |
|
"epoch": 0.5171265618303498, |
|
"grad_norm": 0.7943838238716125, |
|
"learning_rate": 2.5908706285913066e-05, |
|
"loss": 1.463, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 0.518148551082979, |
|
"grad_norm": 0.8546155095100403, |
|
"learning_rate": 2.5824774394472008e-05, |
|
"loss": 1.4511, |
|
"step": 15717 |
|
}, |
|
{ |
|
"epoch": 0.519170540335608, |
|
"grad_norm": 0.8017929792404175, |
|
"learning_rate": 2.5740833195563996e-05, |
|
"loss": 1.4583, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 0.5201925295882373, |
|
"grad_norm": 0.8227274417877197, |
|
"learning_rate": 2.5656883636454067e-05, |
|
"loss": 1.4633, |
|
"step": 15779 |
|
}, |
|
{ |
|
"epoch": 0.5212145188408663, |
|
"grad_norm": 0.8150655031204224, |
|
"learning_rate": 2.557292666450159e-05, |
|
"loss": 1.4549, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 0.5222365080934955, |
|
"grad_norm": 0.8417273163795471, |
|
"learning_rate": 2.5488963227149566e-05, |
|
"loss": 1.4525, |
|
"step": 15841 |
|
}, |
|
{ |
|
"epoch": 0.5232584973461247, |
|
"grad_norm": 0.8203164935112, |
|
"learning_rate": 2.5404994271913983e-05, |
|
"loss": 1.4664, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.5242804865987538, |
|
"grad_norm": 0.781090497970581, |
|
"learning_rate": 2.5321020746373085e-05, |
|
"loss": 1.4529, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 0.525302475851383, |
|
"grad_norm": 0.7886222004890442, |
|
"learning_rate": 2.52370435981567e-05, |
|
"loss": 1.449, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 0.5263244651040121, |
|
"grad_norm": 0.8068331480026245, |
|
"learning_rate": 2.5153063774935533e-05, |
|
"loss": 1.4468, |
|
"step": 15965 |
|
}, |
|
{ |
|
"epoch": 0.5273464543566413, |
|
"grad_norm": 0.8176882863044739, |
|
"learning_rate": 2.506908222441045e-05, |
|
"loss": 1.4431, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 0.5283684436092704, |
|
"grad_norm": 0.8044219017028809, |
|
"learning_rate": 2.498509989430187e-05, |
|
"loss": 1.4565, |
|
"step": 16027 |
|
}, |
|
{ |
|
"epoch": 0.5293904328618996, |
|
"grad_norm": 0.8286380767822266, |
|
"learning_rate": 2.4901117732338958e-05, |
|
"loss": 1.438, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 0.5304124221145288, |
|
"grad_norm": 0.7985462546348572, |
|
"learning_rate": 2.481713668624899e-05, |
|
"loss": 1.4596, |
|
"step": 16089 |
|
}, |
|
{ |
|
"epoch": 0.5314344113671579, |
|
"grad_norm": 0.8129749894142151, |
|
"learning_rate": 2.4733157703746663e-05, |
|
"loss": 1.4643, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.532456400619787, |
|
"grad_norm": 0.818228542804718, |
|
"learning_rate": 2.4649181732523392e-05, |
|
"loss": 1.459, |
|
"step": 16151 |
|
}, |
|
{ |
|
"epoch": 0.5334783898724161, |
|
"grad_norm": 0.8294692039489746, |
|
"learning_rate": 2.4565209720236582e-05, |
|
"loss": 1.4608, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 0.5345003791250453, |
|
"grad_norm": 0.8209260106086731, |
|
"learning_rate": 2.4481242614498975e-05, |
|
"loss": 1.4615, |
|
"step": 16213 |
|
}, |
|
{ |
|
"epoch": 0.5355223683776745, |
|
"grad_norm": 0.8328977227210999, |
|
"learning_rate": 2.439728136286796e-05, |
|
"loss": 1.4407, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 0.5365443576303036, |
|
"grad_norm": 0.8058875799179077, |
|
"learning_rate": 2.4313326912834852e-05, |
|
"loss": 1.4548, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.5375663468829328, |
|
"grad_norm": 0.8151506185531616, |
|
"learning_rate": 2.4229380211814206e-05, |
|
"loss": 1.4673, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 0.5385883361355619, |
|
"grad_norm": 0.7918756008148193, |
|
"learning_rate": 2.4145442207133124e-05, |
|
"loss": 1.4552, |
|
"step": 16337 |
|
}, |
|
{ |
|
"epoch": 0.5396103253881911, |
|
"grad_norm": 0.8043615818023682, |
|
"learning_rate": 2.406151384602059e-05, |
|
"loss": 1.4384, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 0.5406323146408202, |
|
"grad_norm": 0.7996934652328491, |
|
"learning_rate": 2.3977596075596747e-05, |
|
"loss": 1.4498, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 0.5416543038934494, |
|
"grad_norm": 0.8233758211135864, |
|
"learning_rate": 2.3893689842862223e-05, |
|
"loss": 1.4576, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 0.5426762931460786, |
|
"grad_norm": 0.8136016726493835, |
|
"learning_rate": 2.3809796094687475e-05, |
|
"loss": 1.4599, |
|
"step": 16461 |
|
}, |
|
{ |
|
"epoch": 0.5436982823987077, |
|
"grad_norm": 0.8084482550621033, |
|
"learning_rate": 2.372591577780202e-05, |
|
"loss": 1.4462, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 0.5447202716513369, |
|
"grad_norm": 0.8046656847000122, |
|
"learning_rate": 2.3642049838783838e-05, |
|
"loss": 1.4618, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 0.5457422609039659, |
|
"grad_norm": 0.8591692447662354, |
|
"learning_rate": 2.3558199224048666e-05, |
|
"loss": 1.4407, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 0.5467642501565951, |
|
"grad_norm": 0.8005566000938416, |
|
"learning_rate": 2.347436487983929e-05, |
|
"loss": 1.4473, |
|
"step": 16585 |
|
}, |
|
{ |
|
"epoch": 0.5477862394092243, |
|
"grad_norm": 0.8062754273414612, |
|
"learning_rate": 2.3390547752214888e-05, |
|
"loss": 1.4475, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 0.5488082286618534, |
|
"grad_norm": 0.7946265935897827, |
|
"learning_rate": 2.330674878704035e-05, |
|
"loss": 1.4386, |
|
"step": 16647 |
|
}, |
|
{ |
|
"epoch": 0.5498302179144826, |
|
"grad_norm": 0.7979186177253723, |
|
"learning_rate": 2.322296892997561e-05, |
|
"loss": 1.4294, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 0.5508522071671117, |
|
"grad_norm": 0.8142803907394409, |
|
"learning_rate": 2.313920912646497e-05, |
|
"loss": 1.4448, |
|
"step": 16709 |
|
}, |
|
{ |
|
"epoch": 0.5518741964197409, |
|
"grad_norm": 0.8202729821205139, |
|
"learning_rate": 2.305547032172643e-05, |
|
"loss": 1.4665, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.55289618567237, |
|
"grad_norm": 0.7969973087310791, |
|
"learning_rate": 2.2971753460741014e-05, |
|
"loss": 1.4441, |
|
"step": 16771 |
|
}, |
|
{ |
|
"epoch": 0.5539181749249992, |
|
"grad_norm": 0.7817745208740234, |
|
"learning_rate": 2.288805948824212e-05, |
|
"loss": 1.4618, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 0.5549401641776284, |
|
"grad_norm": 0.8136980533599854, |
|
"learning_rate": 2.2804389348704858e-05, |
|
"loss": 1.44, |
|
"step": 16833 |
|
}, |
|
{ |
|
"epoch": 0.5559621534302575, |
|
"grad_norm": 0.8196117281913757, |
|
"learning_rate": 2.2720743986335374e-05, |
|
"loss": 1.4387, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 0.5569841426828867, |
|
"grad_norm": 0.8157840371131897, |
|
"learning_rate": 2.2637124345060233e-05, |
|
"loss": 1.4549, |
|
"step": 16895 |
|
}, |
|
{ |
|
"epoch": 0.5580061319355157, |
|
"grad_norm": 0.8260380029678345, |
|
"learning_rate": 2.2553531368515695e-05, |
|
"loss": 1.4694, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 0.5590281211881449, |
|
"grad_norm": 0.8016270399093628, |
|
"learning_rate": 2.2469966000037144e-05, |
|
"loss": 1.4579, |
|
"step": 16957 |
|
}, |
|
{ |
|
"epoch": 0.5600501104407741, |
|
"grad_norm": 0.816955029964447, |
|
"learning_rate": 2.2386429182648417e-05, |
|
"loss": 1.4503, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 0.5610720996934032, |
|
"grad_norm": 0.7901566624641418, |
|
"learning_rate": 2.230292185905114e-05, |
|
"loss": 1.446, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 0.5620940889460324, |
|
"grad_norm": 0.8029798865318298, |
|
"learning_rate": 2.2219444971614116e-05, |
|
"loss": 1.4631, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.5631160781986615, |
|
"grad_norm": 0.8106251358985901, |
|
"learning_rate": 2.2135999462362655e-05, |
|
"loss": 1.4553, |
|
"step": 17081 |
|
}, |
|
{ |
|
"epoch": 0.5641380674512907, |
|
"grad_norm": 0.8000419735908508, |
|
"learning_rate": 2.2052586272968003e-05, |
|
"loss": 1.4503, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 0.5651600567039198, |
|
"grad_norm": 0.7883618474006653, |
|
"learning_rate": 2.196920634473666e-05, |
|
"loss": 1.446, |
|
"step": 17143 |
|
}, |
|
{ |
|
"epoch": 0.566182045956549, |
|
"grad_norm": 0.7922521829605103, |
|
"learning_rate": 2.1885860618599787e-05, |
|
"loss": 1.4349, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 0.5672040352091782, |
|
"grad_norm": 0.8302006721496582, |
|
"learning_rate": 2.1802550035102577e-05, |
|
"loss": 1.4421, |
|
"step": 17205 |
|
}, |
|
{ |
|
"epoch": 0.5682260244618073, |
|
"grad_norm": 0.8268263339996338, |
|
"learning_rate": 2.171927553439363e-05, |
|
"loss": 1.4415, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 0.5692480137144365, |
|
"grad_norm": 0.815686047077179, |
|
"learning_rate": 2.1636038056214376e-05, |
|
"loss": 1.4394, |
|
"step": 17267 |
|
}, |
|
{ |
|
"epoch": 0.5702700029670655, |
|
"grad_norm": 0.8233394622802734, |
|
"learning_rate": 2.155283853988844e-05, |
|
"loss": 1.4607, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 0.5712919922196947, |
|
"grad_norm": 0.8064000010490417, |
|
"learning_rate": 2.146967792431106e-05, |
|
"loss": 1.448, |
|
"step": 17329 |
|
}, |
|
{ |
|
"epoch": 0.5723139814723238, |
|
"grad_norm": 0.8143057823181152, |
|
"learning_rate": 2.138655714793849e-05, |
|
"loss": 1.4253, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.573335970724953, |
|
"grad_norm": 0.8112401366233826, |
|
"learning_rate": 2.1303477148777367e-05, |
|
"loss": 1.4321, |
|
"step": 17391 |
|
}, |
|
{ |
|
"epoch": 0.5743579599775822, |
|
"grad_norm": 0.8024718761444092, |
|
"learning_rate": 2.122043886437421e-05, |
|
"loss": 1.445, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 0.5753799492302113, |
|
"grad_norm": 0.8294973373413086, |
|
"learning_rate": 2.1137443231804765e-05, |
|
"loss": 1.4713, |
|
"step": 17453 |
|
}, |
|
{ |
|
"epoch": 0.5764019384828405, |
|
"grad_norm": 0.8058011531829834, |
|
"learning_rate": 2.105449118766347e-05, |
|
"loss": 1.4374, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 0.5774239277354696, |
|
"grad_norm": 0.8186344504356384, |
|
"learning_rate": 2.097158366805287e-05, |
|
"loss": 1.4488, |
|
"step": 17515 |
|
}, |
|
{ |
|
"epoch": 0.5784459169880988, |
|
"grad_norm": 0.8200953602790833, |
|
"learning_rate": 2.0888721608573047e-05, |
|
"loss": 1.4464, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 0.579467906240728, |
|
"grad_norm": 0.8406069278717041, |
|
"learning_rate": 2.0805905944311087e-05, |
|
"loss": 1.4484, |
|
"step": 17577 |
|
}, |
|
{ |
|
"epoch": 0.580489895493357, |
|
"grad_norm": 0.8079460859298706, |
|
"learning_rate": 2.0723137609830497e-05, |
|
"loss": 1.4437, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 0.5815118847459863, |
|
"grad_norm": 0.7628518342971802, |
|
"learning_rate": 2.0640417539160686e-05, |
|
"loss": 1.398, |
|
"step": 17639 |
|
}, |
|
{ |
|
"epoch": 0.5825338739986153, |
|
"grad_norm": 0.8117976188659668, |
|
"learning_rate": 2.0557746665786427e-05, |
|
"loss": 1.4394, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 0.5835558632512445, |
|
"grad_norm": 0.8591923713684082, |
|
"learning_rate": 2.0475125922637256e-05, |
|
"loss": 1.4379, |
|
"step": 17701 |
|
}, |
|
{ |
|
"epoch": 0.5845778525038736, |
|
"grad_norm": 0.8447268009185791, |
|
"learning_rate": 2.0392556242077047e-05, |
|
"loss": 1.4388, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 0.5855998417565028, |
|
"grad_norm": 0.8177095055580139, |
|
"learning_rate": 2.031003855589343e-05, |
|
"loss": 1.4258, |
|
"step": 17763 |
|
}, |
|
{ |
|
"epoch": 0.586621831009132, |
|
"grad_norm": 0.8127830028533936, |
|
"learning_rate": 2.022757379528727e-05, |
|
"loss": 1.4361, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 0.5876438202617611, |
|
"grad_norm": 0.8459563255310059, |
|
"learning_rate": 2.0145162890862184e-05, |
|
"loss": 1.425, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.5886658095143903, |
|
"grad_norm": 0.7994370460510254, |
|
"learning_rate": 2.0062806772614022e-05, |
|
"loss": 1.4407, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.5896877987670194, |
|
"grad_norm": 0.8096714019775391, |
|
"learning_rate": 1.9980506369920392e-05, |
|
"loss": 1.4396, |
|
"step": 17887 |
|
}, |
|
{ |
|
"epoch": 0.5907097880196486, |
|
"grad_norm": 0.760986864566803, |
|
"learning_rate": 1.989826261153015e-05, |
|
"loss": 1.4394, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 0.5917317772722778, |
|
"grad_norm": 0.7998522520065308, |
|
"learning_rate": 1.9816076425552923e-05, |
|
"loss": 1.4465, |
|
"step": 17949 |
|
}, |
|
{ |
|
"epoch": 0.5927537665249069, |
|
"grad_norm": 0.78706294298172, |
|
"learning_rate": 1.9733948739448676e-05, |
|
"loss": 1.4537, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.593775755777536, |
|
"grad_norm": 0.792362630367279, |
|
"learning_rate": 1.9651880480017155e-05, |
|
"loss": 1.4264, |
|
"step": 18011 |
|
}, |
|
{ |
|
"epoch": 0.5947977450301651, |
|
"grad_norm": 0.8106920123100281, |
|
"learning_rate": 1.9569872573387516e-05, |
|
"loss": 1.4326, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 0.5958197342827943, |
|
"grad_norm": 0.8342007994651794, |
|
"learning_rate": 1.9487925945007854e-05, |
|
"loss": 1.4437, |
|
"step": 18073 |
|
}, |
|
{ |
|
"epoch": 0.5968417235354234, |
|
"grad_norm": 0.8332077860832214, |
|
"learning_rate": 1.9406041519634726e-05, |
|
"loss": 1.4551, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 0.5978637127880526, |
|
"grad_norm": 0.7965781092643738, |
|
"learning_rate": 1.932422022132275e-05, |
|
"loss": 1.4367, |
|
"step": 18135 |
|
}, |
|
{ |
|
"epoch": 0.5988857020406818, |
|
"grad_norm": 0.8394030928611755, |
|
"learning_rate": 1.924246297341414e-05, |
|
"loss": 1.422, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 0.5999076912933109, |
|
"grad_norm": 0.8186848163604736, |
|
"learning_rate": 1.9160770698528338e-05, |
|
"loss": 1.4482, |
|
"step": 18197 |
|
}, |
|
{ |
|
"epoch": 0.6009296805459401, |
|
"grad_norm": 0.7956410646438599, |
|
"learning_rate": 1.907914431855156e-05, |
|
"loss": 1.4269, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 0.6019516697985692, |
|
"grad_norm": 0.8348777890205383, |
|
"learning_rate": 1.8997584754626412e-05, |
|
"loss": 1.4342, |
|
"step": 18259 |
|
}, |
|
{ |
|
"epoch": 0.6029736590511984, |
|
"grad_norm": 0.8190683126449585, |
|
"learning_rate": 1.8916092927141486e-05, |
|
"loss": 1.4463, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 0.6039956483038275, |
|
"grad_norm": 0.7944843769073486, |
|
"learning_rate": 1.883466975572098e-05, |
|
"loss": 1.4435, |
|
"step": 18321 |
|
}, |
|
{ |
|
"epoch": 0.6050176375564567, |
|
"grad_norm": 0.8158681988716125, |
|
"learning_rate": 1.8753316159214312e-05, |
|
"loss": 1.4355, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 0.6060396268090859, |
|
"grad_norm": 0.8052075505256653, |
|
"learning_rate": 1.8672033055685766e-05, |
|
"loss": 1.4379, |
|
"step": 18383 |
|
}, |
|
{ |
|
"epoch": 0.6070616160617149, |
|
"grad_norm": 0.7949001789093018, |
|
"learning_rate": 1.8590821362404116e-05, |
|
"loss": 1.4289, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 0.6080836053143441, |
|
"grad_norm": 0.8260155916213989, |
|
"learning_rate": 1.8509681995832294e-05, |
|
"loss": 1.4317, |
|
"step": 18445 |
|
}, |
|
{ |
|
"epoch": 0.6091055945669732, |
|
"grad_norm": 0.8101741075515747, |
|
"learning_rate": 1.8428615871617004e-05, |
|
"loss": 1.4341, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 0.6101275838196024, |
|
"grad_norm": 0.8563366532325745, |
|
"learning_rate": 1.8347623904578448e-05, |
|
"loss": 1.4432, |
|
"step": 18507 |
|
}, |
|
{ |
|
"epoch": 0.6111495730722316, |
|
"grad_norm": 0.7924138307571411, |
|
"learning_rate": 1.8266707008699975e-05, |
|
"loss": 1.4341, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 0.6121715623248607, |
|
"grad_norm": 0.7919387221336365, |
|
"learning_rate": 1.818586609711774e-05, |
|
"loss": 1.4613, |
|
"step": 18569 |
|
}, |
|
{ |
|
"epoch": 0.6131935515774899, |
|
"grad_norm": 0.7915093302726746, |
|
"learning_rate": 1.8105102082110462e-05, |
|
"loss": 1.4332, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.614215540830119, |
|
"grad_norm": 0.7878425717353821, |
|
"learning_rate": 1.8024415875089058e-05, |
|
"loss": 1.4372, |
|
"step": 18631 |
|
}, |
|
{ |
|
"epoch": 0.6152375300827482, |
|
"grad_norm": 0.7944186329841614, |
|
"learning_rate": 1.7943808386586407e-05, |
|
"loss": 1.431, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 0.6162595193353773, |
|
"grad_norm": 0.7792430520057678, |
|
"learning_rate": 1.7863280526247073e-05, |
|
"loss": 1.43, |
|
"step": 18693 |
|
}, |
|
{ |
|
"epoch": 0.6172815085880065, |
|
"grad_norm": 0.838062047958374, |
|
"learning_rate": 1.7782833202817003e-05, |
|
"loss": 1.4377, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 0.6183034978406357, |
|
"grad_norm": 0.8279454112052917, |
|
"learning_rate": 1.7702467324133327e-05, |
|
"loss": 1.4248, |
|
"step": 18755 |
|
}, |
|
{ |
|
"epoch": 0.6193254870932647, |
|
"grad_norm": 0.8460195064544678, |
|
"learning_rate": 1.7622183797114042e-05, |
|
"loss": 1.4058, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 0.6203474763458939, |
|
"grad_norm": 0.8065218925476074, |
|
"learning_rate": 1.7541983527747838e-05, |
|
"loss": 1.419, |
|
"step": 18817 |
|
}, |
|
{ |
|
"epoch": 0.621369465598523, |
|
"grad_norm": 0.8114182353019714, |
|
"learning_rate": 1.746186742108387e-05, |
|
"loss": 1.4305, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 0.6223914548511522, |
|
"grad_norm": 0.8157764673233032, |
|
"learning_rate": 1.73818363812215e-05, |
|
"loss": 1.4362, |
|
"step": 18879 |
|
}, |
|
{ |
|
"epoch": 0.6234134441037814, |
|
"grad_norm": 0.8182953596115112, |
|
"learning_rate": 1.7301891311300153e-05, |
|
"loss": 1.4292, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 0.6244354333564105, |
|
"grad_norm": 0.8444055914878845, |
|
"learning_rate": 1.7222033113489055e-05, |
|
"loss": 1.4339, |
|
"step": 18941 |
|
}, |
|
{ |
|
"epoch": 0.6254574226090397, |
|
"grad_norm": 0.8451635837554932, |
|
"learning_rate": 1.7142262688977127e-05, |
|
"loss": 1.4272, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 0.6264794118616688, |
|
"grad_norm": 0.7921797633171082, |
|
"learning_rate": 1.7062580937962764e-05, |
|
"loss": 1.4249, |
|
"step": 19003 |
|
}, |
|
{ |
|
"epoch": 0.627501401114298, |
|
"grad_norm": 0.8066070675849915, |
|
"learning_rate": 1.698298875964369e-05, |
|
"loss": 1.422, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 0.6285233903669271, |
|
"grad_norm": 0.7984006404876709, |
|
"learning_rate": 1.690348705220684e-05, |
|
"loss": 1.4279, |
|
"step": 19065 |
|
}, |
|
{ |
|
"epoch": 0.6295453796195563, |
|
"grad_norm": 0.8582960367202759, |
|
"learning_rate": 1.6824076712818156e-05, |
|
"loss": 1.4224, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 0.6305673688721855, |
|
"grad_norm": 0.8283519148826599, |
|
"learning_rate": 1.6744758637612533e-05, |
|
"loss": 1.4509, |
|
"step": 19127 |
|
}, |
|
{ |
|
"epoch": 0.6315893581248145, |
|
"grad_norm": 0.8150811791419983, |
|
"learning_rate": 1.6665533721683664e-05, |
|
"loss": 1.4216, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 0.6326113473774437, |
|
"grad_norm": 0.8012325167655945, |
|
"learning_rate": 1.6586402859073974e-05, |
|
"loss": 1.4263, |
|
"step": 19189 |
|
}, |
|
{ |
|
"epoch": 0.6336333366300728, |
|
"grad_norm": 0.8255190849304199, |
|
"learning_rate": 1.6507366942764463e-05, |
|
"loss": 1.432, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 0.634655325882702, |
|
"grad_norm": 0.8162024021148682, |
|
"learning_rate": 1.6428426864664732e-05, |
|
"loss": 1.4308, |
|
"step": 19251 |
|
}, |
|
{ |
|
"epoch": 0.6356773151353312, |
|
"grad_norm": 0.9059862494468689, |
|
"learning_rate": 1.6349583515602816e-05, |
|
"loss": 1.4258, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 0.6366993043879603, |
|
"grad_norm": 0.8172095417976379, |
|
"learning_rate": 1.6270837785315208e-05, |
|
"loss": 1.426, |
|
"step": 19313 |
|
}, |
|
{ |
|
"epoch": 0.6377212936405895, |
|
"grad_norm": 0.8332613110542297, |
|
"learning_rate": 1.619219056243676e-05, |
|
"loss": 1.4275, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 0.6387432828932186, |
|
"grad_norm": 0.8458919525146484, |
|
"learning_rate": 1.6113642734490698e-05, |
|
"loss": 1.4133, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 0.6397652721458478, |
|
"grad_norm": 0.8014411926269531, |
|
"learning_rate": 1.6035195187878577e-05, |
|
"loss": 1.4323, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 0.6407872613984769, |
|
"grad_norm": 0.8090296387672424, |
|
"learning_rate": 1.5956848807870305e-05, |
|
"loss": 1.4314, |
|
"step": 19437 |
|
}, |
|
{ |
|
"epoch": 0.6418092506511061, |
|
"grad_norm": 0.8195688128471375, |
|
"learning_rate": 1.587860447859413e-05, |
|
"loss": 1.4371, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 0.6428312399037353, |
|
"grad_norm": 0.7982098460197449, |
|
"learning_rate": 1.5800463083026686e-05, |
|
"loss": 1.4314, |
|
"step": 19499 |
|
}, |
|
{ |
|
"epoch": 0.6438532291563643, |
|
"grad_norm": 0.8345074653625488, |
|
"learning_rate": 1.572242550298298e-05, |
|
"loss": 1.4289, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 0.6448752184089935, |
|
"grad_norm": 0.8276827335357666, |
|
"learning_rate": 1.56444926191065e-05, |
|
"loss": 1.4358, |
|
"step": 19561 |
|
}, |
|
{ |
|
"epoch": 0.6458972076616226, |
|
"grad_norm": 0.8199188709259033, |
|
"learning_rate": 1.5566665310859257e-05, |
|
"loss": 1.4291, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 0.6469191969142518, |
|
"grad_norm": 0.8202061057090759, |
|
"learning_rate": 1.5488944456511846e-05, |
|
"loss": 1.4319, |
|
"step": 19623 |
|
}, |
|
{ |
|
"epoch": 0.6479411861668809, |
|
"grad_norm": 0.8072242140769958, |
|
"learning_rate": 1.5411330933133546e-05, |
|
"loss": 1.428, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 0.6489631754195101, |
|
"grad_norm": 0.8337129354476929, |
|
"learning_rate": 1.533382561658241e-05, |
|
"loss": 1.4317, |
|
"step": 19685 |
|
}, |
|
{ |
|
"epoch": 0.6499851646721393, |
|
"grad_norm": 0.8070532083511353, |
|
"learning_rate": 1.525642938149541e-05, |
|
"loss": 1.4263, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 0.6510071539247684, |
|
"grad_norm": 0.7998501658439636, |
|
"learning_rate": 1.5179143101278536e-05, |
|
"loss": 1.4231, |
|
"step": 19747 |
|
}, |
|
{ |
|
"epoch": 0.6520291431773976, |
|
"grad_norm": 0.7996141314506531, |
|
"learning_rate": 1.5101967648096955e-05, |
|
"loss": 1.4295, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 0.6530511324300267, |
|
"grad_norm": 0.8256941437721252, |
|
"learning_rate": 1.5024903892865172e-05, |
|
"loss": 1.4426, |
|
"step": 19809 |
|
}, |
|
{ |
|
"epoch": 0.6540731216826559, |
|
"grad_norm": 0.8443533182144165, |
|
"learning_rate": 1.4947952705237184e-05, |
|
"loss": 1.4206, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.655095110935285, |
|
"grad_norm": 0.8303670287132263, |
|
"learning_rate": 1.4871114953596682e-05, |
|
"loss": 1.4442, |
|
"step": 19871 |
|
}, |
|
{ |
|
"epoch": 0.6561171001879141, |
|
"grad_norm": 0.8437788486480713, |
|
"learning_rate": 1.4794391505047256e-05, |
|
"loss": 1.4321, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 0.6571390894405433, |
|
"grad_norm": 0.8075599670410156, |
|
"learning_rate": 1.4717783225402596e-05, |
|
"loss": 1.4304, |
|
"step": 19933 |
|
}, |
|
{ |
|
"epoch": 0.6581610786931724, |
|
"grad_norm": 0.8010841608047485, |
|
"learning_rate": 1.4641290979176735e-05, |
|
"loss": 1.4357, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 0.6591830679458016, |
|
"grad_norm": 0.8034616112709045, |
|
"learning_rate": 1.4564915629574246e-05, |
|
"loss": 1.4194, |
|
"step": 19995 |
|
}, |
|
{ |
|
"epoch": 0.6602050571984307, |
|
"grad_norm": 0.8247554302215576, |
|
"learning_rate": 1.4488658038480601e-05, |
|
"loss": 1.4316, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 0.6612270464510599, |
|
"grad_norm": 0.8289808034896851, |
|
"learning_rate": 1.4412519066452323e-05, |
|
"loss": 1.4277, |
|
"step": 20057 |
|
}, |
|
{ |
|
"epoch": 0.6622490357036891, |
|
"grad_norm": 0.8197374939918518, |
|
"learning_rate": 1.4336499572707373e-05, |
|
"loss": 1.421, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 0.6632710249563182, |
|
"grad_norm": 0.832967221736908, |
|
"learning_rate": 1.4260600415115433e-05, |
|
"loss": 1.4165, |
|
"step": 20119 |
|
}, |
|
{ |
|
"epoch": 0.6642930142089474, |
|
"grad_norm": 0.8057476282119751, |
|
"learning_rate": 1.4184822450188137e-05, |
|
"loss": 1.4348, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.6653150034615765, |
|
"grad_norm": 0.8722820281982422, |
|
"learning_rate": 1.410916653306954e-05, |
|
"loss": 1.4171, |
|
"step": 20181 |
|
}, |
|
{ |
|
"epoch": 0.6663369927142057, |
|
"grad_norm": 0.8184958696365356, |
|
"learning_rate": 1.403363351752639e-05, |
|
"loss": 1.4232, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 0.6673589819668349, |
|
"grad_norm": 0.828702449798584, |
|
"learning_rate": 1.3958224255938485e-05, |
|
"loss": 1.4066, |
|
"step": 20243 |
|
}, |
|
{ |
|
"epoch": 0.6683809712194639, |
|
"grad_norm": 0.826805055141449, |
|
"learning_rate": 1.388293959928911e-05, |
|
"loss": 1.4177, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 0.6694029604720931, |
|
"grad_norm": 0.8016971349716187, |
|
"learning_rate": 1.3807780397155379e-05, |
|
"loss": 1.4298, |
|
"step": 20305 |
|
}, |
|
{ |
|
"epoch": 0.6704249497247222, |
|
"grad_norm": 0.8369885683059692, |
|
"learning_rate": 1.3732747497698655e-05, |
|
"loss": 1.4286, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 0.6714469389773514, |
|
"grad_norm": 0.8385952115058899, |
|
"learning_rate": 1.3657841747655038e-05, |
|
"loss": 1.4056, |
|
"step": 20367 |
|
}, |
|
{ |
|
"epoch": 0.6724689282299805, |
|
"grad_norm": 0.7958812713623047, |
|
"learning_rate": 1.3583063992325706e-05, |
|
"loss": 1.4153, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 0.6734909174826097, |
|
"grad_norm": 0.815703809261322, |
|
"learning_rate": 1.3508415075567496e-05, |
|
"loss": 1.4171, |
|
"step": 20429 |
|
}, |
|
{ |
|
"epoch": 0.6745129067352389, |
|
"grad_norm": 0.8329752087593079, |
|
"learning_rate": 1.343389583978327e-05, |
|
"loss": 1.433, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 0.675534895987868, |
|
"grad_norm": 0.8523135185241699, |
|
"learning_rate": 1.3359507125912468e-05, |
|
"loss": 1.4223, |
|
"step": 20491 |
|
}, |
|
{ |
|
"epoch": 0.6765568852404972, |
|
"grad_norm": 0.835186243057251, |
|
"learning_rate": 1.3285249773421627e-05, |
|
"loss": 1.4194, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 0.6775788744931263, |
|
"grad_norm": 0.8810424208641052, |
|
"learning_rate": 1.3211124620294884e-05, |
|
"loss": 1.4261, |
|
"step": 20553 |
|
}, |
|
{ |
|
"epoch": 0.6786008637457555, |
|
"grad_norm": 0.841140627861023, |
|
"learning_rate": 1.313713250302451e-05, |
|
"loss": 1.4256, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 0.6796228529983847, |
|
"grad_norm": 1.0968244075775146, |
|
"learning_rate": 1.3063274256601479e-05, |
|
"loss": 1.4261, |
|
"step": 20615 |
|
}, |
|
{ |
|
"epoch": 0.6806448422510137, |
|
"grad_norm": 0.8303719162940979, |
|
"learning_rate": 1.2989550714506086e-05, |
|
"loss": 1.4134, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 0.6816668315036429, |
|
"grad_norm": 0.8236658573150635, |
|
"learning_rate": 1.291596270869846e-05, |
|
"loss": 1.4236, |
|
"step": 20677 |
|
}, |
|
{ |
|
"epoch": 0.682688820756272, |
|
"grad_norm": 0.8341703414916992, |
|
"learning_rate": 1.284251106960927e-05, |
|
"loss": 1.4296, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 0.6837108100089012, |
|
"grad_norm": 0.8321917057037354, |
|
"learning_rate": 1.2769196626130263e-05, |
|
"loss": 1.4403, |
|
"step": 20739 |
|
}, |
|
{ |
|
"epoch": 0.6847327992615303, |
|
"grad_norm": 0.81299889087677, |
|
"learning_rate": 1.2696020205604969e-05, |
|
"loss": 1.4295, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 0.6857547885141595, |
|
"grad_norm": 0.8226946592330933, |
|
"learning_rate": 1.2622982633819359e-05, |
|
"loss": 1.4248, |
|
"step": 20801 |
|
}, |
|
{ |
|
"epoch": 0.6867767777667887, |
|
"grad_norm": 0.8110581040382385, |
|
"learning_rate": 1.2550084734992484e-05, |
|
"loss": 1.4173, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 0.6877987670194178, |
|
"grad_norm": 0.8225431442260742, |
|
"learning_rate": 1.247732733176724e-05, |
|
"loss": 1.4294, |
|
"step": 20863 |
|
}, |
|
{ |
|
"epoch": 0.688820756272047, |
|
"grad_norm": 0.8062965273857117, |
|
"learning_rate": 1.2404711245201044e-05, |
|
"loss": 1.4381, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 0.6898427455246761, |
|
"grad_norm": 0.8408602476119995, |
|
"learning_rate": 1.2332237294756535e-05, |
|
"loss": 1.4233, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 0.6908647347773053, |
|
"grad_norm": 0.8272174000740051, |
|
"learning_rate": 1.225990629829241e-05, |
|
"loss": 1.4153, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 0.6918867240299343, |
|
"grad_norm": 0.8655986785888672, |
|
"learning_rate": 1.2187719072054136e-05, |
|
"loss": 1.4211, |
|
"step": 20987 |
|
}, |
|
{ |
|
"epoch": 0.6929087132825635, |
|
"grad_norm": 0.8234033584594727, |
|
"learning_rate": 1.2115676430664735e-05, |
|
"loss": 1.4125, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 0.6939307025351927, |
|
"grad_norm": 0.812782883644104, |
|
"learning_rate": 1.2043779187115647e-05, |
|
"loss": 1.4384, |
|
"step": 21049 |
|
}, |
|
{ |
|
"epoch": 0.6949526917878218, |
|
"grad_norm": 0.8280140161514282, |
|
"learning_rate": 1.1972028152757476e-05, |
|
"loss": 1.4291, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 0.695974681040451, |
|
"grad_norm": 0.8197391629219055, |
|
"learning_rate": 1.1900424137290889e-05, |
|
"loss": 1.418, |
|
"step": 21111 |
|
}, |
|
{ |
|
"epoch": 0.6969966702930801, |
|
"grad_norm": 0.8022619485855103, |
|
"learning_rate": 1.1828967948757482e-05, |
|
"loss": 1.421, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 0.6980186595457093, |
|
"grad_norm": 0.8374767303466797, |
|
"learning_rate": 1.175766039353062e-05, |
|
"loss": 1.4228, |
|
"step": 21173 |
|
}, |
|
{ |
|
"epoch": 0.6990406487983385, |
|
"grad_norm": 0.8427351117134094, |
|
"learning_rate": 1.1686502276306382e-05, |
|
"loss": 1.4456, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 0.7000626380509676, |
|
"grad_norm": 0.8131812214851379, |
|
"learning_rate": 1.1615494400094445e-05, |
|
"loss": 1.4265, |
|
"step": 21235 |
|
}, |
|
{ |
|
"epoch": 0.7010846273035968, |
|
"grad_norm": 0.7899516224861145, |
|
"learning_rate": 1.1544637566209029e-05, |
|
"loss": 1.4189, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 0.7021066165562259, |
|
"grad_norm": 0.802736222743988, |
|
"learning_rate": 1.1473932574259886e-05, |
|
"loss": 1.4245, |
|
"step": 21297 |
|
}, |
|
{ |
|
"epoch": 0.7031286058088551, |
|
"grad_norm": 0.825494647026062, |
|
"learning_rate": 1.1403380222143247e-05, |
|
"loss": 1.4246, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 0.7041505950614841, |
|
"grad_norm": 0.8523460626602173, |
|
"learning_rate": 1.1332981306032808e-05, |
|
"loss": 1.4244, |
|
"step": 21359 |
|
}, |
|
{ |
|
"epoch": 0.7051725843141133, |
|
"grad_norm": 0.7921078205108643, |
|
"learning_rate": 1.1262736620370762e-05, |
|
"loss": 1.4, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 0.7061945735667425, |
|
"grad_norm": 0.833227813243866, |
|
"learning_rate": 1.1192646957858854e-05, |
|
"loss": 1.4373, |
|
"step": 21421 |
|
}, |
|
{ |
|
"epoch": 0.7072165628193716, |
|
"grad_norm": 0.8187243938446045, |
|
"learning_rate": 1.1122713109449381e-05, |
|
"loss": 1.41, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 0.7082385520720008, |
|
"grad_norm": 0.8406710028648376, |
|
"learning_rate": 1.105293586433634e-05, |
|
"loss": 1.4207, |
|
"step": 21483 |
|
}, |
|
{ |
|
"epoch": 0.7092605413246299, |
|
"grad_norm": 0.8350772857666016, |
|
"learning_rate": 1.0983316009946446e-05, |
|
"loss": 1.4088, |
|
"step": 21514 |
|
}, |
|
{ |
|
"epoch": 0.7102825305772591, |
|
"grad_norm": 0.8365492820739746, |
|
"learning_rate": 1.0913854331930282e-05, |
|
"loss": 1.4168, |
|
"step": 21545 |
|
}, |
|
{ |
|
"epoch": 0.7113045198298883, |
|
"grad_norm": 0.822426974773407, |
|
"learning_rate": 1.0844551614153456e-05, |
|
"loss": 1.397, |
|
"step": 21576 |
|
}, |
|
{ |
|
"epoch": 0.7123265090825174, |
|
"grad_norm": 0.8318968415260315, |
|
"learning_rate": 1.0775408638687725e-05, |
|
"loss": 1.4036, |
|
"step": 21607 |
|
}, |
|
{ |
|
"epoch": 0.7133484983351466, |
|
"grad_norm": 0.8235137462615967, |
|
"learning_rate": 1.0706426185802165e-05, |
|
"loss": 1.429, |
|
"step": 21638 |
|
}, |
|
{ |
|
"epoch": 0.7143704875877757, |
|
"grad_norm": 0.8110867738723755, |
|
"learning_rate": 1.0637605033954371e-05, |
|
"loss": 1.4002, |
|
"step": 21669 |
|
}, |
|
{ |
|
"epoch": 0.7153924768404049, |
|
"grad_norm": 0.8007574081420898, |
|
"learning_rate": 1.05689459597817e-05, |
|
"loss": 1.4224, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.7164144660930339, |
|
"grad_norm": 0.802810549736023, |
|
"learning_rate": 1.050044973809246e-05, |
|
"loss": 1.4197, |
|
"step": 21731 |
|
}, |
|
{ |
|
"epoch": 0.7174364553456631, |
|
"grad_norm": 0.8145677447319031, |
|
"learning_rate": 1.043211714185722e-05, |
|
"loss": 1.4084, |
|
"step": 21762 |
|
}, |
|
{ |
|
"epoch": 0.7184584445982923, |
|
"grad_norm": 0.8142935037612915, |
|
"learning_rate": 1.036394894220003e-05, |
|
"loss": 1.4101, |
|
"step": 21793 |
|
}, |
|
{ |
|
"epoch": 0.7194804338509214, |
|
"grad_norm": 0.8112974166870117, |
|
"learning_rate": 1.0295945908389751e-05, |
|
"loss": 1.4229, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.7205024231035506, |
|
"grad_norm": 0.8134146332740784, |
|
"learning_rate": 1.0228108807831393e-05, |
|
"loss": 1.4103, |
|
"step": 21855 |
|
}, |
|
{ |
|
"epoch": 0.7215244123561797, |
|
"grad_norm": 0.8121820092201233, |
|
"learning_rate": 1.01604384060574e-05, |
|
"loss": 1.4103, |
|
"step": 21886 |
|
}, |
|
{ |
|
"epoch": 0.7225464016088089, |
|
"grad_norm": 0.8066170811653137, |
|
"learning_rate": 1.009293546671907e-05, |
|
"loss": 1.3994, |
|
"step": 21917 |
|
}, |
|
{ |
|
"epoch": 0.7235683908614381, |
|
"grad_norm": 0.8202908635139465, |
|
"learning_rate": 1.002560075157791e-05, |
|
"loss": 1.4099, |
|
"step": 21948 |
|
}, |
|
{ |
|
"epoch": 0.7245903801140672, |
|
"grad_norm": 0.8023785352706909, |
|
"learning_rate": 9.958435020496995e-06, |
|
"loss": 1.4208, |
|
"step": 21979 |
|
}, |
|
{ |
|
"epoch": 0.7256123693666964, |
|
"grad_norm": 0.8268675804138184, |
|
"learning_rate": 9.89143903143249e-06, |
|
"loss": 1.4068, |
|
"step": 22010 |
|
}, |
|
{ |
|
"epoch": 0.7266343586193255, |
|
"grad_norm": 0.8300354480743408, |
|
"learning_rate": 9.824613540425038e-06, |
|
"loss": 1.4195, |
|
"step": 22041 |
|
}, |
|
{ |
|
"epoch": 0.7276563478719547, |
|
"grad_norm": 0.8289726376533508, |
|
"learning_rate": 9.757959301591197e-06, |
|
"loss": 1.4189, |
|
"step": 22072 |
|
}, |
|
{ |
|
"epoch": 0.7286783371245837, |
|
"grad_norm": 0.8267973065376282, |
|
"learning_rate": 9.691477067115017e-06, |
|
"loss": 1.4047, |
|
"step": 22103 |
|
}, |
|
{ |
|
"epoch": 0.7297003263772129, |
|
"grad_norm": 0.8307402729988098, |
|
"learning_rate": 9.625167587239467e-06, |
|
"loss": 1.4101, |
|
"step": 22134 |
|
}, |
|
{ |
|
"epoch": 0.7307223156298421, |
|
"grad_norm": 0.8467045426368713, |
|
"learning_rate": 9.559031610258007e-06, |
|
"loss": 1.4119, |
|
"step": 22165 |
|
}, |
|
{ |
|
"epoch": 0.7317443048824712, |
|
"grad_norm": 0.842170774936676, |
|
"learning_rate": 9.493069882506164e-06, |
|
"loss": 1.408, |
|
"step": 22196 |
|
}, |
|
{ |
|
"epoch": 0.7327662941351004, |
|
"grad_norm": 0.80977863073349, |
|
"learning_rate": 9.427283148353056e-06, |
|
"loss": 1.4115, |
|
"step": 22227 |
|
}, |
|
{ |
|
"epoch": 0.7337882833877295, |
|
"grad_norm": 0.8142424821853638, |
|
"learning_rate": 9.361672150193052e-06, |
|
"loss": 1.4188, |
|
"step": 22258 |
|
}, |
|
{ |
|
"epoch": 0.7348102726403587, |
|
"grad_norm": 0.8256626725196838, |
|
"learning_rate": 9.29623762843734e-06, |
|
"loss": 1.4097, |
|
"step": 22289 |
|
}, |
|
{ |
|
"epoch": 0.7358322618929878, |
|
"grad_norm": 0.9075655341148376, |
|
"learning_rate": 9.230980321505594e-06, |
|
"loss": 1.412, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 0.736854251145617, |
|
"grad_norm": 0.8277102708816528, |
|
"learning_rate": 9.165900965817668e-06, |
|
"loss": 1.3973, |
|
"step": 22351 |
|
}, |
|
{ |
|
"epoch": 0.7378762403982462, |
|
"grad_norm": 0.8417026400566101, |
|
"learning_rate": 9.101000295785245e-06, |
|
"loss": 1.4091, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 0.7388982296508753, |
|
"grad_norm": 0.8252837657928467, |
|
"learning_rate": 9.036279043803565e-06, |
|
"loss": 1.4057, |
|
"step": 22413 |
|
}, |
|
{ |
|
"epoch": 0.7399202189035045, |
|
"grad_norm": 0.8327083587646484, |
|
"learning_rate": 8.971737940243147e-06, |
|
"loss": 1.4044, |
|
"step": 22444 |
|
}, |
|
{ |
|
"epoch": 0.7409422081561335, |
|
"grad_norm": 0.8233001232147217, |
|
"learning_rate": 8.907377713441592e-06, |
|
"loss": 1.4142, |
|
"step": 22475 |
|
}, |
|
{ |
|
"epoch": 0.7419641974087627, |
|
"grad_norm": 0.7973216772079468, |
|
"learning_rate": 8.843199089695293e-06, |
|
"loss": 1.4074, |
|
"step": 22506 |
|
}, |
|
{ |
|
"epoch": 0.7429861866613919, |
|
"grad_norm": 0.8216118812561035, |
|
"learning_rate": 8.779202793251311e-06, |
|
"loss": 1.3943, |
|
"step": 22537 |
|
}, |
|
{ |
|
"epoch": 0.744008175914021, |
|
"grad_norm": 0.8081651329994202, |
|
"learning_rate": 8.715389546299149e-06, |
|
"loss": 1.4008, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 0.7450301651666502, |
|
"grad_norm": 0.8267298340797424, |
|
"learning_rate": 8.651760068962617e-06, |
|
"loss": 1.4318, |
|
"step": 22599 |
|
}, |
|
{ |
|
"epoch": 0.7460521544192793, |
|
"grad_norm": 0.8131210803985596, |
|
"learning_rate": 8.588315079291733e-06, |
|
"loss": 1.3889, |
|
"step": 22630 |
|
}, |
|
{ |
|
"epoch": 0.7470741436719085, |
|
"grad_norm": 0.8630332350730896, |
|
"learning_rate": 8.52505529325457e-06, |
|
"loss": 1.4196, |
|
"step": 22661 |
|
}, |
|
{ |
|
"epoch": 0.7480961329245376, |
|
"grad_norm": 0.8226453065872192, |
|
"learning_rate": 8.461981424729216e-06, |
|
"loss": 1.4265, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 0.7491181221771668, |
|
"grad_norm": 0.8107773661613464, |
|
"learning_rate": 8.399094185495725e-06, |
|
"loss": 1.4202, |
|
"step": 22723 |
|
}, |
|
{ |
|
"epoch": 0.750140111429796, |
|
"grad_norm": 0.8276836276054382, |
|
"learning_rate": 8.336394285228017e-06, |
|
"loss": 1.4148, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 0.7511621006824251, |
|
"grad_norm": 0.823788583278656, |
|
"learning_rate": 8.273882431485952e-06, |
|
"loss": 1.404, |
|
"step": 22785 |
|
}, |
|
{ |
|
"epoch": 0.7521840899350543, |
|
"grad_norm": 0.8203224539756775, |
|
"learning_rate": 8.211559329707316e-06, |
|
"loss": 1.4057, |
|
"step": 22816 |
|
}, |
|
{ |
|
"epoch": 0.7532060791876833, |
|
"grad_norm": 0.8357372283935547, |
|
"learning_rate": 8.149425683199823e-06, |
|
"loss": 1.4132, |
|
"step": 22847 |
|
}, |
|
{ |
|
"epoch": 0.7542280684403125, |
|
"grad_norm": 0.8468825221061707, |
|
"learning_rate": 8.08748219313325e-06, |
|
"loss": 1.4009, |
|
"step": 22878 |
|
}, |
|
{ |
|
"epoch": 0.7552500576929417, |
|
"grad_norm": 0.9028123617172241, |
|
"learning_rate": 8.025729558531453e-06, |
|
"loss": 1.4324, |
|
"step": 22909 |
|
}, |
|
{ |
|
"epoch": 0.7562720469455708, |
|
"grad_norm": 0.8219141960144043, |
|
"learning_rate": 7.964168476264508e-06, |
|
"loss": 1.4122, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 0.7572940361982, |
|
"grad_norm": 0.8215070366859436, |
|
"learning_rate": 7.902799641040884e-06, |
|
"loss": 1.417, |
|
"step": 22971 |
|
}, |
|
{ |
|
"epoch": 0.7583160254508291, |
|
"grad_norm": 0.8290405869483948, |
|
"learning_rate": 7.841623745399523e-06, |
|
"loss": 1.4146, |
|
"step": 23002 |
|
}, |
|
{ |
|
"epoch": 0.7593380147034583, |
|
"grad_norm": 0.8678696751594543, |
|
"learning_rate": 7.780641479702114e-06, |
|
"loss": 1.42, |
|
"step": 23033 |
|
}, |
|
{ |
|
"epoch": 0.7603600039560874, |
|
"grad_norm": 0.8347179889678955, |
|
"learning_rate": 7.719853532125227e-06, |
|
"loss": 1.409, |
|
"step": 23064 |
|
}, |
|
{ |
|
"epoch": 0.7613819932087166, |
|
"grad_norm": 0.8424118757247925, |
|
"learning_rate": 7.65926058865258e-06, |
|
"loss": 1.4034, |
|
"step": 23095 |
|
}, |
|
{ |
|
"epoch": 0.7624039824613458, |
|
"grad_norm": 0.8058504462242126, |
|
"learning_rate": 7.598863333067313e-06, |
|
"loss": 1.3934, |
|
"step": 23126 |
|
}, |
|
{ |
|
"epoch": 0.7634259717139749, |
|
"grad_norm": 0.8032201528549194, |
|
"learning_rate": 7.538662446944253e-06, |
|
"loss": 1.4358, |
|
"step": 23157 |
|
}, |
|
{ |
|
"epoch": 0.7644479609666041, |
|
"grad_norm": 0.8517287373542786, |
|
"learning_rate": 7.478658609642211e-06, |
|
"loss": 1.4018, |
|
"step": 23188 |
|
}, |
|
{ |
|
"epoch": 0.7654699502192331, |
|
"grad_norm": 0.8032835125923157, |
|
"learning_rate": 7.418852498296327e-06, |
|
"loss": 1.4139, |
|
"step": 23219 |
|
}, |
|
{ |
|
"epoch": 0.7664919394718623, |
|
"grad_norm": 0.806354820728302, |
|
"learning_rate": 7.359244787810457e-06, |
|
"loss": 1.405, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.7675139287244914, |
|
"grad_norm": 0.8073768615722656, |
|
"learning_rate": 7.299836150849493e-06, |
|
"loss": 1.4082, |
|
"step": 23281 |
|
}, |
|
{ |
|
"epoch": 0.7685359179771206, |
|
"grad_norm": 0.8103781938552856, |
|
"learning_rate": 7.240627257831847e-06, |
|
"loss": 1.3994, |
|
"step": 23312 |
|
}, |
|
{ |
|
"epoch": 0.7695579072297498, |
|
"grad_norm": 0.8298788666725159, |
|
"learning_rate": 7.1816187769218195e-06, |
|
"loss": 1.3971, |
|
"step": 23343 |
|
}, |
|
{ |
|
"epoch": 0.7705798964823789, |
|
"grad_norm": 0.8138905763626099, |
|
"learning_rate": 7.1228113740220895e-06, |
|
"loss": 1.3956, |
|
"step": 23374 |
|
}, |
|
{ |
|
"epoch": 0.7716018857350081, |
|
"grad_norm": 0.8347598314285278, |
|
"learning_rate": 7.064205712766226e-06, |
|
"loss": 1.4, |
|
"step": 23405 |
|
}, |
|
{ |
|
"epoch": 0.7726238749876372, |
|
"grad_norm": 0.8050975799560547, |
|
"learning_rate": 7.005802454511129e-06, |
|
"loss": 1.4171, |
|
"step": 23436 |
|
}, |
|
{ |
|
"epoch": 0.7736458642402664, |
|
"grad_norm": 0.8573086261749268, |
|
"learning_rate": 6.947602258329639e-06, |
|
"loss": 1.4181, |
|
"step": 23467 |
|
}, |
|
{ |
|
"epoch": 0.7746678534928956, |
|
"grad_norm": 0.7980222105979919, |
|
"learning_rate": 6.889605781003078e-06, |
|
"loss": 1.423, |
|
"step": 23498 |
|
}, |
|
{ |
|
"epoch": 0.7756898427455247, |
|
"grad_norm": 0.7984530329704285, |
|
"learning_rate": 6.831813677013776e-06, |
|
"loss": 1.4164, |
|
"step": 23529 |
|
}, |
|
{ |
|
"epoch": 0.7767118319981539, |
|
"grad_norm": 0.838580846786499, |
|
"learning_rate": 6.774226598537792e-06, |
|
"loss": 1.4215, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 0.7777338212507829, |
|
"grad_norm": 0.8243645429611206, |
|
"learning_rate": 6.716845195437482e-06, |
|
"loss": 1.4311, |
|
"step": 23591 |
|
}, |
|
{ |
|
"epoch": 0.7787558105034121, |
|
"grad_norm": 0.8245717883110046, |
|
"learning_rate": 6.659670115254168e-06, |
|
"loss": 1.4217, |
|
"step": 23622 |
|
}, |
|
{ |
|
"epoch": 0.7797777997560412, |
|
"grad_norm": 0.8594714999198914, |
|
"learning_rate": 6.602702003200872e-06, |
|
"loss": 1.4103, |
|
"step": 23653 |
|
}, |
|
{ |
|
"epoch": 0.7807997890086704, |
|
"grad_norm": 0.8197819590568542, |
|
"learning_rate": 6.545941502154992e-06, |
|
"loss": 1.3962, |
|
"step": 23684 |
|
}, |
|
{ |
|
"epoch": 0.7818217782612996, |
|
"grad_norm": 0.8505415320396423, |
|
"learning_rate": 6.489389252651057e-06, |
|
"loss": 1.415, |
|
"step": 23715 |
|
}, |
|
{ |
|
"epoch": 0.7828437675139287, |
|
"grad_norm": 0.8418869376182556, |
|
"learning_rate": 6.4330458928735325e-06, |
|
"loss": 1.395, |
|
"step": 23746 |
|
}, |
|
{ |
|
"epoch": 0.7838657567665579, |
|
"grad_norm": 0.8336290717124939, |
|
"learning_rate": 6.376912058649559e-06, |
|
"loss": 1.4035, |
|
"step": 23777 |
|
}, |
|
{ |
|
"epoch": 0.784887746019187, |
|
"grad_norm": 0.8226044178009033, |
|
"learning_rate": 6.320988383441845e-06, |
|
"loss": 1.4041, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 0.7859097352718162, |
|
"grad_norm": 0.821552038192749, |
|
"learning_rate": 6.265275498341452e-06, |
|
"loss": 1.3928, |
|
"step": 23839 |
|
}, |
|
{ |
|
"epoch": 0.7869317245244454, |
|
"grad_norm": 0.862799882888794, |
|
"learning_rate": 6.209774032060714e-06, |
|
"loss": 1.4082, |
|
"step": 23870 |
|
}, |
|
{ |
|
"epoch": 0.7879537137770745, |
|
"grad_norm": 0.8196310997009277, |
|
"learning_rate": 6.1544846109261365e-06, |
|
"loss": 1.41, |
|
"step": 23901 |
|
}, |
|
{ |
|
"epoch": 0.7889757030297037, |
|
"grad_norm": 0.8420688509941101, |
|
"learning_rate": 6.099407858871342e-06, |
|
"loss": 1.4144, |
|
"step": 23932 |
|
}, |
|
{ |
|
"epoch": 0.7899976922823327, |
|
"grad_norm": 0.8264843225479126, |
|
"learning_rate": 6.044544397429958e-06, |
|
"loss": 1.4082, |
|
"step": 23963 |
|
}, |
|
{ |
|
"epoch": 0.7910196815349619, |
|
"grad_norm": 0.8335662484169006, |
|
"learning_rate": 5.989894845728708e-06, |
|
"loss": 1.392, |
|
"step": 23994 |
|
}, |
|
{ |
|
"epoch": 0.792041670787591, |
|
"grad_norm": 0.8109620213508606, |
|
"learning_rate": 5.9354598204803605e-06, |
|
"loss": 1.4103, |
|
"step": 24025 |
|
}, |
|
{ |
|
"epoch": 0.7930636600402202, |
|
"grad_norm": 0.8195939064025879, |
|
"learning_rate": 5.881239935976762e-06, |
|
"loss": 1.4049, |
|
"step": 24056 |
|
}, |
|
{ |
|
"epoch": 0.7940856492928494, |
|
"grad_norm": 0.8132045865058899, |
|
"learning_rate": 5.827235804081954e-06, |
|
"loss": 1.4137, |
|
"step": 24087 |
|
}, |
|
{ |
|
"epoch": 0.7951076385454785, |
|
"grad_norm": 0.8163465261459351, |
|
"learning_rate": 5.773448034225221e-06, |
|
"loss": 1.3913, |
|
"step": 24118 |
|
}, |
|
{ |
|
"epoch": 0.7961296277981077, |
|
"grad_norm": 0.8209108710289001, |
|
"learning_rate": 5.719877233394228e-06, |
|
"loss": 1.3912, |
|
"step": 24149 |
|
}, |
|
{ |
|
"epoch": 0.7971516170507368, |
|
"grad_norm": 0.8075419664382935, |
|
"learning_rate": 5.666524006128191e-06, |
|
"loss": 1.3968, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 0.798173606303366, |
|
"grad_norm": 0.8444731831550598, |
|
"learning_rate": 5.613388954511015e-06, |
|
"loss": 1.3818, |
|
"step": 24211 |
|
}, |
|
{ |
|
"epoch": 0.7991955955559952, |
|
"grad_norm": 0.8630134463310242, |
|
"learning_rate": 5.560472678164552e-06, |
|
"loss": 1.4201, |
|
"step": 24242 |
|
}, |
|
{ |
|
"epoch": 0.8002175848086243, |
|
"grad_norm": 0.8386521339416504, |
|
"learning_rate": 5.507775774241775e-06, |
|
"loss": 1.3984, |
|
"step": 24273 |
|
}, |
|
{ |
|
"epoch": 0.8012395740612535, |
|
"grad_norm": 0.8426861763000488, |
|
"learning_rate": 5.4552988374200945e-06, |
|
"loss": 1.4069, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 0.8022615633138825, |
|
"grad_norm": 0.7985621690750122, |
|
"learning_rate": 5.403042459894597e-06, |
|
"loss": 1.4115, |
|
"step": 24335 |
|
}, |
|
{ |
|
"epoch": 0.8032835525665117, |
|
"grad_norm": 0.8200878500938416, |
|
"learning_rate": 5.3510072313714135e-06, |
|
"loss": 1.4055, |
|
"step": 24366 |
|
}, |
|
{ |
|
"epoch": 0.8043055418191408, |
|
"grad_norm": 0.816440761089325, |
|
"learning_rate": 5.2991937390610205e-06, |
|
"loss": 1.4062, |
|
"step": 24397 |
|
}, |
|
{ |
|
"epoch": 0.80532753107177, |
|
"grad_norm": 0.8369777798652649, |
|
"learning_rate": 5.247602567671625e-06, |
|
"loss": 1.3964, |
|
"step": 24428 |
|
}, |
|
{ |
|
"epoch": 0.8063495203243992, |
|
"grad_norm": 0.8133443593978882, |
|
"learning_rate": 5.196234299402603e-06, |
|
"loss": 1.4207, |
|
"step": 24459 |
|
}, |
|
{ |
|
"epoch": 0.8073715095770283, |
|
"grad_norm": 0.8752077221870422, |
|
"learning_rate": 5.145089513937865e-06, |
|
"loss": 1.3898, |
|
"step": 24490 |
|
}, |
|
{ |
|
"epoch": 0.8083934988296575, |
|
"grad_norm": 0.8062163591384888, |
|
"learning_rate": 5.094168788439369e-06, |
|
"loss": 1.4005, |
|
"step": 24521 |
|
}, |
|
{ |
|
"epoch": 0.8094154880822866, |
|
"grad_norm": 0.8645463585853577, |
|
"learning_rate": 5.043472697540594e-06, |
|
"loss": 1.3918, |
|
"step": 24552 |
|
}, |
|
{ |
|
"epoch": 0.8104374773349158, |
|
"grad_norm": 0.8500407934188843, |
|
"learning_rate": 4.993001813340012e-06, |
|
"loss": 1.4103, |
|
"step": 24583 |
|
}, |
|
{ |
|
"epoch": 0.8114594665875449, |
|
"grad_norm": 0.8387671113014221, |
|
"learning_rate": 4.942756705394702e-06, |
|
"loss": 1.4072, |
|
"step": 24614 |
|
}, |
|
{ |
|
"epoch": 0.8124814558401741, |
|
"grad_norm": 0.8170669674873352, |
|
"learning_rate": 4.892737940713884e-06, |
|
"loss": 1.3783, |
|
"step": 24645 |
|
}, |
|
{ |
|
"epoch": 0.8135034450928033, |
|
"grad_norm": 0.873763382434845, |
|
"learning_rate": 4.842946083752511e-06, |
|
"loss": 1.3995, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 0.8145254343454323, |
|
"grad_norm": 0.8309612274169922, |
|
"learning_rate": 4.79338169640493e-06, |
|
"loss": 1.4044, |
|
"step": 24707 |
|
}, |
|
{ |
|
"epoch": 0.8155474235980615, |
|
"grad_norm": 0.8278691172599792, |
|
"learning_rate": 4.74404533799851e-06, |
|
"loss": 1.4097, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 0.8165694128506906, |
|
"grad_norm": 0.7878281474113464, |
|
"learning_rate": 4.694937565287344e-06, |
|
"loss": 1.4075, |
|
"step": 24769 |
|
}, |
|
{ |
|
"epoch": 0.8175914021033198, |
|
"grad_norm": 0.8145809769630432, |
|
"learning_rate": 4.646058932445985e-06, |
|
"loss": 1.4032, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.818613391355949, |
|
"grad_norm": 0.8413559794425964, |
|
"learning_rate": 4.597409991063148e-06, |
|
"loss": 1.4097, |
|
"step": 24831 |
|
}, |
|
{ |
|
"epoch": 0.8196353806085781, |
|
"grad_norm": 0.8196817636489868, |
|
"learning_rate": 4.5489912901355375e-06, |
|
"loss": 1.3989, |
|
"step": 24862 |
|
}, |
|
{ |
|
"epoch": 0.8206573698612073, |
|
"grad_norm": 0.8410577774047852, |
|
"learning_rate": 4.500803376061608e-06, |
|
"loss": 1.4059, |
|
"step": 24893 |
|
}, |
|
{ |
|
"epoch": 0.8216793591138364, |
|
"grad_norm": 0.8097809553146362, |
|
"learning_rate": 4.45284679263541e-06, |
|
"loss": 1.3922, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 0.8227013483664656, |
|
"grad_norm": 0.8499380350112915, |
|
"learning_rate": 4.4051220810404775e-06, |
|
"loss": 1.4004, |
|
"step": 24955 |
|
}, |
|
{ |
|
"epoch": 0.8237233376190947, |
|
"grad_norm": 0.8219107985496521, |
|
"learning_rate": 4.3576297798437025e-06, |
|
"loss": 1.3987, |
|
"step": 24986 |
|
}, |
|
{ |
|
"epoch": 0.8247453268717239, |
|
"grad_norm": 0.850868284702301, |
|
"learning_rate": 4.3103704249892436e-06, |
|
"loss": 1.3938, |
|
"step": 25017 |
|
}, |
|
{ |
|
"epoch": 0.8257673161243531, |
|
"grad_norm": 0.8800864815711975, |
|
"learning_rate": 4.263344549792487e-06, |
|
"loss": 1.4216, |
|
"step": 25048 |
|
}, |
|
{ |
|
"epoch": 0.8267893053769821, |
|
"grad_norm": 0.8150456547737122, |
|
"learning_rate": 4.216552684934056e-06, |
|
"loss": 1.4157, |
|
"step": 25079 |
|
}, |
|
{ |
|
"epoch": 0.8278112946296113, |
|
"grad_norm": 0.9749488234519958, |
|
"learning_rate": 4.169995358453777e-06, |
|
"loss": 1.4015, |
|
"step": 25110 |
|
}, |
|
{ |
|
"epoch": 0.8288332838822404, |
|
"grad_norm": 0.8335748910903931, |
|
"learning_rate": 4.123673095744757e-06, |
|
"loss": 1.4005, |
|
"step": 25141 |
|
}, |
|
{ |
|
"epoch": 0.8298552731348696, |
|
"grad_norm": 0.8542735576629639, |
|
"learning_rate": 4.077586419547435e-06, |
|
"loss": 1.4014, |
|
"step": 25172 |
|
}, |
|
{ |
|
"epoch": 0.8308772623874988, |
|
"grad_norm": 0.8510357141494751, |
|
"learning_rate": 4.03173584994368e-06, |
|
"loss": 1.3915, |
|
"step": 25203 |
|
}, |
|
{ |
|
"epoch": 0.8318992516401279, |
|
"grad_norm": 0.884564995765686, |
|
"learning_rate": 3.986121904350948e-06, |
|
"loss": 1.4081, |
|
"step": 25234 |
|
}, |
|
{ |
|
"epoch": 0.8329212408927571, |
|
"grad_norm": 0.8385151624679565, |
|
"learning_rate": 3.940745097516407e-06, |
|
"loss": 1.4065, |
|
"step": 25265 |
|
}, |
|
{ |
|
"epoch": 0.8339432301453862, |
|
"grad_norm": 0.8125067353248596, |
|
"learning_rate": 3.89560594151116e-06, |
|
"loss": 1.4129, |
|
"step": 25296 |
|
}, |
|
{ |
|
"epoch": 0.8349652193980154, |
|
"grad_norm": 0.8358818292617798, |
|
"learning_rate": 3.850704945724456e-06, |
|
"loss": 1.4278, |
|
"step": 25327 |
|
}, |
|
{ |
|
"epoch": 0.8359872086506445, |
|
"grad_norm": 0.8372805714607239, |
|
"learning_rate": 3.8060426168579077e-06, |
|
"loss": 1.3907, |
|
"step": 25358 |
|
}, |
|
{ |
|
"epoch": 0.8370091979032737, |
|
"grad_norm": 0.8480520248413086, |
|
"learning_rate": 3.7616194589198407e-06, |
|
"loss": 1.3941, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 0.8380311871559029, |
|
"grad_norm": 0.8412033319473267, |
|
"learning_rate": 3.7174359732195574e-06, |
|
"loss": 1.404, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 0.8390531764085319, |
|
"grad_norm": 0.8560240268707275, |
|
"learning_rate": 3.673492658361677e-06, |
|
"loss": 1.4272, |
|
"step": 25451 |
|
}, |
|
{ |
|
"epoch": 0.8400751656611611, |
|
"grad_norm": 0.8140300512313843, |
|
"learning_rate": 3.6297900102405467e-06, |
|
"loss": 1.414, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 0.8410971549137902, |
|
"grad_norm": 0.8061606884002686, |
|
"learning_rate": 3.586328522034607e-06, |
|
"loss": 1.414, |
|
"step": 25513 |
|
}, |
|
{ |
|
"epoch": 0.8421191441664194, |
|
"grad_norm": 0.8134278059005737, |
|
"learning_rate": 3.543108684200838e-06, |
|
"loss": 1.4152, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 0.8431411334190486, |
|
"grad_norm": 0.8117665648460388, |
|
"learning_rate": 3.5001309844692464e-06, |
|
"loss": 1.3981, |
|
"step": 25575 |
|
}, |
|
{ |
|
"epoch": 0.8441631226716777, |
|
"grad_norm": 0.834642767906189, |
|
"learning_rate": 3.4573959078373215e-06, |
|
"loss": 1.381, |
|
"step": 25606 |
|
}, |
|
{ |
|
"epoch": 0.8451851119243069, |
|
"grad_norm": 0.8252747058868408, |
|
"learning_rate": 3.4149039365646063e-06, |
|
"loss": 1.4088, |
|
"step": 25637 |
|
}, |
|
{ |
|
"epoch": 0.846207101176936, |
|
"grad_norm": 0.8141157627105713, |
|
"learning_rate": 3.3726555501672143e-06, |
|
"loss": 1.4095, |
|
"step": 25668 |
|
}, |
|
{ |
|
"epoch": 0.8472290904295652, |
|
"grad_norm": 0.8327375054359436, |
|
"learning_rate": 3.33065122541244e-06, |
|
"loss": 1.3955, |
|
"step": 25699 |
|
}, |
|
{ |
|
"epoch": 0.8482510796821943, |
|
"grad_norm": 0.8337382078170776, |
|
"learning_rate": 3.288891436313385e-06, |
|
"loss": 1.4118, |
|
"step": 25730 |
|
}, |
|
{ |
|
"epoch": 0.8492730689348235, |
|
"grad_norm": 0.855179488658905, |
|
"learning_rate": 3.2473766541235963e-06, |
|
"loss": 1.3819, |
|
"step": 25761 |
|
}, |
|
{ |
|
"epoch": 0.8502950581874527, |
|
"grad_norm": 0.8489012718200684, |
|
"learning_rate": 3.2061073473317466e-06, |
|
"loss": 1.3896, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 0.8513170474400817, |
|
"grad_norm": 0.8339117765426636, |
|
"learning_rate": 3.1650839816563444e-06, |
|
"loss": 1.4018, |
|
"step": 25823 |
|
}, |
|
{ |
|
"epoch": 0.8523390366927109, |
|
"grad_norm": 0.8457059264183044, |
|
"learning_rate": 3.1243070200405093e-06, |
|
"loss": 1.4051, |
|
"step": 25854 |
|
}, |
|
{ |
|
"epoch": 0.85336102594534, |
|
"grad_norm": 0.8139636516571045, |
|
"learning_rate": 3.0837769226467e-06, |
|
"loss": 1.4022, |
|
"step": 25885 |
|
}, |
|
{ |
|
"epoch": 0.8543830151979692, |
|
"grad_norm": 0.8181917071342468, |
|
"learning_rate": 3.0434941468515666e-06, |
|
"loss": 1.3857, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 0.8554050044505983, |
|
"grad_norm": 0.827022910118103, |
|
"learning_rate": 3.003459147240753e-06, |
|
"loss": 1.4097, |
|
"step": 25947 |
|
}, |
|
{ |
|
"epoch": 0.8564269937032275, |
|
"grad_norm": 0.8474435210227966, |
|
"learning_rate": 2.9636723756037875e-06, |
|
"loss": 1.3959, |
|
"step": 25978 |
|
}, |
|
{ |
|
"epoch": 0.8574489829558567, |
|
"grad_norm": 0.8524008393287659, |
|
"learning_rate": 2.9241342809289833e-06, |
|
"loss": 1.4108, |
|
"step": 26009 |
|
}, |
|
{ |
|
"epoch": 0.8584709722084858, |
|
"grad_norm": 0.8360846638679504, |
|
"learning_rate": 2.8848453093983594e-06, |
|
"loss": 1.3946, |
|
"step": 26040 |
|
}, |
|
{ |
|
"epoch": 0.859492961461115, |
|
"grad_norm": 0.8294342160224915, |
|
"learning_rate": 2.8458059043826257e-06, |
|
"loss": 1.4134, |
|
"step": 26071 |
|
}, |
|
{ |
|
"epoch": 0.8605149507137441, |
|
"grad_norm": 0.85257887840271, |
|
"learning_rate": 2.807016506436172e-06, |
|
"loss": 1.3838, |
|
"step": 26102 |
|
}, |
|
{ |
|
"epoch": 0.8615369399663733, |
|
"grad_norm": 0.8425765633583069, |
|
"learning_rate": 2.7684775532920566e-06, |
|
"loss": 1.3965, |
|
"step": 26133 |
|
}, |
|
{ |
|
"epoch": 0.8625589292190025, |
|
"grad_norm": 0.8341211080551147, |
|
"learning_rate": 2.7301894798571425e-06, |
|
"loss": 1.4012, |
|
"step": 26164 |
|
}, |
|
{ |
|
"epoch": 0.8635809184716315, |
|
"grad_norm": 0.8640623688697815, |
|
"learning_rate": 2.6921527182071386e-06, |
|
"loss": 1.386, |
|
"step": 26195 |
|
}, |
|
{ |
|
"epoch": 0.8646029077242607, |
|
"grad_norm": 0.8298767805099487, |
|
"learning_rate": 2.654367697581725e-06, |
|
"loss": 1.3828, |
|
"step": 26226 |
|
}, |
|
{ |
|
"epoch": 0.8656248969768898, |
|
"grad_norm": 0.8219887018203735, |
|
"learning_rate": 2.6168348443797175e-06, |
|
"loss": 1.3955, |
|
"step": 26257 |
|
}, |
|
{ |
|
"epoch": 0.866646886229519, |
|
"grad_norm": 0.8416712284088135, |
|
"learning_rate": 2.5795545821542757e-06, |
|
"loss": 1.4063, |
|
"step": 26288 |
|
}, |
|
{ |
|
"epoch": 0.8676688754821481, |
|
"grad_norm": 0.823794960975647, |
|
"learning_rate": 2.54252733160808e-06, |
|
"loss": 1.3832, |
|
"step": 26319 |
|
}, |
|
{ |
|
"epoch": 0.8686908647347773, |
|
"grad_norm": 0.8214231133460999, |
|
"learning_rate": 2.5057535105886294e-06, |
|
"loss": 1.3872, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.8697128539874065, |
|
"grad_norm": 0.8258060812950134, |
|
"learning_rate": 2.4692335340834953e-06, |
|
"loss": 1.3815, |
|
"step": 26381 |
|
}, |
|
{ |
|
"epoch": 0.8707348432400356, |
|
"grad_norm": 0.8566368222236633, |
|
"learning_rate": 2.432967814215639e-06, |
|
"loss": 1.4109, |
|
"step": 26412 |
|
}, |
|
{ |
|
"epoch": 0.8717568324926648, |
|
"grad_norm": 0.8273147344589233, |
|
"learning_rate": 2.396956760238794e-06, |
|
"loss": 1.4059, |
|
"step": 26443 |
|
}, |
|
{ |
|
"epoch": 0.8727788217452939, |
|
"grad_norm": 0.7971722483634949, |
|
"learning_rate": 2.361200778532796e-06, |
|
"loss": 1.3898, |
|
"step": 26474 |
|
}, |
|
{ |
|
"epoch": 0.8738008109979231, |
|
"grad_norm": 0.9347776174545288, |
|
"learning_rate": 2.325700272599049e-06, |
|
"loss": 1.42, |
|
"step": 26505 |
|
}, |
|
{ |
|
"epoch": 0.8748228002505523, |
|
"grad_norm": 0.8639031052589417, |
|
"learning_rate": 2.2904556430559415e-06, |
|
"loss": 1.4068, |
|
"step": 26536 |
|
}, |
|
{ |
|
"epoch": 0.8758447895031813, |
|
"grad_norm": 0.8582850098609924, |
|
"learning_rate": 2.2554672876343106e-06, |
|
"loss": 1.4089, |
|
"step": 26567 |
|
}, |
|
{ |
|
"epoch": 0.8768667787558105, |
|
"grad_norm": 0.8343849182128906, |
|
"learning_rate": 2.220735601173002e-06, |
|
"loss": 1.3902, |
|
"step": 26598 |
|
}, |
|
{ |
|
"epoch": 0.8778887680084396, |
|
"grad_norm": 0.8293885588645935, |
|
"learning_rate": 2.186260975614382e-06, |
|
"loss": 1.4027, |
|
"step": 26629 |
|
}, |
|
{ |
|
"epoch": 0.8789107572610688, |
|
"grad_norm": 0.8358698487281799, |
|
"learning_rate": 2.1520437999999034e-06, |
|
"loss": 1.3941, |
|
"step": 26660 |
|
}, |
|
{ |
|
"epoch": 0.8799327465136979, |
|
"grad_norm": 0.8251244425773621, |
|
"learning_rate": 2.1180844604657526e-06, |
|
"loss": 1.4191, |
|
"step": 26691 |
|
}, |
|
{ |
|
"epoch": 0.8809547357663271, |
|
"grad_norm": 0.8112486600875854, |
|
"learning_rate": 2.084383340238455e-06, |
|
"loss": 1.4123, |
|
"step": 26722 |
|
}, |
|
{ |
|
"epoch": 0.8819767250189563, |
|
"grad_norm": 0.833122193813324, |
|
"learning_rate": 2.0509408196305704e-06, |
|
"loss": 1.3796, |
|
"step": 26753 |
|
}, |
|
{ |
|
"epoch": 0.8829987142715854, |
|
"grad_norm": 0.8029480576515198, |
|
"learning_rate": 2.017757276036403e-06, |
|
"loss": 1.3847, |
|
"step": 26784 |
|
}, |
|
{ |
|
"epoch": 0.8840207035242146, |
|
"grad_norm": 0.8319761753082275, |
|
"learning_rate": 1.984833083927726e-06, |
|
"loss": 1.4015, |
|
"step": 26815 |
|
}, |
|
{ |
|
"epoch": 0.8850426927768437, |
|
"grad_norm": 0.8120732307434082, |
|
"learning_rate": 1.952168614849581e-06, |
|
"loss": 1.4064, |
|
"step": 26846 |
|
}, |
|
{ |
|
"epoch": 0.8860646820294729, |
|
"grad_norm": 0.8120958209037781, |
|
"learning_rate": 1.919764237416058e-06, |
|
"loss": 1.3988, |
|
"step": 26877 |
|
}, |
|
{ |
|
"epoch": 0.887086671282102, |
|
"grad_norm": 0.8618322014808655, |
|
"learning_rate": 1.8876203173061463e-06, |
|
"loss": 1.4116, |
|
"step": 26908 |
|
}, |
|
{ |
|
"epoch": 0.8881086605347311, |
|
"grad_norm": 0.8309136033058167, |
|
"learning_rate": 1.8557372172596206e-06, |
|
"loss": 1.4013, |
|
"step": 26939 |
|
}, |
|
{ |
|
"epoch": 0.8891306497873603, |
|
"grad_norm": 0.8589499592781067, |
|
"learning_rate": 1.8241152970729341e-06, |
|
"loss": 1.3942, |
|
"step": 26970 |
|
}, |
|
{ |
|
"epoch": 0.8901526390399894, |
|
"grad_norm": 0.8159814476966858, |
|
"learning_rate": 1.7927549135951572e-06, |
|
"loss": 1.3885, |
|
"step": 27001 |
|
}, |
|
{ |
|
"epoch": 0.8911746282926186, |
|
"grad_norm": 0.854448676109314, |
|
"learning_rate": 1.7616564207239477e-06, |
|
"loss": 1.3914, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 0.8921966175452477, |
|
"grad_norm": 0.8111308217048645, |
|
"learning_rate": 1.730820169401584e-06, |
|
"loss": 1.3782, |
|
"step": 27063 |
|
}, |
|
{ |
|
"epoch": 0.8932186067978769, |
|
"grad_norm": 0.8496434092521667, |
|
"learning_rate": 1.7002465076109558e-06, |
|
"loss": 1.3927, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 0.8942405960505061, |
|
"grad_norm": 0.8473883271217346, |
|
"learning_rate": 1.6699357803716898e-06, |
|
"loss": 1.3887, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 0.8952625853031352, |
|
"grad_norm": 0.8557064533233643, |
|
"learning_rate": 1.6398883297362305e-06, |
|
"loss": 1.3985, |
|
"step": 27156 |
|
}, |
|
{ |
|
"epoch": 0.8962845745557644, |
|
"grad_norm": 0.8828888535499573, |
|
"learning_rate": 1.6101044947859606e-06, |
|
"loss": 1.3875, |
|
"step": 27187 |
|
}, |
|
{ |
|
"epoch": 0.8973065638083935, |
|
"grad_norm": 0.8234705328941345, |
|
"learning_rate": 1.5805846116274114e-06, |
|
"loss": 1.4123, |
|
"step": 27218 |
|
}, |
|
{ |
|
"epoch": 0.8983285530610227, |
|
"grad_norm": 0.814002513885498, |
|
"learning_rate": 1.5513290133884611e-06, |
|
"loss": 1.4021, |
|
"step": 27249 |
|
}, |
|
{ |
|
"epoch": 0.8993505423136517, |
|
"grad_norm": 0.8468136191368103, |
|
"learning_rate": 1.5223380302145512e-06, |
|
"loss": 1.4031, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 0.9003725315662809, |
|
"grad_norm": 0.823250949382782, |
|
"learning_rate": 1.4936119892649925e-06, |
|
"loss": 1.3922, |
|
"step": 27311 |
|
}, |
|
{ |
|
"epoch": 0.9013945208189101, |
|
"grad_norm": 0.8171223998069763, |
|
"learning_rate": 1.4651512147092482e-06, |
|
"loss": 1.3878, |
|
"step": 27342 |
|
}, |
|
{ |
|
"epoch": 0.9024165100715392, |
|
"grad_norm": 0.823663055896759, |
|
"learning_rate": 1.4369560277232908e-06, |
|
"loss": 1.4193, |
|
"step": 27373 |
|
}, |
|
{ |
|
"epoch": 0.9034384993241684, |
|
"grad_norm": 0.8320732712745667, |
|
"learning_rate": 1.409026746485978e-06, |
|
"loss": 1.3795, |
|
"step": 27404 |
|
}, |
|
{ |
|
"epoch": 0.9044604885767975, |
|
"grad_norm": 0.8476362228393555, |
|
"learning_rate": 1.3813636861754464e-06, |
|
"loss": 1.3872, |
|
"step": 27435 |
|
}, |
|
{ |
|
"epoch": 0.9054824778294267, |
|
"grad_norm": 0.8781965374946594, |
|
"learning_rate": 1.3539671589655773e-06, |
|
"loss": 1.3957, |
|
"step": 27466 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 30517, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3052, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9185168775775257e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|