|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9850107066381155, |
|
"eval_steps": 500, |
|
"global_step": 932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004282655246252677, |
|
"grad_norm": 5.760822510102549, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.9283, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008565310492505354, |
|
"grad_norm": 9.541601401962827, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.9234, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01284796573875803, |
|
"grad_norm": 6.538784535229611, |
|
"learning_rate": 1.875e-06, |
|
"loss": 1.8408, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017130620985010708, |
|
"grad_norm": 7.213295700964454, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.9104, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.021413276231263382, |
|
"grad_norm": 6.314491554982707, |
|
"learning_rate": 3.125e-06, |
|
"loss": 1.9888, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02569593147751606, |
|
"grad_norm": 2.828604192300556, |
|
"learning_rate": 3.75e-06, |
|
"loss": 1.8774, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.029978586723768737, |
|
"grad_norm": 1.9892932329122277, |
|
"learning_rate": 4.375e-06, |
|
"loss": 1.8232, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.034261241970021415, |
|
"grad_norm": 2.301797599943145, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7929, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03854389721627409, |
|
"grad_norm": 2.642325145637469, |
|
"learning_rate": 5.625e-06, |
|
"loss": 1.6999, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.042826552462526764, |
|
"grad_norm": 1.54457145688109, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.047109207708779445, |
|
"grad_norm": 0.6231435902899881, |
|
"learning_rate": 6.875000000000001e-06, |
|
"loss": 1.8327, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05139186295503212, |
|
"grad_norm": 1.9278562906780023, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.8551, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.055674518201284794, |
|
"grad_norm": 2.1201436104190123, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 1.713, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.059957173447537475, |
|
"grad_norm": 1.6833290019534897, |
|
"learning_rate": 8.75e-06, |
|
"loss": 1.7723, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06423982869379015, |
|
"grad_norm": 1.7592896430688332, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 1.7786, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06852248394004283, |
|
"grad_norm": 1.3527651484156915, |
|
"learning_rate": 1e-05, |
|
"loss": 1.8679, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0728051391862955, |
|
"grad_norm": 0.487619361955017, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 1.7012, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07708779443254818, |
|
"grad_norm": 1.429523434604011, |
|
"learning_rate": 1.125e-05, |
|
"loss": 1.6743, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08137044967880086, |
|
"grad_norm": 1.1313491131725162, |
|
"learning_rate": 1.1875e-05, |
|
"loss": 1.7325, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08565310492505353, |
|
"grad_norm": 0.7296310910231132, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.6433, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08993576017130621, |
|
"grad_norm": 1.0873378233145565, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 1.8281, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09421841541755889, |
|
"grad_norm": 0.4193072086415473, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 1.7653, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09850107066381156, |
|
"grad_norm": 0.6318751609453201, |
|
"learning_rate": 1.4374999999999999e-05, |
|
"loss": 1.806, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10278372591006424, |
|
"grad_norm": 0.7998125137748582, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.7682, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10706638115631692, |
|
"grad_norm": 0.4607962108022345, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 1.7188, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11134903640256959, |
|
"grad_norm": 0.6691122325494083, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 1.7694, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11563169164882227, |
|
"grad_norm": 0.5966739618315919, |
|
"learning_rate": 1.6875000000000004e-05, |
|
"loss": 1.709, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11991434689507495, |
|
"grad_norm": 0.5572079216591783, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.7757, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12419700214132762, |
|
"grad_norm": 0.5765542135816161, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 1.6293, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1284796573875803, |
|
"grad_norm": 0.43151210853354827, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.7835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13276231263383298, |
|
"grad_norm": 0.6485552906377018, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 1.5803, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13704496788008566, |
|
"grad_norm": 0.4482132894148142, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6477, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14132762312633834, |
|
"grad_norm": 0.611457842177946, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 1.6426, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.145610278372591, |
|
"grad_norm": 0.38871825671554094, |
|
"learning_rate": 2.125e-05, |
|
"loss": 1.5813, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.14989293361884368, |
|
"grad_norm": 0.5250386322112104, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 1.6655, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15417558886509636, |
|
"grad_norm": 0.41266647411410223, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.7505, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15845824411134904, |
|
"grad_norm": 0.4161967413290072, |
|
"learning_rate": 2.3125000000000003e-05, |
|
"loss": 1.6562, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16274089935760172, |
|
"grad_norm": 0.4023472892837264, |
|
"learning_rate": 2.375e-05, |
|
"loss": 1.5572, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1670235546038544, |
|
"grad_norm": 0.4716743391777267, |
|
"learning_rate": 2.4375e-05, |
|
"loss": 1.654, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17130620985010706, |
|
"grad_norm": 0.41899791933237945, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.6262, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17558886509635974, |
|
"grad_norm": 0.4691927568541724, |
|
"learning_rate": 2.4999922473602244e-05, |
|
"loss": 1.7226, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.17987152034261242, |
|
"grad_norm": 0.3788848931279516, |
|
"learning_rate": 2.499968989537063e-05, |
|
"loss": 1.5837, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1841541755888651, |
|
"grad_norm": 0.4140681800260881, |
|
"learning_rate": 2.4999302268190118e-05, |
|
"loss": 1.6646, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18843683083511778, |
|
"grad_norm": 0.44590521442058745, |
|
"learning_rate": 2.4998759596868908e-05, |
|
"loss": 1.6526, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19271948608137046, |
|
"grad_norm": 0.42731019400707126, |
|
"learning_rate": 2.499806188813843e-05, |
|
"loss": 1.6234, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19700214132762311, |
|
"grad_norm": 0.41939827714864014, |
|
"learning_rate": 2.4997209150653212e-05, |
|
"loss": 1.6093, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2012847965738758, |
|
"grad_norm": 0.4139349892587555, |
|
"learning_rate": 2.4996201394990805e-05, |
|
"loss": 1.6361, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.20556745182012848, |
|
"grad_norm": 0.4278900632702424, |
|
"learning_rate": 2.4995038633651627e-05, |
|
"loss": 1.6978, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.20985010706638116, |
|
"grad_norm": 0.3616303314921443, |
|
"learning_rate": 2.499372088105884e-05, |
|
"loss": 1.4552, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.21413276231263384, |
|
"grad_norm": 0.42854114017469835, |
|
"learning_rate": 2.4992248153558134e-05, |
|
"loss": 1.7517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21841541755888652, |
|
"grad_norm": 0.526378364056868, |
|
"learning_rate": 2.4990620469417554e-05, |
|
"loss": 1.6058, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22269807280513917, |
|
"grad_norm": 0.4059991411502784, |
|
"learning_rate": 2.498883784882726e-05, |
|
"loss": 1.5755, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22698072805139186, |
|
"grad_norm": 0.4066265032195638, |
|
"learning_rate": 2.4986900313899273e-05, |
|
"loss": 1.6502, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23126338329764454, |
|
"grad_norm": 0.4679382599100356, |
|
"learning_rate": 2.498480788866721e-05, |
|
"loss": 1.5904, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23554603854389722, |
|
"grad_norm": 0.4146983639334321, |
|
"learning_rate": 2.4982560599085984e-05, |
|
"loss": 1.7578, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2398286937901499, |
|
"grad_norm": 0.5011668519133488, |
|
"learning_rate": 2.4980158473031472e-05, |
|
"loss": 1.6348, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24411134903640258, |
|
"grad_norm": 0.39760241005080976, |
|
"learning_rate": 2.4977601540300188e-05, |
|
"loss": 1.6521, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24839400428265523, |
|
"grad_norm": 0.3842980574877057, |
|
"learning_rate": 2.49748898326089e-05, |
|
"loss": 1.5468, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25267665952890794, |
|
"grad_norm": 0.5027132906331951, |
|
"learning_rate": 2.497202338359423e-05, |
|
"loss": 1.6786, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2569593147751606, |
|
"grad_norm": 0.42843610006674887, |
|
"learning_rate": 2.4969002228812256e-05, |
|
"loss": 1.5481, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26124197002141325, |
|
"grad_norm": 0.418048089271474, |
|
"learning_rate": 2.4965826405738054e-05, |
|
"loss": 1.5, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26552462526766596, |
|
"grad_norm": 0.38187537005853855, |
|
"learning_rate": 2.4962495953765248e-05, |
|
"loss": 1.5241, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2698072805139186, |
|
"grad_norm": 0.41994694502120095, |
|
"learning_rate": 2.495901091420551e-05, |
|
"loss": 1.5668, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2740899357601713, |
|
"grad_norm": 0.41737827691699964, |
|
"learning_rate": 2.4955371330288045e-05, |
|
"loss": 1.6447, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.278372591006424, |
|
"grad_norm": 0.42240231204308587, |
|
"learning_rate": 2.4951577247159068e-05, |
|
"loss": 1.5265, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2826552462526767, |
|
"grad_norm": 0.4053695799459516, |
|
"learning_rate": 2.494762871188124e-05, |
|
"loss": 1.6668, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28693790149892934, |
|
"grad_norm": 0.3755341743631125, |
|
"learning_rate": 2.4943525773433063e-05, |
|
"loss": 1.4097, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.291220556745182, |
|
"grad_norm": 0.3698075541817392, |
|
"learning_rate": 2.4939268482708318e-05, |
|
"loss": 1.7374, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2955032119914347, |
|
"grad_norm": 0.35883020884289013, |
|
"learning_rate": 2.4934856892515378e-05, |
|
"loss": 1.7297, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.29978586723768735, |
|
"grad_norm": 0.42882530161941707, |
|
"learning_rate": 2.4930291057576603e-05, |
|
"loss": 1.6139, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.30406852248394006, |
|
"grad_norm": 0.43658211064964164, |
|
"learning_rate": 2.4925571034527633e-05, |
|
"loss": 1.6844, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3083511777301927, |
|
"grad_norm": 0.375017122269398, |
|
"learning_rate": 2.492069688191668e-05, |
|
"loss": 1.5154, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31263383297644537, |
|
"grad_norm": 0.4040881085038259, |
|
"learning_rate": 2.4915668660203827e-05, |
|
"loss": 1.6869, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3169164882226981, |
|
"grad_norm": 0.5315682285098243, |
|
"learning_rate": 2.4910486431760266e-05, |
|
"loss": 1.6036, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.32119914346895073, |
|
"grad_norm": 0.6819504727092934, |
|
"learning_rate": 2.490515026086751e-05, |
|
"loss": 1.7321, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32548179871520344, |
|
"grad_norm": 0.4459093235436832, |
|
"learning_rate": 2.489966021371662e-05, |
|
"loss": 1.6316, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3297644539614561, |
|
"grad_norm": 0.41265976791945247, |
|
"learning_rate": 2.4894016358407368e-05, |
|
"loss": 1.6822, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3340471092077088, |
|
"grad_norm": 0.40455952502188075, |
|
"learning_rate": 2.4888218764947397e-05, |
|
"loss": 1.6279, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.33832976445396146, |
|
"grad_norm": 0.39048708108607677, |
|
"learning_rate": 2.488226750525135e-05, |
|
"loss": 1.67, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3426124197002141, |
|
"grad_norm": 0.37437295904257595, |
|
"learning_rate": 2.487616265313999e-05, |
|
"loss": 1.6237, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3468950749464668, |
|
"grad_norm": 0.5090276930456816, |
|
"learning_rate": 2.486990428433926e-05, |
|
"loss": 1.6003, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3511777301927195, |
|
"grad_norm": 0.46215544091371435, |
|
"learning_rate": 2.486349247647938e-05, |
|
"loss": 1.6227, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3554603854389722, |
|
"grad_norm": 0.41822651733354704, |
|
"learning_rate": 2.485692730909383e-05, |
|
"loss": 1.669, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.35974304068522484, |
|
"grad_norm": 0.441655220825228, |
|
"learning_rate": 2.4850208863618425e-05, |
|
"loss": 1.4542, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3640256959314775, |
|
"grad_norm": 0.3786999347152407, |
|
"learning_rate": 2.4843337223390267e-05, |
|
"loss": 1.4966, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3683083511777302, |
|
"grad_norm": 0.363991509035686, |
|
"learning_rate": 2.483631247364671e-05, |
|
"loss": 1.4573, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.37259100642398285, |
|
"grad_norm": 0.36392542721746446, |
|
"learning_rate": 2.482913470152433e-05, |
|
"loss": 1.5823, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37687366167023556, |
|
"grad_norm": 0.3644244797395943, |
|
"learning_rate": 2.482180399605781e-05, |
|
"loss": 1.5918, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3811563169164882, |
|
"grad_norm": 0.3517233506762531, |
|
"learning_rate": 2.481432044817887e-05, |
|
"loss": 1.6118, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3854389721627409, |
|
"grad_norm": 0.44516577454752954, |
|
"learning_rate": 2.4806684150715097e-05, |
|
"loss": 1.5337, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3897216274089936, |
|
"grad_norm": 0.4170570804452654, |
|
"learning_rate": 2.4798895198388845e-05, |
|
"loss": 1.6465, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.39400428265524623, |
|
"grad_norm": 0.36685661291454347, |
|
"learning_rate": 2.4790953687816017e-05, |
|
"loss": 1.6072, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.39828693790149894, |
|
"grad_norm": 0.4465652273066297, |
|
"learning_rate": 2.4782859717504883e-05, |
|
"loss": 1.648, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.4025695931477516, |
|
"grad_norm": 0.551115294286871, |
|
"learning_rate": 2.4774613387854866e-05, |
|
"loss": 1.6789, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4068522483940043, |
|
"grad_norm": 0.4583820418024637, |
|
"learning_rate": 2.4766214801155276e-05, |
|
"loss": 1.5697, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.41113490364025695, |
|
"grad_norm": 0.41125366081563586, |
|
"learning_rate": 2.475766406158407e-05, |
|
"loss": 1.5489, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41541755888650966, |
|
"grad_norm": 0.5258069733050229, |
|
"learning_rate": 2.4748961275206527e-05, |
|
"loss": 1.5782, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.4197002141327623, |
|
"grad_norm": 0.5370435285656707, |
|
"learning_rate": 2.4740106549973953e-05, |
|
"loss": 1.4463, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.42398286937901497, |
|
"grad_norm": 0.3656167120256986, |
|
"learning_rate": 2.4731099995722353e-05, |
|
"loss": 1.503, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4282655246252677, |
|
"grad_norm": 0.37413674489512233, |
|
"learning_rate": 2.4721941724171025e-05, |
|
"loss": 1.5299, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.43254817987152033, |
|
"grad_norm": 0.5442125290315152, |
|
"learning_rate": 2.4712631848921224e-05, |
|
"loss": 1.47, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43683083511777304, |
|
"grad_norm": 0.41142510883515865, |
|
"learning_rate": 2.470317048545473e-05, |
|
"loss": 1.6346, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4411134903640257, |
|
"grad_norm": 0.36843680594934913, |
|
"learning_rate": 2.4693557751132405e-05, |
|
"loss": 1.5707, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44539614561027835, |
|
"grad_norm": 0.40580526926230925, |
|
"learning_rate": 2.4683793765192753e-05, |
|
"loss": 1.611, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.44967880085653106, |
|
"grad_norm": 0.43640536110068956, |
|
"learning_rate": 2.4673878648750446e-05, |
|
"loss": 1.6646, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4539614561027837, |
|
"grad_norm": 0.36401706952184854, |
|
"learning_rate": 2.4663812524794803e-05, |
|
"loss": 1.501, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4582441113490364, |
|
"grad_norm": 0.4597685050788604, |
|
"learning_rate": 2.4653595518188276e-05, |
|
"loss": 1.4702, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4625267665952891, |
|
"grad_norm": 0.4198721125351821, |
|
"learning_rate": 2.4643227755664898e-05, |
|
"loss": 1.5426, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4668094218415418, |
|
"grad_norm": 0.4415892969218905, |
|
"learning_rate": 2.463270936582872e-05, |
|
"loss": 1.5348, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.47109207708779444, |
|
"grad_norm": 0.468616016936323, |
|
"learning_rate": 2.4622040479152195e-05, |
|
"loss": 1.5948, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4753747323340471, |
|
"grad_norm": 0.7486771610195644, |
|
"learning_rate": 2.4611221227974584e-05, |
|
"loss": 1.683, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4796573875802998, |
|
"grad_norm": 0.779148114510847, |
|
"learning_rate": 2.4600251746500296e-05, |
|
"loss": 1.4869, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.48394004282655245, |
|
"grad_norm": 0.4689233006931303, |
|
"learning_rate": 2.4589132170797234e-05, |
|
"loss": 1.561, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.48822269807280516, |
|
"grad_norm": 0.7040850097227628, |
|
"learning_rate": 2.4577862638795098e-05, |
|
"loss": 1.6254, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4925053533190578, |
|
"grad_norm": 0.4769716615847163, |
|
"learning_rate": 2.456644329028369e-05, |
|
"loss": 1.5774, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49678800856531047, |
|
"grad_norm": 0.6632040162872483, |
|
"learning_rate": 2.4554874266911157e-05, |
|
"loss": 1.5463, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5010706638115632, |
|
"grad_norm": 1.1457031665772415, |
|
"learning_rate": 2.4543155712182252e-05, |
|
"loss": 1.642, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5053533190578159, |
|
"grad_norm": 0.5148470344333809, |
|
"learning_rate": 2.4531287771456556e-05, |
|
"loss": 1.5455, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5096359743040685, |
|
"grad_norm": 0.9100598002476826, |
|
"learning_rate": 2.4519270591946653e-05, |
|
"loss": 1.555, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5139186295503212, |
|
"grad_norm": 1.3104358729746841, |
|
"learning_rate": 2.4507104322716326e-05, |
|
"loss": 1.5604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5182012847965739, |
|
"grad_norm": 0.5181336432498789, |
|
"learning_rate": 2.44947891146787e-05, |
|
"loss": 1.5029, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5224839400428265, |
|
"grad_norm": 1.0934800951662504, |
|
"learning_rate": 2.4482325120594374e-05, |
|
"loss": 1.5449, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5267665952890792, |
|
"grad_norm": 0.6352277468903285, |
|
"learning_rate": 2.4469712495069507e-05, |
|
"loss": 1.588, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5310492505353319, |
|
"grad_norm": 1.4867821380058142, |
|
"learning_rate": 2.445695139455394e-05, |
|
"loss": 1.6408, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5353319057815846, |
|
"grad_norm": 0.6556668793792217, |
|
"learning_rate": 2.444404197733921e-05, |
|
"loss": 1.5059, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5396145610278372, |
|
"grad_norm": 1.3366811637363765, |
|
"learning_rate": 2.4430984403556613e-05, |
|
"loss": 1.6334, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5438972162740899, |
|
"grad_norm": 0.8391779284464247, |
|
"learning_rate": 2.441777883517522e-05, |
|
"loss": 1.5342, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5481798715203426, |
|
"grad_norm": 1.5151547233227163, |
|
"learning_rate": 2.4404425435999857e-05, |
|
"loss": 1.4767, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5524625267665952, |
|
"grad_norm": 0.8456634115358744, |
|
"learning_rate": 2.4390924371669065e-05, |
|
"loss": 1.3985, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.556745182012848, |
|
"grad_norm": 0.9899617700169978, |
|
"learning_rate": 2.437727580965307e-05, |
|
"loss": 1.547, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5610278372591007, |
|
"grad_norm": 0.8748009025292892, |
|
"learning_rate": 2.436347991925169e-05, |
|
"loss": 1.5895, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5653104925053534, |
|
"grad_norm": 0.8284458411110256, |
|
"learning_rate": 2.4349536871592227e-05, |
|
"loss": 1.5536, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.569593147751606, |
|
"grad_norm": 0.918716629707354, |
|
"learning_rate": 2.4335446839627375e-05, |
|
"loss": 1.6851, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5738758029978587, |
|
"grad_norm": 1.0628279716423659, |
|
"learning_rate": 2.4321209998133025e-05, |
|
"loss": 1.6705, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5781584582441114, |
|
"grad_norm": 0.73934998026875, |
|
"learning_rate": 2.430682652370616e-05, |
|
"loss": 1.6545, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.582441113490364, |
|
"grad_norm": 0.8203168050853737, |
|
"learning_rate": 2.4292296594762602e-05, |
|
"loss": 1.6182, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5867237687366167, |
|
"grad_norm": 0.6116686513293031, |
|
"learning_rate": 2.4277620391534845e-05, |
|
"loss": 1.6446, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5910064239828694, |
|
"grad_norm": 0.9614170914314591, |
|
"learning_rate": 2.4262798096069788e-05, |
|
"loss": 1.494, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5952890792291221, |
|
"grad_norm": 0.3676545315742134, |
|
"learning_rate": 2.424782989222651e-05, |
|
"loss": 1.595, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5995717344753747, |
|
"grad_norm": 0.7408509458451011, |
|
"learning_rate": 2.4232715965673952e-05, |
|
"loss": 1.6386, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6038543897216274, |
|
"grad_norm": 0.40233518217652775, |
|
"learning_rate": 2.421745650388864e-05, |
|
"loss": 1.5558, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6081370449678801, |
|
"grad_norm": 0.5148389113634867, |
|
"learning_rate": 2.4202051696152353e-05, |
|
"loss": 1.5015, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6124197002141327, |
|
"grad_norm": 0.3807057141875052, |
|
"learning_rate": 2.418650173354977e-05, |
|
"loss": 1.6467, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6167023554603854, |
|
"grad_norm": 0.3896991503471914, |
|
"learning_rate": 2.41708068089661e-05, |
|
"loss": 1.6053, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6209850107066381, |
|
"grad_norm": 0.35553342191514337, |
|
"learning_rate": 2.4154967117084705e-05, |
|
"loss": 1.5364, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6252676659528907, |
|
"grad_norm": 0.36567417738111496, |
|
"learning_rate": 2.4138982854384663e-05, |
|
"loss": 1.6348, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6295503211991434, |
|
"grad_norm": 0.38816867602696453, |
|
"learning_rate": 2.412285421913834e-05, |
|
"loss": 1.4694, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6338329764453962, |
|
"grad_norm": 0.35173888322190433, |
|
"learning_rate": 2.410658141140894e-05, |
|
"loss": 1.646, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6381156316916489, |
|
"grad_norm": 0.36815567692224666, |
|
"learning_rate": 2.4090164633048e-05, |
|
"loss": 1.6168, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6423982869379015, |
|
"grad_norm": 0.3789787801030716, |
|
"learning_rate": 2.4073604087692925e-05, |
|
"loss": 1.5451, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6466809421841542, |
|
"grad_norm": 0.4139676112725167, |
|
"learning_rate": 2.4056899980764407e-05, |
|
"loss": 1.5772, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6509635974304069, |
|
"grad_norm": 0.4317710716550067, |
|
"learning_rate": 2.404005251946394e-05, |
|
"loss": 1.5901, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6552462526766595, |
|
"grad_norm": 0.3793107950355877, |
|
"learning_rate": 2.4023061912771188e-05, |
|
"loss": 1.4831, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6595289079229122, |
|
"grad_norm": 0.36255246115756395, |
|
"learning_rate": 2.4005928371441444e-05, |
|
"loss": 1.5417, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6638115631691649, |
|
"grad_norm": 0.35515016194574406, |
|
"learning_rate": 2.3988652108002984e-05, |
|
"loss": 1.4822, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6680942184154176, |
|
"grad_norm": 0.3462285743933349, |
|
"learning_rate": 2.3971233336754444e-05, |
|
"loss": 1.5157, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6723768736616702, |
|
"grad_norm": 0.3669326112622935, |
|
"learning_rate": 2.395367227376216e-05, |
|
"loss": 1.5652, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6766595289079229, |
|
"grad_norm": 0.3704783452888347, |
|
"learning_rate": 2.393596913685748e-05, |
|
"loss": 1.5836, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6809421841541756, |
|
"grad_norm": 0.3829979392497551, |
|
"learning_rate": 2.391812414563408e-05, |
|
"loss": 1.5023, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6852248394004282, |
|
"grad_norm": 0.3630273112296912, |
|
"learning_rate": 2.390013752144521e-05, |
|
"loss": 1.6907, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6895074946466809, |
|
"grad_norm": 0.3351207679536815, |
|
"learning_rate": 2.3882009487400993e-05, |
|
"loss": 1.4393, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6937901498929336, |
|
"grad_norm": 0.3497511991840534, |
|
"learning_rate": 2.386374026836561e-05, |
|
"loss": 1.598, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6980728051391863, |
|
"grad_norm": 0.3337980565250301, |
|
"learning_rate": 2.3845330090954542e-05, |
|
"loss": 1.4704, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.702355460385439, |
|
"grad_norm": 0.36707456896757124, |
|
"learning_rate": 2.3826779183531744e-05, |
|
"loss": 1.5851, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7066381156316917, |
|
"grad_norm": 0.37164461991634257, |
|
"learning_rate": 2.380808777620682e-05, |
|
"loss": 1.531, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7109207708779444, |
|
"grad_norm": 0.3505467917592193, |
|
"learning_rate": 2.3789256100832173e-05, |
|
"loss": 1.4713, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.715203426124197, |
|
"grad_norm": 0.47758470060633207, |
|
"learning_rate": 2.3770284391000113e-05, |
|
"loss": 1.5102, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7194860813704497, |
|
"grad_norm": 0.33053663778093284, |
|
"learning_rate": 2.375117288203997e-05, |
|
"loss": 1.4791, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7237687366167024, |
|
"grad_norm": 0.3393970208208402, |
|
"learning_rate": 2.3731921811015175e-05, |
|
"loss": 1.6291, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.728051391862955, |
|
"grad_norm": 0.35855145219326184, |
|
"learning_rate": 2.3712531416720317e-05, |
|
"loss": 1.5539, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7323340471092077, |
|
"grad_norm": 0.369911611756327, |
|
"learning_rate": 2.3693001939678183e-05, |
|
"loss": 1.4999, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7366167023554604, |
|
"grad_norm": 0.5505558187826747, |
|
"learning_rate": 2.367333362213678e-05, |
|
"loss": 1.5852, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7408993576017131, |
|
"grad_norm": 0.3528456056150531, |
|
"learning_rate": 2.3653526708066314e-05, |
|
"loss": 1.5358, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7451820128479657, |
|
"grad_norm": 0.3968210406914177, |
|
"learning_rate": 2.3633581443156178e-05, |
|
"loss": 1.5028, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7494646680942184, |
|
"grad_norm": 0.47087903951900106, |
|
"learning_rate": 2.361349807481189e-05, |
|
"loss": 1.6258, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7537473233404711, |
|
"grad_norm": 0.380904082979793, |
|
"learning_rate": 2.3593276852152056e-05, |
|
"loss": 1.5982, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7580299785867237, |
|
"grad_norm": 0.40302033351805244, |
|
"learning_rate": 2.3572918026005235e-05, |
|
"loss": 1.6539, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7623126338329764, |
|
"grad_norm": 0.41272981679464077, |
|
"learning_rate": 2.355242184890686e-05, |
|
"loss": 1.4144, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7665952890792291, |
|
"grad_norm": 0.3606085218359927, |
|
"learning_rate": 2.35317885750961e-05, |
|
"loss": 1.5244, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7708779443254818, |
|
"grad_norm": 0.34295993724517143, |
|
"learning_rate": 2.3511018460512696e-05, |
|
"loss": 1.4102, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7751605995717344, |
|
"grad_norm": 0.4192738060845751, |
|
"learning_rate": 2.349011176279379e-05, |
|
"loss": 1.5336, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7794432548179872, |
|
"grad_norm": 0.3651804066614457, |
|
"learning_rate": 2.3469068741270744e-05, |
|
"loss": 1.5337, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7837259100642399, |
|
"grad_norm": 0.3621028477405051, |
|
"learning_rate": 2.3447889656965896e-05, |
|
"loss": 1.6515, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7880085653104925, |
|
"grad_norm": 0.4192540795103203, |
|
"learning_rate": 2.342657477258935e-05, |
|
"loss": 1.6674, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7922912205567452, |
|
"grad_norm": 0.40348346440086696, |
|
"learning_rate": 2.340512435253569e-05, |
|
"loss": 1.5162, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7965738758029979, |
|
"grad_norm": 0.39794985457766996, |
|
"learning_rate": 2.3383538662880732e-05, |
|
"loss": 1.4518, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8008565310492506, |
|
"grad_norm": 0.40289663289027905, |
|
"learning_rate": 2.3361817971378197e-05, |
|
"loss": 1.6306, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8051391862955032, |
|
"grad_norm": 0.35469529427153196, |
|
"learning_rate": 2.3339962547456397e-05, |
|
"loss": 1.3989, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8094218415417559, |
|
"grad_norm": 0.3727961066406737, |
|
"learning_rate": 2.3317972662214898e-05, |
|
"loss": 1.5999, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8137044967880086, |
|
"grad_norm": 0.4333692956220233, |
|
"learning_rate": 2.329584858842116e-05, |
|
"loss": 1.5081, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8179871520342612, |
|
"grad_norm": 0.3789155310493327, |
|
"learning_rate": 2.3273590600507135e-05, |
|
"loss": 1.4586, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8222698072805139, |
|
"grad_norm": 0.3768886247305229, |
|
"learning_rate": 2.3251198974565887e-05, |
|
"loss": 1.5521, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8265524625267666, |
|
"grad_norm": 0.45287917232766545, |
|
"learning_rate": 2.322867398834815e-05, |
|
"loss": 1.6411, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8308351177730193, |
|
"grad_norm": 0.36693401699800615, |
|
"learning_rate": 2.320601592125889e-05, |
|
"loss": 1.5276, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8351177730192719, |
|
"grad_norm": 0.44127614034536217, |
|
"learning_rate": 2.318322505435384e-05, |
|
"loss": 1.4782, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8394004282655246, |
|
"grad_norm": 0.3608904748036842, |
|
"learning_rate": 2.316030167033601e-05, |
|
"loss": 1.5273, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8436830835117773, |
|
"grad_norm": 0.38846305560083205, |
|
"learning_rate": 2.313724605355218e-05, |
|
"loss": 1.4738, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8479657387580299, |
|
"grad_norm": 0.3509606665662544, |
|
"learning_rate": 2.3114058489989378e-05, |
|
"loss": 1.4431, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8522483940042827, |
|
"grad_norm": 0.3858549090015476, |
|
"learning_rate": 2.3090739267271332e-05, |
|
"loss": 1.515, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8565310492505354, |
|
"grad_norm": 0.4113088516859706, |
|
"learning_rate": 2.306728867465489e-05, |
|
"loss": 1.5244, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.860813704496788, |
|
"grad_norm": 0.44586677646136047, |
|
"learning_rate": 2.3043707003026452e-05, |
|
"loss": 1.4043, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8650963597430407, |
|
"grad_norm": 0.43850720329826914, |
|
"learning_rate": 2.3019994544898345e-05, |
|
"loss": 1.5149, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.8693790149892934, |
|
"grad_norm": 0.4409370304445262, |
|
"learning_rate": 2.2996151594405196e-05, |
|
"loss": 1.5645, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.8736616702355461, |
|
"grad_norm": 0.5520670678955565, |
|
"learning_rate": 2.2972178447300305e-05, |
|
"loss": 1.5525, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8779443254817987, |
|
"grad_norm": 0.44992372464956326, |
|
"learning_rate": 2.2948075400951946e-05, |
|
"loss": 1.5927, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8822269807280514, |
|
"grad_norm": 0.5250810847046828, |
|
"learning_rate": 2.2923842754339696e-05, |
|
"loss": 1.5617, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.8865096359743041, |
|
"grad_norm": 0.43126340615021524, |
|
"learning_rate": 2.2899480808050724e-05, |
|
"loss": 1.6348, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.8907922912205567, |
|
"grad_norm": 0.5913654606733179, |
|
"learning_rate": 2.2874989864276058e-05, |
|
"loss": 1.5646, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8950749464668094, |
|
"grad_norm": 0.5253786434201022, |
|
"learning_rate": 2.2850370226806846e-05, |
|
"loss": 1.5984, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.8993576017130621, |
|
"grad_norm": 0.4009456934819743, |
|
"learning_rate": 2.2825622201030572e-05, |
|
"loss": 1.5283, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9036402569593148, |
|
"grad_norm": 0.5333990945105044, |
|
"learning_rate": 2.280074609392729e-05, |
|
"loss": 1.5867, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9079229122055674, |
|
"grad_norm": 0.3887789131541451, |
|
"learning_rate": 2.2775742214065786e-05, |
|
"loss": 1.3414, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9122055674518201, |
|
"grad_norm": 0.5198803692192113, |
|
"learning_rate": 2.2750610871599782e-05, |
|
"loss": 1.5405, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9164882226980728, |
|
"grad_norm": 0.3926454337534817, |
|
"learning_rate": 2.2725352378264074e-05, |
|
"loss": 1.509, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9207708779443254, |
|
"grad_norm": 0.42675935243666635, |
|
"learning_rate": 2.2699967047370656e-05, |
|
"loss": 1.5438, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9250535331905781, |
|
"grad_norm": 0.3709378032432874, |
|
"learning_rate": 2.2674455193804857e-05, |
|
"loss": 1.6725, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9293361884368309, |
|
"grad_norm": 0.40669761633617474, |
|
"learning_rate": 2.26488171340214e-05, |
|
"loss": 1.485, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9336188436830836, |
|
"grad_norm": 0.41102950360303664, |
|
"learning_rate": 2.2623053186040533e-05, |
|
"loss": 1.6809, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9379014989293362, |
|
"grad_norm": 0.40461859144094875, |
|
"learning_rate": 2.259716366944401e-05, |
|
"loss": 1.4951, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9421841541755889, |
|
"grad_norm": 0.3897126856825778, |
|
"learning_rate": 2.25711489053712e-05, |
|
"loss": 1.5844, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9464668094218416, |
|
"grad_norm": 0.42222904373725634, |
|
"learning_rate": 2.2545009216515038e-05, |
|
"loss": 1.4944, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9507494646680942, |
|
"grad_norm": 0.40547118703731166, |
|
"learning_rate": 2.2518744927118085e-05, |
|
"loss": 1.5574, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9550321199143469, |
|
"grad_norm": 0.3513543405028927, |
|
"learning_rate": 2.2492356362968452e-05, |
|
"loss": 1.4118, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9593147751605996, |
|
"grad_norm": 0.48633500004889796, |
|
"learning_rate": 2.2465843851395796e-05, |
|
"loss": 1.5477, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.9635974304068522, |
|
"grad_norm": 0.3590985254593397, |
|
"learning_rate": 2.2439207721267236e-05, |
|
"loss": 1.4816, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9678800856531049, |
|
"grad_norm": 0.3702932493860504, |
|
"learning_rate": 2.2412448302983286e-05, |
|
"loss": 1.5548, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.9721627408993576, |
|
"grad_norm": 0.40425531625329014, |
|
"learning_rate": 2.2385565928473758e-05, |
|
"loss": 1.6429, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.9764453961456103, |
|
"grad_norm": 0.4058276769467583, |
|
"learning_rate": 2.2358560931193636e-05, |
|
"loss": 1.4335, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9807280513918629, |
|
"grad_norm": 0.3312315245440172, |
|
"learning_rate": 2.2331433646118946e-05, |
|
"loss": 1.3716, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.9850107066381156, |
|
"grad_norm": 0.45936537843711933, |
|
"learning_rate": 2.2304184409742602e-05, |
|
"loss": 1.6051, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9892933618843683, |
|
"grad_norm": 0.41972232909317975, |
|
"learning_rate": 2.227681356007022e-05, |
|
"loss": 1.5685, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.9935760171306209, |
|
"grad_norm": 0.3634109524654273, |
|
"learning_rate": 2.224932143661594e-05, |
|
"loss": 1.5598, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.9978586723768736, |
|
"grad_norm": 0.45907719960230176, |
|
"learning_rate": 2.222170838039822e-05, |
|
"loss": 1.5116, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.45907719960230176, |
|
"learning_rate": 2.2193974733935573e-05, |
|
"loss": 1.6087, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.0042826552462527, |
|
"grad_norm": 0.5861411036899304, |
|
"learning_rate": 2.216612084124236e-05, |
|
"loss": 1.3689, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.0085653104925054, |
|
"grad_norm": 0.4289041901369656, |
|
"learning_rate": 2.213814704782449e-05, |
|
"loss": 1.5579, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0128479657387581, |
|
"grad_norm": 0.6259476055605661, |
|
"learning_rate": 2.2110053700675153e-05, |
|
"loss": 1.4052, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0171306209850106, |
|
"grad_norm": 0.38820446634590455, |
|
"learning_rate": 2.2081841148270517e-05, |
|
"loss": 1.4333, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0214132762312633, |
|
"grad_norm": 0.5061006213518089, |
|
"learning_rate": 2.205350974056538e-05, |
|
"loss": 1.356, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.025695931477516, |
|
"grad_norm": 0.3610425739202918, |
|
"learning_rate": 2.2025059828988873e-05, |
|
"loss": 1.3948, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0299785867237687, |
|
"grad_norm": 0.423679381495652, |
|
"learning_rate": 2.1996491766440047e-05, |
|
"loss": 1.3546, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.0342612419700214, |
|
"grad_norm": 0.34897540177436914, |
|
"learning_rate": 2.196780590728355e-05, |
|
"loss": 1.4721, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.0385438972162742, |
|
"grad_norm": 0.425385319438199, |
|
"learning_rate": 2.193900260734519e-05, |
|
"loss": 1.4658, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.0428265524625269, |
|
"grad_norm": 0.3792487113919495, |
|
"learning_rate": 2.191008222390754e-05, |
|
"loss": 1.4699, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.0471092077087794, |
|
"grad_norm": 0.40281504819932906, |
|
"learning_rate": 2.188104511570551e-05, |
|
"loss": 1.3331, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.051391862955032, |
|
"grad_norm": 0.395699301044668, |
|
"learning_rate": 2.1851891642921875e-05, |
|
"loss": 1.4023, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.0556745182012848, |
|
"grad_norm": 0.37492910340499946, |
|
"learning_rate": 2.1822622167182837e-05, |
|
"loss": 1.4737, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.0599571734475375, |
|
"grad_norm": 0.3952955885524941, |
|
"learning_rate": 2.1793237051553516e-05, |
|
"loss": 1.3771, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.0642398286937902, |
|
"grad_norm": 0.3870229140110392, |
|
"learning_rate": 2.176373666053346e-05, |
|
"loss": 1.4438, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.068522483940043, |
|
"grad_norm": 0.40050568793681735, |
|
"learning_rate": 2.1734121360052117e-05, |
|
"loss": 1.3037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0728051391862956, |
|
"grad_norm": 0.36180001178651866, |
|
"learning_rate": 2.1704391517464297e-05, |
|
"loss": 1.4278, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.077087794432548, |
|
"grad_norm": 0.4411737907590586, |
|
"learning_rate": 2.1674547501545615e-05, |
|
"loss": 1.3945, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.0813704496788008, |
|
"grad_norm": 0.49343649178046994, |
|
"learning_rate": 2.164458968248792e-05, |
|
"loss": 1.3915, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.0856531049250535, |
|
"grad_norm": 0.3296867039273728, |
|
"learning_rate": 2.16145184318947e-05, |
|
"loss": 1.3265, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.0899357601713062, |
|
"grad_norm": 0.39840035584346023, |
|
"learning_rate": 2.158433412277647e-05, |
|
"loss": 1.3751, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.094218415417559, |
|
"grad_norm": 0.3633584286546075, |
|
"learning_rate": 2.1554037129546153e-05, |
|
"loss": 1.354, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.0985010706638116, |
|
"grad_norm": 0.4160505299653988, |
|
"learning_rate": 2.152362782801443e-05, |
|
"loss": 1.4007, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.1027837259100641, |
|
"grad_norm": 0.41007015982955497, |
|
"learning_rate": 2.1493106595385075e-05, |
|
"loss": 1.5213, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.1070663811563168, |
|
"grad_norm": 0.4650280917344183, |
|
"learning_rate": 2.1462473810250283e-05, |
|
"loss": 1.3312, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.1113490364025695, |
|
"grad_norm": 0.4266636624788006, |
|
"learning_rate": 2.1431729852585973e-05, |
|
"loss": 1.4889, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1156316916488223, |
|
"grad_norm": 0.3484736446907606, |
|
"learning_rate": 2.140087510374707e-05, |
|
"loss": 1.3312, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.119914346895075, |
|
"grad_norm": 0.41911843923802033, |
|
"learning_rate": 2.1369909946462785e-05, |
|
"loss": 1.3692, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.1241970021413277, |
|
"grad_norm": 0.3732407300025524, |
|
"learning_rate": 2.1338834764831845e-05, |
|
"loss": 1.3838, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.1284796573875804, |
|
"grad_norm": 0.38178586641917484, |
|
"learning_rate": 2.1307649944317757e-05, |
|
"loss": 1.2793, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.132762312633833, |
|
"grad_norm": 0.3673713909731938, |
|
"learning_rate": 2.1276355871744014e-05, |
|
"loss": 1.4399, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.1370449678800856, |
|
"grad_norm": 0.3901268012108484, |
|
"learning_rate": 2.124495293528928e-05, |
|
"loss": 1.4587, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.1413276231263383, |
|
"grad_norm": 0.3360533239959902, |
|
"learning_rate": 2.121344152448261e-05, |
|
"loss": 1.243, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.145610278372591, |
|
"grad_norm": 0.3771399946534415, |
|
"learning_rate": 2.118182203019859e-05, |
|
"loss": 1.3957, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.1498929336188437, |
|
"grad_norm": 0.4880244995913143, |
|
"learning_rate": 2.1150094844652493e-05, |
|
"loss": 1.3888, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.1541755888650964, |
|
"grad_norm": 0.3578978890422881, |
|
"learning_rate": 2.1118260361395428e-05, |
|
"loss": 1.4619, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1584582441113491, |
|
"grad_norm": 0.4201377835773034, |
|
"learning_rate": 2.108631897530945e-05, |
|
"loss": 1.4785, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.1627408993576016, |
|
"grad_norm": 0.4499980376910688, |
|
"learning_rate": 2.1054271082602646e-05, |
|
"loss": 1.4159, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.1670235546038543, |
|
"grad_norm": 0.3320870014261129, |
|
"learning_rate": 2.102211708080425e-05, |
|
"loss": 1.3894, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.171306209850107, |
|
"grad_norm": 0.42013650446350975, |
|
"learning_rate": 2.0989857368759686e-05, |
|
"loss": 1.3316, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.1755888650963597, |
|
"grad_norm": 0.35386203059819066, |
|
"learning_rate": 2.0957492346625647e-05, |
|
"loss": 1.4005, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.1798715203426124, |
|
"grad_norm": 0.3484835954332615, |
|
"learning_rate": 2.0925022415865093e-05, |
|
"loss": 1.275, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.1841541755888652, |
|
"grad_norm": 0.4266307426695914, |
|
"learning_rate": 2.0892447979242314e-05, |
|
"loss": 1.3413, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.1884368308351179, |
|
"grad_norm": 0.4145417718791916, |
|
"learning_rate": 2.085976944081791e-05, |
|
"loss": 1.4286, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.1927194860813706, |
|
"grad_norm": 0.4464633405061637, |
|
"learning_rate": 2.0826987205943772e-05, |
|
"loss": 1.4146, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.197002141327623, |
|
"grad_norm": 0.3813440974126778, |
|
"learning_rate": 2.0794101681258077e-05, |
|
"loss": 1.4651, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2012847965738758, |
|
"grad_norm": 0.37367647405069787, |
|
"learning_rate": 2.0761113274680227e-05, |
|
"loss": 1.3905, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.2055674518201285, |
|
"grad_norm": 0.4209973043589035, |
|
"learning_rate": 2.0728022395405794e-05, |
|
"loss": 1.3164, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.2098501070663812, |
|
"grad_norm": 0.35285764889842397, |
|
"learning_rate": 2.069482945390145e-05, |
|
"loss": 1.3184, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.214132762312634, |
|
"grad_norm": 0.6553038505857459, |
|
"learning_rate": 2.0661534861899858e-05, |
|
"loss": 1.2821, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.2184154175588866, |
|
"grad_norm": 0.4444549917679711, |
|
"learning_rate": 2.0628139032394582e-05, |
|
"loss": 1.3502, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.222698072805139, |
|
"grad_norm": 0.3352896065598441, |
|
"learning_rate": 2.0594642379634972e-05, |
|
"loss": 1.4577, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.2269807280513918, |
|
"grad_norm": 0.47069617049270435, |
|
"learning_rate": 2.0561045319120986e-05, |
|
"loss": 1.4025, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.2312633832976445, |
|
"grad_norm": 0.3991774380744109, |
|
"learning_rate": 2.0527348267598085e-05, |
|
"loss": 1.3674, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.2355460385438972, |
|
"grad_norm": 0.45298444147723504, |
|
"learning_rate": 2.049355164305203e-05, |
|
"loss": 1.2552, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.23982869379015, |
|
"grad_norm": 0.33638821026760457, |
|
"learning_rate": 2.0459655864703708e-05, |
|
"loss": 1.2414, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2441113490364026, |
|
"grad_norm": 0.4270670356767359, |
|
"learning_rate": 2.0425661353003932e-05, |
|
"loss": 1.261, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.2483940042826553, |
|
"grad_norm": 0.40636537980947196, |
|
"learning_rate": 2.0391568529628237e-05, |
|
"loss": 1.3725, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.252676659528908, |
|
"grad_norm": 0.36195547030323016, |
|
"learning_rate": 2.035737781747162e-05, |
|
"loss": 1.3342, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.2569593147751605, |
|
"grad_norm": 0.3539734470288324, |
|
"learning_rate": 2.0323089640643326e-05, |
|
"loss": 1.2697, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.2612419700214133, |
|
"grad_norm": 0.3540155063008326, |
|
"learning_rate": 2.0288704424461565e-05, |
|
"loss": 1.3329, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.265524625267666, |
|
"grad_norm": 0.4090169739563911, |
|
"learning_rate": 2.0254222595448248e-05, |
|
"loss": 1.4402, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.2698072805139187, |
|
"grad_norm": 0.4193574818141074, |
|
"learning_rate": 2.0219644581323698e-05, |
|
"loss": 1.3086, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.2740899357601714, |
|
"grad_norm": 0.38365729947629434, |
|
"learning_rate": 2.0184970811001337e-05, |
|
"loss": 1.4018, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.2783725910064239, |
|
"grad_norm": 0.4219737883083424, |
|
"learning_rate": 2.0150201714582356e-05, |
|
"loss": 1.3844, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.2826552462526766, |
|
"grad_norm": 0.43507834104776355, |
|
"learning_rate": 2.011533772335041e-05, |
|
"loss": 1.3706, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2869379014989293, |
|
"grad_norm": 0.4133280809903553, |
|
"learning_rate": 2.008037926976625e-05, |
|
"loss": 1.376, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.291220556745182, |
|
"grad_norm": 0.36852825890998525, |
|
"learning_rate": 2.0045326787462333e-05, |
|
"loss": 1.328, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.2955032119914347, |
|
"grad_norm": 0.4205230066377953, |
|
"learning_rate": 2.001018071123751e-05, |
|
"loss": 1.2974, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.2997858672376874, |
|
"grad_norm": 0.4329679857419846, |
|
"learning_rate": 1.9974941477051558e-05, |
|
"loss": 1.3526, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.3040685224839401, |
|
"grad_norm": 0.3705004730863205, |
|
"learning_rate": 1.9939609522019818e-05, |
|
"loss": 1.2298, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.3083511777301928, |
|
"grad_norm": 0.39436925521218896, |
|
"learning_rate": 1.9904185284407772e-05, |
|
"loss": 1.3945, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.3126338329764453, |
|
"grad_norm": 0.35298924796738734, |
|
"learning_rate": 1.986866920362558e-05, |
|
"loss": 1.3016, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.316916488222698, |
|
"grad_norm": 0.3894071215590034, |
|
"learning_rate": 1.9833061720222647e-05, |
|
"loss": 1.2325, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.3211991434689507, |
|
"grad_norm": 0.3213378234068627, |
|
"learning_rate": 1.9797363275882165e-05, |
|
"loss": 1.2817, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.3254817987152034, |
|
"grad_norm": 0.4084287292776311, |
|
"learning_rate": 1.9761574313415617e-05, |
|
"loss": 1.4881, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3297644539614561, |
|
"grad_norm": 0.40532300063738275, |
|
"learning_rate": 1.9725695276757302e-05, |
|
"loss": 1.4029, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.3340471092077089, |
|
"grad_norm": 0.3507190637097869, |
|
"learning_rate": 1.9689726610958814e-05, |
|
"loss": 1.4194, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.3383297644539613, |
|
"grad_norm": 0.3805072033067047, |
|
"learning_rate": 1.9653668762183526e-05, |
|
"loss": 1.3264, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.342612419700214, |
|
"grad_norm": 0.3367128120964735, |
|
"learning_rate": 1.9617522177701058e-05, |
|
"loss": 1.3298, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.3468950749464668, |
|
"grad_norm": 0.3977736636900147, |
|
"learning_rate": 1.9581287305881733e-05, |
|
"loss": 1.3487, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.3511777301927195, |
|
"grad_norm": 0.3236399137428874, |
|
"learning_rate": 1.9544964596190996e-05, |
|
"loss": 1.2795, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.3554603854389722, |
|
"grad_norm": 0.4410261852426088, |
|
"learning_rate": 1.9508554499183867e-05, |
|
"loss": 1.2954, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.359743040685225, |
|
"grad_norm": 0.33824185574060495, |
|
"learning_rate": 1.9472057466499332e-05, |
|
"loss": 1.2966, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.3640256959314776, |
|
"grad_norm": 0.5560403035800862, |
|
"learning_rate": 1.9435473950854745e-05, |
|
"loss": 1.4434, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.3683083511777303, |
|
"grad_norm": 0.36625625108883125, |
|
"learning_rate": 1.939880440604021e-05, |
|
"loss": 1.2226, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.3725910064239828, |
|
"grad_norm": 0.35699181136533303, |
|
"learning_rate": 1.9362049286912976e-05, |
|
"loss": 1.2464, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.3768736616702355, |
|
"grad_norm": 0.3813490989402076, |
|
"learning_rate": 1.9325209049391745e-05, |
|
"loss": 1.3279, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.3811563169164882, |
|
"grad_norm": 0.37459529309165335, |
|
"learning_rate": 1.9288284150451075e-05, |
|
"loss": 1.4422, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.385438972162741, |
|
"grad_norm": 0.39667372726355776, |
|
"learning_rate": 1.9251275048115664e-05, |
|
"loss": 1.5061, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.3897216274089936, |
|
"grad_norm": 0.34082355171490486, |
|
"learning_rate": 1.9214182201454695e-05, |
|
"loss": 1.3049, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.3940042826552461, |
|
"grad_norm": 0.4260735758035037, |
|
"learning_rate": 1.917700607057613e-05, |
|
"loss": 1.3912, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.3982869379014988, |
|
"grad_norm": 0.4021033157629882, |
|
"learning_rate": 1.9139747116621015e-05, |
|
"loss": 1.4421, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.4025695931477515, |
|
"grad_norm": 0.4034799522400383, |
|
"learning_rate": 1.910240580175775e-05, |
|
"loss": 1.3598, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.4068522483940042, |
|
"grad_norm": 0.44358114185104625, |
|
"learning_rate": 1.906498258917635e-05, |
|
"loss": 1.4136, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.411134903640257, |
|
"grad_norm": 0.3945332504871927, |
|
"learning_rate": 1.9027477943082713e-05, |
|
"loss": 1.2517, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4154175588865097, |
|
"grad_norm": 0.3778742839914516, |
|
"learning_rate": 1.8989892328692864e-05, |
|
"loss": 1.333, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.4197002141327624, |
|
"grad_norm": 0.3796237837136356, |
|
"learning_rate": 1.895222621222716e-05, |
|
"loss": 1.3931, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.423982869379015, |
|
"grad_norm": 0.38301575785071823, |
|
"learning_rate": 1.8914480060904537e-05, |
|
"loss": 1.424, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.4282655246252678, |
|
"grad_norm": 0.421930928101693, |
|
"learning_rate": 1.88766543429367e-05, |
|
"loss": 1.402, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.4325481798715203, |
|
"grad_norm": 0.3699757863435036, |
|
"learning_rate": 1.8838749527522315e-05, |
|
"loss": 1.4079, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.436830835117773, |
|
"grad_norm": 0.42666319657235885, |
|
"learning_rate": 1.8800766084841183e-05, |
|
"loss": 1.3614, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.4411134903640257, |
|
"grad_norm": 0.35291694731273704, |
|
"learning_rate": 1.8762704486048427e-05, |
|
"loss": 1.3407, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.4453961456102784, |
|
"grad_norm": 0.37044240049931565, |
|
"learning_rate": 1.872456520326863e-05, |
|
"loss": 1.3531, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.4496788008565311, |
|
"grad_norm": 0.374037870809853, |
|
"learning_rate": 1.8686348709589982e-05, |
|
"loss": 1.4962, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.4539614561027836, |
|
"grad_norm": 0.39143283644429916, |
|
"learning_rate": 1.8648055479058422e-05, |
|
"loss": 1.3451, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4582441113490363, |
|
"grad_norm": 0.3862274046133055, |
|
"learning_rate": 1.8609685986671744e-05, |
|
"loss": 1.4157, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.462526766595289, |
|
"grad_norm": 0.32589359289541453, |
|
"learning_rate": 1.8571240708373707e-05, |
|
"loss": 1.3611, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.4668094218415417, |
|
"grad_norm": 0.38467743700470014, |
|
"learning_rate": 1.853272012104815e-05, |
|
"loss": 1.4441, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.4710920770877944, |
|
"grad_norm": 0.3740956575298423, |
|
"learning_rate": 1.849412470251305e-05, |
|
"loss": 1.4004, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.4753747323340471, |
|
"grad_norm": 0.30848423646912154, |
|
"learning_rate": 1.8455454931514605e-05, |
|
"loss": 1.262, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.4796573875802999, |
|
"grad_norm": 0.3740097120746422, |
|
"learning_rate": 1.8416711287721303e-05, |
|
"loss": 1.2179, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.4839400428265526, |
|
"grad_norm": 0.4082863839360843, |
|
"learning_rate": 1.8377894251717974e-05, |
|
"loss": 1.4259, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.4882226980728053, |
|
"grad_norm": 0.3948652596870541, |
|
"learning_rate": 1.8339004304999806e-05, |
|
"loss": 1.3442, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.4925053533190578, |
|
"grad_norm": 0.4678512487151559, |
|
"learning_rate": 1.8300041929966404e-05, |
|
"loss": 1.4306, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.4967880085653105, |
|
"grad_norm": 0.45548221851750526, |
|
"learning_rate": 1.8261007609915773e-05, |
|
"loss": 1.3257, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5010706638115632, |
|
"grad_norm": 0.3961504677246392, |
|
"learning_rate": 1.8221901829038347e-05, |
|
"loss": 1.4226, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.5053533190578159, |
|
"grad_norm": 0.48575304661026586, |
|
"learning_rate": 1.818272507241099e-05, |
|
"loss": 1.3101, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.5096359743040684, |
|
"grad_norm": 0.4223474689775986, |
|
"learning_rate": 1.8143477825990938e-05, |
|
"loss": 1.3738, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.513918629550321, |
|
"grad_norm": 0.4328835573924883, |
|
"learning_rate": 1.8104160576609828e-05, |
|
"loss": 1.4613, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.5182012847965738, |
|
"grad_norm": 0.36894215625076815, |
|
"learning_rate": 1.80647738119676e-05, |
|
"loss": 1.4421, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.5224839400428265, |
|
"grad_norm": 0.42960329602264624, |
|
"learning_rate": 1.8025318020626497e-05, |
|
"loss": 1.4449, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.5267665952890792, |
|
"grad_norm": 0.4381808830561339, |
|
"learning_rate": 1.7985793692004983e-05, |
|
"loss": 1.3895, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.531049250535332, |
|
"grad_norm": 0.511639740310659, |
|
"learning_rate": 1.7946201316371665e-05, |
|
"loss": 1.5033, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.5353319057815846, |
|
"grad_norm": 0.30935207991898406, |
|
"learning_rate": 1.7906541384839226e-05, |
|
"loss": 1.2179, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.5396145610278373, |
|
"grad_norm": 0.5149363491855712, |
|
"learning_rate": 1.7866814389358323e-05, |
|
"loss": 1.3692, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.54389721627409, |
|
"grad_norm": 0.3768568355085642, |
|
"learning_rate": 1.7827020822711493e-05, |
|
"loss": 1.4404, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.5481798715203428, |
|
"grad_norm": 0.5075668454602467, |
|
"learning_rate": 1.7787161178507045e-05, |
|
"loss": 1.4351, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.5524625267665952, |
|
"grad_norm": 0.429005671047687, |
|
"learning_rate": 1.7747235951172908e-05, |
|
"loss": 1.2954, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.556745182012848, |
|
"grad_norm": 0.4773307561454311, |
|
"learning_rate": 1.7707245635950536e-05, |
|
"loss": 1.3229, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.5610278372591007, |
|
"grad_norm": 0.46224461269568345, |
|
"learning_rate": 1.7667190728888743e-05, |
|
"loss": 1.4701, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.5653104925053534, |
|
"grad_norm": 0.4398714446841838, |
|
"learning_rate": 1.7627071726837556e-05, |
|
"loss": 1.3617, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.5695931477516059, |
|
"grad_norm": 0.3774107684610511, |
|
"learning_rate": 1.7586889127442045e-05, |
|
"loss": 1.3137, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.5738758029978586, |
|
"grad_norm": 0.4646696934362882, |
|
"learning_rate": 1.754664342913616e-05, |
|
"loss": 1.3487, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.5781584582441113, |
|
"grad_norm": 0.3570064846109861, |
|
"learning_rate": 1.7506335131136548e-05, |
|
"loss": 1.3087, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.582441113490364, |
|
"grad_norm": 0.4493705452348863, |
|
"learning_rate": 1.7465964733436342e-05, |
|
"loss": 1.5064, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5867237687366167, |
|
"grad_norm": 0.35347935083263654, |
|
"learning_rate": 1.7425532736798994e-05, |
|
"loss": 1.354, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.5910064239828694, |
|
"grad_norm": 0.38802945271200445, |
|
"learning_rate": 1.7385039642752026e-05, |
|
"loss": 1.3905, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.595289079229122, |
|
"grad_norm": 0.3971847941983123, |
|
"learning_rate": 1.7344485953580834e-05, |
|
"loss": 1.3172, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.5995717344753748, |
|
"grad_norm": 0.4063900151850949, |
|
"learning_rate": 1.730387217232245e-05, |
|
"loss": 1.3902, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.6038543897216275, |
|
"grad_norm": 0.3482101582890047, |
|
"learning_rate": 1.72631988027593e-05, |
|
"loss": 1.4267, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.6081370449678802, |
|
"grad_norm": 0.3907023409634497, |
|
"learning_rate": 1.7222466349412953e-05, |
|
"loss": 1.3657, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.6124197002141327, |
|
"grad_norm": 0.39648365466974855, |
|
"learning_rate": 1.718167531753787e-05, |
|
"loss": 1.3757, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.6167023554603854, |
|
"grad_norm": 0.3482003705389042, |
|
"learning_rate": 1.7140826213115134e-05, |
|
"loss": 1.3889, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.6209850107066381, |
|
"grad_norm": 0.43357670792552266, |
|
"learning_rate": 1.7099919542846174e-05, |
|
"loss": 1.3975, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.6252676659528906, |
|
"grad_norm": 0.344012746609685, |
|
"learning_rate": 1.705895581414647e-05, |
|
"loss": 1.3761, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6295503211991433, |
|
"grad_norm": 0.3912736883863624, |
|
"learning_rate": 1.7017935535139286e-05, |
|
"loss": 1.2256, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.633832976445396, |
|
"grad_norm": 0.32389309159432333, |
|
"learning_rate": 1.697685921464932e-05, |
|
"loss": 1.2611, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.6381156316916488, |
|
"grad_norm": 0.3808112089261434, |
|
"learning_rate": 1.6935727362196453e-05, |
|
"loss": 1.3773, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.6423982869379015, |
|
"grad_norm": 0.3815707909378436, |
|
"learning_rate": 1.6894540487989374e-05, |
|
"loss": 1.4341, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.6466809421841542, |
|
"grad_norm": 0.3707311578105496, |
|
"learning_rate": 1.6853299102919278e-05, |
|
"loss": 1.3912, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.6509635974304069, |
|
"grad_norm": 0.3477881955581895, |
|
"learning_rate": 1.681200371855354e-05, |
|
"loss": 1.4454, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.6552462526766596, |
|
"grad_norm": 0.3749155440303463, |
|
"learning_rate": 1.6770654847129336e-05, |
|
"loss": 1.3565, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.6595289079229123, |
|
"grad_norm": 0.37356126951976065, |
|
"learning_rate": 1.6729253001547313e-05, |
|
"loss": 1.2841, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.663811563169165, |
|
"grad_norm": 0.3479511050011833, |
|
"learning_rate": 1.6687798695365224e-05, |
|
"loss": 1.3371, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.6680942184154177, |
|
"grad_norm": 0.3581912213414331, |
|
"learning_rate": 1.6646292442791557e-05, |
|
"loss": 1.232, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6723768736616702, |
|
"grad_norm": 0.3215446113048358, |
|
"learning_rate": 1.6604734758679147e-05, |
|
"loss": 1.3963, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.676659528907923, |
|
"grad_norm": 0.4376359515021747, |
|
"learning_rate": 1.6563126158518806e-05, |
|
"loss": 1.3747, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.6809421841541756, |
|
"grad_norm": 0.3060677115981459, |
|
"learning_rate": 1.6521467158432916e-05, |
|
"loss": 1.3455, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.685224839400428, |
|
"grad_norm": 0.39842372210368826, |
|
"learning_rate": 1.647975827516902e-05, |
|
"loss": 1.3162, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.6895074946466808, |
|
"grad_norm": 0.32860459996161495, |
|
"learning_rate": 1.6438000026093447e-05, |
|
"loss": 1.4114, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.6937901498929335, |
|
"grad_norm": 0.42177195772773357, |
|
"learning_rate": 1.6396192929184852e-05, |
|
"loss": 1.3835, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.6980728051391862, |
|
"grad_norm": 0.37483985613490883, |
|
"learning_rate": 1.6354337503027817e-05, |
|
"loss": 1.4495, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.702355460385439, |
|
"grad_norm": 0.3287442844969753, |
|
"learning_rate": 1.6312434266806406e-05, |
|
"loss": 1.3417, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.7066381156316917, |
|
"grad_norm": 0.3409487933679222, |
|
"learning_rate": 1.627048374029773e-05, |
|
"loss": 1.3727, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.7109207708779444, |
|
"grad_norm": 0.364966633180017, |
|
"learning_rate": 1.622848644386551e-05, |
|
"loss": 1.3445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.715203426124197, |
|
"grad_norm": 0.40782880089567125, |
|
"learning_rate": 1.6186442898453593e-05, |
|
"loss": 1.4314, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.7194860813704498, |
|
"grad_norm": 0.35338981155106325, |
|
"learning_rate": 1.614435362557953e-05, |
|
"loss": 1.2992, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.7237687366167025, |
|
"grad_norm": 0.3458710703190408, |
|
"learning_rate": 1.6102219147328064e-05, |
|
"loss": 1.2444, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.728051391862955, |
|
"grad_norm": 0.34047208337511875, |
|
"learning_rate": 1.6060039986344692e-05, |
|
"loss": 1.3841, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.7323340471092077, |
|
"grad_norm": 0.34973667960604016, |
|
"learning_rate": 1.601781666582916e-05, |
|
"loss": 1.3197, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.7366167023554604, |
|
"grad_norm": 0.3619484642212399, |
|
"learning_rate": 1.5975549709528977e-05, |
|
"loss": 1.3597, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.740899357601713, |
|
"grad_norm": 0.3485323431598921, |
|
"learning_rate": 1.593323964173292e-05, |
|
"loss": 1.3541, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.7451820128479656, |
|
"grad_norm": 0.3722079995799495, |
|
"learning_rate": 1.5890886987264536e-05, |
|
"loss": 1.3639, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.7494646680942183, |
|
"grad_norm": 0.32734387518519825, |
|
"learning_rate": 1.5848492271475622e-05, |
|
"loss": 1.4136, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.753747323340471, |
|
"grad_norm": 0.3864261811647076, |
|
"learning_rate": 1.5806056020239714e-05, |
|
"loss": 1.4231, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.7580299785867237, |
|
"grad_norm": 0.341163146089911, |
|
"learning_rate": 1.576357875994556e-05, |
|
"loss": 1.3912, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.7623126338329764, |
|
"grad_norm": 0.4322424139588224, |
|
"learning_rate": 1.5721061017490594e-05, |
|
"loss": 1.3543, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.7665952890792291, |
|
"grad_norm": 0.3430090140811513, |
|
"learning_rate": 1.5678503320274407e-05, |
|
"loss": 1.4195, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.7708779443254818, |
|
"grad_norm": 0.39442054888019096, |
|
"learning_rate": 1.5635906196192194e-05, |
|
"loss": 1.3609, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.7751605995717346, |
|
"grad_norm": 0.39246818337147305, |
|
"learning_rate": 1.5593270173628208e-05, |
|
"loss": 1.3496, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.7794432548179873, |
|
"grad_norm": 0.3896357465642991, |
|
"learning_rate": 1.5550595781449205e-05, |
|
"loss": 1.2962, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.78372591006424, |
|
"grad_norm": 0.40875227853762397, |
|
"learning_rate": 1.550788354899789e-05, |
|
"loss": 1.2827, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.7880085653104925, |
|
"grad_norm": 0.32384312840403434, |
|
"learning_rate": 1.5465134006086347e-05, |
|
"loss": 1.4018, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.7922912205567452, |
|
"grad_norm": 0.4319845932792659, |
|
"learning_rate": 1.5422347682989467e-05, |
|
"loss": 1.2837, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.7965738758029979, |
|
"grad_norm": 0.4015204521770257, |
|
"learning_rate": 1.5379525110438374e-05, |
|
"loss": 1.445, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.8008565310492506, |
|
"grad_norm": 0.3636542581207264, |
|
"learning_rate": 1.5336666819613832e-05, |
|
"loss": 1.3278, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.805139186295503, |
|
"grad_norm": 0.42635584079656125, |
|
"learning_rate": 1.5293773342139662e-05, |
|
"loss": 1.3899, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.8094218415417558, |
|
"grad_norm": 0.3796172113574308, |
|
"learning_rate": 1.5250845210076151e-05, |
|
"loss": 1.2944, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.8137044967880085, |
|
"grad_norm": 0.4222877528683101, |
|
"learning_rate": 1.5207882955913457e-05, |
|
"loss": 1.4121, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.8179871520342612, |
|
"grad_norm": 0.6206094866942423, |
|
"learning_rate": 1.5164887112564985e-05, |
|
"loss": 1.3037, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.822269807280514, |
|
"grad_norm": 0.338186939979986, |
|
"learning_rate": 1.5121858213360793e-05, |
|
"loss": 1.4515, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.8265524625267666, |
|
"grad_norm": 0.42085883637300137, |
|
"learning_rate": 1.507879679204096e-05, |
|
"loss": 1.3801, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.8308351177730193, |
|
"grad_norm": 0.33029638552346774, |
|
"learning_rate": 1.5035703382749e-05, |
|
"loss": 1.3197, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.835117773019272, |
|
"grad_norm": 0.3796212349112593, |
|
"learning_rate": 1.4992578520025194e-05, |
|
"loss": 1.3341, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.8394004282655247, |
|
"grad_norm": 0.3416764792743133, |
|
"learning_rate": 1.4949422738799982e-05, |
|
"loss": 1.2933, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.8436830835117775, |
|
"grad_norm": 0.37923918821239594, |
|
"learning_rate": 1.4906236574387326e-05, |
|
"loss": 1.3359, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.84796573875803, |
|
"grad_norm": 0.30907027792758374, |
|
"learning_rate": 1.4863020562478064e-05, |
|
"loss": 1.2737, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.8522483940042827, |
|
"grad_norm": 0.3903264898543205, |
|
"learning_rate": 1.4819775239133283e-05, |
|
"loss": 1.3131, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.8565310492505354, |
|
"grad_norm": 0.3841336756186868, |
|
"learning_rate": 1.4776501140777637e-05, |
|
"loss": 1.3649, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.8608137044967878, |
|
"grad_norm": 0.4074493999576374, |
|
"learning_rate": 1.4733198804192724e-05, |
|
"loss": 1.2991, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.8650963597430406, |
|
"grad_norm": 0.3855125688098399, |
|
"learning_rate": 1.4689868766510406e-05, |
|
"loss": 1.3823, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.8693790149892933, |
|
"grad_norm": 0.37126874922918807, |
|
"learning_rate": 1.4646511565206164e-05, |
|
"loss": 1.3426, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.873661670235546, |
|
"grad_norm": 0.3714258164077467, |
|
"learning_rate": 1.4603127738092423e-05, |
|
"loss": 1.2718, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.8779443254817987, |
|
"grad_norm": 0.3429261958678687, |
|
"learning_rate": 1.455971782331187e-05, |
|
"loss": 1.3858, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.8822269807280514, |
|
"grad_norm": 0.38495602247470384, |
|
"learning_rate": 1.4516282359330801e-05, |
|
"loss": 1.2777, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.886509635974304, |
|
"grad_norm": 0.3699329784967151, |
|
"learning_rate": 1.4472821884932426e-05, |
|
"loss": 1.3578, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.8907922912205568, |
|
"grad_norm": 0.3599785136664482, |
|
"learning_rate": 1.442933693921018e-05, |
|
"loss": 1.416, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.8950749464668095, |
|
"grad_norm": 0.33538664994930595, |
|
"learning_rate": 1.4385828061561066e-05, |
|
"loss": 1.3407, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.8993576017130622, |
|
"grad_norm": 0.36336031298257154, |
|
"learning_rate": 1.434229579167893e-05, |
|
"loss": 1.2169, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.903640256959315, |
|
"grad_norm": 0.31518334287029476, |
|
"learning_rate": 1.429874066954778e-05, |
|
"loss": 1.3974, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.9079229122055674, |
|
"grad_norm": 0.380470589989531, |
|
"learning_rate": 1.425516323543509e-05, |
|
"loss": 1.3915, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.9122055674518201, |
|
"grad_norm": 0.3510136894640434, |
|
"learning_rate": 1.4211564029885102e-05, |
|
"loss": 1.3113, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.9164882226980728, |
|
"grad_norm": 0.34050831451001196, |
|
"learning_rate": 1.4167943593712113e-05, |
|
"loss": 1.3751, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.9207708779443253, |
|
"grad_norm": 0.3583661125603097, |
|
"learning_rate": 1.4124302467993769e-05, |
|
"loss": 1.3255, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.925053533190578, |
|
"grad_norm": 0.3389101579476846, |
|
"learning_rate": 1.4080641194064348e-05, |
|
"loss": 1.4168, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9293361884368307, |
|
"grad_norm": 0.3834913291170707, |
|
"learning_rate": 1.403696031350806e-05, |
|
"loss": 1.3644, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.9336188436830835, |
|
"grad_norm": 0.4308322141053784, |
|
"learning_rate": 1.3993260368152317e-05, |
|
"loss": 1.4786, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.9379014989293362, |
|
"grad_norm": 0.3537841876121041, |
|
"learning_rate": 1.3949541900061014e-05, |
|
"loss": 1.2849, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.9421841541755889, |
|
"grad_norm": 0.3739024334028022, |
|
"learning_rate": 1.3905805451527806e-05, |
|
"loss": 1.2974, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.9464668094218416, |
|
"grad_norm": 0.3756096151923131, |
|
"learning_rate": 1.386205156506938e-05, |
|
"loss": 1.2532, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.9507494646680943, |
|
"grad_norm": 0.3642163049913141, |
|
"learning_rate": 1.381828078341873e-05, |
|
"loss": 1.3066, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.955032119914347, |
|
"grad_norm": 0.4016856878315503, |
|
"learning_rate": 1.3774493649518424e-05, |
|
"loss": 1.3514, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.9593147751605997, |
|
"grad_norm": 0.3570908964430489, |
|
"learning_rate": 1.373069070651386e-05, |
|
"loss": 1.3798, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.9635974304068522, |
|
"grad_norm": 0.4546768723455663, |
|
"learning_rate": 1.3686872497746539e-05, |
|
"loss": 1.2297, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.967880085653105, |
|
"grad_norm": 0.39770363928777963, |
|
"learning_rate": 1.364303956674732e-05, |
|
"loss": 1.3251, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.9721627408993576, |
|
"grad_norm": 0.4625841972208585, |
|
"learning_rate": 1.359919245722969e-05, |
|
"loss": 1.4199, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.9764453961456103, |
|
"grad_norm": 0.4133274366928544, |
|
"learning_rate": 1.3555331713082991e-05, |
|
"loss": 1.3047, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.9807280513918628, |
|
"grad_norm": 0.4108939632332837, |
|
"learning_rate": 1.351145787836571e-05, |
|
"loss": 1.3929, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.9850107066381155, |
|
"grad_norm": 0.37835291483581496, |
|
"learning_rate": 1.3467571497298703e-05, |
|
"loss": 1.1941, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.9892933618843682, |
|
"grad_norm": 0.37813972695047565, |
|
"learning_rate": 1.342367311425845e-05, |
|
"loss": 1.4973, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.993576017130621, |
|
"grad_norm": 1.6403016895398341, |
|
"learning_rate": 1.3379763273770324e-05, |
|
"loss": 1.3624, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.9978586723768736, |
|
"grad_norm": 0.4830892612436795, |
|
"learning_rate": 1.3335842520501795e-05, |
|
"loss": 1.302, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6829899377473765, |
|
"learning_rate": 1.3291911399255713e-05, |
|
"loss": 1.2285, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.0042826552462527, |
|
"grad_norm": 0.8136378650415125, |
|
"learning_rate": 1.3247970454963531e-05, |
|
"loss": 1.1863, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.0085653104925054, |
|
"grad_norm": 0.6124913953543332, |
|
"learning_rate": 1.3204020232678549e-05, |
|
"loss": 1.1323, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.012847965738758, |
|
"grad_norm": 0.9415264304617837, |
|
"learning_rate": 1.3160061277569156e-05, |
|
"loss": 1.1341, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.017130620985011, |
|
"grad_norm": 0.5598470498427739, |
|
"learning_rate": 1.3116094134912055e-05, |
|
"loss": 1.0978, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.0214132762312635, |
|
"grad_norm": 0.5199782381878686, |
|
"learning_rate": 1.3072119350085524e-05, |
|
"loss": 1.15, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.0256959314775163, |
|
"grad_norm": 0.4796395014344232, |
|
"learning_rate": 1.3028137468562624e-05, |
|
"loss": 1.2802, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.0299785867237685, |
|
"grad_norm": 0.4542325665519593, |
|
"learning_rate": 1.2984149035904447e-05, |
|
"loss": 1.0659, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.0342612419700212, |
|
"grad_norm": 0.4431903012032383, |
|
"learning_rate": 1.2940154597753356e-05, |
|
"loss": 1.0986, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.038543897216274, |
|
"grad_norm": 0.46952279850037054, |
|
"learning_rate": 1.2896154699826201e-05, |
|
"loss": 1.1216, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.0428265524625266, |
|
"grad_norm": 0.45033430393074514, |
|
"learning_rate": 1.2852149887907553e-05, |
|
"loss": 1.1881, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.0471092077087794, |
|
"grad_norm": 0.4606628838219141, |
|
"learning_rate": 1.2808140707842936e-05, |
|
"loss": 1.0762, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.051391862955032, |
|
"grad_norm": 0.4522706754261223, |
|
"learning_rate": 1.276412770553207e-05, |
|
"loss": 1.1182, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0556745182012848, |
|
"grad_norm": 0.4275410449005914, |
|
"learning_rate": 1.2720111426922072e-05, |
|
"loss": 1.1262, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.0599571734475375, |
|
"grad_norm": 0.4117922922818347, |
|
"learning_rate": 1.2676092418000709e-05, |
|
"loss": 1.0937, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.06423982869379, |
|
"grad_norm": 0.4076420511090681, |
|
"learning_rate": 1.2632071224789613e-05, |
|
"loss": 1.1588, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.068522483940043, |
|
"grad_norm": 0.39985814020478855, |
|
"learning_rate": 1.2588048393337503e-05, |
|
"loss": 1.2315, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.0728051391862956, |
|
"grad_norm": 0.41357202132909343, |
|
"learning_rate": 1.2544024469713437e-05, |
|
"loss": 1.1924, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.0770877944325483, |
|
"grad_norm": 0.39780940223532485, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.1816, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.081370449678801, |
|
"grad_norm": 0.42899527932620385, |
|
"learning_rate": 1.245597553028657e-05, |
|
"loss": 1.1841, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.0856531049250537, |
|
"grad_norm": 0.39083398721432966, |
|
"learning_rate": 1.2411951606662498e-05, |
|
"loss": 1.1098, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.089935760171306, |
|
"grad_norm": 0.43420822774302814, |
|
"learning_rate": 1.2367928775210393e-05, |
|
"loss": 1.1627, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.0942184154175587, |
|
"grad_norm": 0.3732705280561028, |
|
"learning_rate": 1.2323907581999292e-05, |
|
"loss": 1.129, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.0985010706638114, |
|
"grad_norm": 0.41632399144455645, |
|
"learning_rate": 1.2279888573077935e-05, |
|
"loss": 0.9738, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.102783725910064, |
|
"grad_norm": 0.38659287989811325, |
|
"learning_rate": 1.2235872294467934e-05, |
|
"loss": 1.1593, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.107066381156317, |
|
"grad_norm": 0.3920026187084851, |
|
"learning_rate": 1.2191859292157066e-05, |
|
"loss": 1.0827, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.1113490364025695, |
|
"grad_norm": 0.3994514767198869, |
|
"learning_rate": 1.2147850112092448e-05, |
|
"loss": 1.1405, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.1156316916488223, |
|
"grad_norm": 0.43445357298460374, |
|
"learning_rate": 1.2103845300173801e-05, |
|
"loss": 1.0986, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.119914346895075, |
|
"grad_norm": 0.4042400771293127, |
|
"learning_rate": 1.2059845402246642e-05, |
|
"loss": 1.1418, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.1241970021413277, |
|
"grad_norm": 0.3788718739976897, |
|
"learning_rate": 1.2015850964095555e-05, |
|
"loss": 1.1349, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.1284796573875804, |
|
"grad_norm": 0.3821076969792679, |
|
"learning_rate": 1.197186253143738e-05, |
|
"loss": 1.2081, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.132762312633833, |
|
"grad_norm": 0.4411851187923958, |
|
"learning_rate": 1.192788064991448e-05, |
|
"loss": 1.1522, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.137044967880086, |
|
"grad_norm": 0.404962832392533, |
|
"learning_rate": 1.1883905865087944e-05, |
|
"loss": 1.1383, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1413276231263385, |
|
"grad_norm": 0.39962573083698255, |
|
"learning_rate": 1.1839938722430849e-05, |
|
"loss": 1.0717, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.145610278372591, |
|
"grad_norm": 0.4004973819254198, |
|
"learning_rate": 1.1795979767321451e-05, |
|
"loss": 1.2155, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.1498929336188435, |
|
"grad_norm": 0.42839296529898985, |
|
"learning_rate": 1.175202954503647e-05, |
|
"loss": 1.1801, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.154175588865096, |
|
"grad_norm": 0.39581686357900003, |
|
"learning_rate": 1.1708088600744292e-05, |
|
"loss": 1.1871, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.158458244111349, |
|
"grad_norm": 0.3515337940814968, |
|
"learning_rate": 1.166415747949821e-05, |
|
"loss": 1.0689, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.1627408993576016, |
|
"grad_norm": 0.38280355472311695, |
|
"learning_rate": 1.1620236726229684e-05, |
|
"loss": 1.1653, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.1670235546038543, |
|
"grad_norm": 0.3601455061997376, |
|
"learning_rate": 1.157632688574155e-05, |
|
"loss": 1.1316, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.171306209850107, |
|
"grad_norm": 0.4036025468502878, |
|
"learning_rate": 1.1532428502701303e-05, |
|
"loss": 1.1332, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.1755888650963597, |
|
"grad_norm": 0.3689501638767867, |
|
"learning_rate": 1.1488542121634292e-05, |
|
"loss": 1.1398, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.1798715203426124, |
|
"grad_norm": 0.44516877676862204, |
|
"learning_rate": 1.1444668286917013e-05, |
|
"loss": 1.1009, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.184154175588865, |
|
"grad_norm": 0.35171086043635746, |
|
"learning_rate": 1.1400807542770314e-05, |
|
"loss": 1.1452, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.188436830835118, |
|
"grad_norm": 0.37133980314166626, |
|
"learning_rate": 1.135696043325268e-05, |
|
"loss": 1.1579, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.1927194860813706, |
|
"grad_norm": 0.34968878321273367, |
|
"learning_rate": 1.1313127502253462e-05, |
|
"loss": 1.1296, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.1970021413276233, |
|
"grad_norm": 0.35409451711365186, |
|
"learning_rate": 1.1269309293486144e-05, |
|
"loss": 1.149, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.201284796573876, |
|
"grad_norm": 0.39987353315213703, |
|
"learning_rate": 1.1225506350481577e-05, |
|
"loss": 1.0483, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.2055674518201283, |
|
"grad_norm": 0.37950153309424184, |
|
"learning_rate": 1.1181719216581272e-05, |
|
"loss": 1.123, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.209850107066381, |
|
"grad_norm": 0.3738479054688087, |
|
"learning_rate": 1.1137948434930622e-05, |
|
"loss": 1.1478, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.2141327623126337, |
|
"grad_norm": 0.37447253121660345, |
|
"learning_rate": 1.1094194548472197e-05, |
|
"loss": 1.1929, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.2184154175588864, |
|
"grad_norm": 0.36554010421344446, |
|
"learning_rate": 1.1050458099938985e-05, |
|
"loss": 1.1651, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.222698072805139, |
|
"grad_norm": 0.35742517390118567, |
|
"learning_rate": 1.1006739631847684e-05, |
|
"loss": 1.0415, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.226980728051392, |
|
"grad_norm": 0.3678474681557672, |
|
"learning_rate": 1.0963039686491942e-05, |
|
"loss": 1.0773, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.2312633832976445, |
|
"grad_norm": 0.35021617103631075, |
|
"learning_rate": 1.0919358805935653e-05, |
|
"loss": 1.0147, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.235546038543897, |
|
"grad_norm": 0.3725259580183268, |
|
"learning_rate": 1.0875697532006237e-05, |
|
"loss": 1.1326, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.23982869379015, |
|
"grad_norm": 0.36036157437462213, |
|
"learning_rate": 1.0832056406287888e-05, |
|
"loss": 1.1178, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.2441113490364026, |
|
"grad_norm": 0.38080054734059177, |
|
"learning_rate": 1.0788435970114902e-05, |
|
"loss": 1.2065, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.2483940042826553, |
|
"grad_norm": 0.3744350777602071, |
|
"learning_rate": 1.0744836764564914e-05, |
|
"loss": 1.1504, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.252676659528908, |
|
"grad_norm": 0.37119670203538174, |
|
"learning_rate": 1.0701259330452227e-05, |
|
"loss": 1.1754, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.2569593147751608, |
|
"grad_norm": 0.3450626261101503, |
|
"learning_rate": 1.0657704208321073e-05, |
|
"loss": 1.1758, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.2612419700214135, |
|
"grad_norm": 0.3761085257204848, |
|
"learning_rate": 1.0614171938438937e-05, |
|
"loss": 1.1058, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.265524625267666, |
|
"grad_norm": 0.3534345956983803, |
|
"learning_rate": 1.0570663060789819e-05, |
|
"loss": 1.0396, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.2698072805139184, |
|
"grad_norm": 0.3339089724596173, |
|
"learning_rate": 1.0527178115067577e-05, |
|
"loss": 1.0607, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.274089935760171, |
|
"grad_norm": 0.36758786848355013, |
|
"learning_rate": 1.0483717640669198e-05, |
|
"loss": 1.096, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.278372591006424, |
|
"grad_norm": 0.37103014849499344, |
|
"learning_rate": 1.0440282176688132e-05, |
|
"loss": 1.2022, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.2826552462526766, |
|
"grad_norm": 0.3933653572064292, |
|
"learning_rate": 1.0396872261907578e-05, |
|
"loss": 1.1886, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.2869379014989293, |
|
"grad_norm": 0.3478722741253696, |
|
"learning_rate": 1.0353488434793839e-05, |
|
"loss": 1.1061, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.291220556745182, |
|
"grad_norm": 0.38454344787523614, |
|
"learning_rate": 1.0310131233489595e-05, |
|
"loss": 1.1058, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.2955032119914347, |
|
"grad_norm": 0.3964599267526657, |
|
"learning_rate": 1.0266801195807279e-05, |
|
"loss": 1.1536, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.2997858672376874, |
|
"grad_norm": 0.3505311887204956, |
|
"learning_rate": 1.0223498859222367e-05, |
|
"loss": 1.005, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.30406852248394, |
|
"grad_norm": 0.42646591198465056, |
|
"learning_rate": 1.018022476086672e-05, |
|
"loss": 1.1385, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.308351177730193, |
|
"grad_norm": 0.3516417735648486, |
|
"learning_rate": 1.0136979437521937e-05, |
|
"loss": 1.1299, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.3126338329764455, |
|
"grad_norm": 0.37292041166385276, |
|
"learning_rate": 1.0093763425612677e-05, |
|
"loss": 1.1697, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.3169164882226982, |
|
"grad_norm": 0.37139285774167097, |
|
"learning_rate": 1.0050577261200025e-05, |
|
"loss": 1.0958, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.3211991434689505, |
|
"grad_norm": 0.36732514272211636, |
|
"learning_rate": 1.000742147997481e-05, |
|
"loss": 1.0663, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.325481798715203, |
|
"grad_norm": 0.425696024428236, |
|
"learning_rate": 9.964296617251004e-06, |
|
"loss": 1.0172, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.329764453961456, |
|
"grad_norm": 0.45633961518765603, |
|
"learning_rate": 9.92120320795904e-06, |
|
"loss": 1.2115, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.3340471092077086, |
|
"grad_norm": 0.42776392011984465, |
|
"learning_rate": 9.878141786639212e-06, |
|
"loss": 1.1263, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.3383297644539613, |
|
"grad_norm": 0.4063925688250011, |
|
"learning_rate": 9.835112887435014e-06, |
|
"loss": 1.1167, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.342612419700214, |
|
"grad_norm": 0.347005382841865, |
|
"learning_rate": 9.792117044086544e-06, |
|
"loss": 1.0471, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.3468950749464668, |
|
"grad_norm": 0.41426650830417605, |
|
"learning_rate": 9.749154789923847e-06, |
|
"loss": 1.2857, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.3511777301927195, |
|
"grad_norm": 0.3732639695626659, |
|
"learning_rate": 9.70622665786034e-06, |
|
"loss": 1.133, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.355460385438972, |
|
"grad_norm": 0.3953893693115576, |
|
"learning_rate": 9.663333180386169e-06, |
|
"loss": 1.1723, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.359743040685225, |
|
"grad_norm": 0.3945096837746996, |
|
"learning_rate": 9.620474889561629e-06, |
|
"loss": 1.1853, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.3640256959314776, |
|
"grad_norm": 0.353229521713685, |
|
"learning_rate": 9.57765231701053e-06, |
|
"loss": 1.224, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.3683083511777303, |
|
"grad_norm": 0.38038911754225274, |
|
"learning_rate": 9.534865993913656e-06, |
|
"loss": 1.0707, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.372591006423983, |
|
"grad_norm": 0.40137304773118665, |
|
"learning_rate": 9.492116451002114e-06, |
|
"loss": 1.0614, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.3768736616702357, |
|
"grad_norm": 0.3799373348779043, |
|
"learning_rate": 9.4494042185508e-06, |
|
"loss": 1.0317, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.3811563169164884, |
|
"grad_norm": 0.35846465331360783, |
|
"learning_rate": 9.4067298263718e-06, |
|
"loss": 1.0816, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.385438972162741, |
|
"grad_norm": 0.3892380274193281, |
|
"learning_rate": 9.364093803807807e-06, |
|
"loss": 1.0922, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.3897216274089934, |
|
"grad_norm": 0.40336093781540333, |
|
"learning_rate": 9.321496679725596e-06, |
|
"loss": 1.0938, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.394004282655246, |
|
"grad_norm": 0.3817697005333532, |
|
"learning_rate": 9.278938982509409e-06, |
|
"loss": 1.0803, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.398286937901499, |
|
"grad_norm": 0.3881301113148313, |
|
"learning_rate": 9.236421240054449e-06, |
|
"loss": 1.1377, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.4025695931477515, |
|
"grad_norm": 0.445891116690163, |
|
"learning_rate": 9.193943979760292e-06, |
|
"loss": 1.0991, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.4068522483940042, |
|
"grad_norm": 0.4010581039655185, |
|
"learning_rate": 9.151507728524382e-06, |
|
"loss": 1.041, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.411134903640257, |
|
"grad_norm": 0.3694140168350837, |
|
"learning_rate": 9.109113012735467e-06, |
|
"loss": 0.9861, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.4154175588865097, |
|
"grad_norm": 0.38742555130206846, |
|
"learning_rate": 9.066760358267081e-06, |
|
"loss": 1.0938, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.4197002141327624, |
|
"grad_norm": 0.3559783185134848, |
|
"learning_rate": 9.024450290471026e-06, |
|
"loss": 1.0395, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.423982869379015, |
|
"grad_norm": 0.3636369864702618, |
|
"learning_rate": 8.982183334170844e-06, |
|
"loss": 1.0933, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.428265524625268, |
|
"grad_norm": 0.35525649048200675, |
|
"learning_rate": 8.939960013655311e-06, |
|
"loss": 1.0766, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.4325481798715205, |
|
"grad_norm": 0.3775765508703813, |
|
"learning_rate": 8.897780852671939e-06, |
|
"loss": 1.0256, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.436830835117773, |
|
"grad_norm": 0.42139839896816106, |
|
"learning_rate": 8.855646374420472e-06, |
|
"loss": 1.1425, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4411134903640255, |
|
"grad_norm": 0.3511194625690293, |
|
"learning_rate": 8.813557101546408e-06, |
|
"loss": 0.9875, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.445396145610278, |
|
"grad_norm": 0.35870293115859425, |
|
"learning_rate": 8.771513556134497e-06, |
|
"loss": 1.1143, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.449678800856531, |
|
"grad_norm": 0.3511476581215571, |
|
"learning_rate": 8.729516259702272e-06, |
|
"loss": 1.1216, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.4539614561027836, |
|
"grad_norm": 0.3896756471995198, |
|
"learning_rate": 8.6875657331936e-06, |
|
"loss": 1.2131, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.4582441113490363, |
|
"grad_norm": 0.346301000515738, |
|
"learning_rate": 8.645662496972186e-06, |
|
"loss": 1.1267, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.462526766595289, |
|
"grad_norm": 0.3279075184069246, |
|
"learning_rate": 8.603807070815152e-06, |
|
"loss": 1.0078, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.4668094218415417, |
|
"grad_norm": 0.3524877782412061, |
|
"learning_rate": 8.561999973906554e-06, |
|
"loss": 1.1589, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.4710920770877944, |
|
"grad_norm": 0.3744186526110544, |
|
"learning_rate": 8.520241724830983e-06, |
|
"loss": 1.1987, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.475374732334047, |
|
"grad_norm": 0.37193508975714884, |
|
"learning_rate": 8.478532841567089e-06, |
|
"loss": 1.143, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.4796573875803, |
|
"grad_norm": 0.3563664250992986, |
|
"learning_rate": 8.436873841481197e-06, |
|
"loss": 1.1024, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.4839400428265526, |
|
"grad_norm": 0.3621802163845544, |
|
"learning_rate": 8.395265241320852e-06, |
|
"loss": 1.1237, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.4882226980728053, |
|
"grad_norm": 0.3534462614928483, |
|
"learning_rate": 8.353707557208448e-06, |
|
"loss": 0.9731, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.492505353319058, |
|
"grad_norm": 0.3756351095987366, |
|
"learning_rate": 8.312201304634775e-06, |
|
"loss": 1.0517, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.4967880085653107, |
|
"grad_norm": 0.3810521940082933, |
|
"learning_rate": 8.270746998452688e-06, |
|
"loss": 1.0853, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.5010706638115634, |
|
"grad_norm": 0.39222567553145227, |
|
"learning_rate": 8.229345152870666e-06, |
|
"loss": 1.1764, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.505353319057816, |
|
"grad_norm": 0.3739366136336243, |
|
"learning_rate": 8.18799628144646e-06, |
|
"loss": 1.1238, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.5096359743040684, |
|
"grad_norm": 0.38711368859863554, |
|
"learning_rate": 8.14670089708072e-06, |
|
"loss": 1.1465, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.513918629550321, |
|
"grad_norm": 0.41098527253509576, |
|
"learning_rate": 8.105459512010629e-06, |
|
"loss": 1.041, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.518201284796574, |
|
"grad_norm": 0.406134178093035, |
|
"learning_rate": 8.064272637803553e-06, |
|
"loss": 1.1861, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.5224839400428265, |
|
"grad_norm": 0.3736862306104564, |
|
"learning_rate": 8.02314078535068e-06, |
|
"loss": 1.0904, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.526766595289079, |
|
"grad_norm": 0.3781276058410365, |
|
"learning_rate": 7.982064464860722e-06, |
|
"loss": 1.1083, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.531049250535332, |
|
"grad_norm": 0.40011911371910797, |
|
"learning_rate": 7.94104418585353e-06, |
|
"loss": 1.0687, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.5353319057815846, |
|
"grad_norm": 0.3683735339293543, |
|
"learning_rate": 7.90008045715383e-06, |
|
"loss": 1.1211, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.5396145610278373, |
|
"grad_norm": 0.3878127219742661, |
|
"learning_rate": 7.859173786884867e-06, |
|
"loss": 1.086, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.54389721627409, |
|
"grad_norm": 0.37501963993427256, |
|
"learning_rate": 7.818324682462135e-06, |
|
"loss": 1.0673, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.5481798715203428, |
|
"grad_norm": 0.37276593704270844, |
|
"learning_rate": 7.77753365058705e-06, |
|
"loss": 1.1055, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.552462526766595, |
|
"grad_norm": 0.38843603696651813, |
|
"learning_rate": 7.736801197240703e-06, |
|
"loss": 1.0339, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.5567451820128477, |
|
"grad_norm": 0.4110286435387141, |
|
"learning_rate": 7.696127827677551e-06, |
|
"loss": 1.0975, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.5610278372591004, |
|
"grad_norm": 0.3610377475070173, |
|
"learning_rate": 7.655514046419169e-06, |
|
"loss": 1.0753, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.565310492505353, |
|
"grad_norm": 0.46624031730321613, |
|
"learning_rate": 7.614960357247974e-06, |
|
"loss": 1.0819, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.569593147751606, |
|
"grad_norm": 0.35714403479890183, |
|
"learning_rate": 7.57446726320101e-06, |
|
"loss": 1.0661, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.5738758029978586, |
|
"grad_norm": 0.3537005412507155, |
|
"learning_rate": 7.534035266563657e-06, |
|
"loss": 1.0783, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.5781584582441113, |
|
"grad_norm": 0.3609965104402262, |
|
"learning_rate": 7.493664868863456e-06, |
|
"loss": 1.1183, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.582441113490364, |
|
"grad_norm": 0.3414893487662722, |
|
"learning_rate": 7.453356570863838e-06, |
|
"loss": 1.1513, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.5867237687366167, |
|
"grad_norm": 0.34768494822065116, |
|
"learning_rate": 7.413110872557957e-06, |
|
"loss": 1.075, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.5910064239828694, |
|
"grad_norm": 0.35110711512371934, |
|
"learning_rate": 7.372928273162444e-06, |
|
"loss": 1.0302, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.595289079229122, |
|
"grad_norm": 0.37389978926958345, |
|
"learning_rate": 7.332809271111258e-06, |
|
"loss": 1.127, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.599571734475375, |
|
"grad_norm": 0.36202234697320473, |
|
"learning_rate": 7.2927543640494675e-06, |
|
"loss": 1.0841, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.6038543897216275, |
|
"grad_norm": 0.3692912620672064, |
|
"learning_rate": 7.252764048827096e-06, |
|
"loss": 1.0937, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.6081370449678802, |
|
"grad_norm": 0.371407363782464, |
|
"learning_rate": 7.212838821492962e-06, |
|
"loss": 1.1222, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.612419700214133, |
|
"grad_norm": 0.34843882518833746, |
|
"learning_rate": 7.172979177288505e-06, |
|
"loss": 0.945, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.6167023554603857, |
|
"grad_norm": 0.3677558592711015, |
|
"learning_rate": 7.133185610641683e-06, |
|
"loss": 1.1127, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.6209850107066384, |
|
"grad_norm": 0.36958952805111067, |
|
"learning_rate": 7.0934586151607764e-06, |
|
"loss": 1.1137, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.6252676659528906, |
|
"grad_norm": 0.3474020257100841, |
|
"learning_rate": 7.053798683628335e-06, |
|
"loss": 0.9744, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.6295503211991433, |
|
"grad_norm": 0.3558866341734782, |
|
"learning_rate": 7.014206307995016e-06, |
|
"loss": 1.1125, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.633832976445396, |
|
"grad_norm": 0.3614597470882593, |
|
"learning_rate": 6.974681979373501e-06, |
|
"loss": 1.1009, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.6381156316916488, |
|
"grad_norm": 0.3714477690148325, |
|
"learning_rate": 6.935226188032401e-06, |
|
"loss": 0.9984, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.6423982869379015, |
|
"grad_norm": 0.3317262663806771, |
|
"learning_rate": 6.895839423390175e-06, |
|
"loss": 1.0966, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.646680942184154, |
|
"grad_norm": 0.36917263116104493, |
|
"learning_rate": 6.856522174009061e-06, |
|
"loss": 1.0764, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.650963597430407, |
|
"grad_norm": 0.3777881832761566, |
|
"learning_rate": 6.817274927589014e-06, |
|
"loss": 1.0345, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.6552462526766596, |
|
"grad_norm": 0.35567953357582066, |
|
"learning_rate": 6.7780981709616495e-06, |
|
"loss": 1.1184, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.6595289079229123, |
|
"grad_norm": 0.3719255516818532, |
|
"learning_rate": 6.738992390084232e-06, |
|
"loss": 1.1226, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.663811563169165, |
|
"grad_norm": 0.3829939577200986, |
|
"learning_rate": 6.699958070033596e-06, |
|
"loss": 1.0708, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.6680942184154177, |
|
"grad_norm": 0.36003883214692967, |
|
"learning_rate": 6.660995695000191e-06, |
|
"loss": 1.1787, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.67237687366167, |
|
"grad_norm": 0.3688924024392204, |
|
"learning_rate": 6.622105748282031e-06, |
|
"loss": 1.0507, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.6766595289079227, |
|
"grad_norm": 0.37105335768283265, |
|
"learning_rate": 6.583288712278697e-06, |
|
"loss": 1.0864, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.6809421841541754, |
|
"grad_norm": 0.3676936052384596, |
|
"learning_rate": 6.544545068485404e-06, |
|
"loss": 1.1649, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.685224839400428, |
|
"grad_norm": 0.35833428730388167, |
|
"learning_rate": 6.5058752974869545e-06, |
|
"loss": 1.0467, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.689507494646681, |
|
"grad_norm": 0.3560192973325353, |
|
"learning_rate": 6.4672798789518515e-06, |
|
"loss": 1.0385, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.6937901498929335, |
|
"grad_norm": 0.3422819495514087, |
|
"learning_rate": 6.428759291626294e-06, |
|
"loss": 1.0643, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.6980728051391862, |
|
"grad_norm": 0.3596524934289582, |
|
"learning_rate": 6.39031401332826e-06, |
|
"loss": 1.0874, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.702355460385439, |
|
"grad_norm": 0.3581329395952061, |
|
"learning_rate": 6.35194452094158e-06, |
|
"loss": 1.029, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.7066381156316917, |
|
"grad_norm": 0.3646878019734804, |
|
"learning_rate": 6.313651290410021e-06, |
|
"loss": 1.1463, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.7109207708779444, |
|
"grad_norm": 0.46965105187278144, |
|
"learning_rate": 6.2754347967313694e-06, |
|
"loss": 1.1599, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.715203426124197, |
|
"grad_norm": 0.35199634686850134, |
|
"learning_rate": 6.237295513951577e-06, |
|
"loss": 1.0447, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.71948608137045, |
|
"grad_norm": 0.3552040815294978, |
|
"learning_rate": 6.199233915158817e-06, |
|
"loss": 1.0355, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.7237687366167025, |
|
"grad_norm": 0.3701464344073716, |
|
"learning_rate": 6.161250472477692e-06, |
|
"loss": 1.1069, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.728051391862955, |
|
"grad_norm": 0.3481745786199797, |
|
"learning_rate": 6.123345657063299e-06, |
|
"loss": 1.0379, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.732334047109208, |
|
"grad_norm": 0.34908887773290137, |
|
"learning_rate": 6.085519939095463e-06, |
|
"loss": 1.0759, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.7366167023554606, |
|
"grad_norm": 0.406969071848584, |
|
"learning_rate": 6.047773787772843e-06, |
|
"loss": 1.1397, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.7408993576017133, |
|
"grad_norm": 0.369214552502764, |
|
"learning_rate": 6.01010767130714e-06, |
|
"loss": 1.1652, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.7451820128479656, |
|
"grad_norm": 0.35958281005557274, |
|
"learning_rate": 5.972522056917287e-06, |
|
"loss": 1.0651, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.7494646680942183, |
|
"grad_norm": 0.34773227498527454, |
|
"learning_rate": 5.9350174108236525e-06, |
|
"loss": 1.2105, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.753747323340471, |
|
"grad_norm": 0.3785529745910018, |
|
"learning_rate": 5.897594198242253e-06, |
|
"loss": 1.1186, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.7580299785867237, |
|
"grad_norm": 0.3476745823127357, |
|
"learning_rate": 5.860252883378986e-06, |
|
"loss": 1.1053, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.7623126338329764, |
|
"grad_norm": 0.35740833434939384, |
|
"learning_rate": 5.822993929423872e-06, |
|
"loss": 1.156, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.766595289079229, |
|
"grad_norm": 0.3461287440443304, |
|
"learning_rate": 5.78581779854531e-06, |
|
"loss": 1.034, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.770877944325482, |
|
"grad_norm": 0.3484778190549007, |
|
"learning_rate": 5.748724951884339e-06, |
|
"loss": 1.147, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.7751605995717346, |
|
"grad_norm": 0.3463824371518374, |
|
"learning_rate": 5.711715849548924e-06, |
|
"loss": 1.2487, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.7794432548179873, |
|
"grad_norm": 0.3609765242563188, |
|
"learning_rate": 5.674790950608257e-06, |
|
"loss": 1.0038, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.78372591006424, |
|
"grad_norm": 0.3678624338311653, |
|
"learning_rate": 5.6379507130870245e-06, |
|
"loss": 1.1145, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.7880085653104922, |
|
"grad_norm": 0.35376315009965914, |
|
"learning_rate": 5.601195593959788e-06, |
|
"loss": 1.0577, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.792291220556745, |
|
"grad_norm": 0.3363214828483723, |
|
"learning_rate": 5.5645260491452575e-06, |
|
"loss": 1.0486, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.7965738758029977, |
|
"grad_norm": 0.3622636185655521, |
|
"learning_rate": 5.52794253350067e-06, |
|
"loss": 1.0547, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.8008565310492504, |
|
"grad_norm": 0.5067875911549902, |
|
"learning_rate": 5.491445500816134e-06, |
|
"loss": 1.1395, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.805139186295503, |
|
"grad_norm": 0.34289895282316957, |
|
"learning_rate": 5.4550354038090055e-06, |
|
"loss": 1.1781, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.809421841541756, |
|
"grad_norm": 0.35445697790502123, |
|
"learning_rate": 5.41871269411827e-06, |
|
"loss": 1.1037, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.8137044967880085, |
|
"grad_norm": 0.360842710721591, |
|
"learning_rate": 5.3824778222989424e-06, |
|
"loss": 1.1276, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.817987152034261, |
|
"grad_norm": 0.3432929406538927, |
|
"learning_rate": 5.346331237816477e-06, |
|
"loss": 1.0847, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.822269807280514, |
|
"grad_norm": 0.34235194233646365, |
|
"learning_rate": 5.31027338904119e-06, |
|
"loss": 1.099, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.8265524625267666, |
|
"grad_norm": 0.3494573350685968, |
|
"learning_rate": 5.274304723242701e-06, |
|
"loss": 1.0714, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.8308351177730193, |
|
"grad_norm": 0.36423601172734904, |
|
"learning_rate": 5.238425686584383e-06, |
|
"loss": 1.0917, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.835117773019272, |
|
"grad_norm": 0.3390326644331241, |
|
"learning_rate": 5.2026367241178415e-06, |
|
"loss": 1.0927, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.8394004282655247, |
|
"grad_norm": 0.3389574380550951, |
|
"learning_rate": 5.166938279777356e-06, |
|
"loss": 1.0654, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.8436830835117775, |
|
"grad_norm": 0.3558059969945493, |
|
"learning_rate": 5.131330796374428e-06, |
|
"loss": 1.2394, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.84796573875803, |
|
"grad_norm": 0.3449281004788474, |
|
"learning_rate": 5.095814715592229e-06, |
|
"loss": 1.104, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.852248394004283, |
|
"grad_norm": 0.5741950084872994, |
|
"learning_rate": 5.060390477980181e-06, |
|
"loss": 1.1246, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.8565310492505356, |
|
"grad_norm": 0.3518602777082471, |
|
"learning_rate": 5.0250585229484445e-06, |
|
"loss": 1.0384, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.860813704496788, |
|
"grad_norm": 0.33201611617766386, |
|
"learning_rate": 4.9898192887624946e-06, |
|
"loss": 0.99, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.8650963597430406, |
|
"grad_norm": 0.33654063236244514, |
|
"learning_rate": 4.954673212537668e-06, |
|
"loss": 1.0835, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.8693790149892933, |
|
"grad_norm": 0.35749153943774153, |
|
"learning_rate": 4.9196207302337564e-06, |
|
"loss": 1.238, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.873661670235546, |
|
"grad_norm": 0.3963712296443138, |
|
"learning_rate": 4.884662276649588e-06, |
|
"loss": 1.0847, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.8779443254817987, |
|
"grad_norm": 0.33900776494342877, |
|
"learning_rate": 4.8497982854176475e-06, |
|
"loss": 0.9872, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.8822269807280514, |
|
"grad_norm": 0.3390240674831931, |
|
"learning_rate": 4.8150291889986655e-06, |
|
"loss": 1.1353, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.886509635974304, |
|
"grad_norm": 0.3789710837716194, |
|
"learning_rate": 4.780355418676305e-06, |
|
"loss": 1.1636, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.890792291220557, |
|
"grad_norm": 0.3773675590887804, |
|
"learning_rate": 4.745777404551755e-06, |
|
"loss": 1.1598, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.8950749464668095, |
|
"grad_norm": 0.350034350612991, |
|
"learning_rate": 4.711295575538437e-06, |
|
"loss": 0.9807, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.8993576017130622, |
|
"grad_norm": 0.35389009806788396, |
|
"learning_rate": 4.6769103593566805e-06, |
|
"loss": 1.1225, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.903640256959315, |
|
"grad_norm": 0.3480099705955127, |
|
"learning_rate": 4.6426221825283804e-06, |
|
"loss": 1.0797, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.907922912205567, |
|
"grad_norm": 0.4017077706255267, |
|
"learning_rate": 4.608431470371764e-06, |
|
"loss": 1.0613, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.91220556745182, |
|
"grad_norm": 0.3918078161458431, |
|
"learning_rate": 4.574338646996068e-06, |
|
"loss": 1.1085, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.9164882226980726, |
|
"grad_norm": 0.32920278218913035, |
|
"learning_rate": 4.540344135296296e-06, |
|
"loss": 0.9627, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.9207708779443253, |
|
"grad_norm": 0.3684497632182809, |
|
"learning_rate": 4.506448356947973e-06, |
|
"loss": 1.1601, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.925053533190578, |
|
"grad_norm": 0.3433737649981929, |
|
"learning_rate": 4.4726517324019165e-06, |
|
"loss": 1.0455, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.9293361884368307, |
|
"grad_norm": 0.35325748706550913, |
|
"learning_rate": 4.438954680879015e-06, |
|
"loss": 1.0403, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.9336188436830835, |
|
"grad_norm": 0.34196653123502885, |
|
"learning_rate": 4.405357620365032e-06, |
|
"loss": 1.2242, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.937901498929336, |
|
"grad_norm": 0.3473358887939904, |
|
"learning_rate": 4.371860967605413e-06, |
|
"loss": 0.9848, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.942184154175589, |
|
"grad_norm": 0.3408666843863744, |
|
"learning_rate": 4.338465138100147e-06, |
|
"loss": 1.0415, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.9464668094218416, |
|
"grad_norm": 0.3480886088157686, |
|
"learning_rate": 4.305170546098551e-06, |
|
"loss": 1.0479, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.9507494646680943, |
|
"grad_norm": 0.35083424116981776, |
|
"learning_rate": 4.271977604594206e-06, |
|
"loss": 1.1681, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.955032119914347, |
|
"grad_norm": 0.35317744200985374, |
|
"learning_rate": 4.238886725319774e-06, |
|
"loss": 1.1004, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.9593147751605997, |
|
"grad_norm": 0.36992718168834315, |
|
"learning_rate": 4.205898318741925e-06, |
|
"loss": 1.1501, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.9635974304068524, |
|
"grad_norm": 0.368258055811205, |
|
"learning_rate": 4.173012794056235e-06, |
|
"loss": 1.0589, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.967880085653105, |
|
"grad_norm": 0.3542218292326262, |
|
"learning_rate": 4.1402305591820945e-06, |
|
"loss": 1.1059, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.972162740899358, |
|
"grad_norm": 0.34221816300659097, |
|
"learning_rate": 4.107552020757688e-06, |
|
"loss": 0.9976, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.9764453961456105, |
|
"grad_norm": 0.3798509842359927, |
|
"learning_rate": 4.07497758413491e-06, |
|
"loss": 1.0692, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.980728051391863, |
|
"grad_norm": 0.3371568887516198, |
|
"learning_rate": 4.0425076533743585e-06, |
|
"loss": 1.1132, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.9850107066381155, |
|
"grad_norm": 0.34200886091760746, |
|
"learning_rate": 4.010142631240317e-06, |
|
"loss": 1.1367, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.9892933618843682, |
|
"grad_norm": 0.3874331285336969, |
|
"learning_rate": 3.977882919195755e-06, |
|
"loss": 1.1251, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.993576017130621, |
|
"grad_norm": 0.6572496407131426, |
|
"learning_rate": 3.945728917397355e-06, |
|
"loss": 1.1292, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.9978586723768736, |
|
"grad_norm": 0.8967911622926727, |
|
"learning_rate": 3.913681024690556e-06, |
|
"loss": 1.2485, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.8967911622926727, |
|
"learning_rate": 3.88173963860457e-06, |
|
"loss": 1.1349, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 3.0042826552462527, |
|
"grad_norm": 0.7045871892163175, |
|
"learning_rate": 3.849905155347512e-06, |
|
"loss": 0.919, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 3.0085653104925054, |
|
"grad_norm": 0.8731451662221503, |
|
"learning_rate": 3.818177969801412e-06, |
|
"loss": 0.9352, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 3.012847965738758, |
|
"grad_norm": 0.5862193210847736, |
|
"learning_rate": 3.7865584755173907e-06, |
|
"loss": 0.8273, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.017130620985011, |
|
"grad_norm": 0.4530975739265527, |
|
"learning_rate": 3.7550470647107205e-06, |
|
"loss": 0.8568, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 3.0214132762312635, |
|
"grad_norm": 0.775182178811676, |
|
"learning_rate": 3.723644128255989e-06, |
|
"loss": 0.8563, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 3.0256959314775163, |
|
"grad_norm": 0.8036787462194873, |
|
"learning_rate": 3.6923500556822433e-06, |
|
"loss": 0.9373, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 3.0299785867237685, |
|
"grad_norm": 0.938179132189991, |
|
"learning_rate": 3.6611652351681568e-06, |
|
"loss": 0.9144, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 3.0342612419700212, |
|
"grad_norm": 0.533475946230046, |
|
"learning_rate": 3.630090053537219e-06, |
|
"loss": 0.9413, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.038543897216274, |
|
"grad_norm": 0.4769499859035611, |
|
"learning_rate": 3.5991248962529313e-06, |
|
"loss": 0.8983, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 3.0428265524625266, |
|
"grad_norm": 0.7175275279939133, |
|
"learning_rate": 3.568270147414031e-06, |
|
"loss": 1.0184, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 3.0471092077087794, |
|
"grad_norm": 0.6710751659916476, |
|
"learning_rate": 3.5375261897497208e-06, |
|
"loss": 0.8867, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 3.051391862955032, |
|
"grad_norm": 0.5533721206962046, |
|
"learning_rate": 3.5068934046149303e-06, |
|
"loss": 0.9861, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 3.0556745182012848, |
|
"grad_norm": 0.5096487279270119, |
|
"learning_rate": 3.47637217198557e-06, |
|
"loss": 0.9957, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.0599571734475375, |
|
"grad_norm": 0.392777064751308, |
|
"learning_rate": 3.4459628704538503e-06, |
|
"loss": 0.8717, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 3.06423982869379, |
|
"grad_norm": 0.5848251912632335, |
|
"learning_rate": 3.41566587722353e-06, |
|
"loss": 0.9097, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 3.068522483940043, |
|
"grad_norm": 0.6598671290081435, |
|
"learning_rate": 3.3854815681053045e-06, |
|
"loss": 0.8214, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 3.0728051391862956, |
|
"grad_norm": 0.5792866171130799, |
|
"learning_rate": 3.355410317512081e-06, |
|
"loss": 0.939, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 3.0770877944325483, |
|
"grad_norm": 0.5597042015871566, |
|
"learning_rate": 3.3254524984543858e-06, |
|
"loss": 0.973, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.081370449678801, |
|
"grad_norm": 0.43120275321986723, |
|
"learning_rate": 3.2956084825357046e-06, |
|
"loss": 0.9494, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 3.0856531049250537, |
|
"grad_norm": 0.43798987398686245, |
|
"learning_rate": 3.265878639947885e-06, |
|
"loss": 0.9386, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 3.089935760171306, |
|
"grad_norm": 0.5043861622984578, |
|
"learning_rate": 3.2362633394665414e-06, |
|
"loss": 0.8571, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 3.0942184154175587, |
|
"grad_norm": 0.47877006992255494, |
|
"learning_rate": 3.206762948446486e-06, |
|
"loss": 0.8921, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 3.0985010706638114, |
|
"grad_norm": 0.48382021072189335, |
|
"learning_rate": 3.177377832817163e-06, |
|
"loss": 0.9232, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.102783725910064, |
|
"grad_norm": 0.4428791922415224, |
|
"learning_rate": 3.148108357078128e-06, |
|
"loss": 0.8745, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.107066381156317, |
|
"grad_norm": 0.3690822664254283, |
|
"learning_rate": 3.118954884294495e-06, |
|
"loss": 0.9788, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 3.1113490364025695, |
|
"grad_norm": 0.43897184340546713, |
|
"learning_rate": 3.0899177760924616e-06, |
|
"loss": 0.9244, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 3.1156316916488223, |
|
"grad_norm": 0.4770826738507552, |
|
"learning_rate": 3.060997392654813e-06, |
|
"loss": 0.8922, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 3.119914346895075, |
|
"grad_norm": 0.4254105042307734, |
|
"learning_rate": 3.032194092716449e-06, |
|
"loss": 0.8362, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.1241970021413277, |
|
"grad_norm": 0.4468366976539863, |
|
"learning_rate": 3.0035082335599555e-06, |
|
"loss": 0.87, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 3.1284796573875804, |
|
"grad_norm": 0.4429010036845597, |
|
"learning_rate": 2.9749401710111286e-06, |
|
"loss": 0.9305, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 3.132762312633833, |
|
"grad_norm": 0.4127010809706913, |
|
"learning_rate": 2.9464902594346185e-06, |
|
"loss": 0.9775, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 3.137044967880086, |
|
"grad_norm": 0.4086014968435575, |
|
"learning_rate": 2.9181588517294857e-06, |
|
"loss": 0.999, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 3.1413276231263385, |
|
"grad_norm": 0.3990791790573375, |
|
"learning_rate": 2.8899462993248473e-06, |
|
"loss": 0.9982, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.145610278372591, |
|
"grad_norm": 0.39305406800729714, |
|
"learning_rate": 2.861852952175513e-06, |
|
"loss": 0.8755, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 3.1498929336188435, |
|
"grad_norm": 0.42386938503526844, |
|
"learning_rate": 2.8338791587576435e-06, |
|
"loss": 0.9166, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 3.154175588865096, |
|
"grad_norm": 0.39610798719172463, |
|
"learning_rate": 2.80602526606443e-06, |
|
"loss": 0.8548, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 3.158458244111349, |
|
"grad_norm": 0.39866226920058223, |
|
"learning_rate": 2.7782916196017846e-06, |
|
"loss": 0.9252, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 3.1627408993576016, |
|
"grad_norm": 0.37822843502350373, |
|
"learning_rate": 2.7506785633840583e-06, |
|
"loss": 0.9459, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.1670235546038543, |
|
"grad_norm": 0.3821806357973024, |
|
"learning_rate": 2.7231864399297856e-06, |
|
"loss": 0.8745, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 3.171306209850107, |
|
"grad_norm": 0.42244290458780526, |
|
"learning_rate": 2.6958155902574e-06, |
|
"loss": 0.8758, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 3.1755888650963597, |
|
"grad_norm": 0.3891254431155144, |
|
"learning_rate": 2.6685663538810536e-06, |
|
"loss": 0.8505, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 3.1798715203426124, |
|
"grad_norm": 0.40848076108585224, |
|
"learning_rate": 2.6414390688063687e-06, |
|
"loss": 0.9505, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 3.184154175588865, |
|
"grad_norm": 0.3911863355408845, |
|
"learning_rate": 2.6144340715262437e-06, |
|
"loss": 0.9777, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.188436830835118, |
|
"grad_norm": 0.3568604123347815, |
|
"learning_rate": 2.58755169701672e-06, |
|
"loss": 0.9195, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 3.1927194860813706, |
|
"grad_norm": 0.4017015638792494, |
|
"learning_rate": 2.560792278732768e-06, |
|
"loss": 0.9821, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 3.1970021413276233, |
|
"grad_norm": 0.4407901593054486, |
|
"learning_rate": 2.534156148604207e-06, |
|
"loss": 0.8664, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 3.201284796573876, |
|
"grad_norm": 0.3486898375672858, |
|
"learning_rate": 2.5076436370315496e-06, |
|
"loss": 0.9108, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 3.2055674518201283, |
|
"grad_norm": 0.38504490186433393, |
|
"learning_rate": 2.4812550728819188e-06, |
|
"loss": 0.9088, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.209850107066381, |
|
"grad_norm": 0.4564848674737477, |
|
"learning_rate": 2.4549907834849644e-06, |
|
"loss": 0.9815, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 3.2141327623126337, |
|
"grad_norm": 0.3627458052575124, |
|
"learning_rate": 2.4288510946288063e-06, |
|
"loss": 0.9947, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 3.2184154175588864, |
|
"grad_norm": 0.39127133347387394, |
|
"learning_rate": 2.4028363305559894e-06, |
|
"loss": 0.855, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 3.222698072805139, |
|
"grad_norm": 1.0193828924775918, |
|
"learning_rate": 2.3769468139594727e-06, |
|
"loss": 0.9804, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 3.226980728051392, |
|
"grad_norm": 0.37456178415299207, |
|
"learning_rate": 2.3511828659785975e-06, |
|
"loss": 0.9075, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.2312633832976445, |
|
"grad_norm": 0.39856388723773317, |
|
"learning_rate": 2.3255448061951514e-06, |
|
"loss": 0.8887, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 3.235546038543897, |
|
"grad_norm": 0.38837064304140856, |
|
"learning_rate": 2.3000329526293456e-06, |
|
"loss": 0.9574, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 3.23982869379015, |
|
"grad_norm": 0.40519546139819784, |
|
"learning_rate": 2.2746476217359285e-06, |
|
"loss": 0.9492, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 3.2441113490364026, |
|
"grad_norm": 0.37621301359779613, |
|
"learning_rate": 2.249389128400219e-06, |
|
"loss": 0.9414, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 3.2483940042826553, |
|
"grad_norm": 0.40315000345725827, |
|
"learning_rate": 2.224257785934217e-06, |
|
"loss": 0.8958, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.252676659528908, |
|
"grad_norm": 0.3767855628173954, |
|
"learning_rate": 2.1992539060727137e-06, |
|
"loss": 0.8632, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 3.2569593147751608, |
|
"grad_norm": 0.38794589527885637, |
|
"learning_rate": 2.1743777989694292e-06, |
|
"loss": 0.8607, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 3.2612419700214135, |
|
"grad_norm": 0.365416432156038, |
|
"learning_rate": 2.1496297731931557e-06, |
|
"loss": 0.9429, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 3.265524625267666, |
|
"grad_norm": 0.38839779583938294, |
|
"learning_rate": 2.1250101357239426e-06, |
|
"loss": 0.8837, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 3.2698072805139184, |
|
"grad_norm": 0.3983702485606278, |
|
"learning_rate": 2.1005191919492795e-06, |
|
"loss": 0.9003, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 3.274089935760171, |
|
"grad_norm": 0.36784959637004716, |
|
"learning_rate": 2.0761572456603066e-06, |
|
"loss": 0.9904, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 3.278372591006424, |
|
"grad_norm": 0.4086191337846277, |
|
"learning_rate": 2.051924599048058e-06, |
|
"loss": 0.9865, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 3.2826552462526766, |
|
"grad_norm": 0.385807346281981, |
|
"learning_rate": 2.027821552699695e-06, |
|
"loss": 0.8834, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 3.2869379014989293, |
|
"grad_norm": 0.38623842578363365, |
|
"learning_rate": 2.0038484055948076e-06, |
|
"loss": 0.8881, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 3.291220556745182, |
|
"grad_norm": 0.43545389555296216, |
|
"learning_rate": 1.9800054551016593e-06, |
|
"loss": 0.9753, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.2955032119914347, |
|
"grad_norm": 0.41514320812303884, |
|
"learning_rate": 1.9562929969735494e-06, |
|
"loss": 0.9497, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 3.2997858672376874, |
|
"grad_norm": 0.4038608215680401, |
|
"learning_rate": 1.93271132534511e-06, |
|
"loss": 0.8644, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 3.30406852248394, |
|
"grad_norm": 0.3644719902383785, |
|
"learning_rate": 1.909260732728668e-06, |
|
"loss": 0.9556, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 3.308351177730193, |
|
"grad_norm": 0.42036574911137053, |
|
"learning_rate": 1.885941510010622e-06, |
|
"loss": 0.8886, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 3.3126338329764455, |
|
"grad_norm": 0.42796972706377573, |
|
"learning_rate": 1.8627539464478219e-06, |
|
"loss": 0.9207, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.3169164882226982, |
|
"grad_norm": 0.42284493016560876, |
|
"learning_rate": 1.8396983296639928e-06, |
|
"loss": 0.9094, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 3.3211991434689505, |
|
"grad_norm": 0.34934919011874943, |
|
"learning_rate": 1.816774945646163e-06, |
|
"loss": 0.8775, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 3.325481798715203, |
|
"grad_norm": 0.6600800009141096, |
|
"learning_rate": 1.7939840787411135e-06, |
|
"loss": 1.0994, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 3.329764453961456, |
|
"grad_norm": 0.3976354396493046, |
|
"learning_rate": 1.771326011651854e-06, |
|
"loss": 0.9024, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 3.3340471092077086, |
|
"grad_norm": 0.376362118495897, |
|
"learning_rate": 1.7488010254341172e-06, |
|
"loss": 0.8615, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.3383297644539613, |
|
"grad_norm": 0.40607166419814433, |
|
"learning_rate": 1.7264093994928648e-06, |
|
"loss": 0.912, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 3.342612419700214, |
|
"grad_norm": 0.4191724820681144, |
|
"learning_rate": 1.7041514115788428e-06, |
|
"loss": 0.8292, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 3.3468950749464668, |
|
"grad_norm": 0.3781354302914862, |
|
"learning_rate": 1.6820273377850997e-06, |
|
"loss": 0.8707, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 3.3511777301927195, |
|
"grad_norm": 0.42426853842502676, |
|
"learning_rate": 1.6600374525436057e-06, |
|
"loss": 0.7958, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 3.355460385438972, |
|
"grad_norm": 0.39253316989568815, |
|
"learning_rate": 1.6381820286218027e-06, |
|
"loss": 0.9362, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 3.359743040685225, |
|
"grad_norm": 0.42081804014283164, |
|
"learning_rate": 1.6164613371192668e-06, |
|
"loss": 0.8808, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 3.3640256959314776, |
|
"grad_norm": 0.3805908666364616, |
|
"learning_rate": 1.5948756474643098e-06, |
|
"loss": 0.9281, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 3.3683083511777303, |
|
"grad_norm": 0.3931155152751046, |
|
"learning_rate": 1.5734252274106549e-06, |
|
"loss": 0.8649, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 3.372591006423983, |
|
"grad_norm": 0.36893746954226686, |
|
"learning_rate": 1.5521103430341063e-06, |
|
"loss": 0.9245, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 3.3768736616702357, |
|
"grad_norm": 0.421055167309563, |
|
"learning_rate": 1.5309312587292595e-06, |
|
"loss": 0.9075, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.3811563169164884, |
|
"grad_norm": 0.39708496701725404, |
|
"learning_rate": 1.5098882372062084e-06, |
|
"loss": 0.9268, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 3.385438972162741, |
|
"grad_norm": 0.4147457741610103, |
|
"learning_rate": 1.488981539487308e-06, |
|
"loss": 0.9095, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 3.3897216274089934, |
|
"grad_norm": 0.3870528540533133, |
|
"learning_rate": 1.4682114249039007e-06, |
|
"loss": 0.9108, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 3.394004282655246, |
|
"grad_norm": 0.37912624135371875, |
|
"learning_rate": 1.447578151093143e-06, |
|
"loss": 0.8086, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 3.398286937901499, |
|
"grad_norm": 0.39893951982141634, |
|
"learning_rate": 1.427081973994769e-06, |
|
"loss": 0.8207, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 3.4025695931477515, |
|
"grad_norm": 0.41813759081817203, |
|
"learning_rate": 1.4067231478479465e-06, |
|
"loss": 0.8587, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 3.4068522483940042, |
|
"grad_norm": 0.37522463321771077, |
|
"learning_rate": 1.386501925188112e-06, |
|
"loss": 0.9387, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 3.411134903640257, |
|
"grad_norm": 0.40082201779472715, |
|
"learning_rate": 1.3664185568438252e-06, |
|
"loss": 0.8501, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 3.4154175588865097, |
|
"grad_norm": 0.4044778971930763, |
|
"learning_rate": 1.3464732919336877e-06, |
|
"loss": 0.9708, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 3.4197002141327624, |
|
"grad_norm": 0.3999055484285562, |
|
"learning_rate": 1.32666637786322e-06, |
|
"loss": 0.832, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.423982869379015, |
|
"grad_norm": 0.3940297074656928, |
|
"learning_rate": 1.3069980603218165e-06, |
|
"loss": 0.8606, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 3.428265524625268, |
|
"grad_norm": 0.4037209018320114, |
|
"learning_rate": 1.2874685832796856e-06, |
|
"loss": 0.9606, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 3.4325481798715205, |
|
"grad_norm": 0.36235619726375323, |
|
"learning_rate": 1.2680781889848296e-06, |
|
"loss": 0.8037, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 3.436830835117773, |
|
"grad_norm": 0.4134563817140967, |
|
"learning_rate": 1.248827117960033e-06, |
|
"loss": 0.9296, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 3.4411134903640255, |
|
"grad_norm": 0.37477385004204616, |
|
"learning_rate": 1.2297156089998887e-06, |
|
"loss": 0.8875, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.445396145610278, |
|
"grad_norm": 0.3598044961225808, |
|
"learning_rate": 1.2107438991678252e-06, |
|
"loss": 0.9181, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 3.449678800856531, |
|
"grad_norm": 0.4068544774348545, |
|
"learning_rate": 1.191912223793179e-06, |
|
"loss": 0.802, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 3.4539614561027836, |
|
"grad_norm": 0.39025679795801216, |
|
"learning_rate": 1.1732208164682567e-06, |
|
"loss": 0.9481, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 3.4582441113490363, |
|
"grad_norm": 0.40099768389636997, |
|
"learning_rate": 1.1546699090454596e-06, |
|
"loss": 0.8793, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 3.462526766595289, |
|
"grad_norm": 0.3527515368666591, |
|
"learning_rate": 1.1362597316343897e-06, |
|
"loss": 0.8926, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.4668094218415417, |
|
"grad_norm": 0.3960092351592858, |
|
"learning_rate": 1.117990512599007e-06, |
|
"loss": 0.8198, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 3.4710920770877944, |
|
"grad_norm": 0.37647074443425715, |
|
"learning_rate": 1.0998624785547916e-06, |
|
"loss": 0.8726, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 3.475374732334047, |
|
"grad_norm": 0.4260177464381465, |
|
"learning_rate": 1.081875854365924e-06, |
|
"loss": 0.8411, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 3.4796573875803, |
|
"grad_norm": 0.3678229667943419, |
|
"learning_rate": 1.0640308631425206e-06, |
|
"loss": 0.9303, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 3.4839400428265526, |
|
"grad_norm": 0.40562771211697285, |
|
"learning_rate": 1.0463277262378418e-06, |
|
"loss": 0.9258, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 3.4882226980728053, |
|
"grad_norm": 0.39758544559495274, |
|
"learning_rate": 1.0287666632455562e-06, |
|
"loss": 0.8981, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 3.492505353319058, |
|
"grad_norm": 0.4330255432907014, |
|
"learning_rate": 1.0113478919970166e-06, |
|
"loss": 0.877, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 3.4967880085653107, |
|
"grad_norm": 0.4091350493182955, |
|
"learning_rate": 9.940716285585572e-07, |
|
"loss": 0.8589, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 3.5010706638115634, |
|
"grad_norm": 0.3756040003940408, |
|
"learning_rate": 9.769380872288112e-07, |
|
"loss": 0.8303, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.505353319057816, |
|
"grad_norm": 0.3845542537371508, |
|
"learning_rate": 9.599474805360636e-07, |
|
"loss": 0.8673, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.5096359743040684, |
|
"grad_norm": 0.3621491496685947, |
|
"learning_rate": 9.431000192355904e-07, |
|
"loss": 0.8285, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 3.513918629550321, |
|
"grad_norm": 0.38581119937487457, |
|
"learning_rate": 9.263959123070792e-07, |
|
"loss": 0.9607, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 3.518201284796574, |
|
"grad_norm": 0.40699298803550954, |
|
"learning_rate": 9.098353669519985e-07, |
|
"loss": 0.9999, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 3.5224839400428265, |
|
"grad_norm": 0.36404111618752655, |
|
"learning_rate": 8.934185885910634e-07, |
|
"loss": 0.9621, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 3.526766595289079, |
|
"grad_norm": 0.4080837339902542, |
|
"learning_rate": 8.771457808616615e-07, |
|
"loss": 0.9385, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.531049250535332, |
|
"grad_norm": 0.37542101809408207, |
|
"learning_rate": 8.610171456153407e-07, |
|
"loss": 0.8838, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 3.5353319057815846, |
|
"grad_norm": 0.3622139219889446, |
|
"learning_rate": 8.450328829152962e-07, |
|
"loss": 0.9147, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 3.5396145610278373, |
|
"grad_norm": 0.41604941573448845, |
|
"learning_rate": 8.291931910339016e-07, |
|
"loss": 1.0337, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 3.54389721627409, |
|
"grad_norm": 0.3702662014383576, |
|
"learning_rate": 8.134982664502313e-07, |
|
"loss": 0.8722, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 3.5481798715203428, |
|
"grad_norm": 0.3968324847661136, |
|
"learning_rate": 7.979483038476496e-07, |
|
"loss": 0.8719, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.552462526766595, |
|
"grad_norm": 0.37196472198781777, |
|
"learning_rate": 7.825434961113612e-07, |
|
"loss": 0.9101, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 3.5567451820128477, |
|
"grad_norm": 0.404292826856257, |
|
"learning_rate": 7.672840343260503e-07, |
|
"loss": 0.883, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 3.5610278372591004, |
|
"grad_norm": 0.3986607359258053, |
|
"learning_rate": 7.521701077734921e-07, |
|
"loss": 0.914, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 3.565310492505353, |
|
"grad_norm": 0.37342839604299854, |
|
"learning_rate": 7.372019039302111e-07, |
|
"loss": 0.8733, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 3.569593147751606, |
|
"grad_norm": 0.3789431810782268, |
|
"learning_rate": 7.223796084651596e-07, |
|
"loss": 1.0656, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 3.5738758029978586, |
|
"grad_norm": 0.4143391476747435, |
|
"learning_rate": 7.077034052373991e-07, |
|
"loss": 0.9481, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 3.5781584582441113, |
|
"grad_norm": 0.3802282910841205, |
|
"learning_rate": 6.931734762938416e-07, |
|
"loss": 0.8704, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 3.582441113490364, |
|
"grad_norm": 0.4383295863697292, |
|
"learning_rate": 6.787900018669747e-07, |
|
"loss": 0.8664, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 3.5867237687366167, |
|
"grad_norm": 0.3620529674823113, |
|
"learning_rate": 6.645531603726287e-07, |
|
"loss": 0.8701, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 3.5910064239828694, |
|
"grad_norm": 0.4003391688371413, |
|
"learning_rate": 6.50463128407773e-07, |
|
"loss": 0.956, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.595289079229122, |
|
"grad_norm": 0.35710168185845254, |
|
"learning_rate": 6.365200807483138e-07, |
|
"loss": 0.9395, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 3.599571734475375, |
|
"grad_norm": 0.3888127985496108, |
|
"learning_rate": 6.227241903469322e-07, |
|
"loss": 0.868, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 3.6038543897216275, |
|
"grad_norm": 0.3788842530917126, |
|
"learning_rate": 6.090756283309379e-07, |
|
"loss": 0.9023, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 3.6081370449678802, |
|
"grad_norm": 0.4293764780811211, |
|
"learning_rate": 5.955745640001453e-07, |
|
"loss": 0.912, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 3.612419700214133, |
|
"grad_norm": 0.36701829937079145, |
|
"learning_rate": 5.822211648247797e-07, |
|
"loss": 0.9178, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 3.6167023554603857, |
|
"grad_norm": 0.420252154230346, |
|
"learning_rate": 5.690155964433868e-07, |
|
"loss": 0.9341, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 3.6209850107066384, |
|
"grad_norm": 0.4321448436806155, |
|
"learning_rate": 5.559580226607921e-07, |
|
"loss": 0.9177, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 3.6252676659528906, |
|
"grad_norm": 0.37257126041542216, |
|
"learning_rate": 5.430486054460629e-07, |
|
"loss": 0.9424, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 3.6295503211991433, |
|
"grad_norm": 0.3772731501472801, |
|
"learning_rate": 5.30287504930492e-07, |
|
"loss": 0.9146, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 3.633832976445396, |
|
"grad_norm": 0.3877711033336446, |
|
"learning_rate": 5.176748794056316e-07, |
|
"loss": 0.912, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.6381156316916488, |
|
"grad_norm": 0.3770006556479151, |
|
"learning_rate": 5.052108853213e-07, |
|
"loss": 1.0339, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 3.6423982869379015, |
|
"grad_norm": 0.40082811910610466, |
|
"learning_rate": 4.928956772836751e-07, |
|
"loss": 0.9, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 3.646680942184154, |
|
"grad_norm": 0.4080349447803649, |
|
"learning_rate": 4.807294080533486e-07, |
|
"loss": 0.9017, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 3.650963597430407, |
|
"grad_norm": 0.3750103705444987, |
|
"learning_rate": 4.687122285434456e-07, |
|
"loss": 0.9218, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 3.6552462526766596, |
|
"grad_norm": 0.4168122554116308, |
|
"learning_rate": 4.568442878177467e-07, |
|
"loss": 0.9165, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 3.6595289079229123, |
|
"grad_norm": 0.42052436299883195, |
|
"learning_rate": 4.451257330888442e-07, |
|
"loss": 1.0046, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 3.663811563169165, |
|
"grad_norm": 0.3775819321872966, |
|
"learning_rate": 4.33556709716311e-07, |
|
"loss": 0.8148, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 3.6680942184154177, |
|
"grad_norm": 0.40588411521050055, |
|
"learning_rate": 4.2213736120490373e-07, |
|
"loss": 0.9766, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 3.67237687366167, |
|
"grad_norm": 0.3879183257896917, |
|
"learning_rate": 4.1086782920276845e-07, |
|
"loss": 0.9038, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 3.6766595289079227, |
|
"grad_norm": 0.371088950938356, |
|
"learning_rate": 3.997482534997071e-07, |
|
"loss": 0.9691, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.6809421841541754, |
|
"grad_norm": 0.3974254305078794, |
|
"learning_rate": 3.8877877202541793e-07, |
|
"loss": 0.9505, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 3.685224839400428, |
|
"grad_norm": 0.38333801357842573, |
|
"learning_rate": 3.779595208478065e-07, |
|
"loss": 0.8308, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 3.689507494646681, |
|
"grad_norm": 0.37315579927328224, |
|
"learning_rate": 3.6729063417128285e-07, |
|
"loss": 0.8951, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 3.6937901498929335, |
|
"grad_norm": 0.41169860046752177, |
|
"learning_rate": 3.567722443351032e-07, |
|
"loss": 0.856, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 3.6980728051391862, |
|
"grad_norm": 0.3540168865001641, |
|
"learning_rate": 3.464044818117268e-07, |
|
"loss": 0.9567, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 3.702355460385439, |
|
"grad_norm": 0.41805384086496045, |
|
"learning_rate": 3.361874752051991e-07, |
|
"loss": 0.8485, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 3.7066381156316917, |
|
"grad_norm": 0.3932453571640372, |
|
"learning_rate": 3.2612135124955453e-07, |
|
"loss": 0.8981, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 3.7109207708779444, |
|
"grad_norm": 0.35556756655208993, |
|
"learning_rate": 3.1620623480724807e-07, |
|
"loss": 0.7991, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 3.715203426124197, |
|
"grad_norm": 0.38025591039841, |
|
"learning_rate": 3.064422488675986e-07, |
|
"loss": 0.921, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 3.71948608137045, |
|
"grad_norm": 0.39447979117902376, |
|
"learning_rate": 2.968295145452715e-07, |
|
"loss": 0.8516, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.7237687366167025, |
|
"grad_norm": 0.36729974047622016, |
|
"learning_rate": 2.8736815107877626e-07, |
|
"loss": 0.9292, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 3.728051391862955, |
|
"grad_norm": 0.3892287341045359, |
|
"learning_rate": 2.7805827582897683e-07, |
|
"loss": 0.8804, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 3.732334047109208, |
|
"grad_norm": 0.41914843746271097, |
|
"learning_rate": 2.6890000427765157e-07, |
|
"loss": 0.8756, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 3.7366167023554606, |
|
"grad_norm": 0.39627355945962395, |
|
"learning_rate": 2.598934500260455e-07, |
|
"loss": 0.9612, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 3.7408993576017133, |
|
"grad_norm": 0.40215083865929563, |
|
"learning_rate": 2.510387247934759e-07, |
|
"loss": 1.0171, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.7451820128479656, |
|
"grad_norm": 0.3908638307412036, |
|
"learning_rate": 2.4233593841593295e-07, |
|
"loss": 0.8599, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 3.7494646680942183, |
|
"grad_norm": 0.4326871280589204, |
|
"learning_rate": 2.3378519884472428e-07, |
|
"loss": 1.0263, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 3.753747323340471, |
|
"grad_norm": 0.38245250647594886, |
|
"learning_rate": 2.25386612145137e-07, |
|
"loss": 0.9593, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 3.7580299785867237, |
|
"grad_norm": 0.3778573404558164, |
|
"learning_rate": 2.1714028249511798e-07, |
|
"loss": 0.9466, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 3.7623126338329764, |
|
"grad_norm": 0.3700075006593136, |
|
"learning_rate": 2.0904631218398445e-07, |
|
"loss": 0.8128, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.766595289079229, |
|
"grad_norm": 0.3843775256635492, |
|
"learning_rate": 2.011048016111544e-07, |
|
"loss": 0.9134, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 3.770877944325482, |
|
"grad_norm": 0.385219325392379, |
|
"learning_rate": 1.9331584928490159e-07, |
|
"loss": 0.8527, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 3.7751605995717346, |
|
"grad_norm": 0.36661581147669026, |
|
"learning_rate": 1.8567955182113295e-07, |
|
"loss": 0.8592, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 3.7794432548179873, |
|
"grad_norm": 0.401361109957553, |
|
"learning_rate": 1.7819600394218956e-07, |
|
"loss": 0.9088, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 3.78372591006424, |
|
"grad_norm": 0.32988480791991265, |
|
"learning_rate": 1.7086529847566979e-07, |
|
"loss": 0.7957, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.7880085653104922, |
|
"grad_norm": 0.37989640262936986, |
|
"learning_rate": 1.6368752635328998e-07, |
|
"loss": 0.8675, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 3.792291220556745, |
|
"grad_norm": 0.3937658078234294, |
|
"learning_rate": 1.5666277660973533e-07, |
|
"loss": 0.8864, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 3.7965738758029977, |
|
"grad_norm": 0.3722219853982238, |
|
"learning_rate": 1.49791136381576e-07, |
|
"loss": 0.9096, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 3.8008565310492504, |
|
"grad_norm": 0.37559569493426515, |
|
"learning_rate": 1.430726909061722e-07, |
|
"loss": 0.8924, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 3.805139186295503, |
|
"grad_norm": 0.38719709372883876, |
|
"learning_rate": 1.3650752352062508e-07, |
|
"loss": 0.8479, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.809421841541756, |
|
"grad_norm": 0.3911144136584381, |
|
"learning_rate": 1.3009571566073853e-07, |
|
"loss": 0.9491, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 3.8137044967880085, |
|
"grad_norm": 0.37807417768830925, |
|
"learning_rate": 1.238373468600118e-07, |
|
"loss": 0.9301, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 3.817987152034261, |
|
"grad_norm": 0.37694080855509665, |
|
"learning_rate": 1.1773249474865133e-07, |
|
"loss": 0.8065, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 3.822269807280514, |
|
"grad_norm": 0.388921594089528, |
|
"learning_rate": 1.1178123505260623e-07, |
|
"loss": 0.9592, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 3.8265524625267666, |
|
"grad_norm": 0.4116324419131167, |
|
"learning_rate": 1.0598364159263436e-07, |
|
"loss": 0.8211, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 3.8308351177730193, |
|
"grad_norm": 0.36448244518924466, |
|
"learning_rate": 1.0033978628338214e-07, |
|
"loss": 0.8574, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 3.835117773019272, |
|
"grad_norm": 0.37097780876337194, |
|
"learning_rate": 9.484973913249096e-08, |
|
"loss": 0.9514, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 3.8394004282655247, |
|
"grad_norm": 0.36937494307460916, |
|
"learning_rate": 8.95135682397366e-08, |
|
"loss": 1.0152, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 3.8436830835117775, |
|
"grad_norm": 0.38701761947361546, |
|
"learning_rate": 8.433133979617313e-08, |
|
"loss": 0.944, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 3.84796573875803, |
|
"grad_norm": 0.4062184881145919, |
|
"learning_rate": 7.930311808332092e-08, |
|
"loss": 0.9758, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.852248394004283, |
|
"grad_norm": 0.37343762843807315, |
|
"learning_rate": 7.442896547237011e-08, |
|
"loss": 0.8735, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 3.8565310492505356, |
|
"grad_norm": 0.3671379727642055, |
|
"learning_rate": 6.970894242339516e-08, |
|
"loss": 0.8647, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 3.860813704496788, |
|
"grad_norm": 0.3958355267876771, |
|
"learning_rate": 6.514310748462205e-08, |
|
"loss": 0.9561, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 3.8650963597430406, |
|
"grad_norm": 0.382409326734392, |
|
"learning_rate": 6.073151729168585e-08, |
|
"loss": 0.8091, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 3.8693790149892933, |
|
"grad_norm": 0.4074968347015751, |
|
"learning_rate": 5.6474226566938236e-08, |
|
"loss": 0.9165, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 3.873661670235546, |
|
"grad_norm": 0.3600503231444295, |
|
"learning_rate": 5.2371288118764626e-08, |
|
"loss": 0.8608, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 3.8779443254817987, |
|
"grad_norm": 0.4385570932475021, |
|
"learning_rate": 4.8422752840933393e-08, |
|
"loss": 1.0001, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 3.8822269807280514, |
|
"grad_norm": 0.3526075337659528, |
|
"learning_rate": 4.462866971195745e-08, |
|
"loss": 0.8845, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 3.886509635974304, |
|
"grad_norm": 0.3912267742586606, |
|
"learning_rate": 4.098908579449334e-08, |
|
"loss": 0.9521, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 3.890792291220557, |
|
"grad_norm": 0.38329918065994895, |
|
"learning_rate": 3.750404623475284e-08, |
|
"loss": 0.9337, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.8950749464668095, |
|
"grad_norm": 0.3671570660694989, |
|
"learning_rate": 3.4173594261947826e-08, |
|
"loss": 0.8763, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 3.8993576017130622, |
|
"grad_norm": 0.36467443117322795, |
|
"learning_rate": 3.099777118774766e-08, |
|
"loss": 0.7929, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 3.903640256959315, |
|
"grad_norm": 0.3850495904484138, |
|
"learning_rate": 2.797661640577265e-08, |
|
"loss": 0.8685, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 3.907922912205567, |
|
"grad_norm": 0.3947552978578375, |
|
"learning_rate": 2.511016739110139e-08, |
|
"loss": 1.0001, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 3.91220556745182, |
|
"grad_norm": 0.35654913515444236, |
|
"learning_rate": 2.2398459699811415e-08, |
|
"loss": 0.8357, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 3.9164882226980726, |
|
"grad_norm": 0.3755511134463352, |
|
"learning_rate": 1.9841526968528145e-08, |
|
"loss": 0.8337, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 3.9207708779443253, |
|
"grad_norm": 0.4178807150767805, |
|
"learning_rate": 1.74394009140183e-08, |
|
"loss": 1.0103, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 3.925053533190578, |
|
"grad_norm": 0.36343375952599793, |
|
"learning_rate": 1.5192111332791582e-08, |
|
"loss": 1.0066, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 3.9293361884368307, |
|
"grad_norm": 0.42967159160836504, |
|
"learning_rate": 1.3099686100728758e-08, |
|
"loss": 0.8981, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 3.9336188436830835, |
|
"grad_norm": 0.37863498483420355, |
|
"learning_rate": 1.1162151172741664e-08, |
|
"loss": 0.9011, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.937901498929336, |
|
"grad_norm": 0.36158296723772976, |
|
"learning_rate": 9.379530582445672e-09, |
|
"loss": 0.9935, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 3.942184154175589, |
|
"grad_norm": 0.3992757868964545, |
|
"learning_rate": 7.751846441866883e-09, |
|
"loss": 0.9523, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 3.9464668094218416, |
|
"grad_norm": 0.4093754834796768, |
|
"learning_rate": 6.279118941163176e-09, |
|
"loss": 0.9193, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 3.9507494646680943, |
|
"grad_norm": 0.3812402104730706, |
|
"learning_rate": 4.961366348374408e-09, |
|
"loss": 0.8255, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 3.955032119914347, |
|
"grad_norm": 0.3932024496097198, |
|
"learning_rate": 3.798605009198986e-09, |
|
"loss": 0.8468, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.9593147751605997, |
|
"grad_norm": 0.36137752700716075, |
|
"learning_rate": 2.790849346788471e-09, |
|
"loss": 0.8799, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 3.9635974304068524, |
|
"grad_norm": 0.39672575824023565, |
|
"learning_rate": 1.9381118615699467e-09, |
|
"loss": 0.9367, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 3.967880085653105, |
|
"grad_norm": 0.4049246679049995, |
|
"learning_rate": 1.240403131090584e-09, |
|
"loss": 0.9305, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 3.972162740899358, |
|
"grad_norm": 0.36851044379383624, |
|
"learning_rate": 6.977318098844165e-10, |
|
"loss": 0.8928, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 3.9764453961456105, |
|
"grad_norm": 0.3887303558382742, |
|
"learning_rate": 3.1010462936825745e-10, |
|
"loss": 0.8732, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.980728051391863, |
|
"grad_norm": 0.38791626187967704, |
|
"learning_rate": 7.752639775565618e-11, |
|
"loss": 0.9141, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 3.9850107066381155, |
|
"grad_norm": 0.3676480337759505, |
|
"learning_rate": 0.0, |
|
"loss": 0.9296, |
|
"step": 932 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 932, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 117, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2867186494210048e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|