|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9921259842519685, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005249343832020997, |
|
"grad_norm": 4.262586140207107, |
|
"learning_rate": 1.2500000000000002e-07, |
|
"loss": 1.2143, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010498687664041995, |
|
"grad_norm": 4.1559742669756154, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 1.2307, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015748031496062992, |
|
"grad_norm": 4.2196647284049895, |
|
"learning_rate": 3.75e-07, |
|
"loss": 1.2286, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02099737532808399, |
|
"grad_norm": 4.13634077943981, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.2002, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.026246719160104987, |
|
"grad_norm": 4.015668455829927, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.1672, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.031496062992125984, |
|
"grad_norm": 3.832855314884781, |
|
"learning_rate": 7.5e-07, |
|
"loss": 1.1993, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03674540682414698, |
|
"grad_norm": 3.8323407788221733, |
|
"learning_rate": 8.75e-07, |
|
"loss": 1.1554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04199475065616798, |
|
"grad_norm": 3.7465244180174917, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.1672, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.047244094488188976, |
|
"grad_norm": 3.7827251172961986, |
|
"learning_rate": 1.125e-06, |
|
"loss": 1.1755, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05249343832020997, |
|
"grad_norm": 3.470602675526565, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.1419, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05774278215223097, |
|
"grad_norm": 3.556221853917274, |
|
"learning_rate": 1.3750000000000002e-06, |
|
"loss": 1.194, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06299212598425197, |
|
"grad_norm": 3.324934060085957, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.1336, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06824146981627296, |
|
"grad_norm": 2.965981688480075, |
|
"learning_rate": 1.6250000000000001e-06, |
|
"loss": 1.1349, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07349081364829396, |
|
"grad_norm": 2.8658973663115046, |
|
"learning_rate": 1.75e-06, |
|
"loss": 1.1776, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07874015748031496, |
|
"grad_norm": 2.720689909744523, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 1.1549, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08398950131233596, |
|
"grad_norm": 2.439062154183451, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.141, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08923884514435695, |
|
"grad_norm": 2.1353279033918002, |
|
"learning_rate": 2.125e-06, |
|
"loss": 1.139, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09448818897637795, |
|
"grad_norm": 1.2092875477650313, |
|
"learning_rate": 2.25e-06, |
|
"loss": 1.0516, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09973753280839895, |
|
"grad_norm": 1.0763847439987342, |
|
"learning_rate": 2.375e-06, |
|
"loss": 1.0802, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10498687664041995, |
|
"grad_norm": 1.0340250902529846, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.0607, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11023622047244094, |
|
"grad_norm": 0.8630354040462489, |
|
"learning_rate": 2.6250000000000003e-06, |
|
"loss": 1.0496, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11548556430446194, |
|
"grad_norm": 0.8072735722523627, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 1.08, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12073490813648294, |
|
"grad_norm": 0.7305262800316248, |
|
"learning_rate": 2.875e-06, |
|
"loss": 1.0539, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 0.7406394208995156, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0529, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13123359580052493, |
|
"grad_norm": 0.7903255346265977, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.9914, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13648293963254593, |
|
"grad_norm": 0.8017049075586423, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 1.0313, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14173228346456693, |
|
"grad_norm": 0.7764417012146556, |
|
"learning_rate": 3.3750000000000003e-06, |
|
"loss": 1.0422, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.14698162729658792, |
|
"grad_norm": 0.7112571206699242, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.0187, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15223097112860892, |
|
"grad_norm": 0.6069574236656299, |
|
"learning_rate": 3.625e-06, |
|
"loss": 0.9958, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15748031496062992, |
|
"grad_norm": 0.5747553586770598, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.9858, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16272965879265092, |
|
"grad_norm": 0.590510044443011, |
|
"learning_rate": 3.875e-06, |
|
"loss": 0.9841, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1679790026246719, |
|
"grad_norm": 0.5240510507345066, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.0171, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1732283464566929, |
|
"grad_norm": 0.4913378134702597, |
|
"learning_rate": 4.125e-06, |
|
"loss": 1.0218, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1784776902887139, |
|
"grad_norm": 0.47909247638813307, |
|
"learning_rate": 4.25e-06, |
|
"loss": 1.02, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1837270341207349, |
|
"grad_norm": 0.4949722185401137, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 1.0206, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1889763779527559, |
|
"grad_norm": 0.48456911030733396, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.0321, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1942257217847769, |
|
"grad_norm": 0.49965866704889284, |
|
"learning_rate": 4.625000000000001e-06, |
|
"loss": 1.0184, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1994750656167979, |
|
"grad_norm": 0.4898360279427832, |
|
"learning_rate": 4.75e-06, |
|
"loss": 1.0165, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2047244094488189, |
|
"grad_norm": 0.5039246597121783, |
|
"learning_rate": 4.875e-06, |
|
"loss": 0.9811, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2099737532808399, |
|
"grad_norm": 0.462371012430454, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9999, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2152230971128609, |
|
"grad_norm": 0.4564370676360458, |
|
"learning_rate": 4.99989327925842e-06, |
|
"loss": 1.0056, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2204724409448819, |
|
"grad_norm": 0.4838548423542603, |
|
"learning_rate": 4.999573126145132e-06, |
|
"loss": 1.0099, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.22572178477690288, |
|
"grad_norm": 0.47657399375831033, |
|
"learning_rate": 4.999039567993719e-06, |
|
"loss": 1.0059, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.23097112860892388, |
|
"grad_norm": 0.4609228968128241, |
|
"learning_rate": 4.998292650357558e-06, |
|
"loss": 0.9613, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.23622047244094488, |
|
"grad_norm": 0.5379441335347738, |
|
"learning_rate": 4.997332437005932e-06, |
|
"loss": 0.9912, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24146981627296588, |
|
"grad_norm": 0.4488276134846175, |
|
"learning_rate": 4.996159009918586e-06, |
|
"loss": 0.9623, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.24671916010498687, |
|
"grad_norm": 0.7899879742350473, |
|
"learning_rate": 4.994772469278726e-06, |
|
"loss": 0.9373, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 0.45351107525432893, |
|
"learning_rate": 4.99317293346447e-06, |
|
"loss": 0.9312, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2572178477690289, |
|
"grad_norm": 0.4145057356223518, |
|
"learning_rate": 4.991360539038737e-06, |
|
"loss": 0.9133, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.26246719160104987, |
|
"grad_norm": 0.4371226648356658, |
|
"learning_rate": 4.989335440737587e-06, |
|
"loss": 0.9763, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2677165354330709, |
|
"grad_norm": 0.49987252416920314, |
|
"learning_rate": 4.987097811457015e-06, |
|
"loss": 0.9753, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.27296587926509186, |
|
"grad_norm": 0.4737378597917066, |
|
"learning_rate": 4.984647842238185e-06, |
|
"loss": 0.9509, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2782152230971129, |
|
"grad_norm": 0.4803218242006868, |
|
"learning_rate": 4.981985742251123e-06, |
|
"loss": 1.0008, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.28346456692913385, |
|
"grad_norm": 0.4238793960297473, |
|
"learning_rate": 4.9791117387768575e-06, |
|
"loss": 1.0024, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2887139107611549, |
|
"grad_norm": 0.4135698693902407, |
|
"learning_rate": 4.976026077188013e-06, |
|
"loss": 0.9208, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.29396325459317585, |
|
"grad_norm": 0.49893312330659967, |
|
"learning_rate": 4.972729020927866e-06, |
|
"loss": 0.9771, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2992125984251969, |
|
"grad_norm": 0.43878390791709027, |
|
"learning_rate": 4.9692208514878445e-06, |
|
"loss": 0.937, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.30446194225721784, |
|
"grad_norm": 0.42250251333050837, |
|
"learning_rate": 4.965501868383507e-06, |
|
"loss": 0.9287, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.30971128608923887, |
|
"grad_norm": 0.42426620742455357, |
|
"learning_rate": 4.961572389128959e-06, |
|
"loss": 0.9374, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 0.4622588583575654, |
|
"learning_rate": 4.957432749209755e-06, |
|
"loss": 0.99, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32020997375328086, |
|
"grad_norm": 0.4324798534582787, |
|
"learning_rate": 4.953083302054247e-06, |
|
"loss": 1.0035, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.32545931758530183, |
|
"grad_norm": 0.4303590460079348, |
|
"learning_rate": 4.948524419003415e-06, |
|
"loss": 0.9585, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.33070866141732286, |
|
"grad_norm": 0.42861048906851473, |
|
"learning_rate": 4.943756489279164e-06, |
|
"loss": 0.9772, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3359580052493438, |
|
"grad_norm": 0.4115697149677722, |
|
"learning_rate": 4.938779919951092e-06, |
|
"loss": 0.9426, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.34120734908136485, |
|
"grad_norm": 0.417300147661056, |
|
"learning_rate": 4.933595135901733e-06, |
|
"loss": 0.9447, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3464566929133858, |
|
"grad_norm": 0.4040601468423496, |
|
"learning_rate": 4.928202579790285e-06, |
|
"loss": 0.966, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.35170603674540685, |
|
"grad_norm": 0.3677161548087925, |
|
"learning_rate": 4.9226027120148195e-06, |
|
"loss": 0.941, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3569553805774278, |
|
"grad_norm": 0.3832286188469758, |
|
"learning_rate": 4.916796010672969e-06, |
|
"loss": 0.9822, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.36220472440944884, |
|
"grad_norm": 0.4345291089971557, |
|
"learning_rate": 4.910782971521112e-06, |
|
"loss": 0.9687, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3674540682414698, |
|
"grad_norm": 0.4133843702108161, |
|
"learning_rate": 4.904564107932048e-06, |
|
"loss": 0.9283, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37270341207349084, |
|
"grad_norm": 0.38465081625519515, |
|
"learning_rate": 4.898139950851163e-06, |
|
"loss": 0.9479, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 0.424340283488392, |
|
"learning_rate": 4.891511048751102e-06, |
|
"loss": 0.9593, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.38320209973753283, |
|
"grad_norm": 0.40638719244646565, |
|
"learning_rate": 4.884677967584945e-06, |
|
"loss": 0.9264, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3884514435695538, |
|
"grad_norm": 0.4168684939745582, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.9399, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3937007874015748, |
|
"grad_norm": 0.3928820462579945, |
|
"learning_rate": 4.870401618977415e-06, |
|
"loss": 0.9736, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3989501312335958, |
|
"grad_norm": 0.48005159652101537, |
|
"learning_rate": 4.86295957040205e-06, |
|
"loss": 0.9935, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4041994750656168, |
|
"grad_norm": 0.4229896582271501, |
|
"learning_rate": 4.855315780388541e-06, |
|
"loss": 0.9358, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4094488188976378, |
|
"grad_norm": 0.4258651800976577, |
|
"learning_rate": 4.847470901537642e-06, |
|
"loss": 0.9583, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4146981627296588, |
|
"grad_norm": 0.4069975102427097, |
|
"learning_rate": 4.839425603618382e-06, |
|
"loss": 0.9237, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4199475065616798, |
|
"grad_norm": 0.3853018091795304, |
|
"learning_rate": 4.83118057351089e-06, |
|
"loss": 0.9528, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4251968503937008, |
|
"grad_norm": 0.41640976933906576, |
|
"learning_rate": 4.822736515147748e-06, |
|
"loss": 0.9281, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4304461942257218, |
|
"grad_norm": 0.4341414623727107, |
|
"learning_rate": 4.814094149453891e-06, |
|
"loss": 0.983, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4356955380577428, |
|
"grad_norm": 0.41893835966431175, |
|
"learning_rate": 4.805254214285061e-06, |
|
"loss": 0.9691, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4409448818897638, |
|
"grad_norm": 0.3778203088676148, |
|
"learning_rate": 4.796217464364808e-06, |
|
"loss": 0.9386, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4461942257217848, |
|
"grad_norm": 0.4049971835755209, |
|
"learning_rate": 4.786984671220053e-06, |
|
"loss": 0.9604, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.45144356955380577, |
|
"grad_norm": 0.39430021496671025, |
|
"learning_rate": 4.7775566231152216e-06, |
|
"loss": 0.9625, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4566929133858268, |
|
"grad_norm": 0.4303135030837173, |
|
"learning_rate": 4.767934124984941e-06, |
|
"loss": 0.9421, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.46194225721784776, |
|
"grad_norm": 0.4037289734626591, |
|
"learning_rate": 4.7581179983653224e-06, |
|
"loss": 0.9395, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4671916010498688, |
|
"grad_norm": 0.389977925556389, |
|
"learning_rate": 4.7481090813238145e-06, |
|
"loss": 0.9494, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.47244094488188976, |
|
"grad_norm": 0.4221791671439025, |
|
"learning_rate": 4.737908228387656e-06, |
|
"loss": 0.9236, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4776902887139108, |
|
"grad_norm": 0.38571752654505004, |
|
"learning_rate": 4.72751631047092e-06, |
|
"loss": 0.9836, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.48293963254593175, |
|
"grad_norm": 0.3756683416227555, |
|
"learning_rate": 4.716934214800155e-06, |
|
"loss": 0.9847, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4881889763779528, |
|
"grad_norm": 0.43805821160946656, |
|
"learning_rate": 4.70616284483864e-06, |
|
"loss": 0.9759, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.49343832020997375, |
|
"grad_norm": 0.42186430904629324, |
|
"learning_rate": 4.695203120209245e-06, |
|
"loss": 0.9381, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.49868766404199477, |
|
"grad_norm": 0.3816672833085737, |
|
"learning_rate": 4.684055976615924e-06, |
|
"loss": 0.9381, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 0.4151475033902527, |
|
"learning_rate": 4.672722365763821e-06, |
|
"loss": 0.9449, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5091863517060368, |
|
"grad_norm": 0.39782263887391656, |
|
"learning_rate": 4.66120325527802e-06, |
|
"loss": 0.9617, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5144356955380578, |
|
"grad_norm": 0.40910931809868095, |
|
"learning_rate": 4.649499628620931e-06, |
|
"loss": 0.9334, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5196850393700787, |
|
"grad_norm": 0.3910070221772484, |
|
"learning_rate": 4.637612485008328e-06, |
|
"loss": 0.9344, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5249343832020997, |
|
"grad_norm": 0.3983780342477285, |
|
"learning_rate": 4.625542839324036e-06, |
|
"loss": 0.9201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5301837270341208, |
|
"grad_norm": 0.41440216503056093, |
|
"learning_rate": 4.613291722033285e-06, |
|
"loss": 0.9854, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5354330708661418, |
|
"grad_norm": 0.399617487921351, |
|
"learning_rate": 4.600860179094732e-06, |
|
"loss": 0.9317, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5406824146981627, |
|
"grad_norm": 0.38269422826680943, |
|
"learning_rate": 4.588249271871164e-06, |
|
"loss": 0.9026, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5459317585301837, |
|
"grad_norm": 0.4161308186315819, |
|
"learning_rate": 4.575460077038877e-06, |
|
"loss": 0.9402, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5511811023622047, |
|
"grad_norm": 0.38768080702286994, |
|
"learning_rate": 4.562493686495756e-06, |
|
"loss": 0.9276, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5564304461942258, |
|
"grad_norm": 0.4454064598197059, |
|
"learning_rate": 4.5493512072680535e-06, |
|
"loss": 0.9452, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5616797900262467, |
|
"grad_norm": 0.3695333796660741, |
|
"learning_rate": 4.536033761415871e-06, |
|
"loss": 0.9493, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5669291338582677, |
|
"grad_norm": 0.4111826761670116, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.9109, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5721784776902887, |
|
"grad_norm": 0.384299356893817, |
|
"learning_rate": 4.508878532671684e-06, |
|
"loss": 0.938, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5774278215223098, |
|
"grad_norm": 0.39579785919606036, |
|
"learning_rate": 4.4950430682005995e-06, |
|
"loss": 0.924, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5826771653543307, |
|
"grad_norm": 0.40856237862457445, |
|
"learning_rate": 4.481037273748935e-06, |
|
"loss": 0.9092, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5879265091863517, |
|
"grad_norm": 0.38651275760437465, |
|
"learning_rate": 4.4668623450837085e-06, |
|
"loss": 0.9311, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5931758530183727, |
|
"grad_norm": 0.378464413540759, |
|
"learning_rate": 4.452519492412039e-06, |
|
"loss": 0.9255, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5984251968503937, |
|
"grad_norm": 0.3779607392785478, |
|
"learning_rate": 4.438009940277825e-06, |
|
"loss": 0.9024, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6036745406824147, |
|
"grad_norm": 0.410919812420786, |
|
"learning_rate": 4.423334927457198e-06, |
|
"loss": 0.9105, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6089238845144357, |
|
"grad_norm": 0.41911457133998964, |
|
"learning_rate": 4.408495706852758e-06, |
|
"loss": 0.9483, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6141732283464567, |
|
"grad_norm": 0.39458100856937656, |
|
"learning_rate": 4.393493545386607e-06, |
|
"loss": 0.9388, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6194225721784777, |
|
"grad_norm": 0.40200990450921853, |
|
"learning_rate": 4.378329723892184e-06, |
|
"loss": 0.927, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6246719160104987, |
|
"grad_norm": 0.42346347209583185, |
|
"learning_rate": 4.3630055370049065e-06, |
|
"loss": 0.9439, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 0.40678132773256936, |
|
"learning_rate": 4.3475222930516484e-06, |
|
"loss": 0.994, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6351706036745407, |
|
"grad_norm": 0.4215857320252809, |
|
"learning_rate": 4.3318813139390295e-06, |
|
"loss": 0.8946, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6404199475065617, |
|
"grad_norm": 0.39311078218513845, |
|
"learning_rate": 4.316083935040561e-06, |
|
"loss": 0.9129, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6456692913385826, |
|
"grad_norm": 0.38855202294392815, |
|
"learning_rate": 4.300131505082637e-06, |
|
"loss": 0.9229, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6509186351706037, |
|
"grad_norm": 0.437029643792513, |
|
"learning_rate": 4.284025386029381e-06, |
|
"loss": 0.9151, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6561679790026247, |
|
"grad_norm": 0.41975844377898647, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.9224, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6614173228346457, |
|
"grad_norm": 0.39675424131353054, |
|
"learning_rate": 4.251357593983228e-06, |
|
"loss": 0.9371, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3980469727389334, |
|
"learning_rate": 4.234798710055124e-06, |
|
"loss": 0.928, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6719160104986877, |
|
"grad_norm": 0.3868709015712275, |
|
"learning_rate": 4.218091714923157e-06, |
|
"loss": 0.925, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6771653543307087, |
|
"grad_norm": 0.4150283358784954, |
|
"learning_rate": 4.2012380349736544e-06, |
|
"loss": 0.9632, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6824146981627297, |
|
"grad_norm": 0.3965769285465639, |
|
"learning_rate": 4.184239109116393e-06, |
|
"loss": 0.916, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6876640419947506, |
|
"grad_norm": 0.4040770175389451, |
|
"learning_rate": 4.167096388661754e-06, |
|
"loss": 0.9013, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6929133858267716, |
|
"grad_norm": 0.4065859256340288, |
|
"learning_rate": 4.149811337196808e-06, |
|
"loss": 0.9115, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6981627296587927, |
|
"grad_norm": 0.39095752489168806, |
|
"learning_rate": 4.132385430460361e-06, |
|
"loss": 0.9401, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7034120734908137, |
|
"grad_norm": 0.37457240781741025, |
|
"learning_rate": 4.114820156216969e-06, |
|
"loss": 0.9178, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7086614173228346, |
|
"grad_norm": 0.399797934048477, |
|
"learning_rate": 4.097117014129903e-06, |
|
"loss": 0.9579, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7139107611548556, |
|
"grad_norm": 0.4477976289426918, |
|
"learning_rate": 4.079277515633127e-06, |
|
"loss": 0.9448, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7191601049868767, |
|
"grad_norm": 0.38322925064614577, |
|
"learning_rate": 4.061303183802248e-06, |
|
"loss": 0.9192, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7244094488188977, |
|
"grad_norm": 0.40594305556063015, |
|
"learning_rate": 4.043195553224482e-06, |
|
"loss": 0.9254, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7296587926509186, |
|
"grad_norm": 0.36120175115175723, |
|
"learning_rate": 4.024956169867642e-06, |
|
"loss": 0.9591, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7349081364829396, |
|
"grad_norm": 0.38003421628995726, |
|
"learning_rate": 4.006586590948141e-06, |
|
"loss": 0.9154, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7401574803149606, |
|
"grad_norm": 0.3779196146369032, |
|
"learning_rate": 3.9880883847980475e-06, |
|
"loss": 0.9412, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7454068241469817, |
|
"grad_norm": 0.3908051818324559, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 0.9037, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7506561679790026, |
|
"grad_norm": 0.4043989800878751, |
|
"learning_rate": 3.95071241890829e-06, |
|
"loss": 0.9032, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 0.396255171092691, |
|
"learning_rate": 3.9318378502012636e-06, |
|
"loss": 0.9288, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7611548556430446, |
|
"grad_norm": 0.40757200005039956, |
|
"learning_rate": 3.91284103605648e-06, |
|
"loss": 0.9179, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7664041994750657, |
|
"grad_norm": 0.3862826523020415, |
|
"learning_rate": 3.893723598357214e-06, |
|
"loss": 0.8894, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7716535433070866, |
|
"grad_norm": 0.3770664118848218, |
|
"learning_rate": 3.874487169285168e-06, |
|
"loss": 0.8898, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7769028871391076, |
|
"grad_norm": 0.3854873052422692, |
|
"learning_rate": 3.855133391181124e-06, |
|
"loss": 0.9135, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7821522309711286, |
|
"grad_norm": 0.38997928953459443, |
|
"learning_rate": 3.835663916404721e-06, |
|
"loss": 0.8843, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7874015748031497, |
|
"grad_norm": 0.45712052623884775, |
|
"learning_rate": 3.81608040719339e-06, |
|
"loss": 0.9694, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7926509186351706, |
|
"grad_norm": 0.41182630339038373, |
|
"learning_rate": 3.7963845355204303e-06, |
|
"loss": 0.9194, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7979002624671916, |
|
"grad_norm": 0.4065794661619882, |
|
"learning_rate": 3.7765779829522674e-06, |
|
"loss": 0.9278, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8031496062992126, |
|
"grad_norm": 0.38122636394786497, |
|
"learning_rate": 3.7566624405048847e-06, |
|
"loss": 0.892, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8083989501312336, |
|
"grad_norm": 0.4026809828765533, |
|
"learning_rate": 3.736639608499448e-06, |
|
"loss": 0.9246, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8136482939632546, |
|
"grad_norm": 0.3884561226623423, |
|
"learning_rate": 3.7165111964171407e-06, |
|
"loss": 0.9438, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8188976377952756, |
|
"grad_norm": 0.3844815855908215, |
|
"learning_rate": 3.6962789227532165e-06, |
|
"loss": 0.9316, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8241469816272966, |
|
"grad_norm": 0.3715029261772477, |
|
"learning_rate": 3.675944514870274e-06, |
|
"loss": 0.924, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8293963254593176, |
|
"grad_norm": 0.4062567635907239, |
|
"learning_rate": 3.6555097088507837e-06, |
|
"loss": 0.9616, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8346456692913385, |
|
"grad_norm": 0.40370287012056855, |
|
"learning_rate": 3.634976249348867e-06, |
|
"loss": 0.9526, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8398950131233596, |
|
"grad_norm": 0.3899308479039462, |
|
"learning_rate": 3.6143458894413463e-06, |
|
"loss": 0.9215, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8451443569553806, |
|
"grad_norm": 0.3813874729696962, |
|
"learning_rate": 3.5936203904780665e-06, |
|
"loss": 0.953, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8503937007874016, |
|
"grad_norm": 0.38224710953621555, |
|
"learning_rate": 3.5728015219315226e-06, |
|
"loss": 0.894, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8556430446194225, |
|
"grad_norm": 0.4038493099805114, |
|
"learning_rate": 3.5518910612457885e-06, |
|
"loss": 0.9614, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8608923884514436, |
|
"grad_norm": 0.3976228258032158, |
|
"learning_rate": 3.530890793684759e-06, |
|
"loss": 0.9364, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8661417322834646, |
|
"grad_norm": 0.4141980981190029, |
|
"learning_rate": 3.5098025121797375e-06, |
|
"loss": 0.9316, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8713910761154856, |
|
"grad_norm": 0.4156903411116242, |
|
"learning_rate": 3.4886280171763563e-06, |
|
"loss": 0.923, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.8766404199475065, |
|
"grad_norm": 0.38250613966133334, |
|
"learning_rate": 3.467369116480864e-06, |
|
"loss": 0.9153, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 0.4070344049084728, |
|
"learning_rate": 3.446027625105776e-06, |
|
"loss": 0.9347, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8871391076115486, |
|
"grad_norm": 0.3844877252304378, |
|
"learning_rate": 3.424605365114923e-06, |
|
"loss": 0.9214, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8923884514435696, |
|
"grad_norm": 0.37585915847896717, |
|
"learning_rate": 3.403104165467883e-06, |
|
"loss": 0.9133, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8976377952755905, |
|
"grad_norm": 0.6300998021233689, |
|
"learning_rate": 3.3815258618638316e-06, |
|
"loss": 0.9395, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9028871391076115, |
|
"grad_norm": 0.3994171061317929, |
|
"learning_rate": 3.359872296584821e-06, |
|
"loss": 0.917, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9081364829396326, |
|
"grad_norm": 0.3806190962949758, |
|
"learning_rate": 3.338145318338485e-06, |
|
"loss": 0.9408, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9133858267716536, |
|
"grad_norm": 0.36777162273585867, |
|
"learning_rate": 3.3163467821002082e-06, |
|
"loss": 0.9346, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9186351706036745, |
|
"grad_norm": 0.3927948155295108, |
|
"learning_rate": 3.2944785489547544e-06, |
|
"loss": 0.9121, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9238845144356955, |
|
"grad_norm": 0.3958182964876464, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 0.9318, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9291338582677166, |
|
"grad_norm": 0.39754399123912254, |
|
"learning_rate": 3.250540465874382e-06, |
|
"loss": 0.9244, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9343832020997376, |
|
"grad_norm": 0.38861358420887904, |
|
"learning_rate": 3.228474367223312e-06, |
|
"loss": 0.9051, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9396325459317585, |
|
"grad_norm": 0.3926071787199394, |
|
"learning_rate": 3.206346073912488e-06, |
|
"loss": 0.9409, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 0.4008739814562732, |
|
"learning_rate": 3.184157475180208e-06, |
|
"loss": 0.9222, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9501312335958005, |
|
"grad_norm": 0.3665607732753151, |
|
"learning_rate": 3.1619104654134397e-06, |
|
"loss": 0.913, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9553805774278216, |
|
"grad_norm": 0.38983081632202093, |
|
"learning_rate": 3.1396069439860894e-06, |
|
"loss": 0.9297, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9606299212598425, |
|
"grad_norm": 0.3645800135143814, |
|
"learning_rate": 3.117248815096833e-06, |
|
"loss": 0.8883, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.9658792650918635, |
|
"grad_norm": 0.3720108758786826, |
|
"learning_rate": 3.094837987606547e-06, |
|
"loss": 0.9204, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9711286089238845, |
|
"grad_norm": 0.3818234182095755, |
|
"learning_rate": 3.0723763748753354e-06, |
|
"loss": 0.8814, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9763779527559056, |
|
"grad_norm": 0.3847151463563777, |
|
"learning_rate": 3.049865894599172e-06, |
|
"loss": 0.9133, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.9816272965879265, |
|
"grad_norm": 0.37971775342950864, |
|
"learning_rate": 3.027308468646175e-06, |
|
"loss": 0.8906, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.9868766404199475, |
|
"grad_norm": 0.41572124992250203, |
|
"learning_rate": 3.0047060228925256e-06, |
|
"loss": 0.9672, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9921259842519685, |
|
"grad_norm": 0.40219608509658256, |
|
"learning_rate": 2.9820604870580426e-06, |
|
"loss": 0.9011, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.9973753280839895, |
|
"grad_norm": 0.397234257384178, |
|
"learning_rate": 2.9593737945414264e-06, |
|
"loss": 0.9174, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.397234257384178, |
|
"learning_rate": 2.9366478822551973e-06, |
|
"loss": 0.9204, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.005249343832021, |
|
"grad_norm": 0.6279719871788059, |
|
"learning_rate": 2.913884690460325e-06, |
|
"loss": 0.8376, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.010498687664042, |
|
"grad_norm": 0.41242697308859033, |
|
"learning_rate": 2.8910861626005774e-06, |
|
"loss": 0.8369, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.015748031496063, |
|
"grad_norm": 0.38981163388237194, |
|
"learning_rate": 2.8682542451365943e-06, |
|
"loss": 0.8493, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.020997375328084, |
|
"grad_norm": 0.42318354204075453, |
|
"learning_rate": 2.845390887379706e-06, |
|
"loss": 0.8618, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.026246719160105, |
|
"grad_norm": 0.5052809667467608, |
|
"learning_rate": 2.822498041325509e-06, |
|
"loss": 0.8644, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.031496062992126, |
|
"grad_norm": 0.36960238923766053, |
|
"learning_rate": 2.7995776614872083e-06, |
|
"loss": 0.8484, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.036745406824147, |
|
"grad_norm": 0.3606472920225704, |
|
"learning_rate": 2.776631704728752e-06, |
|
"loss": 0.8413, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.041994750656168, |
|
"grad_norm": 0.38778609923815943, |
|
"learning_rate": 2.753662130097758e-06, |
|
"loss": 0.8266, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.047244094488189, |
|
"grad_norm": 0.3636856280047818, |
|
"learning_rate": 2.730670898658255e-06, |
|
"loss": 0.8285, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05249343832021, |
|
"grad_norm": 0.3886565787437705, |
|
"learning_rate": 2.70765997332326e-06, |
|
"loss": 0.8628, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.057742782152231, |
|
"grad_norm": 0.41378173196429036, |
|
"learning_rate": 2.684631318687185e-06, |
|
"loss": 0.8549, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.0629921259842519, |
|
"grad_norm": 0.3657527566283362, |
|
"learning_rate": 2.661586900858111e-06, |
|
"loss": 0.8472, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.068241469816273, |
|
"grad_norm": 0.3666340150026852, |
|
"learning_rate": 2.638528687289925e-06, |
|
"loss": 0.8331, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.073490813648294, |
|
"grad_norm": 0.3661321106045701, |
|
"learning_rate": 2.6154586466143495e-06, |
|
"loss": 0.8706, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.078740157480315, |
|
"grad_norm": 0.3796204270047528, |
|
"learning_rate": 2.592378748472863e-06, |
|
"loss": 0.8329, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.083989501312336, |
|
"grad_norm": 0.4003268539729557, |
|
"learning_rate": 2.5692909633485414e-06, |
|
"loss": 0.8762, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.0892388451443569, |
|
"grad_norm": 0.3816091507612548, |
|
"learning_rate": 2.546197262397825e-06, |
|
"loss": 0.8499, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.094488188976378, |
|
"grad_norm": 0.4135433735229758, |
|
"learning_rate": 2.5230996172822274e-06, |
|
"loss": 0.8191, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.099737532808399, |
|
"grad_norm": 0.38519176727175336, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8164, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.10498687664042, |
|
"grad_norm": 0.397224934471037, |
|
"learning_rate": 2.4769003827177735e-06, |
|
"loss": 0.8373, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.110236220472441, |
|
"grad_norm": 0.37696141546345585, |
|
"learning_rate": 2.453802737602176e-06, |
|
"loss": 0.8575, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.1154855643044619, |
|
"grad_norm": 0.38562550508165394, |
|
"learning_rate": 2.4307090366514594e-06, |
|
"loss": 0.8372, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.120734908136483, |
|
"grad_norm": 0.392610655726213, |
|
"learning_rate": 2.4076212515271384e-06, |
|
"loss": 0.8561, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.125984251968504, |
|
"grad_norm": 0.3752434088251031, |
|
"learning_rate": 2.3845413533856517e-06, |
|
"loss": 0.8539, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1312335958005248, |
|
"grad_norm": 0.3769283394800778, |
|
"learning_rate": 2.3614713127100752e-06, |
|
"loss": 0.8134, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.136482939632546, |
|
"grad_norm": 0.39533971296250825, |
|
"learning_rate": 2.3384130991418896e-06, |
|
"loss": 0.8608, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.141732283464567, |
|
"grad_norm": 0.5791866799791944, |
|
"learning_rate": 2.3153686813128153e-06, |
|
"loss": 0.8334, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.1469816272965878, |
|
"grad_norm": 0.3810153033409976, |
|
"learning_rate": 2.2923400266767406e-06, |
|
"loss": 0.8472, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.152230971128609, |
|
"grad_norm": 0.3728143509479016, |
|
"learning_rate": 2.269329101341745e-06, |
|
"loss": 0.8376, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1574803149606299, |
|
"grad_norm": 0.40305096887095054, |
|
"learning_rate": 2.246337869902243e-06, |
|
"loss": 0.8275, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.162729658792651, |
|
"grad_norm": 0.3692851271916895, |
|
"learning_rate": 2.2233682952712484e-06, |
|
"loss": 0.8413, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.167979002624672, |
|
"grad_norm": 0.3963527736664121, |
|
"learning_rate": 2.2004223385127925e-06, |
|
"loss": 0.8295, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.1732283464566928, |
|
"grad_norm": 0.3587695306587395, |
|
"learning_rate": 2.1775019586744924e-06, |
|
"loss": 0.8547, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.178477690288714, |
|
"grad_norm": 0.3614709935886563, |
|
"learning_rate": 2.1546091126202955e-06, |
|
"loss": 0.813, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1837270341207349, |
|
"grad_norm": 0.4140010983968987, |
|
"learning_rate": 2.131745754863406e-06, |
|
"loss": 0.8398, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.188976377952756, |
|
"grad_norm": 0.45996885685295197, |
|
"learning_rate": 2.1089138373994226e-06, |
|
"loss": 0.8518, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.194225721784777, |
|
"grad_norm": 0.36254259181474985, |
|
"learning_rate": 2.086115309539675e-06, |
|
"loss": 0.8647, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.1994750656167978, |
|
"grad_norm": 0.4122269000782737, |
|
"learning_rate": 2.063352117744803e-06, |
|
"loss": 0.8881, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.204724409448819, |
|
"grad_norm": 0.40615776242804974, |
|
"learning_rate": 2.040626205458574e-06, |
|
"loss": 0.8328, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.20997375328084, |
|
"grad_norm": 0.41826600558691523, |
|
"learning_rate": 2.017939512941958e-06, |
|
"loss": 0.8281, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.2152230971128608, |
|
"grad_norm": 0.4153271729906844, |
|
"learning_rate": 1.995293977107475e-06, |
|
"loss": 0.8693, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.220472440944882, |
|
"grad_norm": 0.3654074693662248, |
|
"learning_rate": 1.972691531353826e-06, |
|
"loss": 0.821, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.2257217847769029, |
|
"grad_norm": 0.3981178374801672, |
|
"learning_rate": 1.9501341054008292e-06, |
|
"loss": 0.8962, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.2309711286089238, |
|
"grad_norm": 0.4049324135862524, |
|
"learning_rate": 1.9276236251246655e-06, |
|
"loss": 0.7905, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.236220472440945, |
|
"grad_norm": 0.33631867862267323, |
|
"learning_rate": 1.9051620123934538e-06, |
|
"loss": 0.8284, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.2414698162729658, |
|
"grad_norm": 0.37393386640045784, |
|
"learning_rate": 1.882751184903167e-06, |
|
"loss": 0.8405, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.246719160104987, |
|
"grad_norm": 0.3462880519364805, |
|
"learning_rate": 1.860393056013911e-06, |
|
"loss": 0.7939, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2519685039370079, |
|
"grad_norm": 0.3848360075044728, |
|
"learning_rate": 1.8380895345865603e-06, |
|
"loss": 0.8375, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.257217847769029, |
|
"grad_norm": 0.3617106402936481, |
|
"learning_rate": 1.8158425248197931e-06, |
|
"loss": 0.8162, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.26246719160105, |
|
"grad_norm": 0.34772753646733273, |
|
"learning_rate": 1.7936539260875125e-06, |
|
"loss": 0.8408, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.2677165354330708, |
|
"grad_norm": 0.36406532287683085, |
|
"learning_rate": 1.7715256327766887e-06, |
|
"loss": 0.8103, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.272965879265092, |
|
"grad_norm": 0.3765574533307364, |
|
"learning_rate": 1.7494595341256185e-06, |
|
"loss": 0.8461, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.2782152230971129, |
|
"grad_norm": 0.3859435308304487, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 0.8761, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.2834645669291338, |
|
"grad_norm": 0.3936691957558663, |
|
"learning_rate": 1.7055214510452462e-06, |
|
"loss": 0.9159, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.288713910761155, |
|
"grad_norm": 0.5107365379023212, |
|
"learning_rate": 1.6836532178997922e-06, |
|
"loss": 0.8649, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.2939632545931758, |
|
"grad_norm": 0.388972979502565, |
|
"learning_rate": 1.6618546816615162e-06, |
|
"loss": 0.8734, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.2992125984251968, |
|
"grad_norm": 0.4410810635653612, |
|
"learning_rate": 1.6401277034151798e-06, |
|
"loss": 0.8405, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.304461942257218, |
|
"grad_norm": 0.38502463729196623, |
|
"learning_rate": 1.6184741381361684e-06, |
|
"loss": 0.8546, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.3097112860892388, |
|
"grad_norm": 0.38032181472927906, |
|
"learning_rate": 1.5968958345321178e-06, |
|
"loss": 0.8253, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3149606299212597, |
|
"grad_norm": 0.36970392450888717, |
|
"learning_rate": 1.5753946348850774e-06, |
|
"loss": 0.8558, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.3202099737532809, |
|
"grad_norm": 0.3934484470589474, |
|
"learning_rate": 1.5539723748942246e-06, |
|
"loss": 0.8299, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.3254593175853018, |
|
"grad_norm": 0.3602834073019444, |
|
"learning_rate": 1.5326308835191372e-06, |
|
"loss": 0.8476, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.330708661417323, |
|
"grad_norm": 0.3482676328694225, |
|
"learning_rate": 1.5113719828236439e-06, |
|
"loss": 0.858, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.3359580052493438, |
|
"grad_norm": 0.44035367856347457, |
|
"learning_rate": 1.490197487820263e-06, |
|
"loss": 0.8351, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.341207349081365, |
|
"grad_norm": 0.371715281121202, |
|
"learning_rate": 1.4691092063152417e-06, |
|
"loss": 0.827, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.3464566929133859, |
|
"grad_norm": 0.35070038567136974, |
|
"learning_rate": 1.4481089387542134e-06, |
|
"loss": 0.8492, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.3517060367454068, |
|
"grad_norm": 0.3499117772233927, |
|
"learning_rate": 1.4271984780684778e-06, |
|
"loss": 0.8399, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.356955380577428, |
|
"grad_norm": 0.3811483044855955, |
|
"learning_rate": 1.4063796095219345e-06, |
|
"loss": 0.8688, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.3622047244094488, |
|
"grad_norm": 0.3714007196954483, |
|
"learning_rate": 1.3856541105586545e-06, |
|
"loss": 0.813, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3674540682414698, |
|
"grad_norm": 0.3577252388966486, |
|
"learning_rate": 1.3650237506511333e-06, |
|
"loss": 0.8506, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.372703412073491, |
|
"grad_norm": 0.3670679328461459, |
|
"learning_rate": 1.3444902911492174e-06, |
|
"loss": 0.8267, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.3779527559055118, |
|
"grad_norm": 0.3521888751612076, |
|
"learning_rate": 1.324055485129727e-06, |
|
"loss": 0.8079, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.3832020997375327, |
|
"grad_norm": 0.3751986141403746, |
|
"learning_rate": 1.303721077246784e-06, |
|
"loss": 0.8491, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.3884514435695539, |
|
"grad_norm": 0.3589588384505892, |
|
"learning_rate": 1.2834888035828597e-06, |
|
"loss": 0.8768, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.3937007874015748, |
|
"grad_norm": 0.372280310354174, |
|
"learning_rate": 1.2633603915005535e-06, |
|
"loss": 0.8335, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.3989501312335957, |
|
"grad_norm": 0.40534475247811924, |
|
"learning_rate": 1.2433375594951166e-06, |
|
"loss": 0.8719, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.4041994750656168, |
|
"grad_norm": 0.40997625659212245, |
|
"learning_rate": 1.2234220170477332e-06, |
|
"loss": 0.8269, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.4094488188976377, |
|
"grad_norm": 0.3668404917652971, |
|
"learning_rate": 1.2036154644795697e-06, |
|
"loss": 0.7913, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.4146981627296589, |
|
"grad_norm": 0.3641040286199539, |
|
"learning_rate": 1.1839195928066101e-06, |
|
"loss": 0.8469, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4199475065616798, |
|
"grad_norm": 0.36110674154533795, |
|
"learning_rate": 1.164336083595279e-06, |
|
"loss": 0.8698, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.425196850393701, |
|
"grad_norm": 0.376323216718325, |
|
"learning_rate": 1.1448666088188766e-06, |
|
"loss": 0.8355, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.4304461942257218, |
|
"grad_norm": 0.37039873117700245, |
|
"learning_rate": 1.1255128307148319e-06, |
|
"loss": 0.8219, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.4356955380577427, |
|
"grad_norm": 0.3612230977145218, |
|
"learning_rate": 1.1062764016427864e-06, |
|
"loss": 0.8568, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.4409448818897639, |
|
"grad_norm": 0.3962177187198973, |
|
"learning_rate": 1.0871589639435204e-06, |
|
"loss": 0.8115, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4461942257217848, |
|
"grad_norm": 0.38303094180829605, |
|
"learning_rate": 1.068162149798737e-06, |
|
"loss": 0.818, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.4514435695538057, |
|
"grad_norm": 0.364448052838668, |
|
"learning_rate": 1.049287581091711e-06, |
|
"loss": 0.803, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.4566929133858268, |
|
"grad_norm": 0.37746023670056045, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 0.8495, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.4619422572178478, |
|
"grad_norm": 0.40984945067844814, |
|
"learning_rate": 1.0119116152019535e-06, |
|
"loss": 0.8631, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.4671916010498687, |
|
"grad_norm": 0.3840327770579671, |
|
"learning_rate": 9.934134090518593e-07, |
|
"loss": 0.8318, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4724409448818898, |
|
"grad_norm": 0.3575557636540703, |
|
"learning_rate": 9.750438301323584e-07, |
|
"loss": 0.8244, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.4776902887139107, |
|
"grad_norm": 0.363642443404998, |
|
"learning_rate": 9.56804446775518e-07, |
|
"loss": 0.8109, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.4829396325459316, |
|
"grad_norm": 0.3410295671236108, |
|
"learning_rate": 9.386968161977528e-07, |
|
"loss": 0.8227, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.4881889763779528, |
|
"grad_norm": 0.3693264048471325, |
|
"learning_rate": 9.207224843668733e-07, |
|
"loss": 0.8349, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.4934383202099737, |
|
"grad_norm": 0.3912247837861982, |
|
"learning_rate": 9.028829858700974e-07, |
|
"loss": 0.8218, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.4986876640419948, |
|
"grad_norm": 0.3771536484006796, |
|
"learning_rate": 8.851798437830323e-07, |
|
"loss": 0.8328, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.5039370078740157, |
|
"grad_norm": 0.3956914054034924, |
|
"learning_rate": 8.676145695396399e-07, |
|
"loss": 0.8779, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.5091863517060369, |
|
"grad_norm": 0.34503047792761804, |
|
"learning_rate": 8.501886628031941e-07, |
|
"loss": 0.8265, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.5144356955380578, |
|
"grad_norm": 0.3888965654379534, |
|
"learning_rate": 8.329036113382474e-07, |
|
"loss": 0.8354, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.5196850393700787, |
|
"grad_norm": 0.38477861699293, |
|
"learning_rate": 8.157608908836071e-07, |
|
"loss": 0.8109, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5249343832020998, |
|
"grad_norm": 0.378643274506092, |
|
"learning_rate": 7.987619650263462e-07, |
|
"loss": 0.8507, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.5301837270341208, |
|
"grad_norm": 0.3619947185774947, |
|
"learning_rate": 7.819082850768433e-07, |
|
"loss": 0.8321, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.5354330708661417, |
|
"grad_norm": 0.3793890733316615, |
|
"learning_rate": 7.652012899448761e-07, |
|
"loss": 0.846, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.5406824146981628, |
|
"grad_norm": 0.37104232865461917, |
|
"learning_rate": 7.486424060167726e-07, |
|
"loss": 0.8113, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.5459317585301837, |
|
"grad_norm": 2.1051715634012864, |
|
"learning_rate": 7.322330470336314e-07, |
|
"loss": 0.8174, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.5511811023622046, |
|
"grad_norm": 0.45390059444688674, |
|
"learning_rate": 7.159746139706194e-07, |
|
"loss": 0.8414, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.5564304461942258, |
|
"grad_norm": 0.39115421496994207, |
|
"learning_rate": 6.99868494917364e-07, |
|
"loss": 0.8085, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.5616797900262467, |
|
"grad_norm": 0.37250458373292433, |
|
"learning_rate": 6.839160649594401e-07, |
|
"loss": 0.7906, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.5669291338582676, |
|
"grad_norm": 0.3746838213451273, |
|
"learning_rate": 6.68118686060972e-07, |
|
"loss": 0.8314, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.5721784776902887, |
|
"grad_norm": 0.5365749531577879, |
|
"learning_rate": 6.524777069483526e-07, |
|
"loss": 0.8332, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5774278215223099, |
|
"grad_norm": 0.3788491507367419, |
|
"learning_rate": 6.369944629950933e-07, |
|
"loss": 0.8611, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.5826771653543306, |
|
"grad_norm": 0.3690009715495826, |
|
"learning_rate": 6.216702761078167e-07, |
|
"loss": 0.8099, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.5879265091863517, |
|
"grad_norm": 0.34643553497540036, |
|
"learning_rate": 6.06506454613393e-07, |
|
"loss": 0.8255, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.5931758530183728, |
|
"grad_norm": 0.37384009617772274, |
|
"learning_rate": 5.915042931472426e-07, |
|
"loss": 0.8024, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.5984251968503937, |
|
"grad_norm": 0.34112911330910745, |
|
"learning_rate": 5.766650725428027e-07, |
|
"loss": 0.8172, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.6036745406824147, |
|
"grad_norm": 0.3675924179740712, |
|
"learning_rate": 5.619900597221753e-07, |
|
"loss": 0.8195, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.6089238845144358, |
|
"grad_norm": 0.36296097997746496, |
|
"learning_rate": 5.474805075879616e-07, |
|
"loss": 0.808, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.6141732283464567, |
|
"grad_norm": 0.37901033900237063, |
|
"learning_rate": 5.33137654916292e-07, |
|
"loss": 0.8068, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.6194225721784776, |
|
"grad_norm": 0.3662350700773784, |
|
"learning_rate": 5.189627262510655e-07, |
|
"loss": 0.8695, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.6246719160104988, |
|
"grad_norm": 0.36967015418887056, |
|
"learning_rate": 5.049569317994013e-07, |
|
"loss": 0.8448, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6299212598425197, |
|
"grad_norm": 0.3579476966594129, |
|
"learning_rate": 4.911214673283157e-07, |
|
"loss": 0.8375, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.6351706036745406, |
|
"grad_norm": 0.3611024965491316, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 0.8519, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6404199475065617, |
|
"grad_norm": 0.3485470659035558, |
|
"learning_rate": 4.639662385841293e-07, |
|
"loss": 0.8217, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.6456692913385826, |
|
"grad_norm": 0.37287096492671606, |
|
"learning_rate": 4.506487927319475e-07, |
|
"loss": 0.8413, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.6509186351706036, |
|
"grad_norm": 0.38705573070810245, |
|
"learning_rate": 4.3750631350424456e-07, |
|
"loss": 0.8499, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.6561679790026247, |
|
"grad_norm": 0.3582574621798544, |
|
"learning_rate": 4.2453992296112384e-07, |
|
"loss": 0.8422, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.6614173228346458, |
|
"grad_norm": 0.36916485545424343, |
|
"learning_rate": 4.117507281288366e-07, |
|
"loss": 0.8312, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.36367894508342596, |
|
"learning_rate": 3.991398209052685e-07, |
|
"loss": 0.8342, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.6719160104986877, |
|
"grad_norm": 0.39384159873675784, |
|
"learning_rate": 3.8670827796671637e-07, |
|
"loss": 0.8006, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.6771653543307088, |
|
"grad_norm": 0.39604366934016394, |
|
"learning_rate": 3.7445716067596506e-07, |
|
"loss": 0.8556, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6824146981627297, |
|
"grad_norm": 0.3672901516398384, |
|
"learning_rate": 3.623875149916725e-07, |
|
"loss": 0.8621, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.6876640419947506, |
|
"grad_norm": 0.38389464950775926, |
|
"learning_rate": 3.505003713790689e-07, |
|
"loss": 0.8343, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.6929133858267718, |
|
"grad_norm": 0.36767539320644105, |
|
"learning_rate": 3.387967447219803e-07, |
|
"loss": 0.842, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.6981627296587927, |
|
"grad_norm": 0.3907752041858798, |
|
"learning_rate": 3.2727763423617915e-07, |
|
"loss": 0.8767, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.7034120734908136, |
|
"grad_norm": 0.37713933030901214, |
|
"learning_rate": 3.1594402338407633e-07, |
|
"loss": 0.8326, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.7086614173228347, |
|
"grad_norm": 0.38192173068300844, |
|
"learning_rate": 3.047968797907552e-07, |
|
"loss": 0.8168, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.7139107611548556, |
|
"grad_norm": 0.35632236660696437, |
|
"learning_rate": 2.9383715516136083e-07, |
|
"loss": 0.8344, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.7191601049868765, |
|
"grad_norm": 0.39281712398945773, |
|
"learning_rate": 2.8306578519984526e-07, |
|
"loss": 0.8057, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.7244094488188977, |
|
"grad_norm": 0.3713265865015316, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"loss": 0.8128, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.7296587926509186, |
|
"grad_norm": 0.3704631773575278, |
|
"learning_rate": 2.620917716123444e-07, |
|
"loss": 0.8593, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7349081364829395, |
|
"grad_norm": 0.3833563384401533, |
|
"learning_rate": 2.5189091867618615e-07, |
|
"loss": 0.8353, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.7401574803149606, |
|
"grad_norm": 0.39756909736931284, |
|
"learning_rate": 2.418820016346779e-07, |
|
"loss": 0.8596, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.7454068241469818, |
|
"grad_norm": 0.36212623044011794, |
|
"learning_rate": 2.3206587501505866e-07, |
|
"loss": 0.8311, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.7506561679790025, |
|
"grad_norm": 0.36098218836000906, |
|
"learning_rate": 2.224433768847789e-07, |
|
"loss": 0.8091, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.7559055118110236, |
|
"grad_norm": 0.3713293844163563, |
|
"learning_rate": 2.1301532877994747e-07, |
|
"loss": 0.8147, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.7611548556430447, |
|
"grad_norm": 0.34562968172819897, |
|
"learning_rate": 2.0378253563519247e-07, |
|
"loss": 0.8284, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.7664041994750657, |
|
"grad_norm": 0.39461152581923564, |
|
"learning_rate": 1.9474578571493874e-07, |
|
"loss": 0.8632, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.7716535433070866, |
|
"grad_norm": 0.3689417081438894, |
|
"learning_rate": 1.859058505461095e-07, |
|
"loss": 0.8259, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.7769028871391077, |
|
"grad_norm": 0.3926353728632124, |
|
"learning_rate": 1.7726348485225337e-07, |
|
"loss": 0.8364, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.7821522309711286, |
|
"grad_norm": 0.354022004999706, |
|
"learning_rate": 1.6881942648911077e-07, |
|
"loss": 0.8773, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7874015748031495, |
|
"grad_norm": 0.3876142010040814, |
|
"learning_rate": 1.6057439638161891e-07, |
|
"loss": 0.82, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.7926509186351707, |
|
"grad_norm": 0.35474834193863947, |
|
"learning_rate": 1.5252909846235898e-07, |
|
"loss": 0.8193, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.7979002624671916, |
|
"grad_norm": 0.35916164129360717, |
|
"learning_rate": 1.4468421961145924e-07, |
|
"loss": 0.8398, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.8031496062992125, |
|
"grad_norm": 0.3592392678728242, |
|
"learning_rate": 1.3704042959795132e-07, |
|
"loss": 0.8384, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.8083989501312336, |
|
"grad_norm": 0.3717050715996407, |
|
"learning_rate": 1.2959838102258537e-07, |
|
"loss": 0.827, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.8136482939632546, |
|
"grad_norm": 0.3714676540830701, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 0.811, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.8188976377952755, |
|
"grad_norm": 0.36420858424939995, |
|
"learning_rate": 1.1532203241505474e-07, |
|
"loss": 0.8769, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.8241469816272966, |
|
"grad_norm": 0.3993728488042469, |
|
"learning_rate": 1.0848895124889819e-07, |
|
"loss": 0.8437, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.8293963254593177, |
|
"grad_norm": 0.36458761423411645, |
|
"learning_rate": 1.0186004914883779e-07, |
|
"loss": 0.8378, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.8346456692913384, |
|
"grad_norm": 0.3874822926899934, |
|
"learning_rate": 9.54358920679524e-08, |
|
"loss": 0.8284, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8398950131233596, |
|
"grad_norm": 0.38256154200443654, |
|
"learning_rate": 8.921702847888791e-08, |
|
"loss": 0.8602, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.8451443569553807, |
|
"grad_norm": 0.36179666269244404, |
|
"learning_rate": 8.320398932703145e-08, |
|
"loss": 0.8274, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.8503937007874016, |
|
"grad_norm": 0.365807165050502, |
|
"learning_rate": 7.739728798518115e-08, |
|
"loss": 0.8709, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.8556430446194225, |
|
"grad_norm": 0.35458800620057285, |
|
"learning_rate": 7.17974202097152e-08, |
|
"loss": 0.8111, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.8608923884514437, |
|
"grad_norm": 0.37957333795063364, |
|
"learning_rate": 6.640486409826785e-08, |
|
"loss": 0.7994, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.8661417322834646, |
|
"grad_norm": 0.3527171222570775, |
|
"learning_rate": 6.12200800489085e-08, |
|
"loss": 0.853, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.8713910761154855, |
|
"grad_norm": 0.39242714191446193, |
|
"learning_rate": 5.624351072083561e-08, |
|
"loss": 0.8203, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.8766404199475066, |
|
"grad_norm": 0.35707175993984824, |
|
"learning_rate": 5.1475580996585285e-08, |
|
"loss": 0.7958, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.8818897637795275, |
|
"grad_norm": 0.3335686552470708, |
|
"learning_rate": 4.691669794575388e-08, |
|
"loss": 0.8433, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.8871391076115485, |
|
"grad_norm": 0.38462069858291403, |
|
"learning_rate": 4.256725079024554e-08, |
|
"loss": 0.7975, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8923884514435696, |
|
"grad_norm": 0.34131290146851945, |
|
"learning_rate": 3.8427610871041024e-08, |
|
"loss": 0.8223, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.8976377952755905, |
|
"grad_norm": 0.35781280354041417, |
|
"learning_rate": 3.449813161649357e-08, |
|
"loss": 0.9063, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.9028871391076114, |
|
"grad_norm": 0.33992014890280325, |
|
"learning_rate": 3.077914851215585e-08, |
|
"loss": 0.8081, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.9081364829396326, |
|
"grad_norm": 0.3582145429279079, |
|
"learning_rate": 2.7270979072135106e-08, |
|
"loss": 0.8487, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.9133858267716537, |
|
"grad_norm": 0.36355378789809917, |
|
"learning_rate": 2.3973922811987295e-08, |
|
"loss": 0.8128, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.9186351706036744, |
|
"grad_norm": 0.36811282667120626, |
|
"learning_rate": 2.0888261223143136e-08, |
|
"loss": 0.8442, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.9238845144356955, |
|
"grad_norm": 0.36592765014539297, |
|
"learning_rate": 1.8014257748877606e-08, |
|
"loss": 0.8385, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.9291338582677167, |
|
"grad_norm": 0.3695353834129384, |
|
"learning_rate": 1.5352157761815978e-08, |
|
"loss": 0.809, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.9343832020997376, |
|
"grad_norm": 0.3907850295180158, |
|
"learning_rate": 1.2902188542986139e-08, |
|
"loss": 0.8295, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.9396325459317585, |
|
"grad_norm": 0.34933462418116523, |
|
"learning_rate": 1.0664559262413831e-08, |
|
"loss": 0.8238, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9448818897637796, |
|
"grad_norm": 0.3537875486215043, |
|
"learning_rate": 8.639460961263612e-09, |
|
"loss": 0.8377, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.9501312335958005, |
|
"grad_norm": 0.35563927647325777, |
|
"learning_rate": 6.827066535529947e-09, |
|
"loss": 0.7943, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.9553805774278215, |
|
"grad_norm": 0.3474400330562922, |
|
"learning_rate": 5.2275307212742986e-09, |
|
"loss": 0.8511, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.9606299212598426, |
|
"grad_norm": 0.3593397542649248, |
|
"learning_rate": 3.840990081415141e-09, |
|
"loss": 0.8186, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.9658792650918635, |
|
"grad_norm": 0.3612580709923566, |
|
"learning_rate": 2.6675629940689508e-09, |
|
"loss": 0.848, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.9711286089238844, |
|
"grad_norm": 0.3614238458155997, |
|
"learning_rate": 1.707349642442735e-09, |
|
"loss": 0.7933, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.9763779527559056, |
|
"grad_norm": 0.39289720311660936, |
|
"learning_rate": 9.604320062814309e-10, |
|
"loss": 0.8323, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.9816272965879265, |
|
"grad_norm": 0.36167265662489084, |
|
"learning_rate": 4.268738548682261e-10, |
|
"loss": 0.8512, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.9868766404199474, |
|
"grad_norm": 0.3690750146631276, |
|
"learning_rate": 1.0672074158030176e-10, |
|
"loss": 0.832, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.9921259842519685, |
|
"grad_norm": 0.35408134628230764, |
|
"learning_rate": 0.0, |
|
"loss": 0.8172, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 95, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.333503833071944e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|